Comandos em Linguagem R
Regressão Linear Simples
EXEMPLO 1 - Tempo de reação
Y = tempo de reação a certo estímulo (em segundos)
X = idade (em anos)
Arquivo: Exemplo1s.txt (Fonte: Bussab, 1988)
rm(list=ls())
dados<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo1s.txt", h=T)
attach(dados)
head(dados)
## tempo idade
## 1 96 20
## 2 92 20
## 3 106 20
## 4 100 20
## 5 98 25
## 6 104 25
plot(idade,tempo, pch=16, col=4)
points(c(20,25,30,35,40),c(98.5,103.25,107.75,110.75,117.25), pch=15, col=2)
cor(idade,tempo)
## [1] 0.7680814
cor.test(idade,tempo)
##
## Pearson's product-moment correlation
##
## data: idade and tempo
## t = 5.0889, df = 18, p-value = 7.662e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4931929 0.9035073
## sample estimates:
## cor
## 0.7680814
mod<-lm(tempo~idade, data=dados)
summary(mod)
##
## Call:
## lm(formula = tempo ~ idade, data = dados)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.500 -4.125 -0.750 2.625 10.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 80.5000 5.4510 14.768 1.67e-11 ***
## idade 0.9000 0.1769 5.089 7.66e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.593 on 18 degrees of freedom
## Multiple R-squared: 0.5899, Adjusted R-squared: 0.5672
## F-statistic: 25.9 on 1 and 18 DF, p-value: 7.662e-05
anova(mod)
## Analysis of Variance Table
##
## Response: tempo
## Df Sum Sq Mean Sq F value Pr(>F)
## idade 1 810 810.00 25.897 7.662e-05 ***
## Residuals 18 563 31.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
x<-20:40
ye<-80.5+0.9*x
lines(x,ye)
e1 <- resid(mod)
round(e1,3) # resíduos brutos ou ordinários
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## -2.5 -6.5 7.5 1.5 -5.0 1.0 7.0 -2.0 8.5 -1.5 1.5 -7.5 0.0 -7.0 6.0
## 16 17 18 19 20
## -4.0 -3.5 -4.5 10.5 0.5
plot(mod$fitted,e1, pch=16)
abline(h=0, col=2, lty=2)
sigma <- summary(mod)$sigma # sqrt(Qmres)
sigma
## [1] 5.592654
d1 <- e1/sigma
round(d1,1) # resíduos padronizados
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## -0.4 -1.2 1.3 0.3 -0.9 0.2 1.3 -0.4 1.5 -0.3 0.3 -1.3 0.0 -1.3 1.1
## 16 17 18 19 20
## -0.7 -0.6 -0.8 1.9 0.1
plot(mod$fitted,d1, pch=16)
abline(h=0, col=2, lty=2)
r1 <- rstandard(mod) # resíduos studentizados (internamente)
round(r1,3)
## 1 2 3 4 5 6 7 8 9 10
## -0.485 -1.261 1.455 0.291 -0.930 0.186 1.301 -0.372 1.559 -0.275
## 11 12 13 14 15 16 17 18 19 20
## 0.275 -1.376 0.000 -1.301 1.115 -0.744 -0.679 -0.873 2.036 0.097
plot(mod$fitted, r1, pch=16)
abline(h=0, col=2, lty=2)
r2 <- rstudent(mod)
round(r2,3) # resíduos studentizados (externamente)
## 1 2 3 4 5 6 7 8 9 10
## -0.474 -1.283 1.505 0.283 -0.926 0.181 1.329 -0.363 1.629 -0.268
## 11 12 13 14 15 16 17 18 19 20
## 0.268 -1.414 0.000 -1.329 1.124 -0.734 -0.668 -0.867 2.256 0.094
plot(mod$fitted, r1, pch=16)
abline(h=0, col=2, lty=2)
par(mfrow=c(2,2))
plot(mod, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod$resid)
##
## Shapiro-Wilk normality test
##
## data: mod$resid
## W = 0.93762, p-value = 0.2161
## QQplot com envelope simulado
fit.model<-mod
source("https://www.ime.usp.br/~giapaula/envel_norm")
# Obs: Como há mais de um y observado para cada valor de x (repetições), é possível utilizar o teste de Bartlett para testar a homogeneidade de variâncias.
bartlett.test(tempo~idade)
##
## Bartlett test of homogeneity of variances
##
## data: tempo by idade
## Bartlett's K-squared = 0.29867, df = 4, p-value = 0.9899
predict(lm(tempo~idade), interval="confidence", se.fit=T, level=0.95)
## $fit
## fit lwr upr
## 1 98.5 93.94935 103.0507
## 2 98.5 93.94935 103.0507
## 3 98.5 93.94935 103.0507
## 4 98.5 93.94935 103.0507
## 5 103.0 99.78220 106.2178
## 6 103.0 99.78220 106.2178
## 7 103.0 99.78220 106.2178
## 8 103.0 99.78220 106.2178
## 9 107.5 104.87268 110.1273
## 10 107.5 104.87268 110.1273
## 11 107.5 104.87268 110.1273
## 12 107.5 104.87268 110.1273
## 13 112.0 108.78220 115.2178
## 14 112.0 108.78220 115.2178
## 15 112.0 108.78220 115.2178
## 16 112.0 108.78220 115.2178
## 17 116.5 111.94935 121.0507
## 18 116.5 111.94935 121.0507
## 19 116.5 111.94935 121.0507
## 20 116.5 111.94935 121.0507
##
## $se.fit
## [1] 2.166026 2.166026 2.166026 2.166026 1.531611 1.531611 1.531611
## [8] 1.531611 1.250555 1.250555 1.250555 1.250555 1.531611 1.531611
## [15] 1.531611 1.531611 2.166026 2.166026 2.166026 2.166026
##
## $df
## [1] 18
##
## $residual.scale
## [1] 5.592654
new <- data.frame(idade = seq(24,28,1))
new
## idade
## 1 24
## 2 25
## 3 26
## 4 27
## 5 28
predict(lm(tempo~idade), new, interval="prediction", se.fit=T, level=0.95)
## $fit
## fit lwr upr
## 1 102.1 89.85545 114.3445
## 2 103.0 90.81762 115.1824
## 3 103.9 91.76872 116.0313
## 4 104.8 92.70862 116.8914
## 5 105.7 93.63720 117.7628
##
## $se.fit
## 1 2 3 4 5
## 1.640088 1.531611 1.436779 1.358451 1.299615
##
## $df
## [1] 18
##
## $residual.scale
## [1] 5.592654
pred<- predict(lm(tempo~idade), interval="confidence", level=0.95)
pred1<- predict(lm(tempo~idade), interval="prediction", level=0.95)
matplot(idade, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
ylab = "valores preditos de y")
EXEMPLO 2 - Tempo para estocar refrigerantes
Y = tempo necessário para um comerciante estocar uma prateleira da mercearia com refrigerantes (em minutos)
X = quantidade da mercadoria (em unidades)
Arquivo: Exemplo2s.txt (Fonte: Montgomery e Peck (1992))
rm(list=ls())
ex2<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo2s.txt",h=T)
attach(ex2)
plot(x,y, pch=16, col=4)
cor(x,y)
cor.test(x,y)
mod1<-lm(y~x)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
plot(x,y, pch=16, col=4)
x1<-1:30
ye<- -0.093756+0.407107*x1
lines(x1,ye)
# OBS: o limite inferior de x é muito próximo de zero fazendo sentido que quando x = 0 se tenha y = 0.
# Foi, então, ajustado o modelo sem intercepto.
mod2<-lm(y ~ -1 + x) # modelo sem intercepto
summary(mod2)
1 - (sum(y^2)-(sum((y*x))^2/sum(x^2)))/sum((y-mean(y))^2) # R^2*(0)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
fit.model<-mod2
source("https://www.ime.usp.br/~giapaula/envel_norm")
par(mfrow=c(1,1))
plot(x,y, pch=16, col=2)
x1<-min(x):max(x)
ye<- mod2$coef[1]*x1
lines(x1,ye)
ye1<- mod1$coef[1] + mod1$coef[2]*x1
lines(x1,ye1, lty=4)
EXEMPLO 3 - Máquinas automáticas de vendas de refrigerantes
Y = tempo (minutos) para entregador repor e executar pequenos serviços em máquinas de vendas de refrigerantes.
X1 = quantidade de volumes repostos nas máquinas (em unidades)
Arquivo: Exemplo3s.txt (Fonte: Montgomery e Peck (1992))
ex3<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo3s.txt", h=T)
attach(ex3)
head(ex3)
plot(X1, Y, pch=16, col=4)
cor(X1,Y)
cor.test(X1,Y)
mod1<-lm(Y~X1)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
x1<-2:30
ye<-3.321+2.176*x1
lines(x1,ye)
################################################
# Transformações em Y
################################################
mod2<-lm(sqrt(Y)~X1)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
plot(mod2$fitted.values, mod2$resid, pch=16)
summary(mod2)
shapiro.test(mod2$resid)
require(MASS)
boxcox(Y~X1,data=ex3,plotit=T)
boxcox(Y~X1,data=ex3,lam=seq(-0.5,1.5,1/10))
boxcox(Y~X1,data=ex3,plotit=F)
EXEMPLO 5 - Dados de árvores
Girth = diâmetro das árvores (em polegadas)
Volume = volume de madeira (em pés cúbicos)
Height = altura das árvores (em pés)
Arquivo: dados disponíveis no R (Fonte: Ryan, T.A.; Joiner, B.L.; Ryan, B.F., 1976)
rm(list = ls())
data(trees)
# help(trees)
attach(trees)
trees
plot(Girth,Volume)
cor(Girth,Volume)
cor.test(Girth,Volume)
fit1<-lm(Volume~Girth)
summary(fit1)
anova(fit1)
shapiro.test(fit1$res)
par(mfrow=c(2,2))
plot(fit1, which=c(1:4), add.smooth=FALSE, pch=20)
fit.model<-fit1
source("https://www.ime.usp.br/~giapaula/envel_norm")
################################################
# Transformações em Y
################################################
fit2<-lm(sqrt(Volume)~Girth)
summary(fit2)
anova(fit2)
shapiro.test(fit2$res)
par(mfrow=c(2,2))
plot(fit2, which=c(1:4), add.smooth=FALSE, pch=20)
fit.model<-fit2
source("https://www.ime.usp.br/~giapaula/envel_norm")
require(MASS)
boxcox(Volume~Girth,data=trees,plotit=T)
boxcox(Volume~Girth,data=trees,lam=seq(-0.5,1.5,1/10))
bc<-boxcox(Volume~Girth,data=trees,plotit=F)
cbind(bc$x,bc$y)
fit3<-lm((Volume^0.4)~Girth)
par(mfrow=c(2,2))
plot(fit3, which=c(1:4), add.smooth=FALSE, pch=20)
fit.model<-fit3
source("https://www.ime.usp.br/~giapaula/envel_norm")
EXEMPLO 6 - Bactérias sobreviventes em produto alimentício enlatado
Y = número médio de bactérias sobreviventes em um produto alimentício enlatado
X = minutos de exposição a 300º F
Arquivo: Fonte: Montgomery & Peck (1992)
rm(list = ls())
y<-c(175,108,95,82,71,50,49,31,28,17,16,11)
x<-1:12
cbind(y,x)
plot(x,y, pch=16, col=2)
yt<-log(y)
plot(x,yt, pch=16, col=2)
cor.test(x,yt)
mod1<-lm(yt~x)
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
par(mfrow=c(1,2))
xe<-min(x):max(x)
yte<-mod1$coef[1] + mod1$coef[2]*xe
plot(x, yt, pch=16, col=4, ylab="ln(y)")
lines(xe, yte)
plot(x, y, pch=16, col=4, ylab="ln(y)")
ye<-exp(mod1$coef[1])*exp(mod1$coef[2]*xe)
lines(xe, ye)
EXEMPLO 7 - Energia gerada por moinho de vento
Y = energia elétrica gerada por um moinho de vento
X = velocidade do vento (em MPH)
Arquivo: Fonte: Montgomery & Peck (1992)
rm(list = ls())
ex7<-read.table("http://www.ufpr.br/~giolo/CE071/Exemplos/Exemplo7s.txt",h=T)
attach(ex7)
plot(x,y, pch=16)
## Transformação em x: procedimento de Box-Tidwell
## Chute inicial: alpha0 = 1
mod1<-lm(y~x)
summary(mod1)
w<-x*log(x)
mod2<-lm(y~x+w)
summary(mod2)
alpha1<-(mod2$coef[3]/mod1$coef[2]) + 1
alpha1
x1<-x^alpha1
mod11<-lm(y~x1)
summary(mod11)
w1<-x1*log(x1)
mod22<-lm(y~x1+w1)
summary(mod22)
alpha2<-(mod22$coef[3]/mod11$coef[2]) + alpha1
alpha2
# Transformação indicada: x^-1 = 1/x
xt<-1/x
plot(xt,y, pch=16,col=2)
mod3<-lm(y~xt)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod3$resid)
par(mfrow=c(1,1))
plot(xt,y, pch=16, col=4, xlab="1/x")
xe<-seq(0,0.41,by=0.05)
ye<-mod3$coef[1]+mod3$coef[2]*xe
lines(xe,ye)
EXEMPLO 8 - Mínimos Quadrados Ponderados (Dados Exemplo 4)
Y = Renda média mensal sobre vendas de refeições em restaurantes (em mil dólares)
X = Despesas anual com propagandas (em mil dólares)
Fonte: Montgomery & Peck (1992)
rm(list = ls())
ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo4s.txt",h=T)
attach(ex4)
mod1<-lm(y~x)
par(mfrow=c(1,2))
plot(x, mod1$resid, pch = 20)
plot(mod1$fitted, mod1$resid, pch = 20)
m1<-mean(x[1:3])
s1<-var(y[1:3])
m2<-mean(x[4:5])
s2<-var(y[4:5])
m3<-mean(x[7:11])
s3<-var(y[7:11])
m4<-mean(x[13:16])
s4<-var(y[13:16])
m5<-mean(x[18:23])
s5<-var(y[18:23])
m6<-mean(x[24:25])
s6<-var(y[24:25])
m7<-mean(x[27:30])
s7<-var(y[27:30])
medias<-as.vector(c(m1,m2,m3,m4,m5,m6,m7))
s<-as.vector(c(s1,s2,s3,s4,s5,s6,s7))
medias
s
plot(medias, s, pch=16, col=4)
lm(s~medias)
aux<- -7.376+7.820*x
wi<-1/aux
wi
mod3<-lm(y~x,weights=wi)
summary(mod3)
anova(mod3)
rqwi<-sqrt(wi)
we<-rqwi*mod3$resid
wx<-rqwi*x
wy<-rqwi*mod3$fitted.values
plot(wy,we, pch=16)
mod1<-lm(y~x)
mod2<-lm(sqrt(y)~x)
mod2a<-lm(log(y)~x)
mod3<-lm(y~x,weights=wi)
par(mfrow=c(1,4))
plot(mod1$fitted.values,mod1$resid, pch=16)
plot(mod2$fitted.values,mod2$resid, pch=16)
plot(mod2a$fitted.values,mod2a$resid, pch=16)
plot(wy,we, pch=16)
par(mfrow=c(1,2))
plot(x,sqrt(y), pch=16,col=2)
xe<- min(x):max(x)
ye2<- mod2$coef[1]+mod2$coef[2]*xe
ye3<- mod3$coef[1]+mod3$coef[2]*xe
lines(xe,ye2)
title("y^0.5 = 7.8079 + 0.3454*x")
plot(x,y, pch=16,col=2)
lines(xe,ye3)
title("y = 50.975 + 7.922*x")
predict(lm(y~x), interval="confidence", level=0.95)
(predict(lm(sqrt(y)~x), interval="confidence", level=0.95))^2
exp(predict(lm(log(y)~x), interval="confidence", level=0.95))
pred<-predict(lm(y~x, weight=wi), interval="confidence", level=0.95)
pred
Comandos em Linguagem R
Regressão Linear Múltipla
EXEMPLO 1 - Manutenção de máquinas de bebidas
Y = tempo gasto para execução dos serviços (em minutos)
X1 = quantidade de bebida estocada em máquinas acionadas por moeda (em unidades)
X2 = distância percorrida pelo profissional responsavel pelos servicos (em pés)
Arquivo: Exemplo1m.txt (Fonte: Montgomery & Peck, 1992)
rm(list=ls())
ex1<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo1m.txt", h=T)
ex1
attach(ex1)
# instalar package scatterplot3d
require(scatterplot3d)
par(mfrow=c(1,2))
scatterplot3d(X1,X2,Y, type="h", highlight.3d=TRUE, angle= 30, scale.y=0.8, pch=16)
scatterplot3d(X1,X2,Y, type="h", highlight.3d=TRUE, angle=320, scale.y=0.8, pch=16)
X<-as.matrix(cbind(X1,X2))
cor(X)
mod1 <- lm(Y~X1+X2)
anova(mod1)
summary(mod1)
mod2 <- lm(Y~X1)
anova(mod2)
summary(mod2)
mod3 <- lm(Y~X2)
anova(mod3)
summary(mod3)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
mod1 <- lm(Y~X1+X2)
vif(mod1)
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
par(mfrow=c(1,1))
plot(mod1$residuals,pch=16,ylab="Residuals")
par(mfrow=c(1,2))
plot(X1,mod1$residuals,pch=16,ylab="Residuals")
plot(X2,mod1$residuals,pch=16,ylab="Residuals")
resparc1<-mod1$residuals + 1.615*X1
resparc2<-mod1$residuals + 0.014*X2
plot(X1,resparc1,pch=16,ylab="Parcial Residuals")
plot(X2,resparc2,pch=16,ylab="Parcial Residuals")
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
ex1s9<-ex1[-9,]
detach(ex1)
attach(ex1s9)
mod1s9<-lm(Y~X1+X2)
summary(mod1s9)
anova(mod1s9)
par(mfrow=c(2,2))
plot(mod1s9, which=c(1:4),pch=16, add.smooth=FALSE)
ex1s22<-ex1[-22,]
detach(ex1s9)
attach(ex1s22)
mod1s22<-lm(Y~X1+X2)
summary(mod1s22)
anova(mod1s22)
par(mfrow=c(2,2))
plot(mod1s22, which=c(1:4),pch=16, add.smooth=FALSE)
ex1s9s22<-ex1s9[-21,]
detach(ex1s22)
attach(ex1s9s22)
mod1s9s22<-lm(Y~X1+X2)
summary(mod1s9s22)
anova(mod1s9s22)
par(mfrow=c(2,2))
plot(mod1s9s22, which=c(1:4),pch=16, add.smooth=FALSE)
detach(ex1s9s22)
attach(ex1s9)
mod2s9<-lm(Y~X1)
anova(mod2s9)
summary(mod2s9)
par(mfrow=c(2,2))
plot(mod2s9, which=c(1:4),pch=16, add.smooth=FALSE)
detach(ex1s9)
attach(ex1)
require(scatterplot3d)
par(mfrow=c(1,1))
s3d <- scatterplot3d(X1,X2,Y, type="h", highlight.3d=TRUE,
angle=55, scale.y=0.7, pch=16)
my.lm <- lm(Y ~ X1 + X2)
s3d$plane3d(my.lm,col=4)
par(mfrow=c(1,1))
x1 <- seq(2,30,length=50)
x2 <- seq(30,1500,length=50)
f <- function(x1,x2){
r <- 4.447+1.498*x1+0.0103*x2
}
y <- outer(x1,x2,f)
y[is.na(y)] <- 1
par(bg = "white")
persp(x1,x2,y, theta=30, phi=20, expand=0.5, col="blue",
ltheta=120, shade=0.15, ticktype="detailed",
xlab="X1", ylab="X2", zlab="Y")
new<-data.frame(cbind(8,275))
predict(mod1s9,new,interval="confidence")
predict(mod1s9,interval="confidence")
predict(mod1s9,new,interval="prediction")
predict(mod1s9,interval="prediction")
EXEMPLO 2 - Calor liberado, em calorias, por grama de cimento
Y = heat evolved in calories per gram of cement
X1 = tricalcium aluminate
X2 = tricalcium silicate
X3 = tetracalcium alumino ferrite
X4 = dicalcium silicate
Arquivo: Exemplo2m.txt (Fonte: Montgomery & Peck, 1992).
rm(list=ls())
ex2<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo2m.txt", h=T)
ex2
attach(ex2)
x<-as.matrix(cbind(X1,X2,X3,X4))
rxx<-cor(x)
rxx
mod1<-lm(Y~X1+X2+X3+X4,data=ex2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod1)
2.1 Procedimento de seleção forward (passo a frente)
modp<-lm(Y~X1+X2+X3+X4, data=ex2)
anova(modp)
mod0<-lm(Y~1, data=ex2)
step(mod0, ~X1 + X2 + X3 + X4, direction=c("forward"), scale=5.98, test="F")
## scale = estimativa de (sigma)^2 = QMres do modelo com as 4 regressoras
## use help(step) no R para mais detalhes
2.2 Procedimento de seleção backward (passo atrás)
step(modp, ~X1 + X2 + X3 + X4, direction=c("backward"), scale=5.98, test="F")
2.3 Procedimento de seleção stepwise (passo a passo)
step(mod0, ~X1 + X2 + X3 + X4, direction=c("both"), scale=5.98, test="F")
EXEMPLO 3 - Resistência do papel kraft (Regressão Polinomial)
Y = Tensile strength (psi)
X = Hardwood concentration (%)
Arquivo: Exemplo3m.txt (Fonte: Montgomery & Peck, 1992)
rm(list=ls())
ex3<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo3m.txt",h=T)
attach(ex3)
plot(X,Y, pch=20)
cor(X,X^2)
X2<-X*X
mod1<-lm(Y~X+X2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod1)
Xb<-X-mean(X)
Xb2<-(X-mean(X))^2
plot(Xb,Y, pch=20)
cor(Xb,Xb2)
mod<-lm(Y~Xb+Xb2)
vif(mod)
anova(mod)
summary(mod)
par(mfrow=c(2,2))
plot(mod, which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod$resid)
influence.measures(mod)
summary(influence.measures(mod))
par(mfrow=c(1,1))
plot(Xb,Y, pch=16)
xb<--6:8
yest<-45.295+2.546*xb-0.635*(xb^2)
lines(xb,yest)
pred<- predict(mod, interval="confidence", level=0.95)
pred1<- predict(mod, interval="prediction", level=0.95)
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
ylab = "valores preditos de y")
EXEMPLO 4 - Custo de manutenção e produção de um equipamento
Y = custo anual médio de manutenção de um equipamento (em dólares)
X = produção (em unidades)
Arquivo: Exemplo4m.txt (Fonte: Montgomery & Peck, 1992)
rm(list=ls())
ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo4m.txt",h=T)
ex4
attach(ex4)
plot(x,y, pch=16)
x2<-x*x
cor(x,x2)
mod<-lm(y~x+x2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod)
a1<-(x-mean(x))/25
p1<-2*a1
p1
a2<-(a1^2)-((10^2 - 1)/12)
p2<-1/2*a2
p2
mod1<-lm(y~p1+p2)
vif(mod1)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),add.smooth=FALSE, pch=20)
influence.measures(mod1)
summary(influence.measures(mod1))
mod1$fitted
xe<-50:275
yest<-312.7686+0.0595*(xe-162.5)+0.0022*(xe-162.5)^2
par(mfrow=c(1,1))
plot(x,y, pch=16)
lines(xe,yest)
pred<- predict(mod1, interval="confidence", level=0.95)
pred1<- predict(mod1, interval="prediction", level=0.95)
matplot(x, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
ylab = "valores preditos de y")
# usando a função poly
fit<-lm(y~poly(x,degree=2))
fit$model
summary(fit)
anova(fit)
par(mfrow=c(2,2))
plot(fit, which=c(1:4),add.smooth=FALSE, pch=20)
influence.measures(mod1)
summary(influence.measures(mod1))
fit$fitted
par(mfrow=c(1,1))
pred<- predict(fit, interval="confidence", level=0.95)
pred1<- predict(fit, interval="prediction", level=0.95)
matplot(x, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
ylab = "valores preditos de y")
EXEMPLO 5 - Black cherry trees
Y = Volume (volume de madeira)
X1 = Girth (diâmetro da árvore a uma determinada altura do solo)
X2 = Height (altura da árvore)
Arquivo: Exemplo5m.txt ou data(trees) no R
rm(list=ls())
data(trees)
attach(trees)
require(scatterplot3d)
par(mfrow=c(1,2))
scatterplot3d(Girth,Height,Volume, type="h", highlight.3d=TRUE,angle=30, scale.y=0.7, pch=16)
scatterplot3d(Girth,Height,Volume, type="h", highlight.3d=TRUE,angle=320,scale.y=0.7, pch=16)
cor(Girth,Height)
mod1<-lm(Volume~Height)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1,which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod1))
mod2<-lm(Volume~Girth)
anova(mod2)
summary(mod2)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod2))
mod3<-lm(Volume~Height +Girth)
anova(mod3)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod3))
mod4<-lm(Volume~Height+Girth+I(Girth^2))
anova(mod4)
summary(mod4)
par(mfrow=c(2,2))
plot(mod4, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod4))
mod5<-lm(Volume~Girth+I(Girth^2)+I(Girth^3))
anova(mod5)
summary(mod5)
par(mfrow=c(2,2))
plot(mod5, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod5))
mod6<-lm(Volume~Height+I(Height^2)+Girth+I(Girth^2))
anova(mod6)
summary(mod6)
par(mfrow=c(2,2))
plot(mod6, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod6))
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod4)
Gb<-Girth-mean(Girth)
model<-lm(Volume~Height+Gb+I(Gb^2),data=trees)
vif(model)
summary(model)
anova(model)
par(mfrow=c(2,2))
plot(model, which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(model$residuals)
pred<- predict(mod4, interval="confidence", level=0.95)
cbind(Volume, pred)
x <- seq(63,87,length=30)
y <- seq(8,21,length=30)
z <- function(x,y){v <-9.9204 +0.3764*x -2.8851*y +0.2686*y^2}
z <- outer(x, y, z)
persp(x,y,z, theta = -50, phi = 5,
expand = 0.5, col = "lightblue",
xlab="Height",ylab="Girth", zlab="Volume",
scale=T, ticktype = "detailed")
#install.packages('rgl')
library(rgl)
x <- seq(63,87,length=30)
y <- seq(8,21,length=30)
z <- function(x,y){v <-9.9204 +0.3764*x -2.8851*y +0.2686*y^2}
z <- outer(x, y, z)
open3d()
persp3d(x,y,z, theta = -50, phi = 5,
expand = 0.5, col = "lightblue",
xlab="Height",ylab="Girth", zlab="Volume",
scale=T, ticktype = "detailed")
play3d(spin3d(axis=c(0,0,1), rpm=5), duration=13)
EXEMPLO 6 - Comparação de ferramentas
Y = tempo efetivo de vida (horas)
X1 = velocidade do torno (rpm)
X2 = tipo de ferramenta (tipo A = 0, tipo B = 1)
Arquivo: Exemplo6m.txt (Fonte: Montgomery & Peck, 1992)
rm(list=ls())
ex6<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo6m.txt", h=T)
attach(ex6)
par(mfrow=c(1,2))
plot(X1[X2==1],Y[X2==1],pch="B",col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2==0],Y[X2==0],pch="A",col=4)
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
Esquema 1: dummy do tipo 0 e 1
X12<-X1*X2
mod1<-lm(Y~X1+X2+X12)
summary(mod1)
anova(mod1)
mod2<-lm(Y~X1+X2)
summary(mod2)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2,which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
x1<-min(X1):max(X1)
ya<-mod2$coef[1]+ mod2$coef[2]*x1
yb<-mod2$coef[1]+ mod2$coef[3]+ mod2$coef[2]*x1
par(mfrow=c(1,1))
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
legend(850,45,lty=c(1,2),c("Ferramenta A","Ferramenta B"),bty="n",cex=0.8)
Esquema 2: dummy do tipo 1 e -1
ex6a<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo6m.txt", h=T)
attach(ex6a)
x2<-ifelse(X2==0,1,-1) # A = 1 e B = -1
x12<-X1*x2
mod1<-lm(Y~X1+x2+x12)
summary(mod1)
anova(mod1)
mod2<-lm(Y~X1+x2)
summary(mod2)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2,which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
x1<-min(X1):max(X1)
ya<-mod2$coef[1]+ mod2$coef[2]*x1 + mod2$coef[3]
yb<-mod2$coef[1]+ mod2$coef[2]*x1 - mod2$coef[3]
par(mfrow=c(1,1))
plot(X1[x2==-1],Y[x2==-1],pch=16,col=2,ylim=range(c(0,50)),xlab="Velocidade", ylab="Tempo")
points(X1[x2==1],Y[x2==1],pch=16, col=4)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
legend(850,45,lty=c(1,2),c("Ferramenta A","Ferramenta B"),bty="n",cex=0.8)
Esquema 3: sem intercepto e dummy do tipo 0 e 1
ex6b<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo6m.txt",h=T)
attach(ex6b)
x21<-ifelse(X2==0,1,0)
x22<-ifelse(X2==0,0,1)
cbind(x21,x22)
x1x21<-X1*x21
x1x22<-X1*x22
mod<-lm(Y~-1+X1+x21+x22+x1x21) # Como x1x21 + x1x22 = x1, incluir somente x1x21 ou x1x22
summary(mod)
anova(mod)
mod1<-lm(Y~-1+X1+x21+x22)
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1,which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
x1<-min(X1):max(X1)
ya<-mod1$coef[1]*x1 + mod1$coef[2]
yb<-mod1$coef[1]*x1 + mod1$coef[3]
par(mfrow=c(1,1))
plot(X1[x21==1],Y[x21==1],pch=16,col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[x22==1],Y[x22==1],pch=16,col=4)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
legend(850,45,lty=c(1,2),c("Ferramenta A","Ferramenta B"),bty="n",cex=0.8)
EXERCÍCIO 1 - Produção de processo químico
Y = produção de um processo químico
X1 = temperatura
X2 = concentração
Arquivo: Exercicio1m.txt (Fonte: Montgomery & Peck (1992); Charnet et al.(2008))
ex1<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio1m.txt", h=T)
attach(ex1)
mod1<-lm(Y~X1)
mod1
summary(mod1)
anova(mod1)
mod2<-lm(Y~X2)
mod2
summary(mod2)
anova(mod2)
mod3<-lm(Y~X1+X2)
mod3
summary(mod3)
anova(mod3)
mod4<-lm(Y~X2+X1)
mod4
summary(mod4)
anova(mod4)
X3<-X1*X2
mod5<-lm(Y~X1+X2+X3)
mod5
summary(mod5)
anova(mod5)
mod6<-lm(Y~X2+X1+X3)
mod6
summary(mod6)
anova(mod6)
plot(X1,X2)
cor(X1,X2)
X<-as.matrix(cbind(X1,X2))
rxx<- cor(X)
det(rxx)
eigen(rxx)
resid<- mod3$residuals
resid
par(mfrow=c(2,2))
plot(mod3, which=c(1:4),pch=16, add.smooth=FALSE, id.n = 0)
shapiro.test(mod3$resid)
par(mfrow=c(1,1))
plot(mod3$residuals,pch=16,ylab="Residuals")
par(mfrow=c(1,2))
plot(X1,mod3$residuals,pch=16,ylab="Residuals")
plot(X2,mod3$residuals,pch=16,ylab="Residuals")
par(mfrow=c(1,2))
resparc1<-mod3$residuals + 0.87*X1
resparc2<-mod3$residuals + 2.424*X2
plot(X1,resparc1,pch=16,ylab="Parcial Residuals")
plot(X2,resparc2,pch=16,ylab="Parcial Residuals")
par(mfrow=c(1,1))
plot(X1*X2,mod3$residuals,pch=16,ylab="Residuals")
inflm.mod3 <- influence.measures(mod3)
inflm.mod3
summary(inflm.mod3)
summary(mod3)
anova(mod3)
obspred<-cbind(Y,mod3$fitted.values)
obspred
x1<-c(0,200)
y<-c(117.42, 291.42)
plot(x1,y,type="l",lty=1,ylim=range(c(100,300)),ylab="Y=Produção",
xlab = "X1 = Temperatura", bty="n")
x1<-c(0,200)
y<-c(153.78, 327.78)
lines(x1,y, lty=2)
legend(150,150,lty=c(1,2),c("X2=10","X2=25"), lwd=1, bty="n")
x2<-c(0,25)
y<-c(180.18, 240.78)
plot(x2,y,type="l",lty=1,ylim=range(c(100,300)),ylab="Y=Produção",
xlab = "X2 = Concentracao", bty="n")
x2<-c(0,25)
y<-c(206.28, 266.88)
lines(x2,y, lty=2)
legend(17,150,lty=c(1,2),c("X1=100","X1=130"), lwd=1, bty="n")
new<-data.frame(cbind(80,10))
predict(mod3, new,interval="confidence")
predict(mod3, interval="confidence")
require(scatterplot3d)
s3d <- scatterplot3d(X1,X2,Y,type="h",highlight.3d=TRUE,
ngle=55, scale.y =0.7, pch=16)
my.lm <- lm(Y~X1+X2)
s3d$plane3d(my.lm, col=4)
x1 <- seq(80, 160, length=50)
x2 <- seq(10,25,length=50)
f <- function(x1,x2){r <- 93.18+0.87*x1+2.424*x2}
y <- outer(x1,x2,f)
y[is.na(y)] <- 1
par(bg = "white")
persp(x1,x2,y,theta = 30, phi = 20,expand = 0.5,col = "blue",
ltheta = 120, shade = 0.75, ticktype = "detailed",
xlab = "X1", ylab = "X2", zlab = "Y")
EXERCÍCIO 2 - Estudo sobre comparação de ferramentas
Y = tempo de vida efetivo (em horas)
X1 = velocidade do torno (rpm)
X2 = tipo da ferramenta
Arquivos: (a) Exercicio2ma.txt (b) Exercicio2mb.txt (c) Exercicio2mc.txt
## (a) Considerando ferramentas: A, B, C
rm(list=ls())
ex2a<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio2ma.txt", h=T)
attach(ex2a)
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="C"],Y[X2=="C"],pch="C",col=6)
mod1<-lm(Y~X1+factor(X2))
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
shapiro.test(mod1$resid)
mod2<-lm(Y~X1+factor(X2)+X1*factor(X2)) ## verificando a significancia da interacao
summary(mod2)
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
x1<-min(X1):max(X1)
ya<-mod1$coef[1]+ mod1$coef[2]*x1
yb<-mod1$coef[1]+ mod1$coef[3]+ mod1$coef[2]*x1
yc<-mod1$coef[1]+ mod1$coef[4]+ mod1$coef[2]*x1
par(mfrow=c(1,1))
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="C"],Y[X2=="C"],pch="C",col=6)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
lines(x1,yc,lty=3)
legend(850,45,lty=c(1,2,3),c("Ferramenta A","Ferramenta B","Ferramenta C"),bty="n",cex=0.8)
## (b) Considerando ferramentas: A, B, D
ex2b<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio2mb.txt", h=T)
attach(ex2b)
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="D"],Y[X2=="D"],pch="D",col=6)
mod1<-lm(Y~X1+factor(X2)+X1*factor(X2))
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
shapiro.test(mod1$resid)
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
x1<-min(X1):max(X1)
ya<-mod1$coef[1]+ mod1$coef[2]*x1
yb<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[3]+ mod1$coef[5]*x1
yd<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[4]+ mod1$coef[6]*x1
par(mfrow=c(1,1))
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="D"],Y[X2=="D"],pch="D",col=6)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
lines(x1,yd,lty=3)
legend(850,45,lty=c(1,2,3),c("Ferramenta A","Ferramenta B","Ferramenta D"),bty="n",cex=0.8)
## (c) Considerando ferramentas: A, B, C, D
ex2c<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio2mc.txt",h=T)
attach(ex2c)
mod1<-lm(Y~X1+factor(X2)+X1*factor(X2))
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
shapiro.test(mod1$resid)
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
x1<-min(X1):max(X1)
ya<-mod1$coef[1]+ mod1$coef[2]*x1
yb<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[3]+ mod1$coef[6]*x1
yc<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[4]+ mod1$coef[7]*x1
yd<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[5]+ mod1$coef[8]*x1
par(mfrow=c(1,1))
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="C"],Y[X2=="C"],pch="C",col=1)
points(X1[X2=="D"],Y[X2=="D"],pch="D",col=3)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
lines(x1,yc,lty=3)
lines(x1,yd,lty=4)
legend(850,45,lty=c(1,2,3,4),c("Ferramenta A","Ferramenta B","Ferramenta C",
"Ferramenta D"),bty="n",cex=0.8)
EXERCÍCIO 3 - Performance para certo trabalho
Y = performance para o trabalho sob avaliação,
X1 = teste para verificar as habilidades relacionadas ao trabalho
X2 = raça (branca = 0 e outras = 1)
Arquivo: Exercicio3m.txt Fonte: Montgomery & Peck (1992)
rm(list=ls())
ex3<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio3m.txt", h=T)
attach(ex3)
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(c(0,9)),xlab="Teste",ylab="Aptidão")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
legend(0.5,7,pch=c(16,16),col=c(4,2),c("Branca","Outras"),bty="n",cex=0.8)
mod1<-lm(Y~X1+factor(X2)+X1*(factor(X2)))
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE, id.n = 0)
shapiro.test(mod1$resid)
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
x1<-0:3
yb<-mod1$coef[1]+ mod1$coef[2]*x1
yo<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[3] + mod1$coef[4]*x1
par(mfrow=c(1,1))
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(c(0,9)),xlab="Teste",ylab="Aptidão")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
lines(x1,yb,lty=1)
lines(x1,yo,lty=2)
legend(0.5,7,lty=c(1,2),c("Branca","Outras"),bty="n",cex=0.8)
EXERCÍCIO 4 - Estudo sobre salário
Y: Salário anual (SAL) [em dólares]
X1: Anos de Experiência (EXP)
X2: Nível Educacional (EDUC) [1 = básico, 2 = médio, 3 = superior]
X3: Função administrativa (MGT) [0 = subordinado, 1 = chefe]
Arquivo: Exercicio4m.txt Fonte: Neter & Wassermam (1990?)
rm(list=ls())
ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio4m.txt",h=T)
attach(ex4)
table(EDUC,MGT)
par(mfrow=c(1,2))
plot(EXP, SAL,pch=16,col=1, xlab="Experiencia (anos)",ylab="Salario Anual")
plot(EXP[EDUC==1 & MGT==1],SAL[EDUC==1 & MGT==1],pch="1",col=1,ylim=range(SAL),
xlim=range(EXP), xlab="Experiencia (anos)",ylab="Salario Anual")
points(EXP[EDUC==2 & MGT==1],SAL[EDUC==2 & MGT==1],pch="2",col=2)
points(EXP[EDUC==3 & MGT==1],SAL[EDUC==3 & MGT==1],pch="3",col=3)
points(EXP[EDUC==1 & MGT==0],SAL[EDUC==1 & MGT==0],pch="4",col=4)
points(EXP[EDUC==2 & MGT==0],SAL[EDUC==2 & MGT==0],pch="5",col=5)
points(EXP[EDUC==3 & MGT==0],SAL[EDUC==3 & MGT==0],pch="6",col=6)
mod1<-lm(SAL~EXP+factor(EDUC)+factor(MGT))
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
influence.measures(mod1)
summary(influence.measures(mod1))
x1<-min(EXP):max(EXP)
y10<-mod1$coef[1]+mod1$coef[2]*x1 #EDUC=1 e MGT=0
y20<-mod1$coef[1]+mod1$coef[2]*x1+mod1$coef[3] #EDUC=2 e MGT=0
y21<-mod1$coef[1]+mod1$coef[2]*x1+mod1$coef[3]+mod1$coef[5] #EDUC=2 e MGT=1
y31<-mod1$coef[1]+mod1$coef[2]*x1+mod1$coef[4]+mod1$coef[5] #EDUC=3 e MGT=1
x6<-min(EXP):5
y30<-mod1$coef[1]+mod1$coef[2]*x6+mod1$coef[4] #EDUC=3 e MGT=0
x11<-min(EXP):8
y11<-mod1$coef[1]+mod1$coef[2]*x11+mod1$coef[5] #EDUC=1 e MGT=1
par(mfrow=c(1,1))
plot(EXP[EDUC==1 & MGT==1],SAL[EDUC==1 & MGT==1],pch="1",col=1,ylim=range(SAL),
xlim=range(EXP), xlab="Experiencia (anos)",ylab="Salario Anual")
points(EXP[EDUC==2 & MGT==1],SAL[EDUC==2 & MGT==1],pch="2",col=2)
points(EXP[EDUC==3 & MGT==1],SAL[EDUC==3 & MGT==1],pch="3",col=3)
points(EXP[EDUC==1 & MGT==0],SAL[EDUC==1 & MGT==0],pch="4",col=4)
points(EXP[EDUC==2 & MGT==0],SAL[EDUC==2 & MGT==0],pch="5",col=5)
points(EXP[EDUC==3 & MGT==0],SAL[EDUC==3 & MGT==0],pch="6",col=6)
lines(x1,y10)
lines(x1,y20)
lines(x6,y30)
lines(x11,y11)
lines(x1,y21)
lines(x1,y31)
# incluindo interação entre EDUC e MGT
mod2<-lm(SAL~EXP+factor(EDUC)+factor(MGT)+factor(EDUC)*factor(MGT))
summary(mod2)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
summary(influence.measures(mod2))
ex4a<-ex4[-33,]
mod3<-lm(SAL~EXP+factor(EDUC)+factor(MGT)+factor(EDUC)*factor(MGT),data=ex4a)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod3$resid)
summary(influence.measures(mod3))
x1<-min(EXP):max(EXP)
y10<-mod3$coef[1]+mod3$coef[2]*x1 #EDUC=1 e MGT=0
y20<-mod3$coef[1]+mod3$coef[2]*x1+mod3$coef[3] #EDUC=2 e MGT=0
y21<-mod3$coef[1]+mod3$coef[2]*x1+mod3$coef[3]+mod3$coef[5]+mod3$coef[6] #EDUC=2 e MGT=1
y31<-mod3$coef[1]+mod3$coef[2]*x1+mod3$coef[4]+mod3$coef[5]+mod3$coef[7] #EDUC=3 e MGT=1
x6<-min(EXP):5
y30<-mod3$coef[1]+mod3$coef[2]*x6+mod3$coef[4] #EDUC=3 e MGT=0
x11<-min(EXP):8
y11<-mod3$coef[1]+mod3$coef[2]*x11+mod3$coef[5] #EDUC=1 e MGT=1
par(mfrow=c(1,1))
plot(EXP[EDUC==1 & MGT==1],SAL[EDUC==1 & MGT==1],pch="1",col=1,ylim=range(SAL),
xlim=range(EXP), xlab="Experiencia (anos)",ylab="Salario Anual")
points(EXP[EDUC==2 & MGT==1],SAL[EDUC==2 & MGT==1],pch="2",col=2)
points(EXP[EDUC==3 & MGT==1],SAL[EDUC==3 & MGT==1],pch="3",col=3)
points(EXP[EDUC==1 & MGT==0],SAL[EDUC==1 & MGT==0],pch="4",col=4)
points(EXP[EDUC==2 & MGT==0],SAL[EDUC==2 & MGT==0],pch="5",col=5)
points(EXP[EDUC==3 & MGT==0],SAL[EDUC==3 & MGT==0],pch="6",col=6)
lines(x1,y10)
lines(x1,y20)
lines(x6,y30)
lines(x11,y11)
lines(x1,y21)
lines(x1,y31)
EXERCÍCIO 5 - Tensão de bateria
Y = queda de tensão da bateria (em voltagem) de um motor de míssil guiado
X = tempo (em segundos)
rm(list=ls())
ex5<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio5m.txt", h=T)
attach(ex5)
plot(X,Y,pch=16, col=4)
X2<-X*X
X3<-X2*X
mod1<-lm(Y~X+X2+X3)
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod1)
Xb<-X-mean(X)
Xb2<-Xb*Xb
Xb3<-Xb2*Xb
mod2<-lm(Y~Xb+Xb2+Xb3)
vif(mod2)
summary(mod2)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
Xb4<-Xb3*Xb
mod3<-lm(Y~Xb+Xb2+Xb3+Xb4)
vif(mod3)
anova(mod3)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
inflm.mod3 <- influence.measures(mod3)
inflm.mod3
summary(inflm.mod3)
par(mfrow=c(1,1))
plot(Xb, Y, pch=20, col=4)
lines(Xb, mod3$fitted, type="l", col=2, lwd=2)
pred<-predict(mod3, interval="confidence", level=0.95)
pred1<- predict(mod3, interval="prediction", level=0.95)
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4), type = "l",
ylab = "valores preditos de y", lwd=2)
EXERCÍCIO 6 - Demanda de supervisores
Y = no de supervisores em 27 estabelecimentos industriais
X = no de trabalhadores supervisionados
Arquivo: Exercicio6m.txt
## Modelo 1: Y em X
rm(list=ls())
ex6<- read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio6m.txt", h=T)
attach(ex6)
plot(X,Y,pch=16, col=4)
mod1<-lm(Y~X)
summary(mod1)
summary(influence.measures(mod1))
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=16)
shapiro.test(mod1$resid)
## Modelo 2: transformação raiz quadrada em Y
require(MASS)
boxcox(Y~X,data=ex6,plotit=T)
boxcox(Y~X,data=ex6,lam=seq(-0.5,1.5,1/10))
mod2<-lm(sqrt(Y)~X)
summary(mod2)
anova(mod2)
summary(influence.measures(mod2))
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=16)
shapiro.test(mod2$resid)
## Modelo 3: MQP (Mínimos Quadrados Ponderados)
wi<-1/(X^2) # pesos
mod3<-lm(Y~X,weights=wi)
summary(mod3)
anova(mod3)
summary(influence.measures(mod3))
rqwi<-sqrt(wi)
we<-rqwi*mod3$resid
wx<-rqwi*X
wy<-rqwi*mod3$fitted.values
plot(wy,we, pch=16)
par(mfrow=c(1,3))
plot(mod1$fitted.values,mod1$resid, pch=16)
plot(mod2$fitted.values,mod2$resid, pch=16)
plot(wy,we, pch=16)
# Modelo 4: log(Y) e inclusão do termo X2
X2<-X*X
mod4<-lm(log(Y)~X+X2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod4)
Xb<-X-mean(X)
Xb2<-Xb*Xb
mod5<-lm(log(Y)~Xb+Xb2)
vif(mod5)
summary(mod5)
anova(mod5)
par(mfrow=c(2,2))
plot(mod5, which=c(1:4), add.smooth=FALSE, pch=16)
shapiro.test(mod5$resid)
summary(influence.measures(mod5))
## resíduos versus valores preditos para os 4 modelos ajustados!
par(mfrow=c(1,4))
plot(mod1$fitted.values,mod1$resid, pch=16)
plot(mod2$fitted.values,mod2$resid, pch=16)
plot(wy,we, pch=16)
plot(mod5$fitted.values,mod5$resid, pch=16)
lines(X,logyest, type="l", col=2, lwd=2)
plot(X,log(Y), pch=16) logyest<-4.58+1.439e-03*Xb-1.101e-06*Xb2 lines(X,logyest, type="l", col=2, lwd=2)
yest<-exp(4.58+1.439e-03*Xb-1.101e-06*Xb2)
plot(X,Y, pch=16)
lines(X,yest, type="l", col=2, lwd=2)
logY<-log(Y)
mod5<-lm(logY~Xb+Xb2)
pred<-predict(mod5, interval="confidence", level=0.95)
pred1<- predict(mod5, interval="prediction", level=0.95)
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
ylab = "valores preditos de log(y)", lwd=2)
pred<- exp(predict(mod5, interval="confidence", level=0.95))
pred1<- exp(predict(mod5, interval="prediction", level=0.95))
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
ylab = "valores preditos de y", lwd=2)