library(MASS)
data(Animals)
plot(brain~body,data=Animals)
plot(brain~body,data=Animals, log="xy")

plot(log(brain)~log(body), data=Animals)

Animals[log(Animals$body)>8&log(Animals$brain)<6,]
anim.m1 <- lm(log(brain)~log(body),data=Animals)
abline(anim.m1, col="blue")

plot(anim.m1)
anim.m2 <- lm(log(brain)~log(body),data=Animals, 
subset=!(log(Animals$body)>8&log(Animals$brain)<6))

plot(log(brain)~log(body), data=Animals)
abline(anim.m1, col="blue", lty=2)
abline(anim.m2, col="red")

par(mfrow=c(2,2))
plot(anim.m2)
par(mfrow=c(1,1))

anova(anim.m2)
summary(anim.m2)

coef(anim.m2)
confint(anim.m2)

cf <- as.numeric(coef(anim.m2))
Animals$prev <- round(exp(cf[1])*Animals$body^cf[2],1)
Animals

set.seed(22)

x <- runif(30,0,10)
y <- rexp(30,rate=1/(2+5*x))

plot(y~x)
m1 <- lm(y~x)

par(mfrow=c(2,2))
plot(m1)
par(mfrow=c(1,1))
coef(m1)

result <- matrix(ncol=6,nrow=2000) 
for(i in 1:2000){
  yr <- rexp(30,rate=1/(2+5*x))
  m <- lm(yr~x)
  CIs <- confint(m)
  result[i,1:2] <- as.numeric(coef(m))# guarda os coeficientes nas duas primeiras colunas
  result[i,3:4] <- CIs[1,]#guarda limites dos ICs do intercepto
  result[i,5:6] <- CIs[2,]#guarda limites dos ICs da inclinacao
}

hist(result[,1])
abline(v=mean(result[,1]), lty=2, col="blue")
abline(v=2, col="red", lty=2)

hist(result[,2])
abline(v=mean(result[,2]), lty=2, col="blue")
abline(v=5, col="red", lty=2)

100*(2-mean(result[,1]))/2 ## -3%
100*(5-mean(result[,2]))/5 ## 1%

sum(result[,3]<=2&result[,4]>=2)
sum(result[,5]<=5&result[,6]>=5)

poluicao <- read.table("/Users/FAM/Downloads/Pollute.txt", header=T, sep="\t")
par(mfrow=c(1,3))
plot(Pollution~Industry, data=poluicao)
plot(Pollution~Temp, data=poluicao)
plot(Pollution~Wind, data=poluicao)
par(mfrow=c(1,1))

pol.m1 <- lm(Pollution~Industry, data=poluicao)
plot(Pollution~Industry, data=poluicao)
abline(pol.m1, col="blue")

abline(lm(Pollution~Industry, data=poluicao, subset=Industry<max(Industry)), col="red")
anova(pol.m1)

pol.m0 <- lm(Pollution~1, data=poluicao)
##compare:
anova(pol.m1,pol.m0)
anova(pol.m1)

summary(pol.m0)

mean(poluicao$Pollution)
sd(poluicao$Pollution)

plot(Pollution~Industry, data=poluicao)
abline(pol.m1, col="blue")
abline(h=mean(poluicao$Pollution),col="red")

pol.m2 <- lm(Pollution~Industry+Temp, data=poluicao)
anova(pol.m1,pol.m2)

pol.m3 <- update(pol.m2,.~.+Wind)
anova(pol.m2,pol.m3)
anova(pol.m3)

par(mfrow=c(2,2))
plot(pol.m2)
par(mfrow=c(1,1))

poluicao[41,c(1:3,5)]
par(mfrow=c(1,2))
plot(Pollution~Industry, data=poluicao)
points(Pollution~Industry, data=poluicao[41,], col="red", pch=19)
plot(Pollution~Temp, data=poluicao)
points(Pollution~Temp, data=poluicao[41,], col="red", pch=19)
par(mfrow=c(1,1))

summary(pol.m2)
anova(lm(Pollution~Temp+Wind+Industry, data=poluicao))
anova(lm(Pollution~Industry+Wind+Temp, data=poluicao))

anova(lm(Pollution~Temp+Industry, data=poluicao),lm(Pollution~Temp, data=poluicao))
anova(lm(Pollution~Temp+Industry, data=poluicao),lm(Pollution~Industry, data=poluicao))

library(lattice)
temp.shingle <- equal.count(poluicao$Temp,number=4)
summary(temp.shingle)

xyplot(Pollution~Industry|temp.shingle,data=poluicao,
   panel=function(x,y,...){
     panel.xyplot(x,y,...)
     panel.abline(lm(y~x),...)
   }
       )

xyplot(Pollution~Industry|equal.count(poluicao$Wind,number=4),data=poluicao,
   panel=function(x,y,...){
     panel.xyplot(x,y,...)
     panel.abline(lm(y~x),...)
   }
       )

pol.m4 <- update(pol.m2,.~.+Industry:Temp)## acrescenta apenas a interação
pol.m5 <- update(pol.m2,.~.+Wind+Industry:Wind)##acrescenta a interação e seus componentes
anova(pol.m2,pol.m4)
anova(pol.m2,pol.m5)


##Tercis da variavel Temp
Temp.tercis <- quantile(poluicao$Temp,c(0,1/3,2/3,1))
Temp.tercis
##Criando um fator com estes breakpoints, com a funcao cut
poluicao$Temp.c <- cut(poluicao$Temp, breaks=Temp.tercis)
## Verificando: três fatores ordenados de faixas de temperatura
levels(poluicao$Temp.c)

cf.m2 <- coef(pol.m2)
par(mfrow=c(1,3))
##Primeiro Grafico: primeiro nivel do fator faixa de temperatura
plot(Pollution~Industry, data=poluicao, subset=Temp.c==levels(poluicao$Temp.c)[1],
     main=paste("Temp: ",Temp.tercis[1]," a ",Temp.tercis[2],sep=""),
     xlim=c(min(poluicao$Industry),max(poluicao$Industry)),
     ylim=c(min(poluicao$Pollution),max(poluicao$Pollution)) )
##Linhas do previsto para ponto medio da faixa de temperatura
abline(cf.m2[1]+cf.m2[3]*mean(Temp.tercis[1:2]),cf.m2[2], col="blue", lwd=2)
##Segundo Grafico
plot(Pollution~Industry, data=poluicao, subset=Temp.c==levels(poluicao$Temp.c)[2],
     main=paste("Temp: ",round(Temp.tercis[2],1)," a ",round(Temp.tercis[3],1),sep=""),
     xlim=c(min(poluicao$Industry),max(poluicao$Industry)),
     ylim=c(min(poluicao$Pollution),max(poluicao$Pollution)) )
abline(cf.m2[1]+cf.m2[3]*mean(Temp.tercis[2:3]),cf.m2[2], col="blue", lwd=2)
##Terceiro grafico
plot(Pollution~Industry, data=poluicao, subset=Temp.c==levels(poluicao$Temp.c)[3],
     main=paste("Temp: ",round(Temp.tercis[3],1)," a ",round(Temp.tercis[4],1),sep=""),
     xlim=c(min(poluicao$Industry),max(poluicao$Industry)),
     ylim=c(min(poluicao$Pollution),max(poluicao$Pollution)) )
abline(cf.m2[1]+cf.m2[3]*mean(Temp.tercis[3:4]),cf.m2[2], col="blue", lwd=2)
par(mfrow=c(1,1))


init.h = c(600, 700, 800, 950, 1100, 1300, 1500)
h.d = c(253, 337, 395, 451, 495, 534, 573)
plot(h.d~init.h)

mod1 <- lm(h.d~init.h)
mod2 <- lm(h.d~init.h+I(init.h^2))
anova(mod1,mod2)

abline(mod1)
cf.m2 <- coef(mod2)
curve(cf.m2[1]+cf.m2[2]*x+cf.m2[3]*x^2, add=T, lty=2)
summary(mod2)
