babies <- read.csv("Planilha de dados Babies.txt", sep = " ") head(babies) # Ajustes babies <- babies[which(babies$bwt!=999,),] babies <- babies[which(babies$gestation!=999,),] babies <- babies[which(babies$parity!=9,),] babies <- babies[which(babies$age!=99,),] babies <- babies[which(babies$height!=99,),] babies <- babies[which(babies$weight!=999,),] babies <- babies[which(babies$smoke!=9,),] ## Variáveis importantes # gestation lm1 <- lm(babies$bwt~babies$gestation) anova(lm1) plot(babies$bwt~babies$gestation) abline(lm1) # parity lm2 <- lm(babies$bwt~babies$parity) anova(lm2) plot(babies$bwt~babies$parity) abline(lm2) # age lm3 <- lm(babies$bwt~babies$age) anova(lm3) plot(babies$bwt~babies$age) abline(lm3) # height lm4 <- lm(babies$bwt~babies$height) anova(lm4) plot(babies$bwt~babies$height) abline(lm4) # weight lm5 <- lm(babies$bwt~babies$weight) anova(lm5) plot(babies$bwt~babies$weight) abline(lm5) # smoke lm6 <- lm(babies$bwt~babies$smoke) anova(lm6) plot(babies$bwt~babies$smoke) abline(lm6) # Variáveis preditoras: # gestation # height # weight # smoke # Interações biologicamente relevantes: # gestation:height # gestation:weight # gestation:smoke # height:weight # weight:smoke # Modelo com variáveis preditoras, mas sem interações: lm7 <- lm(bwt~gestation + height + weight + smoke, data = babies) summary(lm7) # Acrscentando interações: lmfull <- lm(bwt~gestation + height + weight + smoke + gestation:height + gestation:weight + gestation:smoke + height:weight + weight:smoke, data = babies) summary(lmfull) anova(lmfull) # Simplificando o modelo: ## Remoção das interações e comparação com lmfull ou com o modelo ## retido.A ordem de remoção das interações foi daquela com maior ## p-valor (menos significante) para menor p-valor (mais significante), ## de acordo com summary(lmfull). #lmbabies1 = lmfull - weight:smoke lmbabies1 <-lm(bwt~gestation + height + weight + smoke + gestation:height + gestation:weight + gestation:smoke + height:weight, data = babies) summary(lmbabies1) anova(lmfull, lmbabies1) # p-valor>0.05 -> Mantém modelo mais simples #lmfull2 = lmfull1 - height:weight lmbabies2 <-lm(bwt~gestation + height + weight + smoke + gestation:height + gestation:weight + gestation:smoke, data = babies) summary(lmbabies2) anova(lmbabies1, lmbabies2) # p-valor>0.05 -> Mantém modelo mais simples #lmfull3 = lmfull2 - gestation:height lmbabies3 <-lm(bwt~gestation + height + weight + smoke + gestation:weight + gestation:smoke, data = babies) summary(lmbabies3) anova(lmbabies2, lmbabies3) # p-valor>0.05 -> Mantém modelo mais simples #lmfull4 = lmfull3 - gestation:weight lmbabies4 <-lm(bwt~gestation + height + weight + smoke + gestation:smoke, data = babies) summary(lmbabies4) anova(lmbabies3, lmbabies4) # p-valor>0.05 -> Mantém modelo mais simples #lmfull5 = lmfull4 - gestation:smoke lmbabies5 <-lm(bwt~gestation + height + weight + smoke, data = babies) summary(lmbabies5) anova(lmbabies4, lmbabies5) # p-valor<0.05 -> Mantém modelo mais complexo. ## A partir do modelo retido, lmbabies4, remove-se, uma a uma, as ## variaveis preditoras, com o mesmo critério de remoção das interações #lmbabies6 = lmbabies4 - weight lmbabies6 <-lm(bwt~gestation + height + smoke + gestation:smoke, data = babies) summary(lmbabies6) anova(lmbabies4, lmbabies6) # p-valor<0.05 -> retenho modelo mais complexo #lmbabies7 = lmbabies4 - smoke lmbabies7 <-lm(bwt~gestation + height + weight + gestation:smoke, data = babies) summary(lmbabies7) anova(lmbabies7, lmbabies4) # p-valor<0.05 -> retenho modelo mais complexo #lmbabies9 = lmbabies4 - height lmbabies8 <-lm(bwt~gestation + weight + smoke + gestation:smoke, data = babies) summary(lmbabies4) anova(lmbabies8, lmbabies4) # p-valor<0.05 -> retenho modelo mais complexo #lmbabies9 = lmbabies4 - height lmbabies9 <-lm(bwt~weight + smoke + height + gestation:smoke, data = babies) summary(lmbabies9) anova(lmbabies9, lmbabies4) # p-valor<0.05 -> retenho modelo mais complexo ### Conclusão: o modelo selecionado foi lmbabies4: ### lmbabies4 <-lm(bwt~gestation + height + weight + smoke + ### gestation:smoke, data = babies) # Diagnóstico do modelo: par(mfrow = c(2,2)) plot(lmbabies4)