##################### ###### bebes #################### #leitura de dados bebes = read.csv("babies.csv", sep="\t", header= TRUE, as.is = T) str(bebes) head(bebes) #tratamendo de dados bebes = bebes[!(bebes$bwt==999),] bebes = bebes[!(bebes$gestation==999),] bebes = bebes[!(bebes$parity==9),] bebes = bebes[!(bebes$age==99),] bebes = bebes[!(bebes$height==99),] bebes = bebes[!(bebes$weight==999),] bebes = bebes[!(bebes$smoke==9),] bebes$bwt = bebes$bwt/35.274 #lm lm.bebes = lm(bwt~gestation, data = bebes) summary(lm.bebes) confint(lm.bebes) #calculo ci na unha ssx = sum((bebes$gestation - mean(bebes$gestation))**2) x.mx = (bebes$gestation - mean(bebes$gestation) )**2 se = sqrt(var(bebes$bwt) * ((1/length(bebes$gestation) + x.mx/ssx))) t.value <- qt(0.975, length(bebes$gestation)-2) ic = se * t.value #plot y.pred = predict(object = lm.bebes) plot(bwt~gestation, data = bebes) polygon(c(rev(sort(bebes$gestation)), sort(bebes$gestation)), c(rev(sort(y.pred-ic)), sort(y.pred+ic)), col=rgb(1, 0, 0,0.5), border = F) abline(lm.bebes,col="blue") #lines(x= sort(bebes$gestation) , y = sort(y.pred+ic), col="blue") #lines(x= bebes$gestation , y = (y.pred-ic), col="red") ######################### #### galileu ######################### init.h = c(600, 700, 800, 950, 1100, 1300, 1500) h.d = c(253, 337, 395, 451, 495, 534, 573) galileu = data.frame(init.h, h.d) #modelos lm3 = lm(h.d~init.h + I(init.h**2) +I(init.h**3)) lm2 = lm(h.d~init.h + I(init.h**2)) lm1 = lm(h.d~init.h) #comparações anova(lm3, lm2) #lm3 anova(lm3, lm1) #lm3 summary(lm3) # O modelo de terceiro grau é melhor para descrever os dados apresentados do que os outros dois modelos. # O modelo de terceiro grau explica ~174 vezes mais que o de primeiro grau e ~43 vezes mais que o de segundo grau. #plots plot(h.d~init.h, ylab = "Distancia", xlab="Altura Inicial") lines(galileu$init.h , predict(lm1)) lines(galileu$init.h , predict(lm2), col="red") lines(galileu$init.h , predict(lm3), col="blue") ############################# ######## Recem Nascidos ############################ bb = read.table("babies.txt", header = T) head(bb) str(bb) #tratamendo de dados bb = bb[!(bb$bwt==999),] bb = bb[!(bb$gestation==999),] bb = bb[!(bb$parity==9),] bb = bb[!(bb$age==99),] bb = bb[!(bb$height==99),] bb = bb[!(bb$weight==999),] bb = bb[!(bb$smoke==9),] #plots pairs(bb, lower.panel = NULL) library(GGally) ggpairs(bb) #lms lm.cheio = lm(bwt~gestation*parity*age*height*weight*smoke, data = bb) # comecei tirando as variaceis que acho que nao tem um significado biologico lm1 = lm(bwt~gestation*age*height*weight*smoke, data = bb) anova(lm.cheio,lm1) #ok