############################################### # # Lab 1. PROBABILITY & STATISTICAL DISTRIBUTIONS # ############################################### setwd("C:/Bayesian Course/Day 1_Probability/Lab Probability") # question 1.1 t<-rbinom(8, 12, 0.2) hist(t) dbinom(0, 10, 0.2) dbinom(2, 10, 0.2) dbinom(5, 10, 0.2) 1-pbinom(4,10,0.2) t <- rbinom(800, 40, 0.2) mean(t) var(t) # Plotting our random draws from the binomial against the known actual density function. # For discrete distributions, easier to make a frequency table using table, and then barplot to chart it. # Then we can add the points on top of our jury-rigged histogram. x <-rbinom(20000, prob = 0.2, size = 12) tx <- table(factor(x, levels = 0:12))/20000 b1 <-barplot(tx, ylim = c(0, 0.3), ylab = "Probability") points(b1, dbinom(0:12, prob = 0.2, size = 12), pch = 16, col='red') # Negative binnomial draws # parameterization in terms of mu and k (size) mu = 2 # mean size = 0.5 # clustering parameter # question 1.2. Plotting the negative binomial x<- rnbinom(10000, size = .5, mu = 2) # check mean and variance from random draws mean(x) var(x) # theoretical mean= mu # theoretical variance = mu+mu^2/k (size)= (2+(4/0.5))=10 tx <- table(factor(x, levels = 0:12))/10000 b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.5)) points(b1, dnbinom(0:12, size = .5, mu = 2), pch = 16, cex = 2, col = "red") ###################################################### # # 2. DATA DISTRIBUTION & METHOD OF MOMENTS # ###################################################### # 2.1 Sapling data sapling <- read.table("sapling_growth.txt", header=TRUE) hist(sapling$Growth, prob=T) # Looks lognormal or gamma. Let's try adding a gamma. # But how do we find the parameters of the distribution? # MOM gamma distribution. mean.gamma<-mean(sapling$Growth) var.gamma<-var(sapling$Growth) # now match moments to distribution parameters. #get prior shape parameters for prior gamma distribution of lambda: a=mean.gamma^2/var.gamma b=mean.gamma/var.gamma # Now, we can see how the gamma fits the data hist(sapling$Growth, prob=T) x<-seq(0,50,0.02) curve(dgamma(x, a,b), add = TRUE, col="blue") # how about the lognormal distribution? curve(dlnorm(x, mean(log(sapling$Growth)), sd(log(sapling$Growth))),col="red", add = TRUE) # 2.2 Tows data. tows <- read.table("HMtab43.txt", header=TRUE) attach(tows) # so that you can work with Hauls and Captures directly. # Don't forget to detach! # Take a look at the data. # Note that this looks a lot like a histogram already, # since it is sorted by # of Captures. barplot(Hauls, names = Captures, ylab= "#Hauls", xlab= "#Captures") totHauls <- sum(Hauls) Haulfreq <- Hauls/totHauls # expectation bp <- barplot(Haulfreq,names=Captures,ylab="Frequency",xlab="Captures") # Now let's calculate parameters using MOM from hauls data # We want to calculate the mean of this frequency distribution. m <- sum(Hauls*Captures)/sum(Hauls) # Alternatively, since we've already calculated Haulfreq, m <- sum(Haulfreq*Captures) sigma.2 <- sum(Hauls*(Captures-m)^2)/sum(Hauls) k <- m^2/(sigma.2-m) # Or you may also generate a data file with 807 0 captures, # 37 1 captures etc by using the rep command counts<-rep(Captures,Hauls) # Maybe negative binomial? nbHaulfreq <- dnbinom(0:17,mu=m,size=k) poisfr<-dpois(0:17,lambda=m) barplot(rbind(Haulfreq,nbHaulfreq,poisfr),col=c("red","yellow", "blue"),names=Captures,beside=TRUE) legend(10,0.6,c("Observed","Neg. bin.", "Poisson"),fill=c("red","yellow", "blue")) detach(tows) ############################### # #3. COMPOUND DISTRIBUTIONS # ############################## # Zero-inflated negative binomial dzinbinom = function(x, mu, size, zprob) { ifelse(x == 0, zprob + (1 - zprob) * dnbinom(0, mu = mu, size = size), (1 - zprob) * dnbinom(x, mu = mu, size = size)) } rzinbinom = function(n, mu, size, zprob) { ifelse(runif(n) < zprob, 0, rnbinom(n, mu = mu, size = size)) } x<-rzinbinom(n = 1000,mu= 2, size = .5, 0.2) tx <- table(factor(x, levels = 0:12))/1000 b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.6)) points(b1, dzinbinom(0:12, mu = 2, size =0.5, 0.2), pch = 16) # Try it with a Poisson dzinpois = function(x, lambda, zprob) { ifelse(x == 0, zprob + (1 - zprob) * dpois(0, lambda = lambda), (1 - zprob) * dpois(x, lambda=lambda)) } rzinpois = function(n,lambda, zprob) { ifelse(runif(n) < zprob, 0, rpois(n, lambda=lambda)) } x<-rzinpois(10000, 2, 0.5) tx <- table(factor(x, levels = 0:12))/10000 b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.6)) points(b1, dzinpois(0:12, 2, 0.5), pch = 16) # Normal with sd proportional to the mean d_prop_norm <- function(x, mean, prop) { dnorm(x, mean, sd = mean * prop) } r_prop_norm <- function(n, mean, prop) { rnorm(n, mean, sd = mean * prop) } xs <- r_prop_norm(1000, 17, 3) hist(xs, freq=FALSE, ylim=c(0,0.008)) curve(d_prop_norm(x, 17, 3), add=TRUE, lwd = 2, col = "blue") # Note that we can use curve, on a hist, # because this is a continuous probability function #(unlike the nbinom and pois, above).