###############################################
#
# Lab 1. PROBABILITY & STATISTICAL DISTRIBUTIONS
#
###############################################

setwd("C:/Bayesian Course/Day 1_Probability/Lab Probability")

# question 1.1

t<-rbinom(8, 12, 0.2)  
hist(t)

dbinom(0, 10, 0.2)
dbinom(2, 10, 0.2)
dbinom(5, 10, 0.2)
1-pbinom(4,10,0.2)

t <- rbinom(800, 40, 0.2)  
mean(t)
var(t)

# Plotting our random draws from the binomial against the known actual density function. 
# For discrete distributions, easier to make a frequency table using table, and then barplot to chart it. 
# Then we can add the points on top of our jury-rigged histogram.

x <-rbinom(20000, prob = 0.2, size = 12)
tx <- table(factor(x, levels = 0:12))/20000
b1 <-barplot(tx, ylim = c(0, 0.3), ylab = "Probability")
points(b1, dbinom(0:12, prob = 0.2, size = 12), pch = 16, col='red')

# Negative binnomial draws

# parameterization in terms of mu and k (size)

mu = 2  # mean
size = 0.5 # clustering parameter


# question 1.2. Plotting the negative binomial

x<- rnbinom(10000, size = .5, mu = 2)
# check mean and variance from random draws
mean(x)
var(x)

# theoretical mean= mu
# theoretical variance = mu+mu^2/k (size)= (2+(4/0.5))=10

tx <- table(factor(x, levels = 0:12))/10000
b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.5))
points(b1, dnbinom(0:12, size = .5, mu = 2), pch = 16, cex = 2, col = "red")

######################################################
# 
# 2. DATA DISTRIBUTION & METHOD OF MOMENTS
#
######################################################

# 2.1 Sapling data

sapling <- read.table("sapling_growth.txt", header=TRUE)

hist(sapling$Growth, prob=T)

# Looks  lognormal or gamma. Let's try adding a gamma.
# But how do we find the parameters of the distribution? 

# MOM gamma distribution.

mean.gamma<-mean(sapling$Growth)
var.gamma<-var(sapling$Growth)

# now match moments to distribution parameters.
#get prior shape parameters for prior gamma distribution of lambda:

a=mean.gamma^2/var.gamma
b=mean.gamma/var.gamma

# Now, we can see how the gamma fits the data 

hist(sapling$Growth, prob=T)
x<-seq(0,50,0.02)
curve(dgamma(x, a,b), add = TRUE, col="blue")

# how about the lognormal distribution? 
curve(dlnorm(x, mean(log(sapling$Growth)), sd(log(sapling$Growth))),col="red", add = TRUE)

# 2.2 Tows data.

tows <- read.table("HMtab43.txt", header=TRUE)
attach(tows)  
# so that you can work with Hauls and Captures directly. 
# Don't forget to detach!

# Take a look at the data. 
# Note that this looks a lot like a histogram already,
# since it is sorted by # of Captures.

barplot(Hauls, names = Captures, ylab= "#Hauls", xlab= "#Captures")

totHauls <- sum(Hauls)
Haulfreq <- Hauls/totHauls # expectation
bp <- barplot(Haulfreq,names=Captures,ylab="Frequency",xlab="Captures")

# Now let's calculate parameters using MOM from hauls data
# We want to calculate the mean of this frequency distribution. 
 
m <- sum(Hauls*Captures)/sum(Hauls)

# Alternatively, since we've already calculated Haulfreq, 

m <- sum(Haulfreq*Captures)
sigma.2 <- sum(Hauls*(Captures-m)^2)/sum(Hauls)
k <- m^2/(sigma.2-m)

# Or you may also generate a data file with 807 0 captures,
# 37 1 captures etc by using the rep command

counts<-rep(Captures,Hauls)

# Maybe negative binomial? 

nbHaulfreq <- dnbinom(0:17,mu=m,size=k)
poisfr<-dpois(0:17,lambda=m)
barplot(rbind(Haulfreq,nbHaulfreq,poisfr),col=c("red","yellow", "blue"),names=Captures,beside=TRUE)
legend(10,0.6,c("Observed","Neg. bin.", "Poisson"),fill=c("red","yellow", "blue"))

detach(tows)


###############################
#
#3. COMPOUND DISTRIBUTIONS
#
##############################

# Zero-inflated negative binomial 

dzinbinom = function(x, mu, size, zprob) {
 ifelse(x == 0, 
 	zprob + (1 - zprob) * dnbinom(0, mu = mu,
 		size = size),
	  		(1 - zprob) * dnbinom(x, mu = mu, size = size))
 	}
 
 rzinbinom = function(n, mu, size, zprob) {
 ifelse(runif(n) < zprob, 0, rnbinom(n, mu = mu, size = size))
 }

x<-rzinbinom(n = 1000,mu= 2, size = .5, 0.2)
tx <- table(factor(x, levels = 0:12))/1000
b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.6))
points(b1, dzinbinom(0:12, mu = 2, size =0.5, 0.2), pch = 16)

# Try it with a Poisson

dzinpois = function(x, lambda, zprob) {
 ifelse(x == 0, zprob + (1 - zprob) * dpois(0,
 lambda = lambda), (1 - zprob) * dpois(x, lambda=lambda))
 }
 
 rzinpois = function(n,lambda, zprob) {
 ifelse(runif(n) < zprob, 0, rpois(n, lambda=lambda))
 }

x<-rzinpois(10000, 2, 0.5)
tx <- table(factor(x, levels = 0:12))/10000
b1 <-barplot(tx, ylab = "Probability", ylim = c(0, 0.6))
points(b1, dzinpois(0:12, 2, 0.5), pch = 16)

# Normal with sd proportional to the mean

d_prop_norm <- function(x, mean, prop) {
	dnorm(x, mean, sd = mean * prop)
	}
r_prop_norm <- function(n, mean, prop) {
	rnorm(n, mean, sd = mean * prop)
	}

xs <- r_prop_norm(1000, 17, 3)

hist(xs, freq=FALSE, ylim=c(0,0.008))
curve(d_prop_norm(x, 17, 3), add=TRUE, lwd = 2, col = "blue")

# Note that we can use curve, on a hist, 
# because this is a continuous probability function 
#(unlike the nbinom and pois, above).