\documentclass{article} \usepackage[american]{babel} \usepackage{graphicx} \usepackage{alltt} \usepackage{url} \usepackage{amsmath} \newcommand{\codef}[1]{{\tt #1}} \newcommand{\code}[1]{\texttt{#1}} \usepackage{sober} \newcounter{exercise} %\numberwithin{exercise}{section} \newcommand{\exnumber}{\addtocounter{exercise}{1} \theexercise \thinspace} \newcommand{\R}{{\sf R}} \title{Lab 6: estimation (solutions)} \author{Ben Bolker} \begin{document} \bibliographystyle{plain} \maketitle \copyright 2005 Ben Bolker \textbf{Exercise \exnumber}: (Recreate negative binomial data, likelihood surface, etc.): <<>>= set.seed(1001) mu.true=1 k.true=0.4 x = rnbinom(50,mu=mu.true,size=k.true) NLLfun1 = function(p,dat=x) { mu=p[1] k=p[2] -sum(dnbinom(x,mu=mu,size=k,log=TRUE)) } @ Method-of-moments estimates, for starting point: <<>>= m = mean(x) v = var(x) mu.mom = m k.mom = m/(v/m-1) @ Using \code{optim()} to find best fit: <<>>= O1 = optim(fn=NLLfun1,par=c(mu=mu.mom,k=k.mom)); O1 @ Calculating likelihood surface: <<>>= muvec = seq(0.4,3,by=0.05) kvec = seq(0.01,0.7,by=0.01) resmat = matrix(nrow=length(muvec),ncol=length(kvec)) for (i in 1:length(muvec)) { for (j in 1:length(kvec)) { resmat[i,j] = NLLfun1(c(muvec[i],kvec[j])) } } @ The new part: (1) construct confidence levels (bivariate, so use \code{df=2} in \code{qchisq}): <<>>= alevels=c(0.5,0.9,0.95,0.99,0.999) minval = O1$value nll.levels = qchisq(alevels,df=2)/2+minval @ Draw the contour plot, then add MLEs, true values, and method-of-moments values <>= contour(muvec,kvec,resmat,levels=nll.levels,labels=alevels) points(O1$par["mu"],O1$par["k"],pch=16) points(mu.true,k.true,pch=1) points(mu.mom,k.mom,pch=2) @ \textbf{Exercise\exnumber:} Set up reef fish data, yet again: <<>>= a = 0.696 b = 9.79 recrprob = function(x,a=0.696,b=9.79) a/(1+(a/b)*x) scoefs = c(mu=25.32,k=0.932,zprob=0.123) rzinbinom = function(n,mu,size,zprob) { ifelse(runif(n)>= NLLfun3L = function(loga,logb,logd) { a = exp(loga) b = exp(logb) d = exp(logd) recrprob = a/(1+(a/b)*settlers^d) -sum(dbinom(recr,prob=recrprob,size=settlers,log=TRUE),na.rm=TRUE) } NLLfun4L = function(loga,logb) { a = exp(loga) b = exp(logb) recrprob = a/(1+(a/b)*settlers) -sum(dbinom(recr,prob=recrprob,size=settlers,log=TRUE),na.rm=TRUE) } NLLfun5L = function(loga) { a = exp(loga) recrprob = a r = -sum(dbinom(recr,prob=recrprob,size=settlers,log=TRUE),na.rm=TRUE) ## cat(loga,a,r,"\n") return(r) } @ Start by doing linear regression of recruits vs. settlers, with a zero intercept: <<>>= lm1 = lm(recr~settlers-1) lm.loga = list(log(coef(lm1))) rename=function(L,names) { for (i in seq(along=L)) { names(L[[i]]) = NULL } names(L)=names L } lm.loga=rename(lm.loga,"loga") @ First fit density-independent model, using BFGS <<>>= library(stats4) m5L = mle(minuslogl=NLLfun5L,start=list(loga=-1), method="BFGS",control=list(ndeps=0.01)) @ Had to use Nelder-Mead instead of BFGS: <<>>= library(stats4) m4L = mle(minuslogl=NLLfun4L,start=list(loga=log(0.5),logb=log(10)), method="Nelder-Mead") @ set new starting condition to coeffs of last fit: <<>>= s3 = c(coef(m4L),list(logd=0)) @ <<>>= m3L = mle(minuslogl=NLLfun3L,start=s3, method="Nelder-Mead") @ Table of coefficients and log-likelihoods: <<>>= lin = c(exp(coef(m5L)),NA,NA,-logLik(m5L)) BH = c(exp(coef(m4L)),NA,-logLik(m4L)) shep = c(exp(coef(m3L)),-logLik(m3L)) ptab = rbind(lin,BH,shep) colnames(ptab)=c("a","b","d","NLL") ptab @ Confidence intervals: <<>>= exp(confint(m3L)) exp(confint(m4L)) exp(confint(m5L)) @ \textbf{Exercise\exnumber:} ZINB, negative binomial, Poisson: <<>>= NLLpois = function(lambda) { -sum(dpois(settlers,lambda=lambda,log=TRUE)) } NLLnb = function(mu,k) { -sum(dnbinom(settlers,mu=mu,size=k,log=TRUE)) } @ Set up ZINB function again: <<>>= dzinbinom = function(x,mu,size,zprob,log=FALSE) { v = ifelse(x==0, zprob+(1-zprob)*dnbinom(0,mu=mu,size=size), (1-zprob)*dnbinom(x,mu=mu,size=size)) if (log) return(log(v)) else v } @ <<>>= NLLzinb = function(mu,k,zprob) { -sum(dzinbinom(settlers,mu=mu,size=k,zprob=zprob,log=TRUE)) } @ Fit all three functions, from simplest to most complex: <<>>= m = mean(settlers) mpois = mle(minuslogl=NLLpois,start=list(lambda=m)) mnbinom = mle(minuslogl=NLLnb,start=list(mu=m,k=0.5)) mzinbinom = mle(minuslogl=NLLzinb,start=list(mu=m,k=0.5,zprob=0.5)) @ Table of coefficients and log-likelihoods: <<>>= pois = c(coef(mpois),NA,NA,-logLik(mpois)) nbin = c(coef(mnbinom),NA,-logLik(mnbinom)) zinb = c(coef(mzinbinom),-logLik(mzinbinom)) ptab = rbind(pois,nbin,zinb) colnames(ptab)=c("lambda/mu","k","zprob","NLL") ptab @ The zero-inflated negative binomial wins by a very large margin (how small would we have to make the data set before we couldn't tell the difference any more?) --- 6000 log-likelihood units between neg bin and Poisson, and an additional 21 log-likelihood units between neg bin and z-i neg bin \ldots <<>>= confint(mpois) confint(mnbinom) confint(mzinbinom) @ As expected, confidence limits for $\mu$ get wider and wider as we add more parameters to the model. Still, even \code{zprob} is pretty well bounded from these data. \end{document}