data=read.table("http://www.stat.unm.edu/~ghuerta/stat574/Table9_1.txt",col.names=c("age","smoking","deaths","persons") ) logd=log(data$deaths) logp=log(data$persons) agecat=rep(NA,10) agecat[data$age=="35-44"]=1 agecat[data$age=="45-54"]=2 agecat[data$age=="55-64"]=3 agecat[data$age=="65-74"]=4 agecat[data$age=="75-84"]=5 plot(agecat,logd-logp,pch="*") agesq=agecat*agecat smoke=rep(0,10) smoke[data$smoking=="smoker"]=1 smkage=smoke*agecat par(mfrow=c(2,1)) plot(smoke,logd-logp,pch="*") plot(smkage,logd-logp,pch="*") f=glm(data$deaths ~ agecat + agesq + smoke + smkage, family=poisson,offset=logp) summary(f) print(cbind(agecat,smoke,data$deaths,f$fitted.values)) # Lets try the following f1=glm(deaths ~ age + smoke, family=poisson,offset=log(persons),data=data) summary(f1) # the same as f2=glm(deaths ~ factor(age) + factor(smoke), family=poisson,offset=log(persons),data=data) summary(f2) # age as numeric with interaction f3=glm(deaths ~ as.numeric(age) + factor(smoke) +as.numeric(age)*factor(smoke), family=poisson,offset=log(persons),data=data) summary(f3) # the same as f4=glm(data$deaths ~ agecat + smoke + agecat*smoke, family=poisson,offset=logp) summary(f4) # couldn't fit the quadratic age term via the data object f5=glm(deaths ~ as.numeric(age) + as.numeric(age)*as.numeric(age) +factor(smoke) +as.numeric(age)*factor(smoke), family=poisson,offset=log(persons),data=data) summary(f5) # looking back at the orginally fitted model... # To compute residuals and some other things fit_p=c(fitted.values(f)) pearsonresid = (data$deaths-fit_p)/sqrt(fit_p) chisq=sum(pearsonresid*pearsonresid) devres=sign(data$deaths-fit_p)*(sqrt(2*(data$deaths*log(data$deaths/fit_p)- (data$deaths-fit_p)))) deviance=sum(devres*devres) 1-pchisq(deviance,5) p=5 llikb=(f$aic-2*p)/(-2) fmin=glm(data$deaths ~ 1,family=poisson,offset=logp) p=1 llikm=(fmin$aic-2*p)/(-2) C=2*(llikb-llikm) Rsq=(llikm-llikb)/llikm # calculation of rate ratios s=summary(f) round(exp(s$coefficients[,1]),2) ll=s$coefficients[,1]-1.96*s$coefficients[,2] ul=s$coefficients[,1]+1.96*s$coefficients[,2] round(exp(ll),2) round(exp(ul),2) # Example taken from the BUGS book y=c(15,21,29,16,18,21,16,26,33,27,41,60,33,38,41,20,27,42) x=c(rep(0,3),rep(10,3),rep(33,3),rep(100,3),rep(333,3), rep(1000,3)) f=glm(y~ log(x+10) + x,family=poisson) summary(f) f$fitted.values names(f) # with some Bayesian model the resuls are: # node mean sd 2.5% median 97.5% # interc 2.182 0.2169 1.767 2.178 2.629 #log(x+10) 0.3169 0.0566 0.1993 0.3186 0.4254 # x -0.001006 1.06E-5 -0.0014 -0.001009 -5.044E-4 # mu[1] 18.48 1.793 15.23 18.39 22.35 # mu[2] 22.74 1.571 19.78 22.69 25.99 # mu[3] 28.29 1.506 25.43 28.26 31.31 # mu[4] 35.63 2.153 31.5 35.59 39.95 # mu[5] 40.43 2.739 35.18 40.38 45.95 # mu[6] 29.2 3.086 23.52 29.07 35.68