car=read.table("http://www.stat.unm.edu/~ghuerta/stat574/table8.1.txt",header=T) car # Some exploratory analysis to obtain Percentages of table 8.1 and figure 8.1 # Preliminary steps freq=car$freq; age=car$age sex=car$sex; resp=car$resp p1=freq[sex=="women"& age=="18-23"] p1=p1/sum(p1) p2=freq[sex=="women"& age=="24-40"] p2=p2/sum(p2) p3=freq[sex=="women"& age==">40"] p3=p3/sum(p3) tabw=rbind(p1,p2,p3) plot(c(1:3),tabw[,1],pch="o",type='b',lty=1,xlab="age",ylab="proportion",axes=F,ylim=c(0,0.8)) axis(1,at=c(1:3),labels=c("18-23","24-40",">40")); axis(2) points(c(1:3),tabw[,2],type='b',lty=2,pch="+") points(c(1:3),tabw[,3],type='b',lty=3,pch="x") title("Women") # now the same graph for 'Men' p1=freq[sex=="men"& age=="18-23"] p1=p1/sum(p1) p2=freq[sex=="men"& age=="24-40"] p2=p2/sum(p2) p3=freq[sex=="men"& age==">40"] p3=p3/sum(p3) tabm=rbind(p1,p2,p3) plot(c(1:3),tabm[,1],pch="o",type='b',lty=1,xlab="age",ylab="proportion",axes=F,ylim=c(0,0.8)) axis(1,at=c(1:3),labels=c("18-23","24-40",">40")); axis(2) points(c(1:3),tabm[,2],type='b',lty=2,pch="+") points(c(1:3),tabm[,3],type='b',lty=3,pch="x") title("Men") # Fitting logistic regressions library(nnet) res.car=multinom(response~factor(age)+factor(sex),weights=frequency,data=car) summary(res.car) factor(car$age) factor(car$sex) n=18 x1=rep(0,n); x1[car$sex=="men"]=1 x2=rep(0,n); x2[car$age=="24-40"]=1 x3=rep(0,n); x3[car$age==">40"]=1 res.car=multinom(car$response~x1+x2+x3,weights=car$frequency) car$response resp=rep(NA,n) resp[car$response=="no/little"]=1 resp[car$response=="important"]=2 resp[car$response=="very-important"]=3 resp=factor(resp) res.car=multinom(resp~x1+x2+x3,weights=car$frequency) # the negative log-lik is reported after the estimation is performed # or llik1=-res.car$value # Fitting the minimal model res.car0=multinom(resp~1,weights=car$frequency) llik0=-res.car0$value # likelihood ratio chi-square statistic C=2*(llik1-llik0) # pseudo R^2 r2=(llik0-llik1)/llik0 # Maximal model includes interactions res.carm=multinom(resp~x1+x2+x3 + x1*x2 + x1*x3+ x2*x3,weights=car$frequency) llikm=-res.carm$value D=2*(llikm-llik1) # cat = number of response categories # fc = number of factor combinations cat=3 fc=2*3 # fitted values from multinom fit=fitted.values(res.car) # Estimated probabilities prob=0 for(i in 1:fc){prob=c(prob,fit[cat*i,])} prob=as.vector(prob[-1]) # The actual fitted values # compute total freq per sex and age group total=rep(0,fc) total[1]=sum(freq[sex=="women"& age=="18-23"]) total[2]=sum(freq[sex=="women"& age=="24-40"]) total[3]=sum(freq[sex=="women"& age==">40"]) total[4]=sum(freq[sex=="men"& age=="18-23"]) total[5]=sum(freq[sex=="men"& age=="24-40"]) total[6]=sum(freq[sex=="men"& age==">40"]) e=0 for(i in 1:fc){e=c(e,fit[cat*i,]*total[i])} e=e[-1] # Pearson's residuals r=(freq-e)/sqrt(e) # Pearson's chisq X=sum(r^2) # Here are now some commands to deal with # the ordinal logistic regression model library(MASS) res.polr=polr(factor(response)~factor(age)+factor(sex),weights=frequency,data=car) summary(res.polr) res.polr=polr(resp~x1+x2+x3,weights=frequency) summary(res.polr) # model likelihood llik=res.polr$deviance/-2 # Fit of minimal model, C and R^2 res.polr0=polr(resp~1,weights=frequency) llik0=res.polr0$deviance/-2 C=2*(llik-llik0) r2=(llik0-llik)/llik0 fit=fitted.values(res.polr) # Estimated probabilities prob=0 for(i in 1:fc){prob=c(prob,fit[cat*i,])} prob=as.vector(prob[-1]) # The actual fitted values # compute total freq per sex and age group total=rep(0,fc) total[1]=sum(freq[sex=="women"& age=="18-23"]) total[2]=sum(freq[sex=="women"& age=="24-40"]) total[3]=sum(freq[sex=="women"& age==">40"]) total[4]=sum(freq[sex=="men"& age=="18-23"]) total[5]=sum(freq[sex=="men"& age=="24-40"]) total[6]=sum(freq[sex=="men"& age==">40"]) e=0 for(i in 1:fc){e=c(e,fit[cat*i,]*total[i])} e=e[-1] # Pearson's residuals r=(freq-e)/sqrt(e) # Pearson's chisq X=sum(r^2)