# functions for response tables # Tom Minka 11/8/01 # returns the elements of x not indexed by i not <- function(x,i) { if(is.numeric(i)) return(x[-i]) if(is.character(i)) return(x[setdiff(names(x),i)]) if(is.logical(i)) return(x[!i]) } # returns the name of the response variable # works for formulas, model frames, and fitted models response.var <- function(object) { if(inherits(object, "terms")) { a <- attributes(object) if(!a$response) return(character(0)) return(as.character(a$variables[2])) } response.var(terms(object)) } # returns only the predictors which appear as bare terms predictor.vars <- function(object) { if(inherits(object, "terms")) { a <- attributes(object) return(a$term.labels[a$order == 1]) } predictor.vars(terms(object)) } # should already be in R terms.data.frame <- function(df) { fmla <- attr(df,"terms") if(is.null(fmla)) { nam <- names(df) resp <- nam[length(nam)] pred <- nam[1:(length(nam)-1)] fmla <- terms(formula(paste(resp,"~",paste(pred,collapse="+")))) } fmla } rep.mat <- function(x,n,m) { array(rep(x,n*m),c(length(x)*n,m)) } rep.col <- function(x,n) { rep.mat(x,1,n) } rep.row <- function(x,n) { t(rep.col(x,n)) } ############################################################################# # given an aov fit, plot the effects for each predictor # as well as a boxplot of the residuals # you can do an F-test visually by comparing the spread of the effects # with the spread of the residuals effects.plot <- function(object,se=F) { mt <- model.tables(object,se=se) vars <- names(mt$tables) nvar <- length(vars) opar <- par(mar=c(2.5,4,0,0.1)) on.exit(par(opar)) plot.new() ylim <- c(min(sapply(mt$tables,min)), max(sapply(mt$tables,max))) plot.window(xlim=c(0.5,nvar+1+0.5),ylim=ylim) axis(1,1:(nvar+1), labels=c(vars,"residuals")) axis(2) title(ylab=response.var(object)) if(se) { if(!is.numeric(se)) se <- 1.96 p <- (1-2*pnorm(-se))*100 cat("Displaying ",format(p,digits=2),"% confidence intervals\n",sep="") } for(k in 1:nvar) { eff <- mt$tables[[k]] text(k, eff, names(eff)) if(se) { jitter <- runif(length(eff))*0.1 arrows(k+jitter, eff-se*mt$se[[k]], k+jitter, eff+se*mt$se[[k]], code = 3, col = "green", angle = 75, length = .1) } } res <- residuals(object) x <- model.frame(object) res <- tapply(res, x[vars], mean) boxplot(res, at=nvar+1, add=T) } ############################################################################# print.rtable <- function(rt,...) { cat(response.var(rt),"\n") # strip attributes and print as a matrix attributes(rt) <- attributes(rt)[c("dim","dimnames")] print(rt) } terms.rtable <- function(rt) { attr(rt,"terms") } is.ordered.rtable <- function(rt) { # "ordered" attribute is not guaranteed to be ordered correctly, # because of t() and aperm() attr(rt,"ordered")[names(dimnames(rt))] } as.data.frame.rtable <- function(rt) { df <- as.data.frame.table(rt) len <- length(names(df)) names(df)[len] <- response.var(rt) df } aov.rtable <- function(rt,med=F) { frame <- as.data.frame.rtable(rt) if(med) { p <- medpolish(rt,trace.iter=F) return(structure(terms=terms(rt), model=frame, class="aov")) } aov(terms(rt), frame) } # returns a table of responses, stratified according to the terms in fmla # and aggregated according to fun. rtable.terms <- function(fmla, x, fun = mean) { resp <- response.var(fmla) pred <- predictor.vars(fmla) rt <- tapply(x[[resp]], x[pred], fun) class(rt) <- "rtable" attr(rt,"terms") <- fmla attr(rt,"ordered") <- sapply(x[pred],is.ordered) rt } # model.frame is a data.frame with a "terms" attribute describing a model rtable.data.frame <- function(object, ...) { rtable(terms(object), object, ...) } rtable.aov <- function(object, ...) { x <- model.frame(object) resp <- response.var(x) pred <- predictor.vars(x) y <- expand.grid(lapply(x[pred],levels)) y[[resp]] <- predict(object,y) y <- model.frame(terms(x),y) rtable(y, ...) } # legal input: # rtable(y~f1+f2,x) # rtable(y~.,x) # rtable(x$y ~ x$f1 + x$f2) rtable.formula <- function(formula, data, ...) { expr <- match.call(expand = F) expr$... <- NULL expr[[1]] <- as.name("model.frame.default") model <- eval(expr, parent.frame()) return(rtable(model, ...)) } rtable <- function(object, ...) UseMethod("rtable") # plots rows of x as traces profile.plot <- function(y,standard=F,med=F,ylab,...) { reorder.cols <- function(y,i) { a <- attributes(y); y <- y[,i] a$dimnames[[2]] <- a$dimnames[[2]][i]; attributes(y) <- a y } #if(missing(ylab)) ylab <- deparse(substitute(y)) row.var <- names(dimnames(y))[1] col.var <- names(dimnames(y))[2] if(med) { library(eda) fit <- medpolish(y,trace.iter=F) col.centers <- fit$col + fit$overall resid <- fit$residuals } else { col.centers <- apply(y,2,function(z) mean(z,na.rm=T)) row.effects <- apply(y,1,function(z) mean(z,na.rm=T)) - mean(y) resid <- y - outer(row.effects, col.centers, "+") } if(standard) { # subtract column centers y <- y - rep.row(col.centers,length(rownames(y))) # choose x to make the profiles linear # fails if there are missing values if(F) { s <- La.svd(resid) v <- s$vt[1,] } else { s <- svd(resid) v <- s$v[,1] } #v <- v + rnorm(length(v))*1e-1 i <- order(v) if(!is.ordered.rtable(y)[2] || all(i == 1:length(i))) { y <- reorder.cols(y,i) x <- v[i] } else { x <- seq(along=colnames(y)) } if(missing(ylab)) ylab <- paste(row.var, "effect on", response.var(y)) } else { # sort columns by center i <- order(col.centers) if(!is.ordered.rtable(y)[2] || all(i == 1:length(col.centers))) { y <- reorder.cols(y,i) # space x axis by center x <- col.centers[i] } else { x <- seq(along=colnames(y)) } if(missing(ylab)) ylab <- response.var(y) } opar <- par(mar=c(4.5,4,0,0.1)) on.exit(par(opar)) plot.new() xlim <- range(x) w <- max(strwidth(rownames(y),units="inches"))/par("pin")[1] w <- w+0.05 xlim[2] <- xlim[2] + diff(xlim)*w/(1-w) xspc <- 0.05*diff(xlim) plot.window(xlim=xlim,ylim=range(as.vector(y),finite=T)) axis(1,x, labels=colnames(y)) axis(2) box() for(i in 1:nrow(y)) { color <- ((i-1) %% 6) + 1 lines(x, y[i,],col=color, type="o", ...) j <- rev(which(!is.na(y[i,])))[1] text(x[j]+xspc, y[i,j], rownames(y)[i], col=color, adj=0, ...) } title(xlab=col.var,ylab=ylab) }