# Functions for lab 5
# by Tom Minka

rep.mat <- function(x,n,m) {
  array(rep(x,n*m),c(length(x)*n,m))
}
rep.col <- function(x,n) {
  rep.mat(x,1,n)
}
rep.row <- function(x,n) {
  t(rep.col(x,n))
}

terms.data.frame <- function(x,env=parent.frame(),...) {
  fmla <- attr(x,"terms")
  if(is.null(fmla)) {
    # minka: assume the last column is the response
    nm <- make.names(names(x))
    if(length(nm) > 1) {
      lhs <- nm[length(nm)]
      rhs <- nm[-length(nm)]
    }
    else {
      lhs <- NULL
      rhs <- nm
    }
    fmla <- terms(formula(paste(lhs,"~",paste(rhs,collapse="+")),env=env,...))
  }
  fmla
}
formula.data.frame <- function(x,env=parent.frame(),...) {
  formula(terms(x,env=env),...)
}

# returns the name of the response variable
# works for formulas, model frames, and fitted models
response.var <- function(object) {
  if(inherits(object, "terms")) {
    a <- attributes(object)
    if(!a$response) return(character(0))
    return(as.character(a$variables[2]))
  }
  if(inherits(object,"data.frame") && is.null(attr(object,"terms"))) {
    # shortcut to avoid make.names
    return(names(object)[length(object)])
  }
  response.var(terms(object))
}

# returns only the predictors which appear as bare terms
predictor.vars <- function(object) {
  if(inherits(object, "terms")) {
    a <- attributes(object)
    return(a$term.labels[a$order == 1])
  }
  if(inherits(object,"data.frame") && is.null(attr(object,"terms"))) {
    # shortcut to avoid make.names
    return(names(object)[-length(object)])
  }
  predictor.vars(terms(object))
}
# returns all terms on the rhs, including higher-order terms
predictor.terms <- function(object) {
  attr(terms(object),"term.labels")
}

##############################################################################

auto.layout <- function(len) {
  layout <- c(1,len)
  if(len > 3) {
    din <- par("din")
    asp <- din[2]/din[1]
    layout[2] <- ceiling(sqrt(len/asp))
    layout[1] <- ceiling(len/layout[2])
  }
  layout
}

hist.data.frame <- function(x,layout,...) {
  if(missing(layout)) layout <- auto.layout(length(x))
  opar <- par(mfrow=layout)
  on.exit(par(opar))
  for(i in names(x)) {
    hist(x[[i]],xlab=i,main="",...)
  }
}

scale.data.frame <- function(x,...) {
  i <- sapply(x,is.numeric)
  x[i] <- data.frame(scale(data.matrix(x[i]),...))
  x
}

project <- function(x,w) {
  pred <- rownames(w)
  xw <- data.frame(data.matrix(x[pred]) %*% w)
  other <- setdiff(colnames(x),pred)
  if(length(other) > 0) cbind(xw, x[other])
  else xw
}

pca <- function(x,k=1) {
  x <- data.matrix(x[sapply(x,is.numeric)])
  s <- svd(x)
  cat("R-squared =",format(sum(s$d[1:k]^2)/sum(s$d^2)),"\n")
  w <- s$v[,1:k]
  if(k == 1) dim(w) <- c(length(w),1)
  rownames(w) <- colnames(x)
  colnames(w) <- paste("h",1:ncol(w),sep="")
  w
}
solve.chol <- function(a,b) {
  ch <- chol(a)
  backsolve(ch,forwardsolve(t(ch),b))
}
pca2 <- function(x,k=1) {
  # PCA vis Roweis's EM algorithm
  # takes O(dnk) time
  x <- data.matrix(x[sapply(x,is.numeric)])
  x <- t(x)
  d <- nrow(x)
  n <- ncol(x)
  w <- array(rnorm(d*k),c(d,k))
  # EM
  for(iter in 1:100) {
    old.w <- w
    if(d >= n) {
      # h is k by n
      # flops is kdk + kdn + kkn + dnk + knk + kkn = O(dnk)
      h <- solve.chol(t(w) %*% w, t(w) %*% x)
      w <- x %*% t(solve.chol(h %*% t(h), h))
    } else {
      # flops is kdk + kdn + kkd + knd + knk + kkd = O(dnk)
      h <- solve.chol(t(w) %*% w, t(w)) %*% x
      w <- t(solve.chol(h %*% t(h), h %*% t(x)))
    }
    if(max(abs(w - old.w)) < 1e-5) break
  }
  if(iter == 100) warning("not enough iterations")
  # postprocessing
  w <- qr.Q(qr(w))
  wx <- t(w) %*% x
  s <- svd(wx)
  cat("R-squared =",format(sum(s$d^2)/sum(x*x)),"\n")
  w <- w %*% s$u
  rownames(w) <- rownames(x)
  colnames(w) <- paste("h",1:ncol(w),sep="")
  w
}

plot.axes <- function(w,col=2,origin,keep=NULL,cex=par("cex")) {
  if(is.null(keep)) keep <- 0.2
  usr <- par("usr")
  # is the origin in the plot?
  if(missing(origin)) {
    origin <- (usr[1] < 0) && (usr[2] > 0) && (usr[3] < 0) && (usr[4] > 0)
  }
  if(origin) m <- c(0,0)
  else m <- c((usr[2]+usr[1])/2, (usr[4]+usr[3])/2)

  width <- strwidth(rownames(w),cex=cex)/2
  height <- strheight(rownames(w),cex=cex)/2
  # "a" is scale factor to place text at arrow tips
  a <- pmin(width/abs(w[,1]), height/abs(w[,2]))
  # txt.w is the offset of the text from the arrow tip
  txt.w <- w * rep.col(a,2)

  xlim <- usr[1:2]-m[1]
  # make room on left
  xlim[1] <- xlim[1] + diff(xlim)/par("pin")[1]*0.02
  xscale <- c()
  # find xscale so that
  #   xscale*w[,1] + txt.w[,1] - width > xlim[1]  (w < 0)
  #   xscale*w[,1] + txt.w[,1] + width < xlim[2]  (w > 0)
  i <- (w[,1] < 0)
  xscale[1] <- min((xlim[1] + width[i] - txt.w[i,1])/w[i,1])
  i <- (w[,1] > 0)
  xscale[2] <- min((xlim[2] - width[i] - txt.w[i,1])/w[i,1])
  # if one of the sets is empty, the scale will be NA
  xscale <- min(xscale,na.rm=T)

  ylim <- usr[3:4]-m[2]
  # make room for color key
  ylim[2] <- ylim[2] - diff(ylim)/par("pin")[2]*0.06
  yscale <- c()
  # find yscale so that
  #   yscale*w[,2] + txt.w[,2] - height > ylim[1]  (w < 0)
  i <- (w[,2] < 0)
  yscale[1] <- min((ylim[1] + height[i] - txt.w[i,2])/w[i,2])
  i <- (w[,2] > 0)
  yscale[2] <- min((ylim[2] - height[i] - txt.w[i,2])/w[i,2])
  yscale <- min(yscale,na.rm=T)

  # scale equally
  xscale <- min(c(xscale,yscale))
  yscale <- xscale
  w <- scale(w,center=F,scale=1/c(xscale,yscale))
  if(T) {
    # only plot arrows of significant length
    i <- (abs(w[,1])/(xlim[2]-xlim[1])*par("fin")[1] > keep) | 
         (abs(w[,2])/(ylim[2]-ylim[1])*par("fin")[2] > keep)
    if(any(!i)) cat("omitting arrow for",rownames(w)[!i],"\n")
  }
  if(!any(i)) { warning("all arrows omitted"); return() }
  len <- min(par("pin"))/20
  arrows(m[1], m[2], m[1]+w[i,1], m[2]+w[i,2], col=col, len=len)
  w <- scale(w,center=-m,scale=F)
  w <- w + txt.w
  # note: rownames(w[i,]) does not work if i picks only one
  text(w[i,1],w[i,2],labels=rownames(w)[i],col=col,cex=cex)
}

##############################################################################

# returns the minimum user limits for a plotting region of size pin
# which would enclose the given labels at the given positions
# with the justification adj at rotation srt.
range.text <- function(x,y,labels,cex=NULL,adj=NULL,srt=0,
                       pin=par("pin")) {
  lab.w <- strwidth(labels,"inches",cex=cex)
  lab.h <- strheight(labels,"inches",cex=cex)

  # bounding box of text, columns are (x1,y1)(x2,y1)(x2,y2)(x1,y2)
  n <- length(labels)
  ix <- seq(1,8,by=2)
  iy <- seq(2,8,by=2)
  hull <- array(0,c(n,8))
  hull[,c(1,7,2,4)] <- rep(0,n)
  hull[,c(3,5,6,8)] <- rep(1,n)
  # put adj point at origin and scale
  if(is.null(adj)) adj <- c(0.5,0.5)
  if(length(adj) == 1) adj <- c(adj,0.5)
  hull[,ix] <- (hull[,ix] - adj[1])*rep.col(lab.w,4)
  hull[,iy] <- (hull[,iy] - adj[2])*rep.col(lab.h,4)
  # rotate
  srt <- srt/180*pi
  cr <- cos(srt)
  sr <- sin(srt)
  R <- array(c(cr,-sr,sr,cr),c(2,2))
  R <- diag(4) %x% R
  hull <- hull %*% R
  bbox <- list()
  bbox$left <- apply(hull[,ix],1,min)/pin[1]
  bbox$right <- apply(hull[,ix],1,max)/pin[1]
  bbox$bottom <- apply(hull[,iy],1,min)/pin[2]
  bbox$top <- apply(hull[,iy],1,max)/pin[2]

  # solve constraints
  xmid <- mean(range(x))
  ymid <- mean(range(y))
  xlim <- c()
  # these come from the constraints
  # (x-xmin)/(xmax-xmin) + left > 0
  # (x-xmin)/(xmax-xmin) + right < 1
  # where xmax is fixed at 2*xmid - xmin
  min1 <- min((x + 2*bbox$left*xmid)/(1+2*bbox$left))
  min2 <- min((2*(1-bbox$right)*xmid - x)/(1-2*bbox$right))
  xlim[1] <- min(min1,min2)
  xlim[2] <- 2*xmid - xlim[1]
  ylim <- c()
  min1 <- min((y + 2*bbox$bottom*ymid)/(1+2*bbox$bottom))
  min2 <- min((2*(1-bbox$top)*ymid - y)/(1-2*bbox$top))
  ylim[1] <- min(min1,min2)
  ylim[2] <- 2*ymid - ylim[1]
  c(xlim,ylim)
}

text.plot <- function(object,...) UseMethod("text.plot")
text.plot.formula <- function(formula,data=parent.frame(),...) {
  x <- model.frame.default(formula,data,na.action=na.pass)
  text.plot.data.frame(x,...)
}
text.plot.data.frame <- function(x,...) {
  pred <- predictor.vars(x)[1]
  resp <- response.var(x)
  x1 <- x[[pred]]
  x2 <- x[[resp]]
  labels <- rownames(x)
  text.plot.default(x1,x2,labels,xlab=pred,ylab=resp,...)
}
text.plot.default <- function(x,y,labels,xlab,ylab,xlim=NULL,ylim=NULL,
                              cex=par("cex"),adj=NULL,srt=0,axes=T,...) {
  if(missing(xlab)) xlab <- deparse(substitute(x))
  if(missing(ylab)) ylab <- deparse(substitute(y))
  plot.new()
  lim <- range.text(x,y,labels,cex,adj,srt)
  # make room for labels
  if(is.null(xlim)) xlim <- lim[1:2]
  if(is.null(ylim)) ylim <- lim[3:4]
  plot(x,y,xlab=xlab,ylab=ylab,xlim=xlim,ylim=ylim,axes=axes,...,type="n")
  text(x,y,labels=labels,cex=cex,adj=adj,srt=srt,...)
}
# Functions for lab 6
# by Tom Minka

indicators.factor <- function(y) {
  # convert a factor into a matrix of indicators
  # result is level by case
  r <- array(0,c(length(levels(y)),length(y)),list(levels(y),names(y)))
  for(lev in levels(y)) r[lev,y==lev] <- 1
  r
}

prototypes <- function(x,y=NULL,fun=mean) {
  # basically a cleaned-up version of "aggregate"
  if(!is.null(y) && mode(y) == "function") {
    fun <- y
    y <- NULL
  }
  if(is.null(y)) {
    resp <- response.var(x)
    y <- x[[resp]]
    if(!is.factor(y)) stop("no class variable")
    x <- not(x,resp)
  } else {
    resp <- "Group.1"
  }
  y <- factor(y)
  if(identical(fun,sum)) {
    # fast implementation of special case
    as.data.frame(indicators.factor(y) %*% as.matrix(x))
  } else if(identical(fun,mean)) {
    # fast implementation of special case
    f <- indicators.factor(y)
    f <- scale.rows(f, 1/rowSums(f))
    as.data.frame(f %*% as.matrix(x))
  } else if(T) {
    # faster than below
    xs <- split(as.data.frame(x),y)
    xp <- sapply(xs,function(xl) apply(xl,2,fun))
    as.data.frame(t(xp))
  } else {
    yl <- list(y)
    names(yl) <- resp
    x <- aggregate(x,yl,fun)
    rownames(x) <- levels(y)
    x[setdiff(names(x),resp)]
  }
}

factor.isolate <- function(f,i) {
  nam <- names(f)
  f <- as.character(f)
  names(f) <- nam
  f[i] <- i
  factor(f)
}

##############################################################################

default.colors <- function(n) ((0:(n-1))%%6+1)
default.pch <- c(1,4,20,41,35,38,2,17)

color.key <- function(col,pch,names,breaks,digits=2,cex=0.75) {
  if(is.null(col)) col <- rep(1,length(pch))
  rx <- range(par("usr")[1:2])
  ry <- range(par("usr")[3:4])
  # key is always 0.06 of an inch
  ry[1] <- ry[2] - diff(ry)/par("pin")[2]*0.06
  if(!missing(names)) n <- length(names)
  else if(!missing(breaks)) n <- length(breaks)-1
  else n <- length(col)
  i <- seq(0,1,len=n+1)
  x1 <- rx[1]+i[-length(i)]*diff(rx)
  x2 <- rx[1]+i[-1]*diff(rx)
  if(n == 0) stop("n == 0")
  col <- rep(col,ceiling(n/length(col)))[1:n]
  rect(x1, ry[1], x2, ry[2], col=col, border=NA)
  if(!missing(pch) && !is.null(pch))
    points((x1+x2)/2, rep(mean(ry),n), col=8, pch=pch, cex=0.75)
  if(!missing(names))
    mtext(names,at=(x1+x2)/2,cex=cex)
  if(!missing(breaks)) {
    x <- rx[1] + i*diff(rx)
    names <- format(breaks,digits=digits,trim=T)
    mtext(names[1],at=x[1],cex=cex,adj=0)
    mtext(names[n+1],at=x[n+1],cex=cex,adj=1)
    mtext(names[2:n],at=x[2:n],cex=cex,adj=0.5)
  }
}

extend.inches <- function(r,upper=0,lower=0) {
  # extends the given range by a specified number of inches of each side.
  # upper and lower are actually inches/plot.size
  # constraints:
  # (r.new[2] - r[2])/diff(r.new) = upper
  # (r[1] - r.new[1])/diff(r.new) = lower
  r.new <- r
  a = lower/(1-upper)
  r.new[1] <- (r[1] + a*r[2])/(1-a)
  r.new[2] <- (r[2] - upper*r.new[1])/(1-upper)
  r.new
}

color.plot <- function(object, ...) UseMethod("color.plot")
color.plot.formula <- function(formula,data=parent.frame(),...) {
  x <- model.frame.default(formula,data,na.action=na.pass)
  color.plot.data.frame(x,...)
}
color.plot.data.frame <- function(x,z,zlab,xlab=NULL,ylab=NULL,labels=F,...) {
  if(missing(z)) {
    pred <- predictor.vars(x)
    resp <- response.var(x)
    z <- x[[resp]]
    if(missing(zlab)) zlab <- resp
  } else {
    pred <- colnames(x)
    if(missing(zlab)) zlab <- deparse(substitute(z))
  }
  if(length(pred) < 2) stop("must have two predictors to plot")
  if(is.logical(labels)) {
    labels <- if(labels) rownames(x) else NULL
  }
  if(is.null(xlab)) xlab <- pred[1]
  if(is.null(ylab)) ylab <- pred[2]
  color.plot.default(x[[pred[1]]],x[[pred[2]]],z,labels=labels,
                     xlab=xlab,ylab=ylab,zlab=zlab,...)
}
color.plot.default <- function(x,y,z,labels=NULL,data=parent.frame(),
                               xlab,ylab,zlab,
                               xlim=NULL,ylim=NULL,
                               axes=T,key=T,add=F,nlevels=4,
                               color.palette=default.colors,col,
                               pch.palette=default.pch,pch,
                               jitter=F,digits=2,mar,bg,
                               cex=par("cex"),yaxt="s",...) {
  # continous responses are automatically quantized into nlevels levels, with
  # colors given by col.
  # jitter is only a suggestion.  jittering will only be done if necessary.
  if(missing(xlab)) xlab <- deparse(substitute(x))
  if(missing(ylab)) ylab <- deparse(substitute(y))
  if(missing(zlab)) zlab <- deparse(substitute(z))
  x <- eval(substitute(x),data)
  y <- eval(substitute(y),data)
  z <- eval(substitute(z),data)
  # treat character as factor
  if(mode(x) == "character") x <- factor(x,levels=unique(x))
  if(mode(y) == "character") y <- factor(y,levels=unique(x))
  if(is.numeric(z)) {
    b <- as.numeric(quantile(unique(z), seq(0,1,len=nlevels+1),na.rm=T))
    f <- rcut(z,b)
  } else {
    f <- factor(z)
  }
  nlevels <- length(levels(f))
  if(nlevels == 0) stop("0 levels in factor")
  # lev.col is the color of each level
  # lev.pch is the symbol of each level
  # if there are too few colors, recycle them and change pch
  lev <- 1:nlevels
  # col is the color of the individual points
  q <- as.numeric(f)
  if(missing(col)) {
    if(mode(color.palette) == "function")
      color.palette <- color.palette(nlevels)
    lev.col <- color.palette[((lev-1) %% length(color.palette))+1]
    col <- lev.col[q]
  } else {
    lev.col <- NULL
  }
  if(missing(pch)) {
    lev.pch <- pch.palette[(((lev-1) %/% length(color.palette)) %% length(pch.palette))+1]
    pch <- lev.pch[q]
  } else {
    lev.pch <- NULL
  }

  # setup plot window
  if(!add) {
    if(missing(mar)) {
      # mar is c(bottom, left, top, right)
      if(axes) {
        mar <- c(4.5,if(yaxt=="n") 0 else 4,if(key) 2 else 1,0.05)
      } else {
        mar <- c(0.05,0,if(key) 1 else 0,0.05)
      }
    }
    # change mar permanently so we can add things on top
    par(mar=mar)
    if(!missing(bg)) {
      opar <- par(bg=bg)
      on.exit(par(opar))
    }
  }
  # xlim,ylim needed for jittering and label placement
  if(add) {
    xlim <- par("usr")[1:2]
    ylim <- par("usr")[3:4]
  } else {
    if(is.null(labels)) {
      if(is.null(xlim)) xlim <- range(as.numeric(x),na.rm=T)
      if(is.null(ylim)) ylim <- range(as.numeric(y),na.rm=T)
      if(key) {
        # make room at the top of the plot for the key
        if(length(unique(lev.pch)) == 1) {
          a = 0.06/par("pin")[2]
        } else {
          a = par("csi")*cex/par("pin")[2]
        }
        ylim = extend.inches(ylim,a)
      }
    } else {
      plot.new()
      lab.lim <- range.text(x,y,labels,cex)
      # make room for labels
      if(is.null(xlim)) xlim <- lab.lim[1:2]
      if(is.null(ylim)) ylim <- lab.lim[3:4]
    }
  }
  if(jitter) {
    # this test needs to be less strict.  need to consider actual figure size.
    if(length(levels(factor(x))) < length(x)) {
      cat("jittering",xlab,"\n")
      jitter1 <- (runif(length(x))-0.5)*diff(extend.pct(xlim))/100
      x <- x+jitter1
    }
    if(length(levels(factor(y))) < length(y)) {
      cat("jittering",ylab,"\n")
      jitter2 <- (runif(length(y))-0.5)*diff(extend.pct(ylim))/100
      y <- y+jitter2
    }
  }
  if(add) {
    if(!is.null(labels)) {
      return(text(x, y, labels=labels, col=col, cex=cex, ...))
    } else {
      return(points(x, y, pch=pch, col=col,...))
    }
  }
  # from here on, add=F
  if(!is.null(labels)) {
    plot.default(x, y, xlim=xlim, ylim=ylim, type="n",
         xlab=xlab,ylab=ylab,main="",axes=axes,yaxt=yaxt,...)
    text(x, y, labels=labels, col=col, cex=cex, ...)
  } else {
    plot.default(x, y, xlim=xlim, ylim=ylim, xlab=xlab,ylab=ylab,main="",
         pch=pch,col=col,axes=axes,yaxt=yaxt,...)
  }
  if(key) {
    # don't draw symbols in the key if there is only one type
    if(length(unique(lev.pch)) == 1) lev.pch <- NULL
    if(is.numeric(z))
      color.key(lev.col,lev.pch,breaks=b,digits=digits)
    else
      color.key(lev.col,lev.pch,levels(f))
    if(axes) title(zlab,line=1)
  }
  else if(axes) title(zlab,line=0)
}

##############################################################################

zip <- function(a,b,fun="*") {
  fun <- match.fun(fun)
  r <- lapply(1:length(a), function(i) fun(a[[i]],b[[i]]))
  names(r) <- names(a)
  r
}

accumulate <- function(lst,fun="+") {
  fun <- match.fun(fun)
  r <- lst[[1]]
  for(i in 2:length(lst)) {
    #r <- r + lst[[i]]
    r <- fun(r,lst[[i]])
  }
  r
}

eye <- function(n) {
  diag(rep(1,n))
}

# solves a*x = u*b*x
# k is desired number of eigenvectors (optional)
eigen2 <- function(a,b,orth=NULL,k) {
  # solve(b,a) is same as inv(b)*a
  if(F) iba <- solve(b,a)
  else {
    u <- chol(b)
    iba <- backsolve(u,backsolve(u,a,transpose=T))
  }
  if(is.null(orth)) e <- La.eigen(iba,symmetric=F)
  else {
    if(is.vector(orth)) {
      n <- length(orth)
      orth.k <- 1
    } else {
      n <- nrow(orth)
      orth.k <- ncol(orth)
    }
    orth <- eye(n) - (orth %*% t(orth))
    x <- iba %*% orth
    e <- La.eigen(x,symmetric=F)
    # remove zero eigenvalues from result
    e$vectors <- e$vectors[,1:(ncol(x)-orth.k),drop=F]
    e$values <- e$values[1:(ncol(x)-orth.k)]
    if(!missing(k)) {
      e$vectors <- e$vectors[,1:k,drop=F]
      e$values <- e$values[1:k,drop=F]
    }
  }
  # remove trivial imaginary parts
  e$vectors <- Re(e$vectors)
  e$values <- Re(e$values)
  e
}

ml.cov <- function(x) {
  n <- nrow(x)
  v <- cov(x)
  if(n == 1) v <- array(0,dim(v))
  # avoid singularity
  a <- max(eigen(v,only=T)$values)
  (v*(n-1) + eye(ncol(x))*a*1e-5)/n
}

#############################################################################

factor.dichotomy <- function(f,lev) {
  labels <- c(lev,paste("NOT",lev))
  fd <- rep(labels[2],length(f))
  fd[f == lev] <- labels[1]
  fd <- factor(fd,levels=labels)
  names(fd) <- names(f)
  fd
}

projection <- function(x,y,k=1,given=NULL,type=c("mv","m","v","r","lm")) {
  if(missing(y)) {
    resp <- response.var(x)
    y <- x[[resp]]
    pred <- predictor.vars(x)
    x <- x[pred]
  }
  if(missing(type)) {
    if(is.factor(y)) type <- "mv"
    else type <- "r"
    #cat("Projection type",type,"\n")
  } else type <- match.arg(type)

  if(type == "lm") {
    w <- projection.lm(x,y,k,given)
  } else if(type == "r") {
    w <- projection.reg(x,y,k,given)
  } else {
    xs <- split(x,y)
    vs <- lapply(xs,ml.cov)
    ns <- lapply(xs,nrow)
    n <- nrow(x)
    va <- ml.cov(x)
    fun <- switch(type, m=projection.Fisher.stats, 
                        v=projection.cov.stats,
                        mv=projection.mv.stats)
    w <- fun(va,vs,ns,k,given)
  }
  rownames(w) <- names(x)
  colnames(w) <- paste("h",1:ncol(w),sep="")
  w
}

# uses linear regression to get projection
projection.lm <- function(x,y,k=1,given=NULL) {
  vxy <- cov(x,y)
  vxx <- cov(x)
  e <- eigen2(vxy %*% t(vxy),vxx,given)
  w <- e$vectors[,1:k]
  if(k == 1) dim(w) <- c(length(w),1)
  if(is.null(given)) w
  else cbind(given,w)
}

# returns a projection which separates x values leading to different y's
# treats it as a classification problem with n classes
projection.reg <- function(x,y,k=1,given=NULL,span=NULL) {
  # estimate covariance matrix about each point
  if(is.null(span)) {
    span <- min(c(5*ncol(x),length(y)/2))
    #cat("projection.reg: using nearest ",span," responses\n")
  }
  vs <- list()
  for(i in 1:length(y)) {
    d <- abs(y - y[i])
    j <- order(d)
    # include all points whose distance is within that of last neighbor
    j <- which(d <= d[j[span]] + eps)
    #cat(i,"has",length(j),"neighbors\n")
    vs[[i]] <- ml.cov(x[j,])
  }
  ns <- as.list(rep(1,length(y)))
  va <- ml.cov(x)
  projection.mv.stats(va,vs,ns,k,given)
}

# returns a projection which separates the class means
projection.Fisher.stats <- function(va,vs,ns,k=1,given=NULL) {
  n <- accumulate(ns)
  vw <- accumulate(zip(vs,ns))/n
  e <- eigen2(va,vw,given)
  i <- rev(order(e$values))[1:k]
  w <- e$vectors[,i]
  if(k == 1) dim(w) <- c(length(w),1)
  if(is.null(given)) w
  else cbind(given,w)
}

# returns a projection which separates the class covariances
projection.cov.stats <- function(va,vs,ns,k=1,given=NULL) {
  if(length(vs) > 2) {
    n <- accumulate(ns)
    va <- accumulate(zip(vs,ns))/n
    return(projection.mv.stats(va,vs,ns,k,given))
  }
  v1 <- vs[[1]]
  v2 <- vs[[2]]
  n <- accumulate(ns)
  a <- ns[[1]]/n

  if(T) {
    tim <- system.time(e <- eigen2(v1,v2,given))[3]
    cat("time to compute",nrow(v1),"eigenvectors:",tim,"\n")
  } else {
    tim1 <- system.time(e1 <- eigen2.top(v1,v2,k))[3]
    tim2 <- system.time(e2 <- eigen2.top(v2,v1,k))[3]
    cat("time to compute",2*k,"eigenvectors:",tim1+tim2,"\n")
    e <- list(values=c(e1$values,1/e2$values),vectors=cbind(e1$vectors,e2$vectors))
  }
  lambda <- abs(e$values)
  J <- a*log(lambda) - log(a*lambda + 1-a)
  # keep eigenvectors with smallest J
  i <- order(J)[1:k]
  w <- e$vectors[,i]
  if(k == 1) dim(w) <- c(length(w),1)
  if(is.null(given)) w
  else cbind(given,w)
}

score.mv <- function(w,va,vs,ns) {
  n <- accumulate(ns)
  wva <- drop(t(w) %*% va %*% w)
  J <- log(wva)
  for(i in 1:length(vs)) {
    wv <- drop(t(w) %*% vs[[i]] %*% w)
    J <- J - ns[[i]]/n*log(wv)
  }
  J
}

# returns a projection which separates the class means and covariances
projection.mv.stats <- function(va,vs,ns,k=1,given=NULL) {
  if(k > 1) {
    # find projections one at a time
    w <- projection.mv.stats(va,vs,ns,k=1,given)
    for(i in 1:(k-1)) {
      w <- projection.mv.stats(va,vs,ns,k=1,given=w)
    }
    return(w)
  }
  n <- accumulate(ns)

  # initialize with best of m,v
  w1 <- projection.Fisher.stats(va,vs,ns,k=1,given)
  w1 <- w1[,ncol(w1)]
  J1 <- score.mv(w1,va,vs,ns)
  if(length(vs) == 2) {
    w2 <- projection.cov.stats(va,vs,ns,k=1,given)
    w2 <- w2[,ncol(w2)]
    J2 <- score.mv(w2,va,vs,ns)
  } else J2 <- 0
  if(J2 > J1) w <- w2
  else w <- w1

  # EM optimization
  iter <- 1
  repeat {
    oldw <- w
    vb <- array(0,dim(va))
    for(i in 1:length(vs)) {
      wv <- drop(w %*% vs[[i]] %*% w)
      vb <- vb + ns[[i]]/n*vs[[i]]/wv
    }
    # check that objective always increases
    #print(score.mv(w,va,vs,ns))
    e <- eigen2(va,vb,given,k=1)
    w <- e$vector[,1]
    if(max(abs(w - oldw)) < 1e-5) break
    iter <- iter + 1
  }
  #cat("projection.mv: ",iter," EM iterations\n")
  dim(w) <- c(length(w),1)
  if(is.null(given)) w
  else cbind(given,w)
}

##############################################################################

reorder.cols <- function(y,i) {
  a <- attributes(y); y <- y[,i]
  a$dimnames[[2]] <- a$dimnames[[2]][i]; attributes(y) <- a
  y
}

parallel.plot <- function(x,yscale=c("linear","range","none"),
                          xscale=c("linear","equal","none"),proto=T,
                          color.palette=default.colors,col,
                          lty.palette=1:6,lty,...) {
  x <- as.data.frame(x)
  resp <- response.var(x)
  if(is.factor(x[[resp]])) {
    if(proto) {
      cat("showing median of each",resp,"\n")
      f <- x[resp]
      x <- x[sapply(x,is.numeric)]
      x <- aggregate(x,f,median)
      rownames(x) <- levels(f[[1]])
      q <- 1:nrow(x)
    } else {
      # plot cases, but color by class
      q <- as.numeric(x[[resp]])
    }
    x <- not(x,resp)
  } else {
    q <- 1:nrow(x)
  }
  y <- x
  if(missing(col)) {
    if(mode(color.palette) == "function")
      color.palette <- color.palette(nrow(y))
    col <- color.palette[((q-1) %% length(color.palette))+1]
  }
  if(missing(lty)) {
    lty <- lty.palette[(((q-1) %/% length(color.palette)) %% length(lty.palette))+1]
  }

  # assume no factors
  y <- data.matrix(y)
  x <- 1:ncol(y)    

  yscale <- match.arg(yscale)
  xscale <- match.arg(xscale)
  if(yscale=="linear") {
    # do this to subtract means
    y <- scale(y)
    if(xscale != "none") {
      # Hartigan's linear profile algorithm
      # want yij*sj = ai + bi*xj
      # yij = ai*(1/sj) + bi*(xj/sj)
      # so 1/sj = v[j,1], xj/sj = v[j,2]
      # scaling by singular values is irrelevant
      s <- svd(y)
      x <- s$v[,2]/s$v[,1]
      s <- 1/s$v[,1]
      if(xscale == "equal") {
        x <- rank(x)
        # now act as if x was fixed and solve for scaling again
        xscale <- "none"
      }
    }
    if(xscale=="none") {
      # solve for sj when xj is known
      # this comes from solving for (ai,bi) analytically and substituting
      # to get eigenvector problem for sj
      d <- ncol(y)
      xz <- x - mean(x)
      a <- diag(d) - array(1/d,c(d,d)) - outer(xz,xz)/sum(xz*xz)
      # Hadamard product
      a <- (a %*% a) * (t(y) %*% y)
      s <- svd(a)
      # want the smallest eigenvector of a
      s <- s$u[,d]
    }
    # minimize number of reversals
    if(sum(s<0) > sum(s>0)) s <- -s
    y <- y * rep.row(s,nrow(y))
    i <- s<0
    if(any(i)) {
      cat("axis reversed for",colnames(y)[i],"\n")
      colnames(y)[i] <- sapply(colnames(y)[i], function(s) paste("-",s,sep=""))
    }
  } else {
    if(yscale == "range") {
      # map to [0,1]
      y <- y - rep.row(apply(y,2,min),nrow(y))
      y <- y / rep.row(apply(y,2,max),nrow(y))
    }
    if(xscale != "none") {
      # want yij = ai + bi*xj
      # subtract mean in each row to remove ai,
      # then xj is top eigenvector
      s <- svd(y - rep.col(apply(y,1,mean),ncol(y)))
      x <- s$v[,1]
      if(xscale == "equal") x <- rank(x)
    }
  }
  # remove x flip ambiguity by alphabetic sort
  if(colnames(y)[which.min(x)] > colnames(y)[which.max(x)]) {
    x <- -x
  }
  i <- order(x)
  y <- reorder.cols(y,i)
  x <- x[i]

  yaxis <- (yscale == "none")
  opar <- par(mar=c(2.5,if(yaxis) 2 else 0,0,0.1))
  on.exit(par(opar))
  plot.new()
  xlim <- range(x)
  # make space for labels
  # this part from profile.plot
  w <- max(strwidth(rownames(y),units="inches"))/par("pin")[1]
  w <- w+0.05
  xlim[2] <- xlim[2] + diff(xlim)*w/(1-w)
  xspc <- 0.05*diff(xlim)
  plot.window(xlim=xlim,ylim=range(as.vector(y),finite=T))
  cat("columns are",colnames(y),"\n")
  axis(1,x, labels=colnames(y),...)
  if(yaxis) axis(2,)
  box()
  for(i in 1:nrow(y)) {
    lines(x, y[i,],col=col[i],lty=lty[i], type="o", pch=20, ...)
    j <- length(x)
    text(x[j]+xspc, y[i,j], rownames(y)[i], col=col[i], adj=0, ...)
  }
}
# Functions for lab 7
# by Tom Minka

library(modreg)

# smallest number such that 1+eps > 1
eps <- 1e-15

as.matrix.polygon <- function(p) {
  if(is.list(p) && !is.data.frame(p)) p <- cbind(p$x,p$y)
  p
}

# returns T if x is inside the polygon with given vertices
# poly is a matrix where each row is a vertex
# polygon is closed automatically
# or a list of x and y (converted to a matrix)
in.polygon <- function(poly,x) {
  poly <- as.matrix.polygon(poly)
  if(is.list(x) && !is.data.frame(x)) x <- cbind(x$x,x$y)
  if(is.vector(x)) in.polygon1(poly,x)
  num.above <- rep(0,nrow(x))
  start <- 1
  end <- nrow(poly)
  for(i in 1:nrow(poly)) {
    x1 <- poly[i,1]
    if(is.na(x1)) start <- i+1
    else {
      if(i == nrow(poly) || is.na(poly[i+1,1])) { i2 <- start } 
      else { i2 <- i+1 }
      x2 <- poly[i2,1]
      y1 <- poly[i,2]
      y2 <- poly[i2,2]
      xmax <- max(x1,x2)
      xmin <- min(x1,x2)
      # xi indexes the points within the line extent
      xi <- which((x[,1]>=xmin) & (x[,1]<=xmax))
      # a is the height of the line at x[xi,1]
      a <- (x[xi,1]-x1)*(y2-y1)/(x2-x1) + y1
      vert <- (x1==x2)
      a[vert] <- y1[vert]
      # j indexes the points below the line
      j <- (a>x[xi,2])
      num.above[xi[j]] <- num.above[xi[j]] + 1
    }
  }
  (num.above %% 2) == 1
}
in.polygon1 <- function(poly,x) {
  x1 <- poly[,1]
  x2 <- c(poly[2:nrow(poly),1],poly[1,1])
  y1 <- poly[,2]
  y2 <- c(poly[2:nrow(poly),2],poly[1,2])
  breaks <- which(is.na(x1))
  if(length(breaks) > 0) {
    starts <- c(1,breaks+1)
    ends <- c(breaks-1,nrow(poly))
    x2[ends] <- x1[starts]; y2[ends] <- y1[starts]
    x1 <- x1[-breaks]; x2 <- x2[-breaks]; y1 <- y1[-breaks]; y2 <- y2[-breaks]
  }
  vert <- which(x1==x2)
  # a is the height of the lines at x[1]
  a <- (x[1]-x1)*(y2-y1)/(x2-x1) + y1
  a[vert] <- y1[vert]
  xmin <- pmin(x1,x2)
  xmax <- pmax(x1,x2)
  # i indexes the lines above x
  i <- (a>x[2]) & (x[1] >= xmin) & (x[1] <= xmax)
  (sum(i) %% 2) == 1
}

###############################################################################

# Plot a regression surface.
#
# clip specifies a polygon in (x,y) over which the surface is to be defined.
# possible values are F (no clipping), T (clip to the convex hull of the 
# data), a list(x,y), or a matrix with two columns.
#
# if fill=T, z values outside zlim will not be plotted, 
#   i.e. the corresponding regions will be white.
# the default zlim is the range of the data.
#
color.plot.loess <- function(object,x,res=50,add=F,fill=F,equal=F,drawlabels=F,
               levels=NULL,nlevels=if(is.null(levels)) 4 else length(levels),
               col, clip=T, zlim, bg=par("bg"), lwd=par("lwd"),
               color.palette=if(fill)topo.colors else default.colors,...) {
  if(missing(x)) x <- model.frame(object)
  else x <- model.frame(terms(object),x)
  pred <- predictor.vars(object)
  if(length(pred) == 1) return(plot.loess(object,x,add=add,lwd=lwd,...))

  if(mode(color.palette) == "function") {
    color.palette <- color.palette(nlevels)
  }
  
  if(!add && !fill) color.plot.data.frame(x,levels=levels,nlevels=nlevels,
                         color.palette=color.palette,bg=bg,...)
  # use the par options set by color.plot.data.frame

  # x is only used to get plotting range
  resp <- response.var(object)
  pred <- predictor.vars(object)
  # this is ugly, but otherwise recursively calling color.plot.data.frame is hard
  if(is.null(...$xlim)) {
    if(add || !fill) xlim <- range(par("usr")[1:2])
    else xlim <- range(x[[pred[1]]])
  } else xlim <- ...$xlim
  x1 <- seq(xlim[1],xlim[2],length=res)
  if(is.null(...$ylim)) {
    if(add || !fill) ylim <- range(par("usr")[3:4])
    else ylim <- range(x[[pred[2]]])
  } else ylim <- ...$ylim
  x2 <- seq(ylim[1],ylim[2],length=res)
  xt <- expand.grid(x1,x2)
  names(xt) <- pred[1:2]
  z <- predict(object,xt)
  if(clip != F && length(pred) >= 2) {
    if(clip == T) {
      i <- chull(x[pred])
      clip <- x[pred][i,]
    }
    z[!in.polygon(clip,xt)] <- NA
  }
  if(missing(zlim)) {
    zlim <- range(x[[resp]])
    z[z < zlim[1]] <- zlim[1]
    z[z > zlim[2]] <- zlim[2]
  }
  dim(z) <- c(length(x1),length(x2))
  if(is.null(levels)) {
    if(equal) 
      levels <- seq(zlim[1],zlim[2],len=nlevels+1)
    else {
      r <- unique(x[[resp]])
      #if(!missing(zlim)) {
        r <- r[r >= zlim[1] & r <= zlim[2]]
        r <- c(r,zlim)
      #}
      levels <- as.numeric(quantile(r, seq(0,1,len=nlevels+1),na.rm=T))
    }
  }
  if(fill) {
    filled.contour(x1,x2,z,levels=levels,
                   col=color.palette,xlab=pred[1],ylab=pred[2],main=resp,...)
    if(!add) color.plot.data.frame(x,col=1,add=T,...)
  }
  else {
    color.palette <- c(color.palette,color.palette[length(color.palette)])
    contour(x1,x2,z,add=T,col=color.palette,levels=levels,drawlabels=drawlabels,lwd=lwd,...)
  }
}

model.frame.glm <- 
function (formula, data, na.action, ...) 
{
    if (is.null(formula$model)) {
        fcall <- formula$call
        fcall$method <- "model.frame"
        fcall[[1]] <- as.name("glm")
        # minka
        if (!is.null(formula$terms)) 
            env <- environment(formula$terms)
        else
            env <- environment(fcall$formula)
        if (is.null(env)) 
            env <- parent.frame()
        eval(fcall, env)
    }
    else formula$model
}

model.frame.loess <- model.frame.glm

###############################################################################

YlGnBu.colors <- function(n) {
  if(n == 8) c("#ffffd9","#edf8b0","#c7e9b4","#7fcdbb","#41b6c3","#1d91c0","#225ea8","#0c2c83")
  else if(n == 7) c("#ffffcc","#c7e9b4","#7fcdbb","#41b6c3","#1d91c0","#386cb0","#0c2c83")
  else if(n == 6) c("#ffffcc","#c7e9b4","#7fcdbb","#41b6c3","#2c7fb8","#253494")
  else if(n == 5) c("#ffffcc","#a1dab4","#41b6c3","#2c7fb8","#253494")
  else if(n == 4) c("#ffffcc","#a1dab4","#41b6c3","#386cb0")
  else if(n == 3) c("#edf8b0","#7fcdbb","#2c7fb8")
  else if(n == 2) c("yellow","blue")
  else stop("need a different number of levels")
}

formals(color.plot.default)$color.palette <- YlGnBu.colors
formals(color.plot.default)$bg <- grey(0.5)
formals(color.plot.loess)$color.palette <- YlGnBu.colors
formals(color.plot.loess)$bg <- grey(0.5)
formals(color.plot.loess)$lwd <- 2

###############################################################################

# convert from interval to range notation
interval2range <- function(x) {
  x <- unlist(strsplit(x,","))
  x[1] <- substr(x[1],2,nchar(x[1]))
  x[1] <- format(as.numeric(x[1]))
  if(x[1] == "-Inf") x[1] <- "."
  x[2] <- substr(x[2],1,nchar(x[2])-1)
  x[2] <- format(as.numeric(x[2]))
  if(x[2] == "Inf") x[2] <- "."
  paste(x[1],x[2],sep="-")
}

# used by color.plot and predict.plot given
rcut <- function(x,b) {
  f <- cut(x,b,include.lowest=T)
  levels(f) <- sapply(levels(f),interval2range)
  f
}

expand.frame <- function(data,v) {
  if(length(v) == 0) return(data)
  vs = strsplit(v,":")
  for(i in 1:length(v)) {
    data[[v[[i]]]] <- apply(data[vs[[i]]],1,prod)
  }
  data
}
predict.plot.data.frame <- function(x,given,given.lab,nlevels=2,
                           layout,partial,type=c("prob","logit","probit"),
                           highlight,
                           identify.pred=F,scol="red",slwd=2,
                           span=2/3,mar=NA,bg=par("bg"),
                           color.palette=default.colors,
                           pch.palette=default.pch,...) {
  type <- match.arg(type)
  resp <- response.var(x)
  pred <- predictor.terms(x)
  x <- expand.frame(x,setdiff(pred,predictor.vars(x)))
  if(!missing(given) && !is.factor(given)) {
    # make given a factor
    b <- as.numeric(quantile(unique(given),seq(0,1,length=nlevels+1)))
    given <- rcut(given,b)
    if(is.function(color.palette)) color.palette = color.palette(nlevels)
    lev = 1:nlevels
    level.color = color.palette[((lev-1) %% length(color.palette))+1]
    level.pch <- pch.palette[(((lev-1) %/% length(color.palette)) %% length(pch.palette))+1]
  }
  if(missing(layout)) layout <- auto.layout(length(pred) + !missing(given))
  opar <- par(mfrow=layout,bg=bg)
  if(!is.null(mar)) {
    if(is.na(mar)) mar <- c(4.5,4,0,0.1)
    opar <- c(opar,par(mar=mar))
  }
  on.exit(par(opar))
  if(!missing(partial)) {
    pres <- partial.residuals(partial,pred)
    if(missing(highlight)) highlight <- predictor.terms(partial)
  }
  else if(missing(highlight)) highlight <- NULL
  for(i in pred) {
    if(!missing(partial)) {
      x[[resp]] <- pres[[i]]
      if(is.null(x[[resp]])) stop(paste("partial of",i,"not found"))
    }
    if(is.factor(x[[i]]) && !is.ordered(x[[i]])) {
      x[[i]] <- sort.levels(x[[i]],as.numeric(x[[resp]]))
    }
    # plot points
    if(missing(given)) {
      # no given
      if(is.factor(x[[resp]])) {
        # factor
        if(type=="prob") {
          if(is.factor(x[[i]])) {
            mosaicplot(table(x[[i]], x[[resp]]), xlab=i, ylab=resp)
          } else {
            plot(x[[i]], x[[resp]], xlab=i, ylab=resp, ...)
          }
        }
      } else {
        # numeric
        plot(x[[i]], x[[resp]], xlab="", ylab=resp, ...)
        if(i %in% highlight) title(xlab=i,font.lab=2)
        else title(xlab=i)
        if(!missing(partial)) {
          a <- coef(partial)
          a0 = a["(Intercept)"]
          if(is.null(a0)) a0 = 0
          #if(!is.na(a[i])) abline(a0,a[i],col=3,lty=2)
        }
      }
      # smooth curve
      k <- !is.na(x[,i]) & !is.na(x[[resp]])
      if(!is.na(scol)) {
        if(is.factor(x[[resp]])) {
          if(length(levels(x[[resp]]))==2 && !is.factor(x[[i]])) {
            if(type=="prob") {
              lines(loprob(x[k,i], x[k,resp]), col=scol)
            } else {
              xy <- loprob(x[k,i], x[k,resp])
              p <- xy$y-1
              p <- switch(type,logit=log(p/(1-p)),probit=qnorm(p))
              xy$y <- p+1.5
              plot(xy,col=scol,type="l",xlab=i,ylab=type)
              points(x[[i]],2*as.numeric(x[[resp]])-3)
            }
          }
        } else {
          lines(lowess(x[k,i], x[k,resp], f=span),col=scol,lwd=slwd)
        }
      }
      # identify
      if((identify.pred == T) || (i %in% identify.pred)) {
        identify(x[k,i],x[k,resp],labels=rownames(x)[k])
      }
    } else {
      # condition on the given variable
      plot.new()
      box()
      title(xlab=i, ylab=resp)
      if(is.factor(x[[i]])) {
        # setup window for factor
        xlim <- length(levels(x[[i]]))
        plot.window(xlim=c(0.5,xlim+0.5),ylim=range(x[[resp]]))
        axis(1,1:xlim,labels=levels(x[[i]]))
        axis(2)
        cat(paste("jittering",i,"\n"))
      } else {
        # setup window for numeric
        plot.window(xlim=range(x[[i]],na.rm=T),ylim=range(x[[resp]],na.rm=T))
        axis(1);axis(2)
      }
      lev <- levels(given)
      for(g in 1:length(lev)) {
        color <- level.color[g]
        pch <- level.pch[g]
        val <- lev[g]
        k <- (given == val)
        if(is.factor(x[[i]])) {
          jitter <- (runif(length(x[k,i]))-0.5)/5
          points(as.numeric(x[k,i])+jitter, x[k,resp], col=color,
                 pch=pch, ...)
        } else {
          points(x[k,i], x[k,resp], col=color, pch=pch, ...)
        }
        if(is.factor(x[[resp]])) {
          lines(loprob(x[k,i], x[k,resp]),col=color,lwd=slwd)
        } else {
          lines(lowess(x[k,i], x[k,resp], f=span),col=color,lwd=slwd)
          #abline(lm(x[k,resp]~x[k,i]),col=color)
        }
      }
    }
  }
  # show legend for the given variable, as a separate panel
  # this is better than putting a color key on each plot
  if(!missing(given)) {
    # legend
    plot.new()
    if(!missing(given.lab)) title(xlab=given.lab)
    y <- cumsum(strheight(lev)+0.02)
    for(i in 1:length(lev)) {
      color <- color.palette[i]
      val <- lev[i]
      text(0.5,0.5+y[i],val,col=color,adj=0.5)
      if(length(unique(level.pch))>1)
        points(0.6+strwidth(val)/2,0.5+y[i],pch=level.pch[i])
    }
  }
}
predict.plot.lm <- function(object,data,partial=F,...) {
  if(!partial) {
    if(missing(data)) {
      res <- residual.frame(object)
    } else {
      res <- residual.frame(object,data)
    }
    cat("plotting residuals\n")
    predict.plot.data.frame(res,highlight=predictor.terms(object),...)
  } else {
    if(missing(data)) data <- model.frame(object)
    cat("plotting partial residuals\n")
    predict.plot.data.frame(data,partial=object,...)
  }
}
predict.plot.formula <- function(formula,data=parent.frame(),...) {
  # formula has givens?
  rhs <- formula[[3]]
  if(is.call(rhs) && (deparse(rhs[[1]]) == "|")) {
    # remove givens from formula
    given <- deparse(rhs[[3]])
    formula[[3]] <- rhs[[2]]
    if(is.environment(data)) g <- get(given,env=data)
    else g <- data[[given]]
    if(is.null(g)) 
      stop(paste("variable \"",given,"\" not found",sep=""))
    return(predict.plot.formula(formula,data,
                                given=g,given.lab=given,...))
  }
  if(F) {
    expr <- match.call(expand = F)
    expr$... <- NULL
    expr$na.action <- na.pass
    expr[[1]] <- as.name("model.frame.default")
    x <- eval(expr, parent.frame())
  } else {
    # formula has its own environment 
    x <- model.frame.default(formula,data,na.action=na.pass)
  }
  predict.plot.data.frame(x,...)
}
predict.plot <- function(object, ...) UseMethod("predict.plot")
# Functions for lab 9
# by Tom Minka

sort.levels <- function(f,x,fun=median) {
  if(length(x) == length(f)) x <- tapply(x,f,fun)
  if(length(x) != length(levels(f))) stop("wrong number of values to sort by")
  factor(f,levels=levels(f)[order(x)])
}

update.default <-
    function (object, formula., ..., evaluate = TRUE)
{
    call <- object$call
    if (is.null(call))
	stop("need an object with call component")
    extras <- match.call(expand.dots = FALSE)$...
    if (!missing(formula.))
	call$formula <- update.formula(formula(object), formula.)
    if(length(extras) > 0) {
	existing <- !is.na(match(names(extras), names(call)))
	## do these individually to allow NULL to remove entries.
	for (a in names(extras)[existing]) call[[a]] <- extras[[a]]
	if(any(!existing)) {
	    call <- c(as.list(call), extras[!existing])
	    call <- as.call(call)
	}
    }
    if(evaluate) {
      # minka: use environment of formula instead of parent.frame
      env<-environment(call$formula)
      if (is.null(env)) env<-parent.frame()
      eval(call,env)
    }
    else call
}

#############################################################################

# returns a model frame where the responses have been replaced by their
# residuals under the fit
residual.frame <- function(object,data) {
  if(missing(data)) {
    x <- model.frame(object)
    resp <- response.var(x)
    if(inherits(object,"glm")) {
      p <- object$fitted.value
      x[[resp]] <- (object$y - p)/p/(1-p)
    }
    else x[[resp]] <- residuals(object)
  }
  else {
    x <- data
    resp <- response.var(object)
    # only for lm, not glm
    if(inherits(object,"glm")) stop("can't compute residuals for glm")
    # expand p to include NAs
    p <- predict(object,x)[rownames(x)]
    names(p) <- NULL
    x[[resp]] <- x[[resp]] - p
    #attr(x,"terms") <- terms(object)
  }
  x
}

partial.residuals <- function(object,omit) {
  resp <- response.var(object)
  pred <- predictor.vars(object)
  x <- list()
  for(i in 1:length(omit)) {
    fit <- update(object,paste(".~. -",omit[i]))
    #cat(pred[i],coef(fit),"\n")
    x[[i]] <- residuals(fit)
  }
  names(x) <- omit
  x
}
partial.residual.frame <- function(object,omit) {
  x = model.frame(object)
  resp <- response.var(x)
  x[[resp]] = partial.residuals(object,omit)[[1]]
  pred = predictor.vars(formula(paste(resp,"~",omit)))
  x[c(pred,resp)]
}

auto.legend <- function(labels, col=1, lty, pch, text.col, ...) {
  usr <- par("usr")
  top <- usr[4]
  inch.x <- diff(par("usr")[1:2])/par("pin")[1]
  inch.y <- diff(par("usr")[3:4])/par("pin")[2]
  left <- usr[2]-max(strwidth(labels))-0.04*inch.x
  y <- cumsum(strheight(labels)+0.04*inch.y)
  if(missing(lty) && missing(pch)) {
    text.only <- T
    if(missing(text.col)) text.col <- col
  }
  for(i in 1:length(labels)) {
    text(left,top-y[i],labels[i],col=text.col[i],adj=0,...) 
  }
}

##############################################################################

expand.frame <- function(data,v) {
  if(length(v) == 0) return(data)
  vs = strsplit(v,":")
  for(i in 1:length(v)) {
    data[[v[[i]]]] <- apply(data[vs[[i]]],1,prod)
  }
  data
}
predict.plot.data.frame <- function(x,given,given.lab,nlevels=2,
                           layout,partial,type=c("prob","logit","probit"),
                           highlight,
                           identify.pred=F,scol="red",slwd=2,
                           span=2/3,mar=NA,bg=par("bg"),
                           color.palette=default.colors,
                           pch.palette=default.pch,...) {
  type <- match.arg(type)
  resp <- response.var(x)
  pred <- predictor.terms(x)
  x <- expand.frame(x,setdiff(pred,predictor.vars(x)))
  if(!missing(given) && !is.factor(given)) {
    # make given a factor
    b <- as.numeric(quantile(unique(given),seq(0,1,length=nlevels+1)))
    given <- rcut(given,b)
    if(is.function(color.palette)) color.palette = color.palette(nlevels)
    lev = 1:nlevels
    level.color = color.palette[((lev-1) %% length(color.palette))+1]
    level.pch <- pch.palette[(((lev-1) %/% length(color.palette)) %% length(pch.palette))+1]
  }
  if(missing(layout)) layout <- auto.layout(length(pred) + !missing(given))
  opar <- par(mfrow=layout,bg=bg)
  if(!is.null(mar)) {
    if(is.na(mar)) mar <- c(4.5,4,0,0.1)
    opar <- c(opar,par(mar=mar))
  }
  on.exit(par(opar))
  if(!missing(partial)) {
    pres <- partial.residuals(partial,pred)
    if(missing(highlight)) highlight <- predictor.terms(partial)
  }
  else if(missing(highlight)) highlight <- NULL
  for(i in pred) {
    if(!missing(partial)) {
      x[[resp]] <- pres[[i]]
      if(is.null(x[[resp]])) stop(paste("partial of",i,"not found"))
    }
    if(is.factor(x[[i]]) && !is.ordered(x[[i]])) {
      x[[i]] <- sort.levels(x[[i]],as.numeric(x[[resp]]))
    }
    # plot points
    if(missing(given)) {
      # no given
      if(is.factor(x[[resp]])) {
        # factor
        if(type=="prob") {
          if(is.factor(x[[i]])) {
            mosaicplot(table(x[[i]], x[[resp]]), xlab=i, ylab=resp)
          } else {
            plot(x[[i]], x[[resp]], xlab=i, ylab=resp, ...)
          }
        }
      } else {
        # numeric
        plot(x[[i]], x[[resp]], xlab="", ylab=resp, ...)
        if(i %in% highlight) title(xlab=i,font.lab=2)
        else title(xlab=i)
        if(!missing(partial)) {
          a <- coef(partial)
          a0 = a["(Intercept)"]
          if(is.null(a0)) a0 = 0
          #if(!is.na(a[i])) abline(a0,a[i],col=3,lty=2)
        }
      }
      # smooth curve
      k <- !is.na(x[,i]) & !is.na(x[[resp]])
      if(!is.na(scol)) {
        if(is.factor(x[[resp]])) {
          if(length(levels(x[[resp]]))==2 && !is.factor(x[[i]])) {
            if(type=="prob") {
              lines(loprob(x[k,i], x[k,resp]), col=scol)
            } else {
              xy <- loprob(x[k,i], x[k,resp])
              p <- xy$y-1
              p <- switch(type,logit=log(p/(1-p)),probit=qnorm(p))
              xy$y <- p+1.5
              plot(xy,col=scol,type="l",xlab=i,ylab=type)
              points(x[[i]],2*as.numeric(x[[resp]])-3)
            }
          }
        } else {
          lines(lowess(x[k,i], x[k,resp], f=span),col=scol,lwd=slwd)
        }
      }
      # identify
      if((identify.pred == T) || (i %in% identify.pred)) {
        identify(x[k,i],x[k,resp],labels=rownames(x)[k])
      }
    } else {
      # condition on the given variable
      plot.new()
      box()
      title(xlab=i, ylab=resp)
      if(is.factor(x[[i]])) {
        # setup window for factor
        xlim <- length(levels(x[[i]]))
        plot.window(xlim=c(0.5,xlim+0.5),ylim=range(x[[resp]]))
        axis(1,1:xlim,labels=levels(x[[i]]))
        axis(2)
        cat(paste("jittering",i,"\n"))
      } else {
        # setup window for numeric
        plot.window(xlim=range(x[[i]],na.rm=T),ylim=range(x[[resp]],na.rm=T))
        axis(1);axis(2)
      }
      lev <- levels(given)
      for(g in 1:length(lev)) {
        color <- level.color[g]
        pch <- level.pch[g]
        val <- lev[g]
        k <- (given == val)
        if(is.factor(x[[i]])) {
          jitter <- (runif(length(x[k,i]))-0.5)/5
          points(as.numeric(x[k,i])+jitter, x[k,resp], col=color,
                 pch=pch, ...)
        } else {
          points(x[k,i], x[k,resp], col=color, pch=pch, ...)
        }
        if(is.factor(x[[resp]])) {
          lines(loprob(x[k,i], x[k,resp]),col=color,lwd=slwd)
        } else {
          lines(lowess(x[k,i], x[k,resp], f=span),col=color,lwd=slwd)
          #abline(lm(x[k,resp]~x[k,i]),col=color)
        }
      }
    }
  }
  # show legend for the given variable, as a separate panel
  # this is better than putting a color key on each plot
  if(!missing(given)) {
    # legend
    plot.new()
    if(!missing(given.lab)) title(xlab=given.lab)
    y <- cumsum(strheight(lev)+0.02)
    for(i in 1:length(lev)) {
      color <- color.palette[i]
      val <- lev[i]
      text(0.5,0.5+y[i],val,col=color,adj=0.5)
      if(length(unique(level.pch))>1)
        points(0.6+strwidth(val)/2,0.5+y[i],pch=level.pch[i])
    }
  }
}
predict.plot.lm <- function(object,data,partial=F,...) {
  if(!partial) {
    if(missing(data)) {
      res <- residual.frame(object)
    } else {
      res <- residual.frame(object,data)
    }
    cat("plotting residuals\n")
    predict.plot.data.frame(res,highlight=predictor.terms(object),...)
  } else {
    if(missing(data)) data <- model.frame(object)
    cat("plotting partial residuals\n")
    predict.plot.data.frame(data,partial=object,...)
  }
}
predict.plot.formula <- function(formula,data=parent.frame(),...) {
  # formula has givens?
  rhs <- formula[[3]]
  if(is.call(rhs) && (deparse(rhs[[1]]) == "|")) {
    # remove givens from formula
    given <- deparse(rhs[[3]])
    formula[[3]] <- rhs[[2]]
    if(is.environment(data)) g <- get(given,env=data)
    else g <- data[[given]]
    if(is.null(g)) 
      stop(paste("variable \"",given,"\" not found",sep=""))
    return(predict.plot.formula(formula,data,
                                given=g,given.lab=given,...))
  }
  if(F) {
    expr <- match.call(expand = F)
    expr$... <- NULL
    expr$na.action <- na.pass
    expr[[1]] <- as.name("model.frame.default")
    x <- eval(expr, parent.frame())
  } else {
    # formula has its own environment 
    x <- model.frame.default(formula,data,na.action=na.pass)
  }
  predict.plot.data.frame(x,...)
}
predict.plot <- function(object, ...) UseMethod("predict.plot")

##############################################################################
# Functions for lab 10
# by Tom Minka

# cat lab5.r lab6.r lab7.r lab9.r lab10e.r > lab10.r

expand.cross <- function(object) {
  resp <- response.var(object)
  pred <- predictor.vars(object)
  # quadratic terms only
  pred2 <- c()
  for(i in pred) {
    for(j in pred) {
      if(match(i,pred) < match(j,pred)) pred2 = c(pred2,paste(i,j,sep=":"))
    }
  }
  pred = c(pred,pred2)
  formula(paste(resp,"~",paste(pred,collapse="+")))
}

interact.plot.data.frame <- function(x,partial,highlight,span=0.75,
                                     scol="red",lwd=2,type=c("*",":"),
                                     bg=par("bg"),...) {
  library(modreg)
  type = match.arg(type)
  resp <- response.var(x)
  pred = predictor.vars(x)
  n = length(pred)
  mar=c(0.05,0.05,0,0.05)
  opar = par(mfrow=c(n,n),mar=mar,bg=bg)
  on.exit(par(opar))
  if(!missing(partial)) {
    if(missing(highlight)) highlight <- predictor.terms(partial)
  }
  else if(missing(highlight)) highlight <- NULL
  for(i in pred) {
    for(j in pred) {
      if(i == j) {
        plot.new()
        box()
        text(0.5,0.5,i,font=if(i %in% highlight) 2 else 1)
      } else if(match(i,pred) < match(j,pred)) {
        plot.new()
      } else {
        # contour plot
        y = x[c(j,i,resp)]
        if(!missing(partial)) {
          predij = paste(j,type,i,sep="")
          fit = update(partial,paste(".~.-",predij))
          y[[resp]] = residuals(fit)
        }
        fmla = formula(paste(resp,"~",j,"+",i))
        mar = c(0.05,0.05,0,0.05)
        color.plot(loess(fmla,y,span=span),xlab="",ylab="",mar=mar,key=F,axes=F,lwd=lwd,...)
        predji = paste(j,":",i,sep="")
        predij = paste(i,":",j,sep="")
        if(any(c(predij,predji) %in% highlight)) {
          box(col="red")
        } else {
          box()
        }
      }
    }
  }
}
interact.plot.lm <- function(object,data,partial=F,...) {
  if(!partial) {
    if(missing(data)) {
      res <- residual.frame(object)
    } else {
      res <- residual.frame(object,data)
    }
    cat("plotting residuals\n")
    interact.plot.data.frame(res,highlight=predictor.vars(object),...)
  } else {
    if(missing(data)) data <- model.frame(object)
    cat("plotting partial residuals\n")
    interact.plot.data.frame(data,partial=object,...)
  }
}
interact.plot <- function(object, ...) UseMethod("interact.plot")

##############################################################################

formals(interact.plot.data.frame)$bg <- grey(0.5)
formals(interact.plot.data.frame)$color.palette <- YlGnBu.colors