.packageName <- "biglm"
bigglm<-function(formula, data, family=gaussian(),...)
    UseMethod("bigglm", data)


bigglm.data.frame<-function(formula, data, ..., chunksize=5000){
    n<-nrow(data)
    cursor<-0
    datafun<-function(reset=FALSE){
        if (reset){
            cursor<<-0
            return(NULL)
        }
        if (cursor>=n)
            return(NULL)
        start<-cursor+1
        cursor<<-cursor+min(chunksize, n-cursor)
        data[start:cursor,]
    }
    bigglm(formula=formula, data=datafun,...)
}

bigglm.function<-function(formula, data, family=gaussian(), weights=NULL,
                            sandwich=FALSE, maxit=8, tolerance=1e-7,
                            start=NULL, ...){

    tt<-terms(formula)
    beta <- start
    etafun <- function(x) if(is.null(beta)) rep(0,nrow(x)) else x%*%beta
    
    
    converged<-FALSE
    for (i in 1:maxit){
        firstchunk <- TRUE
        deviance<-0
        rss<-0
        data(reset=TRUE)
        n<-0
        while(!is.null(chunk<-data(reset=FALSE))){
            n<-n+nrow(chunk)
            mf<-model.frame(tt,chunk)
            mm<-model.matrix(tt,mf)
            p<-NCOL(mm)
            if (!is.null(weights)){
                if (!inherits(weights, "formula"))
                    stop("`weights' must be a formula")
                w<-model.frame(weights, chunk)[[1]]
            } else w<-rep(1,nrow(mm))
            if (firstchunk) {
                qr<-bigqr.init(p)
                assn<-attr(mm,"assign")
                if(sandwich)
                    xyqr<-bigqr.init(p*(p+1))
            }
            if (!identical(assn, attr(mm,"assign")))
                stop("model matrices incompatible")
            y<-model.response(mf)
            eta<-etafun(mm)
            mu <- family$linkinv(eta)
            dmu <- family$mu.eta(eta)
            z<- eta+(y-mu)/dmu
            ww<-w*dmu*dmu/(family$variance(mu))
            qr<-update(qr,mm,z,ww)
            if(!is.null(beta)){
                deviance<-deviance+sum(family$dev.resids(y,mu,w))
                rss<-rss+sum((y-mu)^2/(w*family$variance(mu)))*(sum(w)/length(w))
                if (sandwich){
                    xx<-matrix(nrow=nrow(mm), ncol=p*(p+1))
                    xx[,1:p]<-mm*drop(z)
                    for(i in 1:p)
                        xx[,p*i+(1:p)]<-mm*mm[,i]
                    xyqr<-update(xyqr,xx,rep(0,nrow(mm)),ww)
                }
            }
            firstchunk <- FALSE
        }
        iwlm <- list(call=sys.call(-1), qr=qr, iterations=i,
                   assign=attr(mm,"assign"), terms=tt, 
                   n=n,names=colnames(mm), weights=weights,rss=rss)
        if(sandwich)
            iwlm$sandwich <- list(xy=xyqr)
        class(iwlm) <- "biglm"

        betaold <- beta
        beta <- coef(iwlm)
        if (i >= maxit)
            break
        
        if (!is.null(betaold)){
            delta <- (betaold-beta)/sqrt(diag(vcov(iwlm)))
            if (max(abs(delta)) < tolerance){
                iwlm$converged<-TRUE
                break
            }
        }

        
    }
    
    rval <- iwlm
    rval$family <- family
    rval$deviance <- deviance
    class(rval) <- c("bigglm","biglm")
    rval
}



"biglm" <-
function(formula, data, weights=NULL, sandwich=FALSE){
   tt<-terms(formula)
   if (!is.null(weights)){
     if (!inherits(weights, "formula"))
       stop("`weights' must be a formula")
     w<-model.frame(weights, data)[[1]]
   } else w<-NULL
   mf<-model.frame(tt,data)
   mm<-model.matrix(tt,mf)
   qr<-bigqr.init(NCOL(mm))
   qr<-update(qr,mm,model.response(mf),w)
   rval<-list(call=sys.call(), qr=qr,assign=attr(mm,"assign"), terms=tt, 
              n=NROW(mm),names=colnames(mm), weights=weights)

   if (sandwich){
     p<-ncol(mm)
     n<-nrow(mm)
     xyqr<-bigqr.init(p*(p+1))
     xx<-matrix(nrow=n, ncol=p*(p+1))
     xx[,1:p]<-mm*model.response(mf)
     for(i in 1:p)
       xx[,p*i+(1:p)]<-mm*mm[,i]
     xyqr<-update(xyqr,xx,rep(0,n),w)
     rval$sandwich<-list(xy=xyqr)
   }

   class(rval)<-"biglm"
   rval	
}



print.biglm<-function(x,...){
  cat("Large data regression model: ")
  print(x$call)
  cat("Sample size = ",x$n,"\n")
  invisible(x)
}

summary.biglm<-function(object,...){
   beta<-coef(object)
   se<-sqrt(diag(vcov(object)))
   mat<-cbind(`Coef`=beta, `(95%`=beta-2*se, `CI)`=beta+2*se, `SE`=se, 
               `p`=2*pnorm(abs(beta/se),lower.tail=FALSE))
   rownames(mat)<-object$names
   rval<-list(obj=object, mat=mat)
   class(rval)<-"summary.biglm"
   rval
}

print.summary.biglm<-function(x,digits=3,...){
   print(x$obj)
   print(round(x$mat,digits))
   if(!is.null(x$obj$sandwich))
     cat("Sandwich (model-robust) standard errors\n")
   invisible(x)
}


"bigqr.init" <-
function(p){

  rval<-list(D=numeric(p), rbar=numeric(choose(p,2)),
             thetab=numeric(p),
             ss=0, checked=FALSE,
             tol=numeric(p))
  class(rval)<-"bigqr"
  rval
}

"coef.biglm" <-
function(object,...){
    if (!object$qr$checked)
       object$qr<-singcheck.bigqr(object$qr)
    rval<-coef(object$qr)
    rval[object$qr$D==0]<-NA
    names(rval)<-object$names
    rval
}

"coef.bigqr" <-
function(bigQR,nvar=NULL,...){
  p <- length(bigQR$D)
  if (is.null(nvar))
    nvar <- p
  if (nvar <1 | nvar >p) stop("Invalid value of `nvar'")

  if (!bigQR$checked)
    bigQR<-singcheck.bigqr(bigQR)
  
  tmp <- .Fortran("regcf", as.integer(p),
                  as.integer(p*p/2),
                  bigQR$D,
                  bigQR$rbar,
                  bigQR$thetab,
                  bigQR$tol,
                  beta=numeric(p),
                  nreq=as.integer(nvar),
                  ier=integer(1), DUP=FALSE)

  if (tmp$ier!=0) stop("Error in REGCF: can't happen")

  tmp$beta
}

"singcheck.bigqr" <-
function(bigQR){
  bigQR <- .Call("singcheckQR", bigQR)
}


"update.biglm" <-
function(object,moredata,...){
    mf<-model.frame(object$terms, moredata)
    mm<-model.matrix(object$terms, mf)
    if (is.null(object$weights))
      w<-NULL
    else
      w<-model.frame(object$weights, moredata)[[1]]
    if (!identical(object$assign, attr(mm,"assign")))
        stop("model matrices incompatible")
    object$qr<-update.bigqr(object$qr, mm, model.response(mf),w)
    object$n<-object$n+NROW(mm)
    if(!is.null(object$sandwich)){
      p<-ncol(mm)
      n<-nrow(mm)
      xx<-matrix(nrow=n,ncol=p*(p+1))
      xx[,1:p]<-mm*model.response(mf)
      for(i in 1:p)
        xx[,p*i+(1:p)]<-mm*mm[,i]
      xyqr<-update(object$sandwich$xy,xx,rep(0,n),w)
      object$sandwich<-list(xy=xyqr)
    }
    object
  }

"update.bigqr" <-
function(bigQR, X, y, w=NULL,
                       singcheck=FALSE, add.intercept=FALSE){
  if (NCOL(X)+add.intercept!=length(bigQR$D))
    stop("Wrong number of columns")
  if (length(y)!=NROW(X))
    stop("Wrong number of rows")
  if (is.null(w)) w<-rep(1.0, length(y))
  if (length(y)!=length(w))
    stop("`weights' has wrong length")
  bigQR<-.Call("updateQR",X, y, w, bigQR, add.intercept)
  
  if (singcheck)
    bigQR<-.Call("singcheckQR",bigQR);

  bigQR
}

"vcov.biglm" <-
function(object,...){
   if(!object$qr$checked)
      object$qr<-singcheck.bigqr(object$qr)
   p<-length(object$qr$D)
   R<-diag(p)
   R[row(R)>col(R)]<-object$qr$rbar
   R<-t(R)
   R<-sqrt(object$qr$D)*R
   ok<-object$qr$D!=0
   R[ok,ok]<-chol2inv(R[ok,ok])
   R[!ok,]<-R[,!ok]<-NA
   dimnames(R)<-list(object$names, object$names)
   
   if(!is.null(object$sandwich)){
     xyqr<-singcheck.bigqr(object$sandwich$xy)
     rxy<-diag(p*(p+1))
     rxy[row(rxy)>col(rxy)]<-xyqr$rbar
     rxy<-t(rxy)
     rxy<-sqrt(xyqr$D)*rxy
     M<-t(rxy)%*%rxy
     
     beta<-coef(object)
     beta[!ok]<-0
     bbeta<-kronecker(diag(p),beta)
     ##FIXME: singularities in beta
     Vcenter<-M[1:p,1:p] + t(bbeta)%*%M[-(1:p),-(1:p)]%*%bbeta -
       t(bbeta)%*%M[-(1:p),1:p] - M[1:p,-(1:p)]%*%bbeta
     
     V<-matrix(NA,p,p)
     V[ok,ok]<-R[ok,ok]%*%Vcenter[ok,ok]%*%R[ok,ok]
     dimnames(V)<-list(object$names, object$names)
     attr(V,"model-based")<-R*object$qr$ss/(object$n-p+sum(!ok))
   } else {
     V<-R*object$qr$ss/(object$n-p+sum(!ok))
   }

   V
}

"vcov.bigglm" <-
function(object, dispersion=NULL,  ...){
   if(!object$qr$checked)
      object$qr<-singcheck.bigqr(object$qr)
   p<-length(object$qr$D)
   R<-diag(p)
   R[row(R)>col(R)]<-object$qr$rbar
   R<-t(R)
   R<-sqrt(object$qr$D)*R
   ok<-object$qr$D!=0
   R[ok,ok]<-chol2inv(R[ok,ok])
   R[!ok,]<-R[,!ok]<-NA
   dimnames(R)<-list(object$names, object$names)
   
   if(!is.null(object$sandwich)){
     xyqr<-singcheck.bigqr(object$sandwich$xy)
     rxy<-diag(p*(p+1))
     rxy[row(rxy)>col(rxy)]<-xyqr$rbar
     rxy<-t(rxy)
     rxy<-sqrt(xyqr$D)*rxy
     M<-t(rxy)%*%rxy
     
     beta<-coef(object)
     beta[!ok]<-0
     bbeta<-kronecker(diag(p),beta)
     ##FIXME: singularities in beta
     Vcenter<-M[1:p,1:p] + t(bbeta)%*%M[-(1:p),-(1:p)]%*%bbeta -
       t(bbeta)%*%M[-(1:p),1:p] - M[1:p,-(1:p)]%*%bbeta
     
     V<-matrix(NA,p,p)
     V[ok,ok]<-R[ok,ok]%*%Vcenter[ok,ok]%*%R[ok,ok]
     dimnames(V)<-list(object$names, object$names)
   }
   
   ddf<-object$n-p+sum(!ok)
   if (is.null(dispersion)){
       if (object$family$family %in% c("poisson", "binomial"))
           dispersion <-1
       else
           dispersion <- object$rss/ddf
   }

   if (is.null(object$sandwich))
       V<-R*dispersion
   else
       attr(V,"model-based") <- R*dispersion

   V
}

