Context

The scoop (Sparse cooperative regression) R package fits coop-Lasso, group-Lasso and Lasso solution paths for linear regression and logistic regression. The cooperative-Lasso (in short coop-Lasso) may be viewed as a modification of the group-Lasso penalty that promotes sign coherence and that allows zeros within groups.

Download

R manual (uncomplete)

package sources

windoz binary version

To install, run R CMD INSTALL scoop_0.x-x.tar.gz in a terminal (tested on Linux ubuntu and Mac OS 10.9).

R code to reproduce some Figures of the journal paper

Basal tumor

Just launch the demo script linked to the package: in R,

library(scoop)
demo(basal_tumor)

Breiman experiments

Launch the following script to produce Figure 5 of the paper (see below). Needs ‘‘scoop’’ package installed.

rm(list=ls())
library(scoop)
##BREIMAN    Breiman sample generator for testing linear regression.
##  [X,Y,BETA] = BREIMAN(CORR,H,T,SEED) produces a sample of 60 (X,Y) 
##  pairs, of 30 covariates X(i,:) and one explained variable Y(i). 
##  X is drawn from a multivariate normal distribution N(0,S2),
##  where S2(i,j) = CORR^abs(i-j). 
##  CORR (default = 0) should be in the range [-1 1];
##  Y = X * BETA + EPSILON
##  EPSILON is drawn from a normal distribution N(0,1)
##  BETA is the sum of three waves centered in 5, 15 and 25:
##  BETA(k+i) = C*(H-i)^2 , with abs(i) < H, k = [ 5 15 25], where 
##  C is a constant such that the  R^2 is about 0.75, and
##  H (default = 1) is an integer governing the wave width (within-group
##  sparsity).  H should be in {1,2,3,4,5} (corresponding to {3,9,15,21,27}
##  non-zero coefficients).
##  T (default = 3) sets the number of active groups. T should be in {1,2,3}. 
##  Default is the original Breiman setup
##  SEED is an optional parameter selecting the seed of the normal 
##  pseudo-random generator used to generate X and EPSILON.

## References:
## Breiman, L., Heuristics of instability and stabilization in model 
## selection, The Annals of Statistics, 24(6), pp 2350--2383, 1996.
breiman_coop <- function(corr=0, h=3, T=4, N = 40) {
                                        # seed=set.seed(floor(runif(1,1:10000)))) {

  d <- 90          # variables
  K <- c(5,14,23,31,40,49,58,67,77,86)  # cluster centers
  
  X    <- matrix(rnorm(N*d),N,d);
  EXX  <- corr^abs(cbind(1:d) %*% rep(1,d)-cbind(rep(1,d)) %*% c(1:d))
  out  <- eigen(EXX)
  D    <- diag(out$values)
  V    <- out$vectors
  X    <- X %*% sqrt(abs(D)) %*% t(V)  # just in case
  beta <- rep(0,d)

  for (i in 1:length(K)) {
    wave <- pmax( 0 , h - abs(cbind(1:d)-K[i]) )
    beta <- beta + wave^2
  }

  ## keep only t waves (group sparsity)
  if (T <=10 & T >= 0) {
    beta[c((T*9+1):d)] <- 0
  } else {
    stop("T should be in {1,...,10}")
  }

  noise <- rnorm(N,0,1)        # additive noise
  beta <- beta %*% sqrt(3/(beta %*% EXX %*% beta)) # beta normalization R2 = 0.75
  y <- X %*% beta + noise  # compute output

  R2 <- 1-sum(noise^2)/sum((y-mean(y))^2)
  
  return(list(X = X, y = y, beta = beta, R2 = R2))
} 

## ====================================================================

rm(list=ls())
library(scoop)
source("breiman.R")

## 999
set.seed(111)

## T = 3->8
## corrélation -: encore pire
## grande dimension ++ 

## d = 90, 10 groupes de 9, T groupes actifs
##
## plus h est grand, plus favorable ou group-Lasso
## plus h est petit, plus favorable ou Lasso
##
h <- 4
data <- breiman_coop(corr=0.4,h=h,T=3,N=45)

x <- data$X
y <- c(data$y)
beta.star <- data$beta
group <- rep(1:10,each=9)

## Lasso
cat("\n--------------------\n")
cat("Lasso\n")
las <- lasso(x, y, intercept=FALSE, lambda.min=0.4)
las@group <- group

## Group-Lasso
cat("\n--------------\n")
cat("Group-Lasso\n")
grp <- group.lasso(x, y, group, intercept=FALSE, lambda.min=0.4)

## Coop-Lasso
cat("\n--------------------\n")
cat("Cooperative-Lasso\n")
coo <- coop.lasso(x, y, group, intercept=FALSE, lambda.min=0.4)

## Sparse group-Lasso
cat("\n--------------------\n")
cat("Sparse group-Lasso\n")
sgl <- sparse.group.lasso(x, y, group, intercept=FALSE, lambda.min=0.4)

## compute BIC
sgrp <- selection(grp, sigma2=1)
scoo <- selection(coo, sigma2=1)
slas <- selection(las, sigma2=1)
ssgl <- crossval(sgl, K=5)
beta.las <- slas$beta.BIC
beta.grp <- sgrp$beta.BIC
beta.coo <- scoo$beta.BIC
beta.sgl1 <- ssgl@beta.1se
beta.sgl2 <- ssgl@beta.min

lambda.las <- slas$lambda.BIC
lambda.grp <- sgrp$lambda.BIC
lambda.sgl1 <- ssgl@lambda.1se
lambda.sgl2 <- ssgl@lambda.min
lambda.coo <- scoo$lambda.BIC
t.grp    <- "GroupLasso / BIC"
t.coo    <- "CoopLasso / BIC"
t.sgl    <- "Sparse Group-Lasso / CV"
t.las    <- "Lasso / BIC"

lim <- range(c(beta.star, beta.las, beta.sgl1, beta.sgl2, beta.grp))

x11(width=7,height=12)
par(mfrow=c(4,2))
## lasso
plot(las, main=t.las, lwd=2)
abline(v=log10(lambda.las))
plot(beta.las, main="Lasso", pch=16, ylim=lim, cex=2)
lines(beta.star, lty=3, col="black", lwd=2)
## group-lasso
plot(grp, main=t.grp, lwd=2)
abline(v=log10(lambda.grp))
plot(beta.grp, pch=16, main="group", ylim=lim, cex=2)
lines(beta.star, lty=3, col="black", lwd=2)
## sparse group-lasso
plot(sgl, main=t.sgl, lwd=2,
     col = 1+c(group),
     lty = c(group))
abline(v=log10(lambda.sgl1))
abline(v=log10(lambda.sgl2))
plot(beta.sgl1 , pch=16, main="spgroup", ylim=lim, cex=2)
lines(beta.star, lty=3, col="black", lwd=2)
## coop lasso
plot(coo, main=t.coo, lwd=2)
abline(v=log10(lambda.coo))
plot(beta.coo, pch=16,  main="coop", ylim=lim, cex=2)
lines(beta.star, lty=3, col="black", lwd=2)

breiman