Gallery: SeqControl

The fraction of yes votes (confidence that 6 lanes of sequencing will achieve 50x coverage) received by each single-lane testing input shows clear separation between true positive and true negative observations (black vs. grey bars). Covariates representing metric values for single-lane inputs show that multiple metrics influence the number of votes received.

### PREAMBLE #######################################################################################
library(BoutrosLab.plotting.general);
library(BoutrosLab.utilities);
 
# read in data
plot.data <- read.table('seqcontrol_machine_learning_plot_data.txt', header = TRUE);
 
### DATA PROCESSING ###############################################################################
# Calculate accuracy, sensitivity, specificity
testing.data.results <- table(
  factor(plot.data$prediction, levels = c(0,1)),
  factor(plot.data$outcome, levels = c(0,1)),
  dnn = c('pred', 'obs')
);
 
accuracy <- (testing.data.results[1,1] + testing.data.results[2,2]) / sum(testing.data.results);
sensitivity <- testing.data.results[2,2] / (testing.data.results[2,2] + testing.data.results[1,2]);
specificity <- testing.data.results[1,1] / (testing.data.results[1,1] + testing.data.results[2,1]);
 
# Flag for whether covariates plot should contain column grid or not 
gridcol <- ifelse(nrow(plot.data) < 250, TRUE, FALSE);
 
### HELPER FUNCTIONS ##############################################################################
# function for creating miscellanious legends
# col - colour, met - metric, lab - label
create.legend <- function(col, met, lab){
  # figure out the cuts
  data <- plot.data[,met];
  cuts <- pretty(
    c(min(data), max(data)),
    n = 10
  );
 
  # figure out a colour scheme
  ColourFunction <- colorRamp(c('white', col), space = 'Lab');
  my.palette <- rgb(ColourFunction(seq(0, 1, length.out = length(cuts) -1)), maxColorValue = 255);
 
  # middle index, used for getting the colours
  mid.idx <- ceiling(length(my.palette) / 2);
 
  # legend values
  vals <- format(
    c(
      cuts[3], 
      (cuts[mid.idx] + cuts[mid.idx + 1]) / 2, # we don't need this value for ranges
      cuts[length(cuts)-3] 
    ),
    scientific = FALSE
  );
  if(met == 'Unique.start.points'){
    print(lab)
    # reformat label
    # lower value in scientific notation
    l.s <- scientific.notation(x=as.numeric(vals[1]),digits=2);
    lb  <- l.s$base;
    le  <- l.s$exponent;
    # upper value in scientific notation
    u.s <- scientific.notation(x=as.numeric(vals[3]),digits=2);
    ub  <- u.s$base;
    ue  <- u.s$exponent;
    # make labels
    labels <- c(
      as.expression(bquote("<" ~ .(lb) %*% 10^.(le))),  
      as.expression(bquote(.(lb) %*% 10^.(le) ~ "-" ~ .(ub) %*% 10^.(ue))), 
      as.expression(bquote(">" ~ .(ub) %*% 10^.(ue)))
    )
  } else{
    labels <- as.character(
      c(
        sprintf("< %s", vals[1]), 
        sprintf("%s - %s", vals[1], vals[3]),
        sprintf("> %s", vals[3]))
    )
  }
 
  legend <- list(
    legend = list(
      colours = c(
        my.palette[1],
        my.palette[mid.idx],
        my.palette[length(my.palette)]
      ),
      labels = labels,
      title = as.expression(
        substitute(
          bold(
            underline(label)
          ),
          env = list(
            label = lab
          )
        )
      )
    )
  );
 
  return(legend);
}
 
# function for creating miscellanious covariate heatmaps
# col - colour, met - metric, lab - label
create.covariate <- function(col, met, lab){
  # figure out the cuts
  data <- plot.data[,met];
  cuts <- pretty(
    c(min(data), max(data)),
    n = 10
  );
 
  # figure out a colour scheme
  ColourFunction <- colorRamp(c('white', col), space = 'Lab');
  my.palette <- rgb(ColourFunction(seq(0, 1, length.out = length(cuts) -1)), maxColorValue = 255);
 
  heatmap <- create.heatmap(
    x = t(data),
    at = cuts,
    clustering.method = 'none',
    colour.scheme = my.palette,
    xaxis.lab = NULL,
    yaxis.lab = lab,
    yat = 1,
    yaxis.cex = 1,
    print.colour.key = FALSE,
    grid.col = gridcol
  );
 
  return(heatmap);
}
 
### COVARIATE LEGENDS #############################################################################
# names of covariates
covariates.names <- c(
  'Sample',
  'FFPE',
  '% Bases > 0 quality',
  'Unique start points',
  'Average reads/start'
  );
# sample type legend, in two columns
# sample covariate colours
colour.scheme.large <- c(
  'rosybrown1',
  'rosybrown4',
  'red',
  'darkred',
  'darkorange',
  'gold',
  'darkolivegreen3',
  'darkgreen',
  'aquamarine',
  'cyan4',
  'dodgerblue',
  'darkblue',
  'plum',
  'magenta',
  'darkorchid',
  'purple4',
  'gray70',
  'gray30'
);
num.samples <- length(levels(plot.data$CPCG));
colour.scheme <- colour.scheme.large[1:num.samples];
if (num.samples <= 10) {
  colour.scheme <- default.colours(num.samples, palette.type = 'survival');
}
# samples data
samples <- as.vector(sort(unique(plot.data$CPCG)));
# create the two legends
sample.legend.1 <- list(
  legend = list(
    colours = colour.scheme[1:6],
    labels = samples[1:6],
    title = expression(bold(underline('Sample')))
  )
);
sample.legend.2 <- list(
  legend = list(
    colours = colour.scheme[7:12],
    labels = samples[7:12],
    title = expression(' ')
  )
);
 
# sample preparation legend
prep.legend = list(
  legend = list(
    colours = c('white', 'black'), 
    labels = c('Frozen', 'FFPE'),
    title = expression(bold(underline('Sample preparation')))
  )
);
 
# base quality legend
base.legend <- create.legend(
  col='darkorange', 
  met='X..Bases...0.quality', 
  lab='% Bases > 0 quality'
);
 
# unique starts legend
uni.start.legend <- create.legend(
  col='darkblue',
  met='Unique.start.points',
  lab='Unique start points'
);
 
# average reads legend
reads.legend <- create.legend(
  col='deeppink',
  met='Average.reads.start',
  lab='Average reads/start'
);
 
# collect covariate legends together
legends <- c(
  sample.legend.1, 
  sample.legend.2, 
  prep.legend, 
  base.legend, 
  uni.start.legend, 
  reads.legend
);
covariate.legends <- BoutrosLab.plotting.general::legend.grob(
  legends = legends,
  title.cex = 0.75,
  title.just = 'left',
  label.cex = 0.65,
  size = 2,
  between.row = 1.5,
  between.col = 0.5,
  layout = c(2,3)
);
 
### BARPLOT LEGEND #################################################################################
obs.leg <- list(
  legend = list(
    colours = c('grey', 'black'),
    labels = c(
      as.expression(substitute(x < '50x',list(x = ''))),
      as.expression(substitute(x >= '50x',list(x = '')))
    ),
    title = expression(underline('Observed'))
  )
);
obs.leg.grob <- legend.grob(legends = obs.leg);
 
### COVARIATE HEATMAPS ###############################################################################
# sample covariate
sample.heatmap <- create.heatmap(
  x = t(as.numeric(plot.data$CPCG)),
  clustering.method = 'none',
  colour.scheme = colour.scheme,
  at = seq(0.5, num.samples + 0.5, 1),
  xaxis.lab = NULL,
  yaxis.lab = 'Sample',
  yat = 1,
  yaxis.cex = 1,
  total.colours = length(colour.scheme),
  print.colour.key = FALSE,
  grid.col = gridcol
);
 
# preparation method covariate
ffpe.heatmap <- BoutrosLab.plotting.general::create.heatmap(
  x = data.frame(plot.data$ffpe.status),
  clustering.method = 'none',
  colour.scheme = c('white', 'black'), 
  at = c(-0.5, 0.5, 1.5),
  xaxis.lab = NULL,
  yaxis.lab = 'FFPE',
  yat = 1,
  yaxis.cex = 1,
  total.colours = 2,
  print.colour.key = FALSE,
  grid.col = gridcol
);
 
# base quality covariate
base.covariate <- create.covariate(
  col='darkorange', 
  met='X..Bases...0.quality', 
  lab='% Bases > 0 quality'
);
 
# unique starts covariate
uni.start.covariate <- create.covariate(
  col='darkblue',
  met='Unique.start.points',
  lab='Unique start points'
);
 
# average reads covariate
reads.covariate <- create.covariate(
  col='deeppink',
  met='Average.reads.start',
  lab='Average reads/start'
);
 
### VOTES BARGRAPH ################################################################################
# bargraph colours
barplot.col.choices <- c('grey', 'black');
barplot.cols <- barplot.col.choices[factor(plot.data$outcome, levels = c(0,1))];
 
# create the bargraph
votes.barplot <- create.barplot(
  formula = votes ~ idx,
  border.col = 'transparent',
  data = plot.data[,c('yes.votes', 'idx')],
  col = barplot.cols,
  right.padding = 2,
  xaxis.lab = rep('', length(votes)),
  ylimits = c(0, 1.05),
  abline.h = 0.5,
  abline.lty = 2,
  abline.col = 'darkgrey'
);
 
### MULTIPLOT ######################################################################################
# objects to include in the plot
plot.objects <- list(
  reads.covariate,
  uni.start.covariate,
  base.covariate,
  ffpe.heatmap,
  sample.heatmap,
  votes.barplot
);
# identify where plotting objects should be placed in the multiplot
yat.vals <- list();
for (n in 1:(length(plot.objects)-1)) {
  yat.vals <- c(yat.vals, list(NULL));
}
# combine plots
create.multiplot(
  plot.objects = plot.objects,
  filename = 'testing_votes.tiff',
  panel.heights = c(1, rep(0.05, length(plot.objects)-1)),
  yat = c(yat.vals, list(seq(0,1,0.25))),
  yaxis.cex = 1.15,
  ylab.label = c(' ', 'Fraction Yes-Votes', ' ', ' ', ' '),
  ylab.padding = 6.5,
  right.padding = 35,
  bottom.padding = -5,
  xat = NULL,
  y.spacing = -1.5,
  ylimits = list(c(0.9,1), c(0.9,1), c(0.9,1), c(0.9,1), c(0.9,1), c(0.1,1)),
  legend = list(
    inside = list(
      fun = covariate.legends,
      x = 1.02,
      y = 1
    ),
    inside = list(
      x = 0.85,
      y = 0.98,
      fun = obs.leg.grob
    ),
    inside = list(
      fun = draw.key,
      args = list(
        key = list(
          text = list(
            lab = paste('',
                        c(
                          covariates.names
                        )
            ),
            cex = 0.75
          ),
          padding.text = 4.5
        )
      ),
      x = 1,
      y = 0.23
    )
  ),
  print.new.legend = TRUE,
  width = 12,
  height = 6,
  resolution = 1200,
);

Created by Pretty R at inside-R.org