# Copyright (c) 2014,
# Mathias Kuhring, KuhringM@rki.de, Robert Koch Institute, Germany, 
# All rights reserved. For details, please note the license.txt.

#
getExpoClassThresholds <- function(training, quantile=0.25){
  
  training$NormedMatchCount1 <- 1 - training$NormedMatchCount1
  training$NormedMatchCount2 <- 1 - training$NormedMatchCount2
  training$NormedContigLength1 <- 1 - training$NormedContigLength1
  
  firstMetric <- which(colnames(training)=="NormedMatchCount1")
  lastMetric <- ncol(training)
  
  quant <- vector(mode="numeric")

  quantiles <- rep(quantile, length(firstMetric:lastMetric))
  names(quantiles) <- names(training)[firstMetric:lastMetric]
  
  for (i in firstMetric:lastMetric){
    fit <- fitExpDist(training[[i]], plot=F, main=names(training)[i])
    quant <- append( quant, expPosByQuantile(fit, quantile=quantiles[names(training)[i]], plot=F) )
  }
  
  names(quant) <- names(training)[firstMetric:lastMetric]
  
  quant["NormedMatchCount1"] <- 1 - quant["NormedMatchCount1"]
  quant["NormedMatchCount2"] <- 1 - quant["NormedMatchCount2"]
  quant["NormedContigLength1"] <- 1 - quant["NormedContigLength1"]
  
  return(quant)
}


#
expoFit <- function(scores, pdf=TRUE, 
                    filename=paste("expofit_", 
                                   format(Sys.time(), "%Y-%m-%d_%H-%M-%S"),
                                   ".pdf", sep="")){
        
    fM <- which(colnames(scores)=="NormedMatchCount1")
    lM <- ncol(scores)
    metrics <- scores[ ,fM:lM]
    
    metrics$NormedMatchCount1 <- 1 - metrics$NormedMatchCount1
    metrics$NormedMatchCount2 <- 1 - metrics$NormedMatchCount2
    metrics$NormedContigLength1 <- 1 - metrics$NormedContigLength1
    
    if (pdf){
      pdf(file=filename, width=5.875, height=5.875, onefile=TRUE)
    }
    
    par.default <- par()
    par.mar <- c(3, 3, 2, 1)
    par.mgp <- c(1.5, 0.5, 0)
    par(mfrow=c(4,2),mar=par.mar, mgp=par.mgp)
    
    fM <- which(colnames(metrics)=="NormedMatchCount1")
    lM <- ncol(metrics)
        
    quantiles <- c(0.50, 0.50, 0.50, 0.50, 0.50, 0.25, 0.33, 0.50, 0.33, 0.25, 0.25, 0.25)
    quantiles <- rep(0.25, length(fM:lM))
    names(quantiles) <- names(metrics)[fM:lM]
    
    for (name in names(metrics)[fM:lM]){
      fit <- fitExpDist(metrics[[name]], plot=T, main=name)
      quant <- expPosByQuantile(fit, quantile=quantiles[name], metrics[[name]], plot=T)
    }
    
#     par(par.default)
    
    if (pdf) garbage <- dev.off()
}


# fit an exponential distribution to a histogram of metric
fitExpDist <- function(metric, plot=FALSE, logIt=FALSE, ...){ 
  
  # use logarithmic scale (+1 shift cause of possible zeros)
  if (logIt) metric <- log(metric +1)
  
  # fit exponential distribution
  require(MASS)
  fit <- fitdistr(metric, densfun="exponential")
  
  # plot it
  if (plot){
    breaks <- getBreaks(metric)
    
    h <- hist(metric, breaks=breaks, freq=F, plot=plot, ...)
    x <- h$mids
    y <- h$density
    
    lines(x, dexp(x, rate=fit$estimate), col="red") 
  }
  
  return(fit)
}

expPosByQuantile <- function(fit, quantile=0.50, metric, plot=FALSE, expIt=FALSE, ...){
  
  # calc quantiles from exponential fit
  quant <- qexp(quantile, rate=fit$estimate)
#   quant <- qnorm(quantile, mean=fit$estimate["mean"], sd=fit$estimate["sd"])
  
  # plot abline at quant positions
  if (plot){
    y <- diff(par("usr")[3:4])*0.6 + par("usr")[3]
    for (i in 1:length(quantile)){
      abline(v=quant[i], col="red", ...)
      lab <- paste(100*quantile[i], "% quantile", "\n# <=", ":", sum(metric<=quant[i]), "\n#  >", ":", sum(metric>quant[i]), sep="")
      text(x=quant[i],y=y, pos=4, labels=lab, col="black")
    }
  }
  
  # reverse logarithmic scale (-1 to reverse +1 shift)
  if (expIt) quants <- exp(quant) -1
  
  return(quant)
}


getBreaks <- function(metric){
  
  if (diff(range(metric)) <= 1){ # if normed metric
    breaks=seq(from=0.00, to=1.00, by=0.01)
  } else if (all(floor(metric) == metric, na.rm = TRUE)) { # if integer scale 
    breaks=(min(metric)-0.5):(max(metric)+0.5)
  } else { # other scales, e.g. log scale
    breaks=100
  }
  
  return(breaks)
}