\documentclass[10pt,twocolumn]{article}
%\VignetteIndexEntry{HistogramTools-QuickRef}

%\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
%\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
%\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
%\fvset{listparameters={\setlength{\topsep}{0pt}}}
%\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}

\setlength{\hoffset}{-0.8in}
\setlength{\voffset}{-0.8in}

\setlength{\marginparwidth}{0pt}
\setlength{\marginparsep}{0pt}
\setlength{\oddsidemargin}{0.1in}
\setlength{\headheight}{0pt}
\setlength{\topmargin}{1pt}
\setlength{\headsep}{0pt}
\setlength{\footskip}{0pt}
\setlength{\textheight}{26.7cm}
%\setlength{\textheight}{10in}
\setlength{\textwidth}{18cm}
\setlength{\columnsep}{1cm}
%\setlength{\textwidth}{8in}

\usepackage[colorlinks]{hyperref}

<<echo=FALSE,print=FALSE>>=
oldoptions <- options(width=50)
oldpar <- par(no.readonly = TRUE)
library(HistogramTools)
set.seed(0)
ht.version <- packageDescription("HistogramTools")$Version
@
% closing $ needed here

\author{Murray Stokely}
\title{HistogramTools \Sexpr{ht.version}: Quick Reference Guide}

% start right flush section
% redefine maketitle
%   \let\myMaketitle\maketitle
\renewcommand\maketitle{%
 \begin{center}
%  \framebox[4in]{\bf
   {\bf
   {\Large HistogramTools \Sexpr{ht.version} \par}
   {\Large Quick Reference Guide \par}
   Murray Stokely \hspace{1cm} \today \par
  }
  \mbox{\rule{\columnwidth}{0.5pt}\par}
 \end{center}
}

\begin{document}
\maketitle
\thispagestyle{empty}

\paragraph{Histogram Manipulation}

This package includes a number of basic functions for subsetting, trimming,
merging, adding, and otherwise manipulating basic R histogram objects.

<<echo=TRUE, eval=FALSE>>=
h <- hist(runif(100, 0, 100),
          breaks=seq(from=0,to=200,by=5))
plot(TrimHistogram(h))
plot(SubsetHistogram(h, maxbreak=70))
plot(MergeBuckets(h, adj.buckets=2))
@

<<echo=FALSE, print=FALSE, eval=TRUE>>=
h <- hist(runif(100, 0, 100),
          breaks=seq(from=0,to=200,by=5), plot=F)
@

<<exhist,fig=TRUE,echo=FALSE,include=FALSE>>=
# top, left, bottom, right
old.mar <- par(mar=c(3,4,1,1))
par(mfrow=c(2,2))
plot(h, main="Histogram h")
plot(TrimHistogram(h), main="TrimHistogram(h)")
plot(SubsetHistogram(h, max=70), main="SubsetHistogram(h, max=70)")
plot(MergeBuckets(h, 4), main="MergeBuckets(h, 4)")
par(mar=old.mar)
@ 
\vspace{-.3cm}

\begin{figure}[h]
\begin{center}
\includegraphics[width=3.4in,height=3.4in]{HistogramTools-quickref-exhist}
\end{center}
\label{fig:exhist}
\end{figure}

\vspace{-.3cm}

\paragraph{Information Loss}

The introduction of binning a dataset into a histogram introduces
information loss.  The package provides standard Histogram distance
measures as well as distance functions on the possibile ECDFs that
could have been constructed from the original data: the Kolmogorov-Smirnov Distance of the Cumulative
Curves (KSDCC) and Earth Mover's Distance of the Cumulative Curves
(EMDCC).  The plots here show a
visual representation of the returned value. EMDCC is the area of the
yellow boxes and KSDCC is the distance of the red arrow.

<<errorhist,fig=TRUE,echo=TRUE,include=FALSE,width=8,height=4>>=
par(mfrow=c(1,2), par(mar=c(5,4,4,0)+0.1))
PlotEMDCC(h)
PlotKSDCC(h)
EMDCC(h)
KSDCC(h)
@

\begin{figure}[h]
\begin{center}
\includegraphics[width=4in,height=2in]{HistogramTools-quickref-errorhist}
\end{center}
\label{fig:errorhist}
\end{figure}

\paragraph{Serialize a Histogram}

This package includes functions for reading and writing Histograms
from other tools.  Most notably, it can encode or decode any arbitrary
R histogram into a portable protocol buffer format to send to other
programs written in other languages.

% keep.source=T
% invisible(cat(paste(readLines(system.file("proto/histogram.proto",
%                                package="HistogramTools")), "\n")))
%@

<<echo=T,print=F,eval=F,keep.source=T>>=
hist.msg <- as.Message(h)
length(hist.msg$serialize(NULL))
@
<<echo=F,print=T,eval=T>>=
if(require(RProtoBuf)) {
  hist.msg <- as.Message(h)
  length(hist.msg$serialize(NULL))
}
@
<<echo=FALSE,print=FALSE>>=
par(oldpar)
options(oldoptions)
@

\paragraph{Common HistogramTools Functions}

\begin{center}
\begin{small}
\begin{tabular}[t]{@{\hspace{2mm}}l@{\hspace{2mm}}p{6.5cm}}
\hline
\multicolumn{2}{l}{Bin Manipulation}\\
\hline
\texttt{TrimHistogram} & Remove empty consecutive buckets from ends\\
\texttt{AddHistograms} & Aggregate two or more histograms\\
\texttt{MergeBuckets} & Merge adjacent bucket boundaries\\
\texttt{SubsetHistogram}  & Return histogram with subset of buckets\\
\texttt{IntersectHistograms} & Return a intersection of two histograms\\
\texttt{ScaleHistogram} & Scale the counts of a histogram by a factor\\
\hline
\multicolumn{2}{l}{Quantiles and Empirical CDFs}\\
\hline
\texttt{HistToEcdf} & Return the ECDF of histogram \\
\texttt{Count}  & Return the number of data points in hist\\
\texttt{ApproxMean}  & Return an approximate mean of the binned data\\
\texttt{ApproxQuantile}  & Return an approximate quantile of the binned data\\
\hline
\multicolumn{2}{l}{Distance Measures of Two Histograms}\\
\hline
\texttt{minkowski.dist} & The Minkowski distance of order $p$.\\
\texttt{intersect.dist} & The intersection distance.\\
\texttt{kl.divergence} & Kullback-Leibler Divergence.\\
\texttt{jeffrey.divergence} & Jeffrey Divergence.\\
\hline
\multicolumn{2}{l}{Binned CDF Distance Measures}\\
\hline
\texttt{PlotKSDCC}  & Plot ECDF with annotation at point of KS
distance of the cumulative curves\\
\texttt{PlotEMDCC}  & Plot ECDF with annotation showing EMD of the cumulative curves\\
\texttt{KSDCC}  & Return the Kolmogorov-Smirnov distance of the cumulative
curves (btwn 0 and 1)\\
\texttt{EMDCC}  & Return the Earth Mover's distance of the cumulative
curves (btwn 0 and 1)\\
\end{tabular}
\begin{tabular}[t]{@{\hspace{2mm}}l@{\hspace{2mm}}p{6.5cm}}
\hline
\multicolumn{2}{l}{Misc}\\
\hline
\texttt{PlotLog2ByteEcdf} & Plot ECDF of hist with power of two bucket
boundaries\\
\scriptsize{\texttt{PlotLogTimeDurationEcdf}} & Plot ECDF of hist with log-scaled
time duration bucket boundaries\\
\texttt{as.histogram} & Parse a HistogramState protocol buffer and return an R histogram\\
\texttt{as.Message}  & Serialize an R histogram as a HistogramState protocol buffer\\
\tiny{\texttt{ReadHistogramsFromDtraceOutputFile}} & Read histograms
from DTrace output\\
\hline
\end{tabular}
\end{small}
\end{center}
\end{document}
