%\VignetteIndexEntry{Biostrings Quick Overview}
%\VignetteKeywords{DNA, RNA, Sequence, Biostrings, Sequence alignment} 
%\VignettePackage{Biostrings}

%
% NOTE -- ONLY EDIT THE .Rnw FILE!!!  The .tex file is
% likely to be overwritten.
%
\documentclass[10pt]{article}

\usepackage{times}
\usepackage{hyperref}

\usepackage[margin=0.65in]{geometry}

\newcommand{\scscst}{\scriptscriptstyle}
\newcommand{\scst}{\scriptstyle}

\newcommand{\R}{{\textsf{R}}}
\newcommand{\code}[1]{{\texttt{#1}}}
\newcommand{\term}[1]{{\emph{#1}}}
\newcommand{\Rpackage}[1]{\textsf{#1}}
\newcommand{\Rfunction}[1]{\texttt{#1}}
\newcommand{\Robject}[1]{\texttt{#1}}
\newcommand{\Rclass}[1]{{\textit{#1}}}
\newcommand{\Rmethod}[1]{{\textit{#1}}}
\newcommand{\Rfunarg}[1]{{\textit{#1}}}

\bibliographystyle{plainnat} 
 
\begin{document}
%\setkeys{Gin}{width=0.55\textwidth}

\title{Biostrings Quick Overview}
\author{Herv\'e Pag\`es \\
  Fred Hutchinson Cancer Research Center \\
  Seattle, WA}
\date{\today}
\maketitle

%\tableofcontents

Please note that \emph{most} but \emph{not all} the functionalities
provided by the \Rpackage{Biostrings} package are listed in this
document.

%-----------------------------------------------------------------------------

\begin{table}[ht]
\begin{center}
\begin{tabular}{p{2.5in}|p{4in}}
{\bf Function} & {\bf Description} \\
\hline
\Rfunction{length} & Return the number of sequences in an object. \\
\hline
\Rfunction{names} & Return the names of the sequences in an object. \\
\hline
\Rfunction{[} & Extract sequences from an object. \\
\hline
\Rfunction{head}, \Rfunction{tail} & Extract the first or last sequences
    from an object. \\
\hline
\Rfunction{rev} & Reverse the order of the sequences in an object. \\
\hline
\Rfunction{c} & Combine in a single object the sequences from 2 or more objects. \\
\hline
\Rfunction{width}, \Rfunction{nchar} & Return the sizes (i.e. number of
    letters) of all the sequences in an object.\\
\hline
\Rfunction{==}, \Rfunction{!=} & Element-wise comparison of the sequences
    in 2 objects. \\
\hline
\Rfunction{match}, \Rfunction{\%in\%} &
    Analog to \Rfunction{match} and \Rfunction{\%in\%} on character vectors. \\
\hline
\Rfunction{duplicated}, \Rfunction{unique} &
    Analog to \Rfunction{duplicated} and \Rfunction{unique} on character
    vectors. \\
\hline
\Rfunction{sort}, \Rfunction{order} &
    Analog to \Rfunction{sort} and \Rfunction{order} on character vectors,
    except that the ordering of DNA or Amino Acid sequences doesn't
    depend on the locale. \\
\hline
\Rfunction{relist}, \Rfunction{split}, \Rfunction{extractList} &
    Analog to \Rfunction{relist} and \Rfunction{split} on character vectors,
    except that the result is a \Rclass{DNAStringSetList} or
    \Rclass{AAStringSetList} object.
    \Rfunction{extractList} is a generalization of \Rfunction{relist} and
    \Rfunction{split} that supports \emph{arbitrary} groupings. \\
\hline
\end{tabular}
\end{center}
\caption{\bf Low-level manipulation of \Rclass{DNAStringSet} and
         \Rclass{AAStringSet} objects.}
\label{table:Low_level_manipulation}
\end{table}

%-----------------------------------------------------------------------------

\begin{table}[ht]
\begin{center}
\begin{tabular}{p{2.5in}|p{4in}}
{\bf Function} & {\bf Description} \\
\hline
\Rfunction{alphabetFrequency}\par
\Rfunction{letterFrequency} &
    Tabulate the letters (all the letters in the alphabet for
    \Rfunction{alphabetFrequency}, only the specified letters for
    \Rfunction{letterFrequency}) of a sequence or set of sequences. \\
\hline
\Rfunction{letterFrequencyInSlidingView} &
    Specialized version of \Rfunction{letterFrequency} that tallies the
    requested letter frequencies for a fixed-width view that is conceptually
    slid along the input sequence. \\
\hline
\Rfunction{consensusMatrix} &
    Computes the consensus matrix of a set of sequences. \\
\hline
\Rfunction{dinucleotideFrequency}\par
\Rfunction{trinucleotideFrequency}\par
\Rfunction{oligonucleotideFrequency} &
    Fast 2-mer, 3-mer, and k-mer counting for DNA or RNA. \\
\hline
\Rfunction{nucleotideFrequencyAt} &
    Tallies the short sequences formed by extracting the nucleotides found
    at a set of fixed positions from each sequence of a set of DNA or RNA
    sequences. \\
\hline
\end{tabular}
\end{center}
\caption{\bf Counting / tabulating.}
\label{table:Counting_tabulating}
\end{table}

%-----------------------------------------------------------------------------

\begin{table}[ht]
\begin{center}
\begin{tabular}{p{2.5in}|p{4in}}
{\bf Function} & {\bf Description} \\
\hline
\Rfunction{reverse}\par
\Rfunction{complement}\par
\Rfunction{reverseComplement} &
    Compute the reverse, complement, or reverse-complement, of a set of
    DNA sequences. \\
\hline
\Rfunction{translate} &
    Translate a set of DNA sequences into a set of Amino Acid sequences. \\
\hline
\Rfunction{chartr} & Translate the letters in a set of sequences. \\
\hline
\Rfunction{subseq}, \Rfunction{subseq<-}\par
\Rfunction{extractAt}, \Rfunction{replaceAt} &
    Extract/replace arbitrary substrings from/in a string or set of strings. \\
\hline
\Rfunction{replaceLetterAt} & Replace the letters specified by a set of
    positions by new letters. \\
\hline
\Rfunction{padAndClip}, \Rfunction{stackStrings} & Pad and clip strings. \\
\hline
\Rfunction{strsplit}, \Rfunction{unstrsplit} & \Rfunction{strsplit} splits the
    sequences in a set of sequences according to a pattern.
    \Rfunction{unstrsplit} is the reverse operation i.e. a fast implementation
    of \code{sapply(x, paste0, collapse=sep)} for collapsing the list elements
    of a \Rclass{DNAStringSetList} or \Rclass{AAStringSetList} object. \\
\hline
\end{tabular}
\end{center}
\caption{\bf Sequence transformation and editing.}
\label{table:Sequence_editing}
\end{table}

%-----------------------------------------------------------------------------

\begin{table}[ht]
\begin{center}
\begin{tabular}{p{2.5in}|p{4in}}
{\bf Function} & {\bf Description} \\
\hline
\Rfunction{matchPattern}\par
\Rfunction{countPattern} &
    Find/count all the occurrences of a given pattern (typically short)
    in a reference sequence (typically long).
    Support mismatches and indels. \\
\hline
\Rfunction{vmatchPattern}\par
\Rfunction{vcountPattern} &
    Find/count all the occurrences of a given pattern (typically short)
    in a set of reference sequences.
    Support mismatches and indels. \\
\hline
\Rfunction{matchPDict}\par
\Rfunction{countPDict}\par
\Rfunction{whichPDict} &
    Find/count all the occurrences of a set of patterns in a reference
    sequence. (\Rfunction{whichPDict} only identifies which patterns in
    the set have at least one match.)
    Support a small number of mismatches. \\
\hline
\Rfunction{vmatchPDict}\par
\Rfunction{vcountPDict}\par
\Rfunction{vwhichPDict} &
    [Note: \Rfunction{vmatchPDict} not implemented yet.]
    Find/count all the occurrences of a set of patterns in a set of
    reference sequences. (\Rfunction{whichPDict} only identifies for each
    reference sequence which patterns in the set have at least one match.)
    Support a small number of mismatches. \\
\hline
\Rfunction{pairwiseAlignment} &
    Solve (Needleman-Wunsch) global alignment, (Smith-Waterman) local
    alignment, and (ends-free) overlap alignment problems. \\
\hline
\Rfunction{matchPWM}\par
\Rfunction{countPWM} &
    Find/count all the occurrences of a Position Weight Matrix in a reference
    sequence. \\
\hline
\Rfunction{trimLRPatterns} &
    Trim left and/or right flanking patterns from sequences. \\
\hline
\Rfunction{matchLRPatterns} &
    Find all paired matches in a reference sequence i.e. matches specified by
    a left and a right pattern, and a maximum distance between them. \\
\hline
\Rfunction{matchProbePair} &
    Find all the amplicons that match a pair of probes in a reference
    sequence. \\
\hline
\Rfunction{findPalindromes} &
    Find palindromic regions in a sequence. \\
\hline
\end{tabular}
\end{center}
\caption{\bf String matching / alignments.}
\label{table:String_matching_alignments}
\end{table}

%-----------------------------------------------------------------------------

\begin{table}[ht]
\begin{center}
\begin{tabular}{p{2.5in}|p{4in}}
{\bf Function} & {\bf Description} \\
\hline
\Rfunction{readBStringSet}\par
\Rfunction{readDNAStringSet}\par
\Rfunction{readRNAStringSet}\par
\Rfunction{readAAStringSet} &
    Read ordinary/DNA/RNA/Amino Acid sequences from files (FASTA or FASTQ
    format). \\
\hline
\Rfunction{writeXStringSet} &
    Write sequences to a file (FASTA or FASTQ format). \\
\hline
\Rfunction{writePairwiseAlignments} &
    Write pairwise alignments (as produced by \Rfunction{pairwiseAlignment})
    to a file (``pair'' format). \\
\hline
\Rfunction{readDNAMultipleAlignment}\par
\Rfunction{readRNAMultipleAlignment}\par
\Rfunction{readAAMultipleAlignment}&
    Read multiple alignments from a file (FASTA, ``stockholm'',
    or ``clustal'' format). \\
\hline
\Rfunction{write.phylip} &
    Write multiple alignments to a file (Phylip format). \\
\hline
\end{tabular}
\end{center}
\caption{\bf I/O functions.}
\label{table:I_O_functions}
\end{table}


%-----------------------------------------------------------------------------

\begin{table}[ht]
\begin{center}
\begin{tabular}{p{2.5in}|p{4in}}
{\bf Function} & {\bf Description} \\
\hline
\Rfunction{stringDist} &
    Computes the matrix of Levenshtein edit distances, or Hamming distances,
    or pairwise alignment scores, for a set of strings. \\
\hline
\end{tabular}
\end{center}
\caption{\bf Miscellaneous.}
\label{table:Miscellaneous}
\end{table}


\end{document}