%%% ====================================================================
%%% @LaTeX-doc-source-file{
%%%   filename  = "inicap.dtx",
%%%   version   = "1.04",
%%%   date      = "2002/02/08",
%%%   time      = "12:52:30 EST",
%%%   author    = "Michael J Downes",
%%%   address   = "American Mathematical Society,
%%%                Publications Technical Group,
%%%                PO Box 6248,
%%%                Providence, RI 02940,
%%%                USA",
%%%   email     = "tech-support@ams.org",
%%%   URL       = "http://www.ams.org/",
%%%   keywords  = "capitalization, initial caps",
%%%   abstract  = "LaTeX package for applying `initial caps'
%%%                capitalization to English text.",
%%%   checksum  = "61425 464 1936 15968",
%%%   docstring = "The checksum field, produced by Robert Solovay's
%%%                checksum utility, gives CRC-16 checksum, lines,
%%%                words, and characters.",
%%%  }
%%% ====================================================================
% \iffalse
%<*driver>
\NeedsTeXFormat{LaTeX2e}
\documentclass{amsdtx}
\providecommand{\mdash}{\textemdash}
\providecommand{\qq}[1]{\textquotedblleft#1\textquotedblright}
\begin{document}
\title{The \pkg{inicap} package}
\author{Michael~J. Downes\\American Mathematical Society}
\date{Version \fileversion, \filedate}
\hDocInput{inicap.dtx}
\end{document}
%</driver>
% \fi
%
% \maketitle
% \section{Introduction}
%
%    The \pkg{inicap} package provides a function \cs{inicap} for
%    converting English titles from normal upper/lower case to
%    \qq{initial caps} form. Most languages other than English have
%    nothing like this, thank heavens. So we worry only about English,
%    and use this restriction to make some simplifying assumptions, such
%    as \qq{If a word doesn't begin with an explicit lowercase letter,
%    it doesn't need any change in its capitalization}. (Think about it
%    \ldots)
%
%    Here is a synopsis of the rules given in the \emph{Chicago Manual
%    of Style} [13th ed., 1982, Univ.\ Chicago Press] for
%    capitalizing titles of written works in English:
% \begin{quote}
%    Capitalize each word, including pronouns and subordinate
%    conjunctions, except for articles, coordinate conjunctions, and
%    prepositions, or the word \emph{to} in infinitives. Always
%    capitalize the first and last word of the title and the first and
%    last word of any subtitles that it may contain. Don't capitalize
%    the second or later word in a hyphenated compound unless it is a
%    noun or proper adjective, or it has equal force with the first
%    word.
% \end{quote}
%
%    The \cn{inicap} command does not presently do any special handling
%    of hyphenated compounds: they get all their words capitalized\mdash
%    i.e., we assume for simplicity that a word following a hyphen
%    \emph{is} a noun, or a proper adjective, or has equal force with
%    the first word; in practice this seems to be true nearly all the
%    time anyway.
%
%    Usage of the \cn{inicap} command is:
%\begin{verbatim}
% \inicap{some text}
%\end{verbatim}
%    This initial-capitalizes the words in the given text according to
%    the rules given by the Chicago Manual of Style (more or less). Math
%    formulas are skipped. Roughly speaking, a word to be capitalized
%    begins with a letter and ends with some kind of space, or a perhaps
%    a hyphen. In order to actually implement this notion, I formalized
%    it as follows:
%
%    A \emph{word} is defined as a sequence of non-space,
%    non-punctuation tokens. If the sequence consists only of letters,
%    and if the first letter is lowercase, then the word is a candidate
%    for capitalization. In this case the first letter will always be
%    capitalized unless the entire word matches an element of the
%    \qq{pac} list (prepositions, articles, and conjunctions).
%    If you come across some unusual preposition that is getting
%    capitalized, it probably means it is not included in the default
%    value of the pac list. Never fear! You can add more words to it
%    with the \cn{pacwords} command:
%\begin{verbatim}
% \pacwords{anent,forby,re}
%\end{verbatim}
%
%    The following tokens that may begin a math formula cause
%    \cs{inicap} to scan to the matching end token and pass on unchanged
%    everything in between.
%\begin{verbatim}
% $ \( \begin
%\end{verbatim}
%
%    In addition to normal characters of category 10 or 12, the
%    following control sequences are also classified as space or
%    punctuation:
% \begin{center}
%  \verb'\slash' \verb'\\' \verb'~' \verb*'\ ' (control-space)
% \end{center}
%    \emph{All other} control sequences\mdash e.g., \verb'\"'\mdash
%    become part of the current word, and prevent the current word from
%    being capitalized. Since our scope is specifically restricted to
%    English text, this effect is normally harmless in practice (any
%    word containing an accent command is probably a proper noun anyway,
%    hence already capitalized).
%
% \StopEventually{}
%
% \section{Implementation}
%    Standard declaration of package name and date.
%    \begin{macrocode}
\NeedsTeXFormat{LaTeX2e}
\ProvidesPackage{inicap}[2002/02/08 v1.04]
%    \end{macrocode}
%
%    A couple of utility functions to start with.
%    \begin{macrocode}
\let\@xp\expandafter \let\@nx\noexpand
%    \end{macrocode}
%
%    When a word is being collected, it will not end up being
%    considered for capitalization unless it contains all letters and is
%    ended by a space-like token or a punctuation-like token. Once such
%    a word terminator has been detected, any following spacey-punctual
%    tokens are passed through until some other kind of token arrives.
%    The new token is either (a) a letter, in which we begin again to
%    collect letters for possible capitalizing, or (b) a start-math
%    token, in which case we want to skip over the entire formula,
%    including any letters, spaces, and punctuation that it may contain,
%    or (c) none of the above, in which case we consider it to be the
%    start of a not-to-be-capitalized \qq{word} and skip onward to the
%    next word-terminating token.
%
%    To simplify the token testing, control sequences that represent
%    spaces or punctuation are temporarily reduced to a minimum number
%    of different meanings. And \cn{-} is made to test as a letter so
%    that it does not interrupt scanning of the current word.
%
%    \begin{macrocode}
\def\inicap@setup{%
  \let~=\ \let\\=\ \let\slash=/%
  \let\linebreak=\ \let\-=X%
}
%    \end{macrocode}
%
%    \begin{macrocode}
\newtoks\inicap@toks
%    \end{macrocode}
%
%    \begin{macrocode}
\newcommand{\inicap}[2][\the]{%
  \begingroup \inicap@setup
  \aftergroup\inicap@toks\aftergroup{%
    \inicap@a#2\@@end
    \endgroup
  }%
  #1\inicap@toks
}
%    \end{macrocode}
%
%    \begin{macrocode}
%%\begingroup \def\c#1{\catcode`#1=\active}
%%\def~{\let~\def \def\inicap@a}
%%%\c a\c f
%%%~{fnA}
%%%~A{isnlcg
%%\endgroup
%    \end{macrocode}
%    Action 1: futurelet n test
%    Action 2: afterassignment 1 let n =
%    Action 3 \#1: aftergroup \#1 futurelet n test
%    \begin{macrocode}
\def\inicap@a{\let\fsa@t\ic@one \futurelet\@let@token\fsa@t}

% FSA lookahead
\def\fsa@l{\futurelet\@let@token\fsa@t}
% FSA bypass a token
\def\fsa@b{\afterassignment\fsa@l \let\@let@token= }
% FSA copy a token (not space, bgroup, egroup)
%    Unroll \fsa@l here for a small speed gain.
\def\fsa@c#1{\aftergroup#1\futurelet\@let@token\fsa@t}
% FSA copy a space token
\def\fsa@s{%
  \lowercase{\aftergroup} \afterassignment\fsa@l \let\@let@token= }
% FSA next action
\let\fsa@n\@empty
% FSA test
\let\fsa@t\@empty
% FSA exit
\let\fsa@x\@gobble

%    Look for a leading letter at start of first word.
\def\ic@one{%
  \ifcat Q\@nx\@let@token \let\fsa@t\ic@two \let\fsa@n\ic@capit
  \else \ic@onea \fi
  \fsa@n
}
\def\ic@capit#1{\uppercase{\aftergroup#1}\futurelet\@let@token\fsa@t}
\def\ic@onea{%
  \ifx\@sptoken\@let@token \let\fsa@n\fsa@b
  \else \let\fsa@t\ic@five \let\fsa@n\fsa@c
    \ifx $\@let@token \let\endmath $%
    \else\ifx\(\@let@token \let\endmath\)
    \else\ifx\begin\@let@token \let\endmath\end
    \else \ic@oneb \fi\fi\fi\fi
}
\def\ic@oneb{%
  \let\fsa@t\ic@two
  \ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b
  \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b
  \else \let\fsa@n\fsa@c \fi\fi
}

% Copy the remainder of the current word.
\def\ic@two{%
  \ifcat Q\@nx\@let@token \let\fsa@n\fsa@c
  \else \let\fsa@t\ic@three
    \ifx\@sptoken\@let@token \let\fsa@n\fsa@s
    \else\ifcat -\@nx\@let@token \let\fsa@n\fsa@c
    \else \ic@twoa \fi\fi\fi
  \fsa@n
}
\def\ic@twoa{%
  \ifx\@@end\@let@token \let\fsa@n\fsa@x
  \else \let\fsa@t\ic@two
    \ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b
    \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b
    \else \ic@checkmath \fi\fi\fi
}

% Look for start of another word.
\def\ic@three{%
  \ifcat Q\@nx\@let@token \let\fsa@n\ic@check@letter
  \else \ic@threea \fi
  \fsa@n
}
\def\ic@check@letter#1{%
  \lowercase{\if#1}#1%
%    lowercase letter: prepare to check word
    \begingroup
    \aftergroup\ic@check@word \aftergroup#1%
    \let\fsa@t\ic@four
  \else
%    cap letter: copy remainder of this word unchanged
    \aftergroup#1\let\fsa@t\ic@two
  \fi
  \futurelet\@let@token\fsa@t
}
\def\ic@threea{%
  \ifx\@sptoken\@let@token \let\fsa@n\fsa@s
  \else\ifx\ \@let@token \let\fsa@n\fsa@c
  \else\ifcat-\@nx\@let@token \let\fsa@n\fsa@c
  \else\ic@threeb\fi\fi\fi
}
\def\ic@threeb{%
  \let\fsa@t\ic@two
  \ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b
  \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b
  \else\ifx\@@end\@let@token \let\fsa@n\fsa@x
  \else \ic@checkmath \fi\fi\fi
}
\def\ic@checkmath{%
  \let\fsa@n\fsa@c \let\fsa@t\ic@five
  \ifx $\@let@token \let\endmath $%
  \else\ifx\(\@let@token \let\endmath\)
  \else\ifx\begin\@let@token \let\endmath\end
  \else \let\fsa@t\ic@two \fi\fi\fi
}
\def\ic@four{%
  \ifcat Q\@nx\@let@token \let\fsa@n\fsa@c
  \else\ifx\@sptoken\@let@token \endgroup\@empty\uppercase
      \let\fsa@t\ic@three \let\fsa@n\fsa@s
  \else \ic@foura \fi\fi
  \fsa@n
}
\def\ic@foura{%
  \ifx\ \@let@token \endgroup\@empty\uppercase
    \let\fsa@t\ic@three \let\fsa@n\fsa@c
  \else\ifcat -\@nx\@let@token \endgroup\@empty\uppercase
    \let\fsa@t\ic@three \let\fsa@n\fsa@c
  \else\ifx\@@end\@let@token \endgroup\@empty\ic@cap@last@word
    \let\fsa@n\fsa@x
  \else \endgroup\@empty\aftergroup
    \let\fsa@t\ic@two \ic@twoa
  \fi\fi\fi
}
\def\ic@five{%
  \ifx\endmath\@let@token \let\fsa@t\ic@two \let\fsa@n\fsa@c
  \else\ifx\@sptoken\@let@token \let\fsa@n\fsa@s
  \else\ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b
  \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b
  \else\ifx\@@end\@let@token \let\fsa@n\ic@badmath
  \else \let\fsa@n\fsa@c \fi\fi\fi\fi\fi
  \fsa@n
}
\def\ic@badmath#1{%
  \PackageError{inicap}{Math formula not closed properly}\@ehc
  \endgroup
}
%    \end{macrocode}
%
%    There are four types of title fragments that need to be handled.
% \begin{enumerate}
%    \item Normal words, all letters
%    \item Interword spaces (including not only \verb*' ' but also
%    \verb'~', \verb'-', and \verb*'\ ')
%    \item Math formulas starting with \verb'$' \verb'\(' or
%    \verb'\begin{math}'
%    \item Abnormal `words' beginning with left brace or \cn{AA} or
%    \cn{emph} or \cn{texttt}
% \end{enumerate}
%    Consider a title such as
%\begin{verbatim}
%    \title{A duality theory for $C^*$-algebras}
%\end{verbatim}
%    The fragment types that are processed are 1 2 1 2 1 2 1 2 3 2 1.
%
%    Some problem cases:
%\begin{verbatim}
% \title{The multidimensional $p$-adic Green function}
% \title{A recurrence/transience result for circle packings}
% \title{The ``low $M^*$-estimate" for covering numbers}
% \title{$Qqpi$ groups and quasi-equivalence}
%\end{verbatim}
%    The suffix \qq{adic} should not be capitalized.
%
%    recurrence/transience $\to$ Recurrence/Transience
%
%    Leading dbl quotes should not keep `low' from being capitalized.
%
%    Capitalize quasi-equivalence as Quasi-Equivalence.
%
% \section{The state machine}
%
%    We start by looking for the beginning of the first word.
%
% \begin{enumerate}
% \item[1] (start) look for first word
%   \begin{enumerate}
%   \item[1a] (space) discard [1]
%   \item[1b] (letter) cap it [2]
%   \item[1c] (start-math) copy it [5]
%   \item[1c] (other) copy it [2]
%   \end{enumerate}
% \item[2] look for more letters in current word; braces or any control
%    sequence that is not explicitly identified with class space or
%    punct will be treated as continuing the current word.
%   \begin{enumerate}
%   \item[2a] (letter) copy it [2]
%   \item[2b] (space) copy it [3]
%   \item[2c] (punct) copy it [3]
%   \item[2d] (end-all) quit [6]
%   \item[2e] (other) copy it [2]
%   \end{enumerate}
% \item[3] look for another word
%   \begin{enumerate}
%   \item[3a] (space) copy it [3]
%   \item[3b] (punct) copy it [3]
%   \item[3c] (letter) hold to check word [4]
%   \item[3d] (start-math) copy it; set end-math token [5]
%   \item[3e] (other) copy it [2]
%   \end{enumerate}
% \item[4] gather letters for checking a word
%   \begin{enumerate}
%   \item[4a] (letter) append to current word [4]
%   \item[4b] (space) check word [3]
%   \item[4c] (punct) check word [3]
%   \item[4d] (end-all) cap that word [6]
%   \item[4e] (other) release partial word [2]
%   \end{enumerate}
% \item[5] look for end-math
%   \begin{enumerate}
%   \item[5a] (end-math) copy it [3]
%   \item[5b] (other) copy it [5]
%   \end{enumerate}
% \end{enumerate}
%
%    Define a command \cs{pacwords} for specifying additional
%    preposition\slash article\slash conjunction words that ought never
%    be capitalized. Multiple words should be separated by commas.
%    \begin{macrocode}
\newcommand{\pacwords}[1]{\xdef\pac@list{\pac@list,#1,}}
%    \end{macrocode}
%
%    \begin{macrocode}
\def\pac@list{%
  ,about,above,across,after,against,along,among,around,at,%
  before,behind,below,beneath,beside,between,beyond,by,%
  despite,down,during,except,for,from,in,inside,into,like,near,%
  nor,of,off,on,onto,or,out,outside,over,past,since,so,through,%
  throughout,till,to,toward,under,underneath,until,up,upon,with,%
  within,without,%
  a,an,the,%
  and,but,yet,%
%    \end{macrocode}
%    Here are some particles used in names.
%    \begin{macrocode}
  da,de,di,du,del,den,der,i,l,la,le,van,von,y,%
%    \end{macrocode}
%    The \qq{s} is for possessives, the \qq{th}, \qq{st}, \qq{nd},
%    \qq{rd} for numbers, \qq{d}, \qq{t}, \qq{ll}, \qq{re} for
%    contractions.
%    \begin{macrocode}
  d,ll,nd,rd,re,s,st,t,th,%
}
%    \end{macrocode}
%
%    \begin{macrocode}
\def\ic@check@word#1#2\@empty#3{%
  \ifx\uppercase#3%
    \def\@tempd##1,#1#2,##2##3\@nil{%
      \if .##2\uppercase{\aftergroup#1}\else\aftergroup#1\fi
    }%
    \@xp\@tempd\pac@list #1#2,.\@nil
  \else
    #3#1%
  \fi
  \copy@letter#2\@@end
}
\def\ic@cap@last@word#1{\uppercase{\aftergroup#1}}
%    \end{macrocode}
%
%    \begin{macrocode}
\def\copy@letter#1{\ifx\@@end#1\else\aftergroup#1\@xp\copy@letter\fi}
%    \end{macrocode}
%
%    To do:
% \begin{itemize}
%
%    \item Define a command \cn{wordseparator} for specifying
%    additional commands like \cn{slash} that should be treated as
%    ending the current word.
%
%    \item Define a command \cn{capwords} for removing words from the
%    pacword list.
%
%    \item Document how to mark a word to be capitalized or not if the
%    automatic decision is wrong. For a capped word, adding \cs{relax}
%    at the beginning will work, but what to recommend? Perhaps
%    \verb'\@'. For a noncapped word, \cs{cappable}?? \cs{titlecap}.
%
%    \item What about \cn{linebreak}?
% \end{itemize}
%
%    The usual \cs{endinput} to ensure that random garbage at the end of
%    the file doesn't get copied by \fn{docstrip}.
%    \begin{macrocode}
\endinput
%
%    \end{macrocode}
%
% \CheckSum{532}
% \Finale
% \endinput