%%% ==================================================================== %%% @LaTeX-doc-source-file{ %%% filename = "inicap.dtx", %%% version = "1.04", %%% date = "2002/02/08", %%% time = "12:52:30 EST", %%% author = "Michael J Downes", %%% address = "American Mathematical Society, %%% Publications Technical Group, %%% PO Box 6248, %%% Providence, RI 02940, %%% USA", %%% email = "tech-support@ams.org", %%% URL = "http://www.ams.org/", %%% keywords = "capitalization, initial caps", %%% abstract = "LaTeX package for applying `initial caps' %%% capitalization to English text.", %%% checksum = "61425 464 1936 15968", %%% docstring = "The checksum field, produced by Robert Solovay's %%% checksum utility, gives CRC-16 checksum, lines, %%% words, and characters.", %%% } %%% ==================================================================== % \iffalse %<*driver> \NeedsTeXFormat{LaTeX2e} \documentclass{amsdtx} \providecommand{\mdash}{\textemdash} \providecommand{\qq}[1]{\textquotedblleft#1\textquotedblright} \begin{document} \title{The \pkg{inicap} package} \author{Michael~J. Downes\\American Mathematical Society} \date{Version \fileversion, \filedate} \hDocInput{inicap.dtx} \end{document} % % \fi % % \maketitle % \section{Introduction} % % The \pkg{inicap} package provides a function \cs{inicap} for % converting English titles from normal upper/lower case to % \qq{initial caps} form. Most languages other than English have % nothing like this, thank heavens. So we worry only about English, % and use this restriction to make some simplifying assumptions, such % as \qq{If a word doesn't begin with an explicit lowercase letter, % it doesn't need any change in its capitalization}. (Think about it % \ldots) % % Here is a synopsis of the rules given in the \emph{Chicago Manual % of Style} [13th ed., 1982, Univ.\ Chicago Press] for % capitalizing titles of written works in English: % \begin{quote} % Capitalize each word, including pronouns and subordinate % conjunctions, except for articles, coordinate conjunctions, and % prepositions, or the word \emph{to} in infinitives. Always % capitalize the first and last word of the title and the first and % last word of any subtitles that it may contain. Don't capitalize % the second or later word in a hyphenated compound unless it is a % noun or proper adjective, or it has equal force with the first % word. % \end{quote} % % The \cn{inicap} command does not presently do any special handling % of hyphenated compounds: they get all their words capitalized\mdash % i.e., we assume for simplicity that a word following a hyphen % \emph{is} a noun, or a proper adjective, or has equal force with % the first word; in practice this seems to be true nearly all the % time anyway. % % Usage of the \cn{inicap} command is: %\begin{verbatim} % \inicap{some text} %\end{verbatim} % This initial-capitalizes the words in the given text according to % the rules given by the Chicago Manual of Style (more or less). Math % formulas are skipped. Roughly speaking, a word to be capitalized % begins with a letter and ends with some kind of space, or a perhaps % a hyphen. In order to actually implement this notion, I formalized % it as follows: % % A \emph{word} is defined as a sequence of non-space, % non-punctuation tokens. If the sequence consists only of letters, % and if the first letter is lowercase, then the word is a candidate % for capitalization. In this case the first letter will always be % capitalized unless the entire word matches an element of the % \qq{pac} list (prepositions, articles, and conjunctions). % If you come across some unusual preposition that is getting % capitalized, it probably means it is not included in the default % value of the pac list. Never fear! You can add more words to it % with the \cn{pacwords} command: %\begin{verbatim} % \pacwords{anent,forby,re} %\end{verbatim} % % The following tokens that may begin a math formula cause % \cs{inicap} to scan to the matching end token and pass on unchanged % everything in between. %\begin{verbatim} % $ \( \begin %\end{verbatim} % % In addition to normal characters of category 10 or 12, the % following control sequences are also classified as space or % punctuation: % \begin{center} % \verb'\slash' \verb'\\' \verb'~' \verb*'\ ' (control-space) % \end{center} % \emph{All other} control sequences\mdash e.g., \verb'\"'\mdash % become part of the current word, and prevent the current word from % being capitalized. Since our scope is specifically restricted to % English text, this effect is normally harmless in practice (any % word containing an accent command is probably a proper noun anyway, % hence already capitalized). % % \StopEventually{} % % \section{Implementation} % Standard declaration of package name and date. % \begin{macrocode} \NeedsTeXFormat{LaTeX2e} \ProvidesPackage{inicap}[2002/02/08 v1.04] % \end{macrocode} % % A couple of utility functions to start with. % \begin{macrocode} \let\@xp\expandafter \let\@nx\noexpand % \end{macrocode} % % When a word is being collected, it will not end up being % considered for capitalization unless it contains all letters and is % ended by a space-like token or a punctuation-like token. Once such % a word terminator has been detected, any following spacey-punctual % tokens are passed through until some other kind of token arrives. % The new token is either (a) a letter, in which we begin again to % collect letters for possible capitalizing, or (b) a start-math % token, in which case we want to skip over the entire formula, % including any letters, spaces, and punctuation that it may contain, % or (c) none of the above, in which case we consider it to be the % start of a not-to-be-capitalized \qq{word} and skip onward to the % next word-terminating token. % % To simplify the token testing, control sequences that represent % spaces or punctuation are temporarily reduced to a minimum number % of different meanings. And \cn{-} is made to test as a letter so % that it does not interrupt scanning of the current word. % % \begin{macrocode} \def\inicap@setup{% \let~=\ \let\\=\ \let\slash=/% \let\linebreak=\ \let\-=X% } % \end{macrocode} % % \begin{macrocode} \newtoks\inicap@toks % \end{macrocode} % % \begin{macrocode} \newcommand{\inicap}[2][\the]{% \begingroup \inicap@setup \aftergroup\inicap@toks\aftergroup{% \inicap@a#2\@@end \endgroup }% #1\inicap@toks } % \end{macrocode} % % \begin{macrocode} %%\begingroup \def\c#1{\catcode`#1=\active} %%\def~{\let~\def \def\inicap@a} %%%\c a\c f %%%~{fnA} %%%~A{isnlcg %%\endgroup % \end{macrocode} % Action 1: futurelet n test % Action 2: afterassignment 1 let n = % Action 3 \#1: aftergroup \#1 futurelet n test % \begin{macrocode} \def\inicap@a{\let\fsa@t\ic@one \futurelet\@let@token\fsa@t} % FSA lookahead \def\fsa@l{\futurelet\@let@token\fsa@t} % FSA bypass a token \def\fsa@b{\afterassignment\fsa@l \let\@let@token= } % FSA copy a token (not space, bgroup, egroup) % Unroll \fsa@l here for a small speed gain. \def\fsa@c#1{\aftergroup#1\futurelet\@let@token\fsa@t} % FSA copy a space token \def\fsa@s{% \lowercase{\aftergroup} \afterassignment\fsa@l \let\@let@token= } % FSA next action \let\fsa@n\@empty % FSA test \let\fsa@t\@empty % FSA exit \let\fsa@x\@gobble % Look for a leading letter at start of first word. \def\ic@one{% \ifcat Q\@nx\@let@token \let\fsa@t\ic@two \let\fsa@n\ic@capit \else \ic@onea \fi \fsa@n } \def\ic@capit#1{\uppercase{\aftergroup#1}\futurelet\@let@token\fsa@t} \def\ic@onea{% \ifx\@sptoken\@let@token \let\fsa@n\fsa@b \else \let\fsa@t\ic@five \let\fsa@n\fsa@c \ifx $\@let@token \let\endmath $% \else\ifx\(\@let@token \let\endmath\) \else\ifx\begin\@let@token \let\endmath\end \else \ic@oneb \fi\fi\fi\fi } \def\ic@oneb{% \let\fsa@t\ic@two \ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b \else \let\fsa@n\fsa@c \fi\fi } % Copy the remainder of the current word. \def\ic@two{% \ifcat Q\@nx\@let@token \let\fsa@n\fsa@c \else \let\fsa@t\ic@three \ifx\@sptoken\@let@token \let\fsa@n\fsa@s \else\ifcat -\@nx\@let@token \let\fsa@n\fsa@c \else \ic@twoa \fi\fi\fi \fsa@n } \def\ic@twoa{% \ifx\@@end\@let@token \let\fsa@n\fsa@x \else \let\fsa@t\ic@two \ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b \else \ic@checkmath \fi\fi\fi } % Look for start of another word. \def\ic@three{% \ifcat Q\@nx\@let@token \let\fsa@n\ic@check@letter \else \ic@threea \fi \fsa@n } \def\ic@check@letter#1{% \lowercase{\if#1}#1% % lowercase letter: prepare to check word \begingroup \aftergroup\ic@check@word \aftergroup#1% \let\fsa@t\ic@four \else % cap letter: copy remainder of this word unchanged \aftergroup#1\let\fsa@t\ic@two \fi \futurelet\@let@token\fsa@t } \def\ic@threea{% \ifx\@sptoken\@let@token \let\fsa@n\fsa@s \else\ifx\ \@let@token \let\fsa@n\fsa@c \else\ifcat-\@nx\@let@token \let\fsa@n\fsa@c \else\ic@threeb\fi\fi\fi } \def\ic@threeb{% \let\fsa@t\ic@two \ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b \else\ifx\@@end\@let@token \let\fsa@n\fsa@x \else \ic@checkmath \fi\fi\fi } \def\ic@checkmath{% \let\fsa@n\fsa@c \let\fsa@t\ic@five \ifx $\@let@token \let\endmath $% \else\ifx\(\@let@token \let\endmath\) \else\ifx\begin\@let@token \let\endmath\end \else \let\fsa@t\ic@two \fi\fi\fi } \def\ic@four{% \ifcat Q\@nx\@let@token \let\fsa@n\fsa@c \else\ifx\@sptoken\@let@token \endgroup\@empty\uppercase \let\fsa@t\ic@three \let\fsa@n\fsa@s \else \ic@foura \fi\fi \fsa@n } \def\ic@foura{% \ifx\ \@let@token \endgroup\@empty\uppercase \let\fsa@t\ic@three \let\fsa@n\fsa@c \else\ifcat -\@nx\@let@token \endgroup\@empty\uppercase \let\fsa@t\ic@three \let\fsa@n\fsa@c \else\ifx\@@end\@let@token \endgroup\@empty\ic@cap@last@word \let\fsa@n\fsa@x \else \endgroup\@empty\aftergroup \let\fsa@t\ic@two \ic@twoa \fi\fi\fi } \def\ic@five{% \ifx\endmath\@let@token \let\fsa@t\ic@two \let\fsa@n\fsa@c \else\ifx\@sptoken\@let@token \let\fsa@n\fsa@s \else\ifx{\@let@token \aftergroup{\let\fsa@n\fsa@b \else\ifx}\@let@token \aftergroup}\let\fsa@n\fsa@b \else\ifx\@@end\@let@token \let\fsa@n\ic@badmath \else \let\fsa@n\fsa@c \fi\fi\fi\fi\fi \fsa@n } \def\ic@badmath#1{% \PackageError{inicap}{Math formula not closed properly}\@ehc \endgroup } % \end{macrocode} % % There are four types of title fragments that need to be handled. % \begin{enumerate} % \item Normal words, all letters % \item Interword spaces (including not only \verb*' ' but also % \verb'~', \verb'-', and \verb*'\ ') % \item Math formulas starting with \verb'$' \verb'\(' or % \verb'\begin{math}' % \item Abnormal `words' beginning with left brace or \cn{AA} or % \cn{emph} or \cn{texttt} % \end{enumerate} % Consider a title such as %\begin{verbatim} % \title{A duality theory for $C^*$-algebras} %\end{verbatim} % The fragment types that are processed are 1 2 1 2 1 2 1 2 3 2 1. % % Some problem cases: %\begin{verbatim} % \title{The multidimensional $p$-adic Green function} % \title{A recurrence/transience result for circle packings} % \title{The ``low $M^*$-estimate" for covering numbers} % \title{$Qqpi$ groups and quasi-equivalence} %\end{verbatim} % The suffix \qq{adic} should not be capitalized. % % recurrence/transience $\to$ Recurrence/Transience % % Leading dbl quotes should not keep `low' from being capitalized. % % Capitalize quasi-equivalence as Quasi-Equivalence. % % \section{The state machine} % % We start by looking for the beginning of the first word. % % \begin{enumerate} % \item[1] (start) look for first word % \begin{enumerate} % \item[1a] (space) discard [1] % \item[1b] (letter) cap it [2] % \item[1c] (start-math) copy it [5] % \item[1c] (other) copy it [2] % \end{enumerate} % \item[2] look for more letters in current word; braces or any control % sequence that is not explicitly identified with class space or % punct will be treated as continuing the current word. % \begin{enumerate} % \item[2a] (letter) copy it [2] % \item[2b] (space) copy it [3] % \item[2c] (punct) copy it [3] % \item[2d] (end-all) quit [6] % \item[2e] (other) copy it [2] % \end{enumerate} % \item[3] look for another word % \begin{enumerate} % \item[3a] (space) copy it [3] % \item[3b] (punct) copy it [3] % \item[3c] (letter) hold to check word [4] % \item[3d] (start-math) copy it; set end-math token [5] % \item[3e] (other) copy it [2] % \end{enumerate} % \item[4] gather letters for checking a word % \begin{enumerate} % \item[4a] (letter) append to current word [4] % \item[4b] (space) check word [3] % \item[4c] (punct) check word [3] % \item[4d] (end-all) cap that word [6] % \item[4e] (other) release partial word [2] % \end{enumerate} % \item[5] look for end-math % \begin{enumerate} % \item[5a] (end-math) copy it [3] % \item[5b] (other) copy it [5] % \end{enumerate} % \end{enumerate} % % Define a command \cs{pacwords} for specifying additional % preposition\slash article\slash conjunction words that ought never % be capitalized. Multiple words should be separated by commas. % \begin{macrocode} \newcommand{\pacwords}[1]{\xdef\pac@list{\pac@list,#1,}} % \end{macrocode} % % \begin{macrocode} \def\pac@list{% ,about,above,across,after,against,along,among,around,at,% before,behind,below,beneath,beside,between,beyond,by,% despite,down,during,except,for,from,in,inside,into,like,near,% nor,of,off,on,onto,or,out,outside,over,past,since,so,through,% throughout,till,to,toward,under,underneath,until,up,upon,with,% within,without,% a,an,the,% and,but,yet,% % \end{macrocode} % Here are some particles used in names. % \begin{macrocode} da,de,di,du,del,den,der,i,l,la,le,van,von,y,% % \end{macrocode} % The \qq{s} is for possessives, the \qq{th}, \qq{st}, \qq{nd}, % \qq{rd} for numbers, \qq{d}, \qq{t}, \qq{ll}, \qq{re} for % contractions. % \begin{macrocode} d,ll,nd,rd,re,s,st,t,th,% } % \end{macrocode} % % \begin{macrocode} \def\ic@check@word#1#2\@empty#3{% \ifx\uppercase#3% \def\@tempd##1,#1#2,##2##3\@nil{% \if .##2\uppercase{\aftergroup#1}\else\aftergroup#1\fi }% \@xp\@tempd\pac@list #1#2,.\@nil \else #3#1% \fi \copy@letter#2\@@end } \def\ic@cap@last@word#1{\uppercase{\aftergroup#1}} % \end{macrocode} % % \begin{macrocode} \def\copy@letter#1{\ifx\@@end#1\else\aftergroup#1\@xp\copy@letter\fi} % \end{macrocode} % % To do: % \begin{itemize} % % \item Define a command \cn{wordseparator} for specifying % additional commands like \cn{slash} that should be treated as % ending the current word. % % \item Define a command \cn{capwords} for removing words from the % pacword list. % % \item Document how to mark a word to be capitalized or not if the % automatic decision is wrong. For a capped word, adding \cs{relax} % at the beginning will work, but what to recommend? Perhaps % \verb'\@'. For a noncapped word, \cs{cappable}?? \cs{titlecap}. % % \item What about \cn{linebreak}? % \end{itemize} % % The usual \cs{endinput} to ensure that random garbage at the end of % the file doesn't get copied by \fn{docstrip}. % \begin{macrocode} \endinput % % \end{macrocode} % % \CheckSum{532} % \Finale % \endinput