% \iffalse % This document requires lualatex %% %% Copyright (C) 2012-2024 Javier Bezos and Johannes L. Braams. %% Copyright (C) 1989-2012 Johannes L. Braams and %% any individual authors listed elsewhere in this file. %% All rights reserved. %% %% %% This file is part of the Babel system. %% -------------------------------------- %% %% It may be distributed and/or modified under the %% conditions of the LaTeX Project Public License, either version 1.3 %% of this license or (at your option) any later version. %% The latest version of this license is in %% http://www.latex-project.org/lppl.txt %% and version 1.3 or later is part of all distributions of LaTeX %% version 2003/12/01 or later. %% %% This work has the LPPL maintenance status "maintained". %% %% The Current Maintainer of this work is Javier Bezos. %% %% The list of derived (unpacked) files belonging to the distribution %% and covered by LPPL is defined by the unpacking scripts (with %% extension |.ins|) which are part of the distribution. %% % \fi % % \CheckSum{5011} % % \iffalse %<*filedriver> \ProvidesFile{babel.dtx}[2024/08/29 v24.9 The Babel package] \documentclass{ltxdoc} \GetFileInfo{babel.dtx} \usepackage{fontspec} \setmainfont[ Scale=.88, BoldFont = NotoSerif-Bold.ttf, ItalicFont = NotoSerif-Italic.ttf, BoldItalicFont = NotoSerif-BoldItalic.ttf] {NotoSerif-Regular.ttf} \setsansfont[ Scale=.88, BoldFont = NotoSans-Bold.ttf, ItalicFont = NotoSans-Italic.ttf, BoldItalicFont = NotoSans-BoldItalic.ttf] {NotoSans-Regular.ttf} \setmonofont[Scale=.86, FakeStretch=.97]{DejaVu Sans Mono} \raggedright \frenchspacing \addtolength{\oddsidemargin}{1em} \addtolength{\textwidth}{25pt} \addtolength{\textheight}{3.5cm} \addtolength{\topmargin}{-2cm} \font\manual=logo10 % font used for the METAFONT logo, etc. \newcommand*\MF{{\manual META}\-{\manual FONT}} \newcommand*\babel{\textsf{babel}} \newcommand*\Babel{\textsf{Babel}} \newcommand*\xetex{\textsf{xetex}} \newcommand*\pdftex{\textsf{pdftex}} \newcommand*\luatex{\textsf{luatex}} \newcommand\largetex{T\kern -.1517em\lower .45ex\hbox {E}\kern -.09emX} \newcommand*\nb[1]{} \newcommand*\m[1]{\mbox{$\langle$\normalfont\itshape#1\/$\rangle$}} \newcommand*\langlist{% \meta{language}\texttt{,}\meta{language}\texttt{,}...} \newcommand*\langvar{\m{language}} \newcommand*\Lopt[1]{\textsf{#1}} \newcommand*\Lenv[1]{\texttt{#1}} \newcommand*\menv[1]{\char`\{#1\char`\}} \newcommand*\Eenv[1]{% \quad\ldots\quad \texttt{\color{thered}\string\end\menv{#1}}} \newcommand*\file[1]{\texttt{#1}} \newcommand*\cls[1]{\texttt{#1}} \newcommand*\pkg[1]{\texttt{#1}} \setlength{\leftmargini}{1.5em} \usepackage{framed} \usepackage{multicol} \usepackage{color,colortbl} \usepackage[linkcolor=blue,urlcolor=blue,colorlinks=true]{hyperref} \hypersetup{% pdfsubject={LaTeX Multilingual documents and localization for pdfTeX, LuaTeX and XeTeX.}, pdfauthor={Javier Bezos, Johannes Braams}, pdftitle={Babel}, pdfkeywords={TeX, LaTeX, pdfTeX, LuaTeX, XeTeX, internationalization, localization}} \ExplSyntaxOn \newcommand\New[1]{% \regex_extract_once:nnN{(\d\d?)\.(\d\d?)}{#1}\bbltempa \seq_pop_right:NN\bbltempa\bbltempb \seq_pop_right:NN\bbltempa\bbltempa \colorbox[rgb]{.92, .86, .73}% {New~#1% \quark_if_no_value:NTF\bbltempb{}% {~ \ifnum\bbltempa>2 \ifnum\bbltempb>34 \href{https://latex3.github.io/babel/news/% whats-new-in-babel-\bbltempa.\bbltempb.html}% {\raisebox{.15ex}{$\oplus$}}% \fi\fi \ifnum\bbltempa>23 \href{https://latex3.github.io/babel/news/% whats-new-in-babel-\bbltempa.\bbltempb.html}% {\raisebox{.15ex}{$\oplus$}}% \fi}}\enspace\ignorespaces} \ExplSyntaxOff \definecolor{thered}{rgb}{0.65,0.04,0.07} \definecolor{thegrey}{gray}{0.8} \definecolor{shadecolor}{rgb}{1,1,0.97} \definecolor{messages}{rgb}{.66,.13,.27} \makeatletter \def\@begintheorem#1#2{% \list{}{}% \global\advance\@listdepth\m@ne \item[{\sffamily\bfseries\color{messages}\hspace*{1.3em}% \MakeUppercase{#1}}]}% \makeatother \newtheorem{warning}{Warning} \newtheorem{note}{Note} \newtheorem{example}{Example} \newtheorem{troubleshooting}{Troubleshooting} \newtheorem{more}{More} \let\bblxv\verbatim \let\bblexv\endverbatim \newcommand\setengine{\def\engine} \let\engine\relax \begingroup \catcode`\<=13 \catcode`\>=13 \catcode`\|=13 \AtBeginDocument{% \gdef|{\verb|\def<##1>{$\langle${\rmfamily\itshape##1}$\rangle$}}} \endgroup \def\verbatim{% \begin{shaded*}% \ifx\engine\relax\else \vskip-1.08\baselineskip \leavevmode\llap{\fbox{\footnotesize\textsc{\engine}}\hskip2.8em}% \vskip-1.5\baselineskip \vskip0pt \global\let\engine\relax \fi \bblxv\vskip-\baselineskip\vskip2.5\parsep} \def\endverbatim{\bblexv\vskip-2\baselineskip\end{shaded*}} \catcode`\_=\active \def_{\bgroup\let_\egroup\leavevmode\color{thered}} \def\MacroFont{\fontencoding \encodingdefault \fontfamily\ttdefault \fontseries\mddefault \fontshape\updefault \small \catcode`\_=\active} \definecolor{shadecolor}{rgb}{0.96,0.96,0.93} \AtBeginDocument{% \def\PrintDescribeMacro#1{% \strut\MacroFont\color{thered}\normalsize\string#1}} \def\Describe#1{% \par\penalty-500\vskip3ex\noindent \DescribeMacro{#1}\args} \def\DescribeOther{\vskip-4ex\Describe} \makeatletter \def\trouble#1{\addcontentsline{tsh}{trouble}{#1}} \def\listoftroubles{\section*{Troubleshoooting}\@starttoc{tsh}} \let\l@trouble\l@figure \let\saved@check@percent\check@percent \let\check@percent\relax \def\args#1{% \def\bbl@tempa{#1}% \ifx\bbl@tempa\@empty\else#1\vskip1ex\fi\ignorespaces} \begingroup % Changes to ltxdoc (add <<...>> syntax) \catcode`\<\active \catcode`\>\active \gdef\check@plus@etc{% \let\bbl@next\pm@module \ifx*\next \let\bbl@next\star@module \else\ifx/\next \let\bbl@next\slash@module \else\ifx<\next \let\bbl@next\var@module \fi\fi\fi \bbl@next} \gdef\var@module#1#2#3>>{% $\langle$\pm@module#2#3>$\rangle$% \ifx*#2\ $\equiv$\fi} \endgroup \renewcommand*\l@section[2]{% \ifnum \c@tocdepth >\z@ \addpenalty\@secpenalty \addvspace{1.0em \@plus\p@}% \setlength\@tempdima{2em}% \begingroup \parindent \z@ \rightskip \@pnumwidth \parfillskip -\@pnumwidth \leavevmode \bfseries \advance\leftskip\@tempdima \hskip -\leftskip #1\nobreak\hfil \nobreak\hb@xt@\@pnumwidth{\hss #2}\par \endgroup \fi} \renewcommand*\l@subsection{\@dottedtocline{2}{2em}{3em}} \renewcommand*\l@subsubsection{\@dottedtocline{3}{5em}{4em}} \renewcommand*\l@paragraph{\@dottedtocline{4}{9em}{4.5em}} \renewcommand\partname{Part} \def\@pnumwidth{3em} \makeatother \begin{document} \title{Babel, a multilingual package for use with \LaTeX's standard document classes.} \author{Johannes Braams\\ Javier Bezos} \date{Typeset \today} \begin{titlepage} \begin{minipage}[t][0pt]{30cm} \vspace{-3cm}\hspace{-7cm} \sffamily \begin{tabular}{p{8cm}p{15cm}} \cellcolor[rgb]{.86,.73,.67} &\cellcolor[rgb]{.95,.95,.95} \vspace{3.6cm}% \color[rgb]{.55,.4,.35} \leftskip5mm \sffamily\fontsize{72}{72}\selectfont Babel \vspace{1.8cm} \\ \cellcolor[rgb]{.95,.95,.95} \vspace{2cm}\hspace{1.5cm} \begin{minipage}{5cm} \Huge \ifx\babelcode\undefined User guide \else Code \fi \Large \vspace{1.2cm} Version \csname @gobble\expandafter\endcsname\fileversion\newline \filedate \LARGE \vspace{1.2cm} Javier Bezos\\[-.5ex] {\large Current maintainer} \vspace{.3cm} Johannes L. Braams\\[-.5ex] {\large Original author} \end{minipage} &\cellcolor[rgb]{.92, .86, .73} \vspace{2cm} \leftskip5mm \begin{minipage}{10cm} \fontsize{35}{45}\selectfont \setlength\parskip{3mm}\raggedright Localization and internationalization\\[1cm] Unicode\\ \largetex\\ pdf\largetex\\ Lua\largetex\\ Xe\largetex \vspace{20cm} \end{minipage} \end{tabular} \end{minipage} \end{titlepage} \tableofcontents \ifx\babelcode\undefined \listoftroubles \fi \clearpage \ifx\babelcode\undefined \begin{description} \item[\sffamily\color{messages}What is this document about?] This user guide focuses on internationalization and localization with \LaTeX{} and \pdftex, \xetex{} and \luatex{} with the \babel{} package. There are also some notes on its use with e-Plain and pdf-Plain \TeX. \item[\sffamily\color{messages}What if I’m interested only in the latest changes?] Changes and new features with relation to version 3.8 are highlighted with \New{X.XX}\hspace{-.5em} (\raisebox{.15ex}{$\oplus$} is a link to the \babel{} site), and there are some notes for the latest versions in \href{https://latex3.github.io/babel/}{the \babel{} site}. The most recent features can be still unstable. Remember version 24.1 follows 3.99, because of a new numbering scheme. \item[\sffamily\color{messages}Can I help?] Sure! If you are interested in the \TeX{} multilingual support, please join the \href{http://tug.org/mailman/listinfo/kadingira}{kadingira mail list}. You can follow the development of \babel{} in \href{https://github.com/latex3/babel}{GitHub} and make suggestions; feel free to fork it and make pull requests. If you are the author of a package, send to me a few test files which I'll add to mine, so that possible issues can be caught in the development phase. \item[\sffamily\color{messages}It doesn't work for me!] You can ask for help in some forums like \textsf{tex.stackexchange}, but if you have found a bug, I strongly beg you to report it in \href{https://github.com/latex3/babel/issues}{GitHub}, which is much better than just complaining on an e-mail list or a web forum. Remember \textit{warnings are not errors} by themselves, they just warn about possible problems or incompatibilities. Hyphenation rules are maintained separately \href{https://github.com/hyphenation/tex-hyphen}{here}. \item[\sffamily\color{messages}How can I contribute a new language?] See section \ref{contribute} for contributing a language. \item[\sffamily\color{messages}I only need learn the most basic features.] The first subsections (1.1-1.3) describe the traditional way of loading a language (with |ldf| files), which is usually all you need. The alternative way based on |ini| files, which complements the previous one (it does \textit{not} replace it, although it is still necessary in some languages), is described below; go to \ref{inifiles}. \item[\sffamily\color{messages}I don’t like manuals. I prefer sample files.] This manual contains lots of examples and tips, but in GitHub there are many \href{https://github.com/latex3/babel/tree/master/samples}{sample files}. \item[\sffamily\color{messages}Where is the code?] Run |lualatex --jobname=babel-code \let\babelcode\relax\input{babel.dtx}|. \end{description} \section{The user interface}\label{U-I} \subsection{Monolingual documents} In most cases, a single language is required, and then all you need in \LaTeX{} is to load the package using its standard mechanism for this purpose, namely, passing that language as an optional argument. In addition, you may want to set the font and input encodings. Another approach is making the language a global option in order to let other packages detect and use it. This is the standard way in \LaTeX{} for an option – in this case a language – to be recognized by several packages. Many languages are compatible with \textsf{xetex} and \textsf{luatex}. With them you can use \babel{} to localize the documents. When these engines are used, the Latin script is covered by default in current \LaTeX{} (provided the document encoding is UTF-8), because the font loader is preloaded and the font is switched to |lmroman|. Other scripts require loading \textsf{fontspec}. You may want to set the font attributes with \textsf{fontspec}, too. \begin{example} Here is a simple full example for “traditional” \TeX{} engines (see below for \xetex{} and \luatex{}). The packages |fontenc| and |inputenc| do not belong to \babel, but they are included in the example because typically you will need them. It assumes UTF-8, the default encoding: \setengine{pdftex} \begin{verbatim} \documentclass{article} \usepackage[T1]{fontenc} _\usepackage[french]{babel}_ \begin{document} Plus ça change, plus c'est la même chose! \end{document} \end{verbatim} Now consider something like: \begin{verbatim} _\documentclass[french]{article}_ \usepackage{babel} \usepackage{varioref} \end{verbatim} With this setting, the package \texttt{varioref} will also see the option |french| and will be able to use it. \end{example} \begin{example} And now a simple monolingual document in Russian (text from the Wikipedia) with \xetex{} or \luatex{}. Note neither \textsf{fontenc} nor \textsf{inputenc} are necessary, but the document should be encoded in UTF-8 and a so-called Unicode font must be loaded (in this example |\babelfont| is used, described below). \setengine{luatex/xetex} \begin{verbatim} _\documentclass[russian]{article}_ \usepackage{babel} _\babelfont{rm}{DejaVu Serif}_ \begin{document} Россия, находящаяся на пересечении множества культур, а также с учётом многонационального характера её населения, — отличается высокой степенью этнокультурного многообразия и способностью к межкультурному диалогу. \end{document} \end{verbatim} \end{example} \begin{troubleshooting} \trouble{Paragraph ended before \textbackslash UTFviii@three@octets was complete} A common source of trouble is a wrong setting of the input encoding. Depending on the \LaTeX{} version you can get the following somewhat cryptic error: \begin{verbatim} ! Paragraph ended before \UTFviii@three@octets was complete. \end{verbatim} Or the more explanatory: \begin{verbatim} ! Package inputenc Error: Invalid UTF-8 byte ... \end{verbatim} Make sure you set the encoding actually used by your editor. \end{troubleshooting} \begin{note} Because of the way \babel{} has evolved, ``language'' can refer to (1) a set of hyphenation patterns as preloaded into the format, (2) a package option, (3) an |ldf| file, and (4) a name used in the document to select a language or dialect. So, a package option refers to a language in a generic way -- sometimes it is the actual language name used to select it, sometimes it is a file name loading a language with a different name, sometimes it is a file name loading several languages. Please, read the documentation for specific languages for further info. \end{note} \begin{troubleshooting} The following warning is about hyphenation patterns, which are not under the direct control of \babel: \trouble{No hyphenation patterns were preloaded for (babel) the language `LANG' into the format} \begin{verbatim} Package babel Warning: No hyphenation patterns were preloaded for (babel) the language `LANG' into the format. (babel) Please, configure your TeX system to add them and (babel) rebuild the format. Now I will use the patterns (babel) preloaded for \language=0 instead on input line 57. \end{verbatim} The document will be typeset, but very likely the text will not be correctly hyphenated. Some languages in some system may be raising this warning wrongly (because they are not hyphenated) -- just ignore it. See the manual of your distribution (Mac\TeX, Mik\TeX, \TeX Live, etc.) for further info about how to configure it. \end{troubleshooting} \begin{note} With \textsf{hyperref} you may want to set the document language with something like: \begin{verbatim} \usepackage[_pdflang=es-MX_]{hyperref} \end{verbatim} This is not currently done by \babel{} and you must set it by hand. The document language can be also set with |\DocumentMetadata|, before |\documentclass|; for example: \begin{verbatim} \DocumentMetadata{_lang=es-MX_} \end{verbatim} \end{note} \begin{note} Although it has been customary to recommend placing |\title|, |\author| and other elements printed by |\maketitle| after |\begin{document}|, mainly because of shorthands, it is advisable to keep them in the preamble. Currently there is no real need to use shorthands in those macros. \end{note} \begin{note} \Babel{} does not make any readjustments by default in font size, vertical positioning or line height by default. This is on purpose because the optimal solution depends on the document layout and the font, and very likely the most appropriate one is a combination of these settings. \end{note} \subsection{Multilingual documents} In multilingual documents, just use a list of the required languages as package or class options. The last language is considered the main one, activated by default. Sometimes, the main language changes the document layout (eg, |spanish| and |french|). \begin{example} In \LaTeX, the preamble of the document: \begin{verbatim} \documentclass{article} \usepackage[dutch,english]{babel} \end{verbatim} would tell \LaTeX\ that the document would be written in two languages, Dutch and English, and that English would be the first language in use, and the main one. \end{example} You can also set the main language explicitly, but it is discouraged except if there is a real reason to do so: \begin{verbatim} \documentclass{article} \usepackage[_main=english_,dutch]{babel} \end{verbatim} Examples of cases where |main| is useful are the following. \begin{example} Some classes load \babel{} with a hardcoded language option. Sometimes, the main language can be overridden with something like that before |\documentclass|: \begin{verbatim} \PassOptionsToPackage{main=english}{babel} \end{verbatim} \end{example} \begin{note} Languages may be set as global and as package option at the same time, but in such a case you should set explicitly the main language with the package option |main|: \begin{verbatim} \documentclass[_italian_]{book} \usepackage[ngerman,_main=italian_]{babel} \end{verbatim} \end{note} \begin{warning} In the preamble the main language has \textit{not} been selected, except hyphenation patterns and the name assigned to |\languagename| (in particular, shorthands, captions and date are not activated). If you need to define boxes and the like in the preamble, you might want to use some of the language selectors described below. \end{warning} To switch the language there are two basic macros, described below in detail: |\selectlanguage| is used for blocks of text, while |\foreignlanguage| is for chunks of text inside paragraphs. \begin{example} A full bilingual document with \pdftex{} follows. The main language is |french|, which is activated when the document begins. It assumes UTF-8: \setengine{pdftex} \begin{verbatim} \documentclass{article} \usepackage[T1]{fontenc} _\usepackage[english,french]{babel}_ \begin{document} Plus ça change, plus c'est la même chose! _\selectlanguage{english}_ And an English paragraph, with a short text in _\foreignlanguage{french}{français}_. \end{document} \end{verbatim} \end{example} \begin{example} With \xetex{} and \luatex, the following bilingual, single script document in UTF-8 encoding just prints a couple of ‘captions’ and |\today| in Danish and Vietnamese. No additional packages are required, because the default font supports both languages. \setengine{luatex/xetex} \begin{verbatim} \documentclass{article} _\usepackage[vietnamese,danish]{babel}_ \begin{document} \prefacename, \alsoname, \today. \selectlanguage{vietnamese} \prefacename, \alsoname, \today. \end{document} \end{verbatim} \end{example} \begin{note} Once loaded a language, you can select it with the corresponding BCP47 tag. See section \ref{bcp47} for further details. \end{note} \begin{note} Documents with several input encodings are not frequent, but sometimes are useful. You can set different encodings for different languages as the following example shows: \begin{verbatim} \addto\extrasfrench{\inputencoding{latin1}} \addto\extrasrussian{\inputencoding{koi8-r}} \end{verbatim} \end{note} \subsection{Mostly monolingual documents} \label{mostlymono} \New{3.39} Very often, multilingual documents consist of a main language with small pieces of text in another languages (words, idioms, short sentences). Typically, all you need is to set the line breaking rules and, perhaps, the font. In such a case, \babel{} now does not require declaring these secondary languages explicitly, because the basic settings are loaded on the fly when the language is selected (and also when provided in the optional argument of |\babelfont|, if used). This is particularly useful, too, when there are short texts of this kind coming from an external source whose contents are not known on beforehand (for example, titles in a bibliography). At this regard, it is worth remembering that |\babelfont| does \textit{not} load any font until required, so that it can be used just in case. \New{3.84} With \pdftex, when a language is loaded on the fly (actually, with |\babelprovide|, because this is the macro used internally to load it) selectors now set the font encoding based on the list provided when loading |fontenc|. Not all scripts have an associated encoding, so this feature works only with Latin, Cyrillic, Greek, Arabic, Hebrew, Cherokee, Armenian, and Georgian, provided a suitable font is found. \begin{example} A trivial document with the default font in English and Spanish, and FreeSerif in Russian is: \setengine{luatex/xetex} \begin{verbatim} \documentclass[english]{article} \usepackage{babel} _\babelfont[russian]{rm}{FreeSerif}_ \begin{document} English. _\foreignlanguage{russian}{Русский}_. _\foreignlanguage{spanish}{Español}_. \end{document} \end{verbatim} \end{example} \begin{note} Instead of its name, you may prefer to select the language with the corresponding BCP47 tag. This alternative, however, must be activated explicitly, because a two- or tree-letter word is a valid name for a language (eg, |lu| can be the locale name with tag |khb| or the tag for |lubakatanga|). See section \ref{bcp47} for further details. \end{note} \subsection{Modifiers} \New{3.9c} The basic behavior of some languages can be modified when loading \babel{} by means of \textit{modifiers}. They are set after the language name, and are prefixed with a dot (only when the language is set as package option -- neither global options nor the |main| key accepts them). An example is (spaces are not significant and they can be added or removed):\footnote{No predefined ``axis'' for modifiers are provided because languages and their scripts have quite different needs.} \begin{verbatim} \usepackage[latin_.medieval_, spanish_.notilde.lcroman_, danish]{babel} \end{verbatim} Attributes (described below) are considered modifiers, ie, you can set an attribute by including it in the list of modifiers. \New{3.89} Alternatively, modifiers can be set with a separate option, with the keyword |modifiers| followed by a dot and the language name (note the language is not selected or loaded with this option). It is useful to activate some feature when the language is declared as a class option: \begin{verbatim} \documentclass[spanish]{report} \usepackage[_modifiers.spanish = notilde.lcroman_]{babel} \end{verbatim} \subsection{Troubleshooting} \begin{itemize} \item Loading directly |sty| files in \LaTeX{} (ie, |\usepackage{}|) is deprecated and you will get the error:\footnote{In old versions the error read ``You have used an old interface to call babel'', not very helpful.} \trouble{You are loading directly a language style} \begin{verbatim} ! Package babel Error: You are loading directly a language style. (babel) This syntax is deprecated and you must use (babel) \usepackage[language]{babel}. \end{verbatim} \item Another typical error when using \babel{} is the following:\footnote{In old versions the error read ``You haven't loaded the language LANG yet''.} \trouble{Unknown language `LANG'} \begin{verbatim} ! Package babel Error: Unknown language `#1'. Either you have (babel) misspelled its name, it has not been installed, (babel) or you requested it in a previous run. Fix its name, (babel) install it or just rerun the file, respectively. In (babel) some cases, you may need to remove the aux file \end{verbatim} The most frequent reason is, by far, the latest (for example, you included |spanish|, but you realized this language is not used after all, and therefore you removed it from the option list). In most cases, the error vanishes when the document is typeset again, but in more severe ones you will need to remove the |aux| file. \end{itemize} \subsection{Plain} In e-Plain and pdf-Plain, load languages styles with |\input| and then use |\begindocument| (the latter is defined by \babel): \begin{verbatim} \input estonian.sty \begindocument \end{verbatim} \begin{warning} Not all languages provide a |sty| file and some of them are not compatible with those formats. Please, refer to \href{https://latex3.github.io/babel/guides/using-babel-with-plain.html}% {Using babel with Plain} for further details. \end{warning} \subsection{Basic language selectors} This section describes the commands to be used in the document to switch the language in multilingual documents. In most cases, only the two basic macros |\selectlanguage| and |\foreignlanguage| are necessary. The environments |otherlanguage|, |otherlanguage*| and |hyphenrules| are auxiliary, and described in the next section. The main language is selected automatically when the |document| environment begins. \Describe{\selectlanguage}{\marg{language}} When a user wants to switch from one language to another he can do so using the macro |\selectlanguage|. This macro takes the language, defined previously by a language definition file, as its argument. It calls several macros that should be defined in the language definition files to activate the special definitions for the language chosen: \begin{verbatim} \selectlanguage{german} \end{verbatim} This command can be used as environment, too, in case there are relatively short texts and you do not want to reset the language with a hardcode value. \begin{note} For ``historical reasons'', a macro name is converted to a language name without the leading |\|; in other words, |\selectlanguage{\german}| is equivalent to |\selectlanguage{german}|. Using a macro instead of a ``real'' name is deprecated. \New{3.43} However, if the macro name does not match any language, it will get expanded as expected. \end{note} \begin{note} Bear in mind |\selectlanguage| can be automatically executed, in some cases, in the auxiliary files, at heads and foots, and after the environment |otherlanguage*|. \end{note} \begin{warning} If used inside braces there might be some non-local changes, as this would be roughly equivalent to: \begin{verbatim} {\selectlanguage{} ...}\selectlanguage{} \end{verbatim} If you want a change which is really local, you must enclose this code with an additional grouping level. \end{warning} \begin{warning} There are a couple of issues related to the way the language information is written to the auxiliary files: \begin{itemize} \item |\selectlanguage| should not be used inside some boxed environments (like floats or |minipage|) to switch the language if you need the information written to the |aux| be correctly synchronized. This rarely happens, but if it were the case, you must use |otherlanguage| instead. \item In addition, this macro inserts a |\write| in vertical mode, which may break the vertical spacing in some cases (for example, between lists or at the beginning of a table cell). \New{3.64} The behavior can be adjusted with |\babeladjust{select.write=}|, where \m{mode} is |shift| (which shifts the skips down and adds a |\penalty|); |keep| (the default -- with it the |\write| and the skips are kept in the order they are written), and |omit| (which may seem a too drastic solution, because nothing is written, but more often than not this command is applied to more or less shorts texts with no sectioning or similar commands, and therefore no language synchronization is necessary). In a table cell, a |\leavevmode| just before the selector may be enough. \end{itemize} \end{warning} \Describe{\foreignlanguage}{\oarg{option-list}\marg{language}\marg{text}} The command |\foreignlanguage| takes two arguments; the second argument is a phrase to be typeset according to the rules of the language named in its first one. This command (1) only switches the extra definitions and the hyphenation rules for the language, \emph{not} the names and dates, (2) does not send information about the language to auxiliary files (i.e., the surrounding language is still in force), and (3) it works even if the language has not been set as package option (but in such a case it only sets the hyphenation patterns and a warning is shown). With the |bidi| option, it also enters in horizontal mode (this is not done always for backwards compatibility), and since it is meant for phrases only the text direction (and not the paragraph one) is set. \New{3.44} As already said, captions and dates are not switched. However, with the optional argument you can switch them, too. So, you can write: \begin{verbatim} \foreignlanguage[date]{polish}{\today} \end{verbatim} In addition, captions can be switched with |captions| (or both, of course, with |date,| |captions|). Until 3.43 you had to write something like |{\selectlanguage{..} ..}|, which was not always the most convenient way. \begin{note} |\bibitem| is out of sync with |\selectlanguage| in the \file{.aux} file. The reason is |\bibitem| uses |\immediate| (and others, in fact), while |\selectlanguage| doesn't. There is a similar issue with floats, too. There is no known workaround. \end{note} \subsection{Auxiliary language selectors} \Describe{\begin\menv{otherlanguage}}{\marg{language}\Eenv{otherlanguage}} The environment \Lenv{otherlanguage} does basically the same as |\selectlanguage|, except that language change is (mostly) local to the environment. Actually, there might be some non-local changes, as this environment is roughly equivalent to: \begin{verbatim} \begingroup \selectlanguage{} ... \endgroup \selectlanguage{} \end{verbatim} If you want a change which is really local, you must enclose this environment with an additional grouping, like braces |{}|. Spaces after the environment are ignored.\footnote{Very likely, and because of the limitations of many old editors with bidi text, the idea was \cs{end}\texttt{\{otherlanguage\}} had to be a line by itself.} If this behavior is not desired, you may use the environment \texttt{selectlanguage}. \begin{warning} Being similar to |\selectlanguage|, the warning above about the internal |\write| also applies here. The current mode (vertical or horizontal) is also not changed. \end{warning} \Describe{\begin\menv{otherlanguage*}}% {\oarg{option-list}\marg{language}\Eenv{otherlanguage*}} Same as |\foreignlanguage| but as environment. Spaces after the environment are \textit{not} ignored. This environment was originally intended for intermixing left-to-right typesetting with right-to-left typesetting in engines not supporting a change in the writing direction inside a line. However, by default it never complied with the documented behavior and it is just a version as environment of |\foreignlanguage|, except when the option |bidi| is set -- in this case, |\foreignlanguage| emits a |\leavevmode|, while |otherlanguage*| does not. \subsection{More on selection} \Describe{\babeltags}{\char`\{\m{tag1} \texttt{=} \m{language1}, \m{tag2} \texttt{=} \m{language2}, \dots\char`\}} \New{3.9i} In multilingual documents with many language-switches the commands above can be cumbersome. With this tool shorter names can be defined. It adds nothing really new -- it is just syntactical sugar. It defines |\text{}| to be |\foreignlanguage{}{}|, and |\begin{}| to be |\begin{otherlanguage*}{}|, and so on. Note |\| is also allowed, but remember to set it locally inside a group. \begin{warning} There is a clear drawback to this feature, namely, the ‘prefix’ |\text...| is heavily overloaded in \LaTeX{} and conflicts with existing macros may arise (|\textlatin|, |\textbar|, |\textit|, |\textcolor| and many others). The same applies to environments, because |arabic| conflicts with |\arabic|. Furthermore, and because of this overloading, detecting the language of a chunk of text by external tools can become unfeasible (is |\textga| the locale for the African language Gã or something else?). Except if there is a reason for this ‘syntactical sugar’, the best option is to stick to the default selectors or even to define your own alternatives. \end{warning} \begin{example} With \begin{verbatim} \babeltags{de = german} \end{verbatim} you can write \begin{verbatim} text \textde{German text} text \end{verbatim} and \begin{verbatim} text \begin{de} German text \end{de} text \end{verbatim} \end{example} \begin{note} Something like \verb|\babeltags{finnish = finnish}| is legitimate -- it defines |\textfinnish| and |\finnish| (and, of course, |\begin{finnish}|). \end{note} \Describe{\babelensure}{|[include=|\m{commands}|,exclude=|\m{commands}% |,fontenc=|\m{encoding}|]|\marg{language}} \New{3.9i} Except in a few languages, like \textsf{russian}, captions and dates are just strings, and do not switch the language. That means you should set it explicitly if you want to use them, or hyphenation (and in some cases the text itself) will be wrong. For example: \begin{verbatim} \foreignlanguage{russian}{text \foreignlanguage{polish}{\seename} text} \end{verbatim} Of course, \TeX{} can do it for you. To avoid switching the language all the while, |\babelensure| redefines the captions for a given language to wrap them with a selector: \begin{verbatim} \babelensure{polish} \end{verbatim} By default only the basic captions and |\today| are redefined, but you can add further macros with the key |include| in the optional argument (without commas). Macros not to be modified are listed in |exclude|. You can also enforce a font encoding with the option |fontenc|.\footnote{With it, encoded strings may not work as expected.} A couple of examples: \begin{verbatim} \babelensure[include=\Today]{spanish} \babelensure[fontenc=T5]{vietnamese} \end{verbatim} They are activated when the language is selected (at the |afterextras| event), and it makes some assumptions which could not be fulfilled in some languages. Note also you should include only macros defined by the language, not global macros (eg, |\TeX| of |\dag|). With |ini| files (see below), captions are ensured by default. \subsection{Shorthands} A \textit{shorthand} is a sequence of one or two characters that expands to arbitrary \TeX{} code. Shorthands can be used for different kinds of things; for example: (1) in some languages shorthands such as |"a| are defined to be able to hyphenate the word if the encoding is |OT1|; (2) in some languages shorthands such as |!| are used to insert the right amount of white space; (3) several kinds of discretionaries and breaks can be inserted easily with |"-|, |"=|, etc. The package \textsf{inputenc} as well as \xetex{} and \luatex{} have alleviated entering non-ASCII characters, but minority languages and some kinds of text can still require characters not directly available on the keyboards (and sometimes not even as separated or precomposed Unicode characters). As to the point 2, now \textsf{pdfTeX} provides |\knbccode|, and \luatex{} can manipulate the glyph list. Tools for point 3 can be still very useful in general. There are four levels of shorthands: \textit{user}, \textit{language}, \textit{system}, and \textit{language user} (by order of precedence). In most cases, you will use only shorthands provided by languages. \begin{note} Keep in mind the following: \begin{enumerate} \item Activated chars used for two-char shorthands cannot be followed by a closing brace |}| and the spaces following are gobbled. With one-char shorthands (eg,~|:|), they are preserved. \item If on a certain level (system, language, user, language user) there is a one-char shorthand, two-char ones starting with that char and on the same level are ignored. \item Since they are active, a shorthand cannot contain the same character in its definition (except if deactivated with, eg, |\string|). \end{enumerate} \end{note} \begin{troubleshooting} \trouble{Argument of \textbackslash language@active@arg" has an extra \textbraceright} A typical error when using shorthands is the following: \begin{verbatim} ! Argument of \language@active@arg" has an extra }. \end{verbatim} It means there is a closing brace just after a shorthand, which is not allowed (eg,~|"}|). Just add |{}| after (eg,~|"{}}|). \end{troubleshooting} \Describe{\shorthandon}{\marg{shorthands-list}} \DescribeOther{\shorthandoff}{% \colorbox{thegrey}{\ttfamily\hskip-.2em*\hskip-.2em}% \marg{shorthands-list}} It is sometimes necessary to switch a shorthand character off temporarily, because it must be used in an entirely different way. For this purpose, the user commands |\shorthandoff| and |\shorthandon| are provided. They each take a list of characters as their arguments. The command |\shorthandoff| sets the |\catcode| for each of the characters in its argument to other (12); the command |\shorthandon| sets the |\catcode| to active (13). Both commands only work on `known' shorthand characters, and an error will be raised otherwise. You can check if a character is a shorthand with |\ifbabelshorthand| (see below). \New{3.9a} However, |\shorthandoff| does not behave as you would expect with characters like |~| or |^|, because they usually are not ``other''. For them |\shorthandoff*| is provided, so that with \begin{verbatim} \shorthandoff*{~^} \end{verbatim} |~| is still active, very likely with the meaning of a non-breaking space, and |^| is the superscript character. The catcodes used are those when the shorthands are defined, usually when language files are loaded. If you do not need shorthands, or prefer an alternative approach of your own, you may want to switch them off with the package option |shorthands=off|, as described below. \begin{warning} It is worth emphasizing these macros are meant for temporary changes. Whenever possible and if there are not conflicts with other packages, shorthands must be always enabled (or disabled). \end{warning} \Describe{\useshorthands}{% \colorbox{thegrey}{\ttfamily\hskip-.2em*\hskip-.2em}% \marg{char}} The command |\useshorthands| initiates the definition of user-defined shorthand sequences. It has one argument, the character that starts these personal shorthands. \New{3.9a} User shorthands are not always alive, as they may be deactivated by languages (for example, if you use |"| for your user shorthands and switch from \textsf{german} to \textsf{french}, they stop working). Therefore, a starred version |\useshorthands*|\marg{char} is provided, which makes sure shorthands are always activated. If the package option |shorthands| is used, you must include any character to be activated with |\useshorthands|. \Describe\defineshorthand{\texttt{[}\langlist\texttt{]}% \marg{shorthand}\marg{code}} The command |\defineshorthand| takes two arguments: the first is a one- or two-character shorthand sequence, and the second is the code the shorthand should expand to. \New{3.9a} An optional argument allows to (re)define language and system shorthands (some languages do not activate shorthands, so you may want to add |\languageshorthands|\marg{language} to the corresponding |\extras|, as explained below). By default, user shorthands are (re)defined. User shorthands override language ones, which in turn override system shorthands. Language-dependent user shorthands (new in 3.9) take precedence over ``normal'' user shorthands. \begin{example} Let's assume you want a unified set of shorthand for discretionaries (languages do not define shorthands consistently, and |"-|, |\-|, |"=| have different meanings). You can start with, say: \begin{verbatim} \useshorthands*{"} \defineshorthand{"*}{\babelhyphen{soft}} \defineshorthand{"-}{\babelhyphen{hard}} \end{verbatim} However, the behavior of hyphens is language-dependent. For example, in languages like Polish and Portuguese, a hard hyphen inside compound words are repeated at the beginning of the next line. You can then set: \begin{verbatim} \defineshorthand[*polish,*portuguese]{"-}{\babelhyphen{repeat}} \end{verbatim} Here, options with |*| set a language-dependent user shorthand, which means the generic one above only applies for the rest of languages; without |*| they would (re)define the language shorthands instead, which are overridden by user ones. Now, you have a single unified shorthand (|"-|), with a content-based meaning (`compound word hyphen') whose visual behavior is that expected in each context. \end{example} \Describe{\languageshorthands}{\marg{language}} The command |\languageshorthands| can be used to switch the shorthands on the language level. It takes one argument, the name of a language or |none| (the latter does what its name suggests).\footnote{Actually, any name not corresponding to a language group does the same as \texttt{none}. However, follow this convention because it might be enforced in future releases of \babel{} to catch possible errors.} Note that for this to work the language should have been specified as an option when loading the \babel\ package. For example, you can use in \textsf{english} the shorthands defined by \textsf{ngerman} with \begin{verbatim} \addto\extrasenglish{\languageshorthands{ngerman}} \end{verbatim} (You may also need to activate them as user shorthands in the preamble with, for example, |\useshorthands| or |\useshorthands*|.) \begin{example} Very often, this is a more convenient way to deactivate shorthands than |\shorthandoff|, for example if you want to define a macro to easy typing phonetic characters with \textsf{tipa}: \begin{verbatim} \newcommand{\myipa}[1]{{_\languageshorthands{none}_\tipaencoding#1}} \end{verbatim} \end{example} \Describe{\babelshorthand}{\marg{shorthand}} With this command you can use a shorthand even if (1) not activated in \texttt{shorthands} (in this case only shorthands for the current language are taken into account, ie, not user shorthands), (2) turned off with |\shorthandoff| or (3) deactivated with the internal |\bbl@deactivate|; for example, \verb|\babelshorthand{"u}| or \verb|\babelshorthand{:}|. (You can conveniently define your own macros, or even your own user shorthands provided they do not overlap.) \begin{example} Since by default shorthands are not activated until |\begin{document}|, you may use this macro when defining the |\title| in the preamble: \begin{verbatim} \title{Documento científico_\babelshorthand{"-}_técnico} \end{verbatim} \end{example} \bigskip For your records, here is a list of shorthands, but you must double check them, as they may change:\footnote{Thanks to Enrico Gregorio} \begin{description} \itemsep=-\parskip \item[Languages with no shorthands] Croatian, English (any variety), Indonesian, Hebrew, Interlingua, Irish, Lower Sorbian, Malaysian, North Sami, Romanian, Scottish, Welsh \item[Languages with only \texttt{"} as defined shorthand character] Albanian, Bulgarian, Danish, Dutch, Finnish, German (old and new orthography, also Austrian), Icelandic, Italian, Norwegian, Polish, Portuguese (also Brazilian), Russian, Serbian (with Latin script), Slovene, Swedish, Ukrainian, Upper Sorbian \item[Basque] |" ' ~| \item[Breton] |: ; ? !| \item[Catalan] |" ' `| \item[Czech] |" -| \item[Esperanto] |^| \item[Estonian] |" ~| \item[French] (all varieties) |: ; ? !| \item[Galician] |" . ' ~ < >| \item[Greek] (ancient, polutoniko, only 8-bit \TeX) |~|, (optional, see the manual for Greek) |;| \item[Hungarian] |`| \item[Kurmanji] |^| \item[Latin] |" ^ =| \item[Slovak] |" ^ ' -| \item[Spanish] |" . < > ' ~| \item[Turkish] |: ! =| \end{description} In addition, the \babel{} core declares |~| as a one-char shorthand which is let, like the standard |~|, to a non breaking space.\footnote{This declaration serves to nothing, but it is preserved for backward compatibility.} \Describe\ifbabelshorthand{\marg{character}\marg{true}\marg{false}} \New{3.23} Tests if a character has been made a shorthand. \begin{note} \catcode`\|=12\relax Both \textsf{ltxdoc} and \textsf{babel} use \verb|\AtBeginDocument| to change some catcodes, and \babel{} reloads \textsf{hhline} to make sure \verb|:| has the right one, so if you want to change the catcode of \verb/|/ it has to be done using the same method at the proper place, with \begin{verbatim} \AtBeginDocument{\DeleteShortVerb{\|}} \end{verbatim} \textit{before} loading \babel. This way, when the document begins the sequence is (1) make \verb/|/ active (\textsf{ltxdoc}); (2) make it inactive (your settings); (3) make \babel{} shorthands active (\textsf{babel}); (4) reload \textsf{hhline} (\textsf{babel}, now with the correct catcodes for \verb/|/ and \verb|:|).\catcode`\|=\active \end{note} \subsection{Package options} \New{3.9a} These package options are processed before language options, so that they are taken into account irrespective of its order. The first three options have been available in previous versions. \Describe{KeepShorthandsActive}{} Tells \babel{} not to deactivate shorthands after loading a language file, so that they are also available in the preamble. \Describe{activeacute}{} For some languages \babel\ supports this options to set |'| as a shorthand in case it is not done by default. \Describe{activegrave}{} Same for |`|. \Describe{shorthands=}{\meta{char}\meta{char}... $\string|$ \texttt{off}} The only language shorthands activated are those given, like, eg: \begin{verbatim} \usepackage[esperanto,french,_shorthands=:;!?_]{babel} \end{verbatim} If \verb|'| is included, \texttt{activeacute} is set; if \verb|`| is included, \texttt{activegrave} is set. Active characters (like \verb|~|) should be preceded by \verb|\string| (otherwise they will be expanded by \LaTeX{} before they are passed to the package and therefore they will not be recognized); however, |t| is provided for the common case of |~| (as well as |c| for not so common case of the comma). With |shorthands=off| no language shorthands are defined, As some languages use this mechanism for tools not available otherwise, a macro \verb|\babelshorthand| is defined, which allows using them; see above. \Describe{safe=}{\texttt{none} $\string|$ \texttt{ref} $\string|$ \texttt{bib}} Some \LaTeX{} macros are redefined so that using shorthands is safe. With \texttt{safe=bib} only |\nocite|, |\bibcite| and |\bibitem| are redefined. With |safe=ref| only |\newlabel|, |\ref| and |\pageref| are redefined (as well as a few macros from \textsf{varioref} and \textsf{ifthen}). With |safe=none| no macro is redefined. This option is strongly recommended, because a good deal of incompatibilities and errors are related to these redefinitions. As of \New{3.34}, in $\epsilon$\TeX{} based engines (ie, almost every engine except the oldest ones) shorthands can be used in these macros (formerly you could not). \Describe{math=}{\texttt{active} $\string|$ \texttt{normal}} Shorthands are mainly intended for text, not for math. By setting this option with the value |normal| they are deactivated in math mode (default is |active|) and things like |${a'}$| (a closing brace after a shorthand) are not a source of trouble anymore. \Describe{config=}{\meta{file}} Load \meta{file}\texttt{.cfg} instead of the default config file |bblopts.cfg| (the file is loaded even with |noconfigs|). \Describe{main=}{\meta{language}} Sets the main language, as explained above, ie, this language is always loaded last. If it is not given as package or global option, it is added to the list of requested languages. \Describe{headfoot=}{\meta{language}} By default, headlines and footlines are not touched (only marks), and if they contain language-dependent macros (which is not usual) there may be unexpected results. With this option you may set the language in heads and foots. An alternative is to set the language explicitly when heads and foots are redefined. \Describe{noconfigs}{} Global and language default config files are not loaded, so you can make sure your document is not spoilt by an unexpected \texttt{.cfg} file. However, if the key |config| is set, this file is loaded. \Describe{showlanguages}{} Prints to the log the list of languages loaded when the format was created: number (remember dialects can share it), name, hyphenation file and exceptions file. % \Describe{nocase}{} \New{3.9l} Language settings for uppercase and % lowercase mapping (as set by |\SetCase|) are ignored. Use only if there % are incompatibilities with other packages. \Describe{silent}{} \New{3.9l} No warnings and no \textit{infos} are written to the log file.\footnote{You can use alternatively the package \textsf{silence}.} \Describe{hyphenmap=}{\texttt{off} $\string|$ \texttt{first} $\string|$ \texttt{select} $\string|$ \texttt{other} $\string|$ \texttt{other*}} \New{3.9g} Sets the behavior of case mapping for hyphenation, provided the language defines it.\footnote{Turned off in plain.} It can take the following values: \begin{description} \renewcommand\makelabel[1]{% \hspace\labelsep\normalfont\ttfamily\color{thered}#1} \itemsep=-\parskip \item[off] deactivates this feature and no case mapping is applied; \item[first] sets it at the first switching commands in the current or parent scope (typically, when the aux file is first read and at |\begin{document}|, but also the first |\selectlanguage| in the preamble), and it's the default if a single language option has been stated;\footnote{Duplicated options count as several ones.} \item[select] sets it only at |\selectlanguage|; \item[other] also sets it at |otherlanguage|; \item[other*] also sets it at |otherlanguage*| as well as in heads and foots (if the option |headfoot| is used) and in auxiliary files (ie, at |\select@language|), and it's the default if several language options have been stated. The option |first| can be regarded as an optimized version of \texttt{other*} for monolingual documents.\footnote{Providing |foreign| is pointless, because the case mapping applied is that at the end of the paragraph, but if either \xetex{} or \luatex{} change this behavior it might be added. On the other hand, |other| is provided even if I [JBL] think it isn't really useful, but who knows.} \end{description} \Describe{bidi=}{\texttt{default} $\string|$ \texttt{basic} $\string|$ \texttt{basic-r} $\string|$ \texttt{bidi-l} $\string|$ \texttt{bidi-r}} \New{3.14} Selects the bidi algorithm to be used in \luatex{} and \xetex{}. See sec.~\ref{bidi}. \Describe{layout=}{} \New{3.16} Selects which layout elements are adapted in bidi documents. See sec.~\ref{bidi}. \Describe{provide=}{\texttt{*}} \New{3.49} An alternative to |\babelprovide| for languages passed as options. See section~\ref{inifiles}, which describes also the variants |provide+=| and |provide*=|. \subsection{The \texttt{base} option} With this package option \babel{} just loads some basic macros (mainly the selectors), defines |\AfterBabelLanguage| and exits. It also selects the hyphenation patterns for the last language passed as option (by its name in |language.dat|). There are two main uses: classes and packages, and as a last resort in case there are, for some reason, incompatible languages. It can be used if you just want to select the hyphenation patterns of a single language, too. \Describe\AfterBabelLanguage{\marg{option-name}\marg{code}} This command is currently the only provided by |base|. Executes \meta{code} when the file loaded by the corresponding package option is finished (at |\ldf@finish|). The setting is global. So \begin{verbatim} \AfterBabelLanguage{french}{...} \end{verbatim} does ... at the end of |french.ldf|. It can be used in |ldf| files, too, but in such a case the code is executed only if \meta{option-name} is the same as |\CurrentOption| (which could not be the same as the option name as set in |\usepackage|!). \begin{example} Consider two languages \textsf{foo} and \textsf{bar} defining the same |\macro| with |\newcommand|. An error is raised if you attempt to load both. Here is a way to overcome this problem: \begin{verbatim} \usepackage[base]{babel} \AfterBabelLanguage{foo}{% \let\macroFoo\macro \let\macro\relax} \usepackage[foo,bar]{babel} \end{verbatim} \end{example} \begin{note} With a recent version of \LaTeX, an alternative method to execute some code just after an |ldf| file is loaded is with |\AddToHook| and the hook |file/.ldf/after|. \Babel{} does not predeclare it, and you have to do it yourself with |\ActivateGenericHook|. \end{note} \begin{warning} Currently this option is not compatible with languages loaded on the fly. \end{warning} \subsection{\texttt{ini} files} \label{inifiles} An alternative approach to define a language (or, more precisely, a \textit{locale}) is by means of an \texttt{ini} file. Currently \babel{} provides about 380 of these files containing the basic data required for a locale, covering about 300 languages, plus basic templates for about 400 locales. |ini| files are not meant only for \babel, and they has been devised as a resource for other packages. To easy interoperability between \TeX{} and other systems, they are identified with the BCP 47 codes as preferred by the Unicode Common Locale Data Repository, which was used as source for most of the data provided by these files, too (the main exception being the |\...name| strings). Most of them set the date, and many also the captions (Unicode and LICR). They will be evolving with the time to add more features (something to keep in mind if backward compatibility is important). The following section shows how to make use of them by means of |\babelprovide|. In other words, |\babelprovide| is mainly meant as alternative when the |ldf| does not exists or does not work as expected, and for secondary tasks. \begin{example} Although Georgian has its own \texttt{ldf} file, here is how to declare this language with an |ini| file in Unicode engines. \setengine{luatex/xetex} \begin{verbatim} \documentclass{book} \usepackage{babel} _\babelprovide[import, main]{georgian}_ \babelfont{rm}[Renderer=Harfbuzz]{DejaVu Sans} \begin{document} \tableofcontents \chapter{სამზარეულო და სუფრის ტრადიციები} ქართული ტრადიციული სამზარეულო ერთ-ერთი უმდიდრესია მთელ მსოფლიოში. \end{document} \end{verbatim} \end{example} \begin{more} There is an example of how to use an |ini| template file \href{https://github.com/latex3/babel/issues/176#issuecomment-1080846575}{here}, for Phoenician (although currently this locale is bundled with \babel). \end{more} \New{3.49} Alternatively, you can tell \babel{} to load all or some languages passed as options with |\babelprovide| and not from the |ldf| file in a few few typical cases. Thus, |provide=*| means ‘load the main language with the |\babelprovide| mechanism instead of the |ldf| file’ applying the basic features, which in this case means |import,| |main|. There are (currently) three options: \begin{itemize} \item |provide=*| is the option just explained, for the main language; \item |provide+=*| is the same for additional languages (the main language is still the |ldf| file); \item |provide*=*| is the same for all languages, ie, main and additional. \end{itemize} \begin{example} The preamble in the previous example can be more compactly written as: \begin{verbatim} \documentclass{book} \usepackage[_georgian, provide=*_]{babel} \babelfont{rm}[Renderer=Harfbuzz]{DejaVu Sans} \end{verbatim} Or also: \begin{verbatim} \documentclass[_georgian_]{book} \usepackage[_provide=*_]{babel} \babelfont{rm}[Renderer=Harfbuzz]{DejaVu Sans} \end{verbatim} \end{example} \begin{note} The \texttt{ini} files just define and set some parameters, but the corresponding behavior is not always implemented. Also, there are some limitations in the engines. A few remarks follow (which could no longer be valid when you read this manual, if the packages involved have been updated). The Harfbuzz renderer still has some issues, so as a rule of thumb prefer the default renderer, and resort to Harfbuzz only if the former does not work for you. Fortunately, fonts can be loaded twice with different renderers; for example: \begin{verbatim} \babelfont[spanish]{rm}{FreeSerif} \babelfont[hindi]{rm}[Renderer=Harfbuzz]{FreeSerif} \end{verbatim} \begin{description} \itemsep=-\parskip \item[Arabic] Monolingual documents mostly work in \luatex, but it must be fine tuned, particularly math and graphical elements like |picture|. In \xetex{} \babel{} resorts to the \textsf{bidi} package, which seems to work. \item[Hebrew] Niqqud marks seem to work in both engines, but depending on the font cantillation marks might be misplaced (\xetex{} or \luatex{} with Harfbuzz seems better). \item[Devanagari] In \luatex{} and the default renderer many fonts work, but some others do not, the main issue being the ‘ra’. You may need to set explicitly the script to either |deva| or |dev2|, eg: \begin{verbatim} \newfontscript{Devanagari}{deva} \end{verbatim} Other Indic scripts are still under development in the default \luatex{} renderer, but should work with |Renderer=Harfbuzz|. They also work with \xetex{}, although unlike with \luatex{} fine tuning the font behavior is not always possible. \item[Southeast scripts] Thai works in both \luatex{} and \xetex{}, but line breaking differs (rules are hard-coded in \xetex, but they can be modified in \luatex). Lao seems to work, too, but there are no patterns for the latter in \luatex{}. Khemer clusters are rendered wrongly with the default renderer. The comment about Indic scripts and \textsf{lualatex} also applies here. Some quick patterns can help, with something similar to: \begin{verbatim} \babelprovide[import, hyphenrules=+]{lao} \babelpatterns[lao]{1ດ 1ມ 1ອ 1ງ 1ກ 1າ} % Random \end{verbatim} \item[East Asia scripts] Settings for either Simplified of Traditional should work out of the box, with basic line breaking with any renderer. Although for a few words and shorts texts the |ini| files should be fine, CJK texts are best set with a dedicated framework (\textsf{CJK}, \textsf{luatexja}, \textsf{kotex}, \textsf{CTeX}, etc.). This is what the class |ltjbook| does with \luatex, which can be used in conjunction with the |ldf| for |japanese|, because the following piece of code loads \textsf{luatexja}: \begin{verbatim} \documentclass[japanese]{ltjbook} \usepackage{babel} \end{verbatim} \item[Latin, Greek, Cyrillic] Combining chars with the default \luatex{} font renderer might be wrong; on then other hand, with the Harfbuzz renderer diacritics are stacked correctly, but many hyphenations points are discarded (this bug is related to kerning, so it depends on the font). With \xetex{} both combining characters and hyphenation work as expected (not quite, but in most cases it works; the problem here are font clusters). \end{description} \end{note} \begin{note} Wikipedia defines a \textit{locale} as follows: “In computing, a locale is a set of parameters that defines the user’s language, region and any special variant preferences that the user wants to see in their user interface. Usually a locale identifier consists of at least a language code and a country/region code.” \Babel{} is moving gradually from the old and fuzzy concept of \textit{language} to the more modern of \textit{locale}. Note each locale is by itself a separate “language”, which explains why there are so many files. This is on purpose, so that possible variants can be created and/or redefined easily. \end{note} \textbf{Modifying and adding values to |ini| files} \nobreak \New{3.39} There is a way to modify the values of |ini| files when they get loaded with |\babelprovide| and |import|. To set, say, |digits.native| in the |numbers| section, use something like |numbers/digits.native=abcdefghij|. Keys may be added, too. Without |import| you may modify the identification keys. This can be used to create private variants easily. All you need is to import the same |ini| file with a different locale name and different parameters. % \begin{example} % Let's assume you need only the basic features of `spanish`, so that % the |ini| file is enough, and also a variant with your own % hyphenation with % \end{example} \subsection{List of locales available in \cs{babelprovide}} Here is the list of the names currently supported with |ini| locale files, with |\babelprovide| (or |provide=|). With these languages, |\babelfont| loads (if not done before) the language and script names (even if the language is defined as a package option with an \textsf{ldf} file). These are also the names recognized by |\babelprovide| with a valueless |import|, which will load the |ini| file with the tag given in parenthesis. Many locale are quite usable, provided captions and dates are not required (which is a very frequent case, particularly in ancient languages). So, they are included in the default \babel{} distribution. This can serve to encourage contributions, too. A warning will remember they are ‘bare minimum locales’. They are set in \textcolor[gray]{.4}{gray} in the following list. \begin{note} Although the names of the corresponding |lfd| files match those in this list, there are some exceptions, particularly in German and Serbian. So, |ngerman| is called here |german|, which is the name in the CLDR and, actually, the most logical. \end{note} \begingroup \bigskip\hrule\nobreak \makeatletter \def\tag#1{\par\@hangfrom{\makebox[10em][l]{#1}}\ignorespaces} \def\tag#1#2#3{\par \hspace{-2em}\textcolor{thered}{\texttt{#1}}#2\enspace \mbox{\footnotesize(#3)}} \def\subtag#1#2#3{\par \hspace{-1em}\texttt{#1}#2\enspace \mbox{\footnotesize(#3)}} \def\subtagalt#1#2#3{\par \hspace{-1em}\textcolor{thered}{\texttt{#1}}#2\enspace \mbox{\footnotesize(#3)}} \def\tagmin#1#2#3{\par \hspace{-2em}\textcolor[gray]{.4}{\texttt{#1}}#2\enspace \mbox{\footnotesize(#3)}} \def\subtagmin#1#2#3{\par \hspace{-1em}\textcolor[gray]{.4}{\texttt{#1}}#2\enspace \mbox{\footnotesize(#3)}} \def\hascapu{\textsuperscript{u}} \def\hascapl{\textsuperscript{l}} \def\note#1{\par{\footnotesize#1\par}} \small \bigskip Recommended names are set in \textcolor{thered}{red}.\\ In variants with the region or the script name (which are not highlighted), prefer the full forms.\\ Bare minimum locales are set in \textcolor[gray]{.4}{gray}.\\ Discouraged and deprecated names are not included.\\ \hascapu{} means Unicode captions; \hascapl{} means LICR captions.\\ There are some notes in a few locales. \bigskip\hrule\nobreak \begin{multicols}{2} \leftskip2em \tag{abkhazian}{}{ab} \tag{afar}{}{aa} \tag{afrikaans}{\hascapu\hascapl}{af} \tag{aghem}{}{agq} \tag{akan}{}{ak} \tagmin{akkadian}{}{akk} \tag{albanian}{\hascapu\hascapl}{sq} \tag{amharic}{\hascapu\hascapl}{am} \tagmin{ancientegyptian}{}{egy} \tag{ancientgreek}{\hascapu\hascapl}{grc}\note{It’s a different language from \texttt{greek}.} \tag{arabic}{\hascapu}{ar} \subtag{arabic-algeria}{\hascapu}{ar-DZ} \subtag{arabic-dz}{\hascapu}{ar-DZ} \subtag{arabic-egypt}{\hascapu}{ar-EG} \subtag{arabic-eg}{\hascapu}{ar-EG} \subtag{arabic-iraq}{\hascapu}{ar-IQ} \subtag{arabic-iq}{\hascapu}{ar-IQ} \subtag{arabic-jordan}{\hascapu}{ar-JO} \subtag{arabic-jo}{\hascapu}{ar-JO} \subtag{arabic-lebanon}{\hascapu}{ar-LB} \subtag{arabic-lb}{\hascapu}{ar-LB} \subtag{arabic-morocco}{\hascapu}{ar-MA} \subtag{arabic-ma}{\hascapu}{ar-MA} \subtag{arabic-palestinianterritories}{\hascapu}{ar-PS} \subtag{arabic-ps}{\hascapu}{ar-PS} \subtag{arabic-saudiarabia}{\hascapu}{ar-SA} \subtag{arabic-sa}{\hascapu}{ar-SA} \subtag{arabic-syria}{\hascapu}{ar-SY} \subtag{arabic-sy}{\hascapu}{ar-SY} \subtag{arabic-tunisia}{\hascapu}{ar-TN} \subtag{arabic-tn}{\hascapu}{ar-TN} \tagmin{aramaic}{}{arc} \subtagmin{aramaic-nabataean}{}{arc-nbat} \subtagmin{aramaic-nbat}{}{arc-nbat} \subtagmin{aramaic-palmyrene}{}{arc-palm} \subtagmin{aramaic-palm}{}{arc-palm} \tag{armenian}{\hascapu\hascapl}{hy} \tag{assamese}{\hascapu}{as} \tag{asturian}{\hascapu\hascapl}{ast} \tag{asu}{}{asa} \tag{atsam}{}{cch} \tagmin{avestan}{}{ae} \tag{awadhi}{}{awa} \tagmin{aymara}{}{ay} \tag{azerbaijani}{\hascapu\hascapl}{az} \subtag{azerbaijani-cyrillic}{}{az-Cyrl} \subtag{azerbaijani-cyrl}{}{az-Cyrl} \subtag{azerbaijani-latin}{}{az-Latn} \subtag{azerbaijani-latn}{}{az-Latn} \tag{bafia}{}{ksf} \tagmin{balinese}{}{ban} \tag{baluchi}{}{bal} \tag{bambara}{}{bm} \tag{bangla}{\hascapu}{bn} \tag{basaa}{}{bas} \tagmin{bashkir}{}{ba} \tag{basque}{\hascapu\hascapl}{eu} \tagmin{bataktoba}{}{bbc} \tagmin{bavarian}{}{bar} \tag{belarusian}{\hascapu\hascapl}{be} \tag{bemba}{}{bem} \tag{bena}{}{bez} \tag{bengali}{\hascapu}{bn} \tag{bhojpuri}{}{bho} \tag{blin}{}{byn} \tag{bodo}{}{brx} \tag{bosnian}{\hascapu\hascapl}{bs} \subtag{bosnian-cyrillic}{}{bs-Cyrl} \subtag{bosnian-cyrl}{}{bs-Cyrl} \subtag{bosnian-latin}{\hascapu\hascapl}{bs-Latn} \subtag{bosnian-latn}{\hascapu\hascapl}{bs-Latn} \tag{breton}{\hascapu\hascapl}{br} \tag{bulgarian}{\hascapu\hascapl}{bg} \tag{buriat}{\hascapu\hascapl}{bua} \tag{burmese}{}{my} \tag{cantonese}{}{yue} \tag{catalan}{\hascapu\hascapl}{ca} \tag{cebuano}{}{ceb} \tag{centralatlastamazight}{}{tzm} \tag{centralkurdish}{\hascapu}{ckb} \subtag{centralkurdish-latin}{\hascapu}{ckb-Latn} \subtag{centralkurdish-latn}{\hascapu}{ckb-Latn} \tag{chakma}{}{ccp} \tag{chechen}{}{ce} \tag{cherokee}{}{chr} \tag{chiga}{}{cgg} \tag{chinese}{\hascapu}{zh} \subtag{chinese-simplified}{\hascapu}{zh-Hans} \subtag{chinese-hans}{\hascapu}{zh-Hans} \subtag{chinese-traditional}{\hascapu}{zh-Hant} \subtag{chinese-hant}{\hascapu}{zh-Hant} \subtag{chinese-simplified-hongkongsarchina}{}{zh-Hans-HK} \subtag{chinese-hans-hk}{}{zh-Hans-HK} \subtag{chinese-simplified-macausarchina}{}{zh-Hans-MO} \subtag{chinese-hans-mo}{}{zh-Hans-MO} \subtag{chinese-simplified-singapore}{}{zh-Hans-SG} \subtag{chinese-hans-sg}{}{zh-Hans-SG} \subtag{chinese-hant-hk}{}{zh-Hant-HK} \subtag{chinese-traditional-hongkongsarchina}{}{zh-Hant-HK} \subtag{chinese-hant-mo}{}{zh-Hant-MO} \subtag{chinese-traditional-macausarchina}{}{zh-Hant-MO} \tag{churchslavic}{\hascapu}{cu} \subtag{churchslavic-cyrs}{\hascapu}{cu-Cyrs} \subtag{churchslavic-glag}{}{cu-Glag} \subtag{churchslavic-glagolitic}{}{cu-Glag} \subtag{churchslavic-oldcyrillic}{\hascapu}{cu-Cyrs} % \tag{churchslavonic}{\hascapu}{cu} \tag{chuvash}{}{cv} \tagmin{classicalmandaic}{}{myz} % \tag{classiclatin}{\hascapu\hascapl}{la-x-classic} \tag{colognian}{}{ksh} \tag{coptic}{}{cop} \tag{cornish}{}{kw} \tag{corsican}{}{co} \tag{croatian}{\hascapu\hascapl}{hr} \tag{czech}{\hascapu\hascapl}{cs} \tag{danish}{\hascapu\hascapl}{da} \tagmin{divehi}{}{dv} \tag{dogri}{}{doi} \tag{duala}{}{dua} \tag{dutch}{\hascapu\hascapl}{nl} \tag{dzongkha}{}{dz} % \tag{ecclesiasticlatin}{\hascapu\hascapl}{la-x-ecclesia} \tagmin{egyptianarabic}{}{arz}\note{Masri or Colloquial Egyptian, with tag \texttt{arz}, different from Standard Arabic as spoken in Egypt, with tag \texttt{ar-EG}.} \tag{embu}{}{ebu} \tag{english}{\hascapu\hascapl}{en} \subtagalt{american}{\hascapu\hascapl}{en-US} \subtag{americanenglish}{\hascapu\hascapl}{en-US} \subtagalt{australian}{\hascapu\hascapl}{en-AU} \subtag{australianenglish}{\hascapu\hascapl}{en-AU} \subtagalt{british}{\hascapu\hascapl}{en-GB} \subtag{britishenglish}{\hascapu\hascapl}{en-GB} \subtagalt{canadian}{\hascapu\hascapl}{en-CA} \subtag{canadianenglish}{\hascapu\hascapl}{en-CA} \subtag{english-australia}{\hascapu\hascapl}{en-AU} \subtag{english-au}{\hascapu\hascapl}{en-AU} \subtag{english-canada}{\hascapu\hascapl}{en-CA} \subtag{english-ca}{\hascapu\hascapl}{en-CA} \subtag{english-unitedkingdom}{\hascapu\hascapl}{en-GB} \subtag{english-gb}{\hascapu\hascapl}{en-GB} \subtag{english-newzealand}{\hascapu\hascapl}{en-NZ} \subtag{english-unitedstates}{\hascapu\hascapl}{en-US} \subtag{english-nz}{\hascapu\hascapl}{en-NZ} \subtag{english-us}{\hascapu\hascapl}{en-US} \tag{erzya}{}{myv} \tag{esperanto}{\hascapu\hascapl}{eo} \tag{estonian}{\hascapu\hascapl}{et} \tag{ewe}{}{ee} \tag{ewondo}{}{ewo} \tag{faroese}{}{fo} \tag{farsi}{\hascapu}{fa} \tag{filipino}{}{fil} \tag{finnish}{\hascapu\hascapl}{fi} \tag{french}{\hascapu\hascapl}{fr} \subtagalt{acadian}{\hascapu\hascapl}{fr-x-acadian} \subtag{canadianfrench}{\hascapu\hascapl}{fr-CA} \subtag{swissfrench}{\hascapu\hascapl}{fr-CH} \subtag{french-belgium}{\hascapu\hascapl}{fr-BE} \subtag{french-be}{\hascapu\hascapl}{fr-BE} \subtag{french-canada}{\hascapu\hascapl}{fr-CA} \subtag{french-ca}{\hascapu\hascapl}{fr-CA} \subtag{french-luxembourg}{\hascapu\hascapl}{fr-LU} \subtag{french-lu}{\hascapu\hascapl}{fr-LU} \subtag{french-switzerland}{\hascapu\hascapl}{fr-CH} \subtag{french-ch}{\hascapu\hascapl}{fr-CH} \tag{friulian}{\hascapu\hascapl}{fur} \tag{fulah}{}{ff} \tag{ga}{}{gaa} \tag{galician}{\hascapu\hascapl}{gl} \tag{ganda}{}{lg} \tag{geez}{}{gez} \tag{georgian}{\hascapu}{ka} \tag{german}{\hascapu\hascapl}{de} \note{Note the \texttt{ldf} names differ. See note above.} \subtag{german-traditional}{\hascapu\hascapl}{de-1901} \subtagalt{austrian}{\hascapu\hascapl}{de-AT} \subtag{german-austria}{\hascapu\hascapl}{de-AT} \subtag{german-at}{\hascapu\hascapl}{de-AT} \subtag{german-austria-traditional}{\hascapu\hascapl}{de-AT-1901} \subtagalt{swisshighgerman}{\hascapu\hascapl}{de-CH}\note{\texttt{swissgerman}, with tag \texttt{gsw} is a different language.} \subtag{german-switzerland}{\hascapu\hascapl}{de-CH} \subtag{german-ch}{\hascapu\hascapl}{de-CH} \subtag{german-switzerland-traditional}{\hascapu\hascapl}{de-CH-1901} \tagmin{gothic}{}{got} \tag{greek}{\hascapu\hascapl}{el} \subtag{monotonicgreek}{\hascapu\hascapl}{el} \subtagalt{polytonicgreek}{\hascapu\hascapl}{el-polyton} \tag{guarani}{}{gn} \tag{gujarati}{\hascapu}{gu} \tag{gusii}{}{guz} \tag{haryanvi}{}{bgc} \tag{hausa}{\hascapu\hascapl}{ha} \subtag{hausa-ghana}{}{ha-GH} \subtag{hausa-gh}{}{ha-GH} \subtag{hausa-niger}{}{ha-NE} \subtag{hausa-ne}{}{ha-NE} \tag{hawaiian}{}{haw} \tag{hebrew}{\hascapu\hascapl}{he} \tag{hindi}{\hascapu}{hi} \tag{hmongnjua}{}{hnj} \tag{hungarian}{\hascapu\hascapl\hascapl\hascapl}{hu} \tag{icelandic}{\hascapu\hascapl}{is} \tag{igbo}{}{ig} \tag{inarisami}{}{smn} \tag{indonesian}{\hascapu\hascapl}{id} \tagmin{ingush}{}{inh} \tag{interlingua}{\hascapu\hascapl}{ia} \tag{inuktitut}{}{iu} \tag{irish}{\hascapu\hascapl}{ga} \tag{italian}{\hascapu\hascapl}{it} \tag{japanese}{\hascapu}{ja} \tag{javanese}{}{jv} \tag{jju}{}{kaj} \tag{jolafonyi}{}{dyo} \tag{kabuverdianu}{}{kea} \tag{kabyle}{}{kab} \tag{kaingang}{}{kgp} \tag{kako}{}{kkj} \tag{kalaallisut}{}{kl} \tag{kalenjin}{}{kln} \tag{kamba}{}{kam} \tag{kannada}{\hascapu}{kn} \tag{kashmiri}{}{ks} \tag{kazakh}{}{kk} \tag{khmer}{\hascapu}{km} \tag{kikuyu}{}{ki} \tag{kinyarwanda}{}{rw} \tagmin{komi}{}{kv} \tag{konkani}{}{kok} \tag{korean}{\hascapu}{ko} \subtag{korean-han}{\hascapu}{ko-Hani} \subtag{korean-hani}{\hascapu}{ko-Hani} \tag{koyraborosenni}{}{ses} \tag{koyrachiini}{}{khq} \tag{kurmanji}{\hascapu\hascapl}{kmr} \tag{kwasio}{}{nmg} \tag{kyrgyz}{}{ky} \tagmin{ladino}{}{lad} \tag{lakota}{}{lkt} \tag{langi}{}{lag} \tag{lao}{\hascapu}{lo} \tag{latin}{\hascapu\hascapl}{la} \subtagalt{ecclesiasticallatin}{\hascapu\hascapl}{la-x-ecclesia} \subtagalt{classicallatin}{\hascapu\hascapl}{la-x-classic} \subtagalt{medievallatin}{\hascapu\hascapl}{la-x-medieval} \tag{latvian}{\hascapu\hascapl}{lv} \tagmin{lepcha}{}{lep} \tag{ligurian}{}{lij} \tagmin{limbu}{}{lif} \subtagmin{limbu-limb}{}{lif-limb} \subtagmin{limbu-limbu}{}{lif-limb} \tagmin{lineara}{}{lab} \tag{lingala}{}{ln} \tag{lithuanian}{\hascapu\hascapl\hascapl\hascapl}{lt} \tagmin{lombard}{}{lmo} \tag{lowersorbian}{\hascapu\hascapl}{dsb} \tag{lowgerman}{}{nds} % \tag{lsorbian}{\hascapu\hascapl}{dsb} \tagmin{lu}{}{khb} \tag{lubakatanga}{}{lu} \tag{luo}{}{luo} \tag{luxembourgish}{\hascapu\hascapl}{lb} \tag{luyia}{}{luy} \tag{macedonian}{\hascapu\hascapl}{mk} \tag{machame}{}{jmc} \tag{magyar}{\hascapu\hascapl\hascapl\hascapl}{hu} \tag{maithili}{}{mai} \tagmin{makasar}{}{mak} \subtagmin{makasar-bugi}{}{mak-Bugi} \subtagmin{makasar-buginese}{}{mak-Bugi} \tag{makhuwameetto}{}{mgh} \tag{makonde}{}{kde} \tag{malagasy}{}{mg} \tag{malay}{\hascapu\hascapl}{ms} \subtag{malay-brunei}{}{ms-BN} \subtag{malay-bn}{}{ms-BN} \subtag{malay-singapore}{}{ms-SG} \subtag{malay-sg}{}{ms-SG} \tag{malayalam}{\hascapu}{ml} \tag{maltese}{}{mt} \tag{manipuri}{}{mni} \tag{manx}{}{gv} \tag{maori}{}{mi} \tag{marathi}{\hascapu}{mr} \tag{masai}{}{mas} \tag{mazanderani}{}{mzn} \tag{meru}{}{mer} \tag{meta}{}{mgo} \tag{mongolian}{}{mn} \tag{monotonicgreek}{\hascapu\hascapl}{el} \tag{morisyen}{}{mfe} \tag{mundang}{}{mua} \tag{muscogee}{}{mus} \tag{nama}{}{naq} % \tag{naustrian}{\hascapu\hascapl}{de-AT} \tagmin{navajo}{}{nv} \tag{nepali}{}{ne} \tagmin{newari}{}{new} \tag{newzealand}{\hascapu\hascapl}{en-NZ} % \tag{ngerman}{\hascapu\hascapl}{de} \tag{ngiemboon}{}{nnh} \tag{ngomba}{}{jgo} \tag{nheengatu}{}{yrl} \tag{nigerianpidgin}{}{pcm} \tag{nko}{}{nqo} \tag{northernfrisian}{}{frr} \tag{northernkurdish}{\hascapu\hascapl}{kmr} \subtag{northernkurdish-arab}{\hascapu}{kmr-Arab} \subtag{northernkurdish-arabic}{\hascapu}{kmr-Arab} \tag{northernluri}{}{lrc} \tag{northernsami}{\hascapu\hascapl}{se} \tag{northernsotho}{}{nso} \tag{northndebele}{}{nd} \tag{norwegian}{\hascapu\hascapl}{no} \subtag{norsk}{\hascapu\hascapl}{no} \note{In the CLDR, \texttt{norwegianbokmal} (nb) just inherites from \texttt{norwegian}, so use the latter.} % \tag{norwegianbokmal}{\hascapu\hascapl}{nb} \tag{nswissgerman}{\hascapu\hascapl}{de-CH} \tag{nuer}{}{nus} \tag{nyanja}{}{ny} \tag{nyankole}{}{nyn} \tag{nynorsk}{\hascapu\hascapl}{nn} \subtag{norwegiannynorsk}{\hascapu\hascapl}{nn} \tag{occitan}{\hascapu\hascapl}{oc} \tag{odia}{\hascapu}{or} \tagmin{oldnorse}{}{non} % \tag{oriya}{\hascapu}{or} \tag{oromo}{}{om} \tag{ossetic}{}{os} \tag{papiamento}{}{pap} \tag{pashto}{}{ps} \tag{persian}{\hascapu}{fa} \subtag{farsi}{\hascapu}{fa} \tagmin{phoenician}{}{phn} \tag{piedmontese}{\hascapu\hascapl}{pms} \tag{polish}{\hascapu\hascapl}{pl} \tag{portuguese}{\hascapu\hascapl}{pt} \subtagalt{brazilian}{\hascapu\hascapl}{pt-BR} \subtag{brazilianportuguese}{\hascapu\hascapl}{pt-BR} \subtag{portuguese-brazil}{\hascapu\hascapl}{pt-BR} \subtag{portuguese-br}{\hascapu\hascapl}{pt-BR} \subtag{europeanportuguese}{\hascapu\hascapl}{pt-PT} \subtag{portuguese-portugal}{\hascapu\hascapl}{pt-PT} \subtag{portuguese-pt}{\hascapu\hascapl}{pt-PT} \tag{prussian}{}{prg} \tag{punjabi}{\hascapu}{pa} \subtag{punjabi-arabic}{}{pa-Arab} \subtag{punjabi-arab}{}{pa-Arab} \subtag{punjabi-gurmukhi}{\hascapu}{pa-Guru} \subtag{punjabi-guru}{\hascapu}{pa-Guru} \tag{quechua}{}{qu} \tag{rajasthani}{}{raj} \tag{romanian}{\hascapu\hascapl}{ro} \subtagalt{moldavian}{\hascapu\hascapl}{ro-MD} \subtag{romanian-moldova}{\hascapu\hascapl}{ro-MD} \subtag{romanian-md}{\hascapu\hascapl}{ro-MD} \tag{romansh}{\hascapu\hascapl}{rm} \tag{rombo}{}{rof} \tag{rundi}{}{rn} \tag{russian}{\hascapu\hascapl}{ru} \tag{rwa}{}{rwk} \tag{saho}{}{ssy} \tag{sakha}{}{sah} \tagmin{samaritan}{}{smp} \tag{samburu}{}{saq} \tag{samin}{\hascapu\hascapl}{se} \tag{sango}{}{sg} \tag{sangu}{}{sbp} \tag{sanskrit}{}{sa} \subtag{sanskrit-bangla}{}{sa-Beng} \subtag{sanskrit-beng}{}{sa-Beng} % \subtag{sanskrit-bengali}{}{sa-Beng} \subtag{sanskrit-devanagari}{}{sa-Deva} \subtag{sanskrit-deva}{}{sa-Deva} \subtag{sanskrit-gujarati}{}{sa-Gujr} \subtag{sanskrit-gujr}{}{sa-Gujr} \subtag{sanskrit-kannada}{}{sa-Knda} \subtag{sanskrit-knda}{}{sa-Knda} \subtag{sanskrit-malayalam}{}{sa-Mlym} \subtag{sanskrit-mlym}{}{sa-Mlym} \subtag{sanskrit-telugu}{}{sa-Telu} \subtag{sanskrit-telu}{}{sa-Telu} \tag{santali}{}{sat} \tag{saraiki}{}{skr} \tag{sardinian}{}{sc} \tag{scottishgaelic}{\hascapu\hascapl}{gd} \tag{sena}{}{seh} \tag{serbian}{\hascapu\hascapl}{sr} \note{Note the \texttt{ldf} names differ. See note above.} \subtag{serbian-cyrillic}{\hascapu\hascapl}{sr-Cyrl} \subtag{serbian-cyrl}{\hascapu\hascapl}{sr-Cyrl} \subtag{serbian-cyrillic-bosniaherzegovina}{\hascapu\hascapl}{sr-Cyrl-BA} \subtag{serbian-cyrl-ba}{\hascapu\hascapl}{sr-Cyrl-BA} \subtag{serbian-cyrillic-kosovo}{\hascapu\hascapl}{sr-Cyrl-XK} \subtag{serbian-cyrl-xk}{\hascapu\hascapl}{sr-Cyrl-XK} \subtag{serbian-cyrillic-montenegro}{\hascapu\hascapl}{sr-Cyrl-ME} \subtag{serbian-cyrl-me}{\hascapu\hascapl}{sr-Cyrl-ME} \subtag{serbian-latin}{\hascapu\hascapl}{sr-Latn} \subtag{serbian-latn}{\hascapu\hascapl}{sr-Latn} \subtag{serbian-latin-bosniaherzegovina}{\hascapu\hascapl}{sr-Latn-BA} \subtag{serbian-latn-ba}{\hascapu\hascapl}{sr-Latn-BA} \subtag{serbian-latin-kosovo}{\hascapu\hascapl}{sr-Latn-XK} \subtag{serbian-latn-xk}{\hascapu\hascapl}{sr-Latn-XK} \subtag{serbian-latin-montenegro}{\hascapu\hascapl}{sr-Latn-ME} \subtag{serbian-latn-me}{\hascapu\hascapl}{sr-Latn-ME} \subtag{serbian-ijekavsk}{\hascapu\hascapl}{sr-ijekavsk} \subtag{serbian-latn-ijekavsk}{\hascapu\hascapl}{sr-Latn-ijekavsk} % \tag{serbianc}{\hascapu\hascapl}{sr} \tag{shambala}{}{ksb} \tag{shona}{}{sn} \tag{sichuanyi}{}{ii} \tag{sicilian}{}{scn} \tag{silesian}{}{szl} \tag{sindhi}{}{sd} \subtag{sindhi-devanagari}{}{sd-deva} \subtag{sindhi-deva}{}{sd-deva} \subtag{sindhi-khojki}{}{sd-khoj} \subtag{sindhi-khoj}{}{sd-khoj} \subtag{sindhi-khudawadi}{}{sd-sind} \subtag{sindhi-sind}{}{sd-sind} \tag{sinhala}{\hascapu}{si} \tagmin{sinteromani}{}{rmo} \tag{slovak}{\hascapu\hascapl}{sk} \tag{slovene}{\hascapu\hascapl}{sl} \tag{slovenian}{\hascapu\hascapl}{sl} \tag{soga}{}{xog} \tag{somali}{}{so} \tag{sorani}{\hascapu}{ckb} \tagmin{southernaltai}{}{alt} \tag{southernsotho}{}{st} \tag{southndebele}{}{nr} \tag{spanish}{\hascapu\hascapl}{es} \subtagalt{mexican}{\hascapu\hascapl}{es-MX} \subtag{mexicanspanish}{\hascapu\hascapl}{es-MX} \subtag{spanish-mexico}{\hascapu\hascapl}{es-MX} \subtag{spanish-mx}{\hascapu\hascapl}{es-MX} \tag{standardmoroccantamazight}{}{zgh} \tag{sundanese}{}{su} \tag{swahili}{}{sw} \tag{swati}{}{ss} \tag{swedish}{\hascapu\hascapl}{sv} \tag{swissgerman}{}{gsw} \note{Different from \texttt{swisshighgerman} (de-CH), which is German as spoken in Switzerland.} \tag{syriac}{}{syr} \tag{tachelhit}{}{shi} \subtag{tachelhit-latin}{}{shi-Latn} \subtag{tachelhit-latn}{}{shi-Latn} \subtag{tachelhit-tifinagh}{}{shi-Tfng} \subtag{tachelhit-tfng}{}{shi-Tfng} \tagmin{tainua}{}{tdd} \tag{taita}{}{dav} \tag{tajik}{}{tg} \tag{tamil}{\hascapu}{ta} \tagmin{tangut}{}{txg} \tag{taroko}{}{trv} \tag{tasawaq}{}{twq} \tag{tatar}{}{tt} \tag{telugu}{\hascapu}{te} \tag{teso}{}{teo} \tag{thai}{\hascapu\hascapl}{th} \tag{tibetan}{\hascapu}{bo} \tag{tigre}{}{tig} \tag{tigrinya}{}{ti} \tag{tokpisin}{}{tpi} \tag{tongan}{}{to} \tag{tsonga}{}{ts} \tag{tswana}{}{tn} \tag{turkish}{\hascapu\hascapl}{tr} \tag{turkmen}{\hascapu\hascapl}{tk} \tag{tyap}{}{kcg} \tag{ukenglish}{\hascapu\hascapl}{en-GB} \tag{ukrainian}{\hascapu\hascapl}{uk} \tag{uppersorbian}{\hascapu\hascapl}{hsb} \tag{urdu}{\hascapu}{ur} \tag{usenglish}{\hascapu\hascapl}{en-US} \tag{usorbian}{\hascapu\hascapl}{hsb} \tag{uyghur}{\hascapu}{ug} \tag{uzbek}{}{uz} \subtag{uzbek-arabic}{}{uz-Arab} \subtag{uzbek-arab}{}{uz-Arab} \subtag{uzbek-cyrillic}{}{uz-Cyrl} \subtag{uzbek-cyrl}{}{uz-Cyrl} \subtag{uzbek-latin}{}{uz-Latn} \subtag{uzbek-latn}{}{uz-Latn} \tag{vai}{}{vai} \subtag{vai-latin}{}{vai-Latn} \subtag{vai-latn}{}{vai-Latn} \subtag{vai-vai}{}{vai-Vaii} \subtag{vai-vaii}{}{vai-Vaii} \tag{venda}{}{ve} \tag{vietnamese}{\hascapu\hascapl}{vi} \tag{volapuk}{}{vo} \tag{vunjo}{}{vun} \tag{walser}{}{wae} \tagmin{waray}{}{war} \tag{welsh}{\hascapu\hascapl}{cy} \tag{westernfrisian}{}{fy} \tag{wolaytta}{}{wal} \tag{wolof}{}{wo} \tag{xhosa}{}{xh} \tag{yangben}{}{yav} \tag{yiddish}{}{yi} \tag{yoruba}{}{yo} \tag{zarma}{}{dje} \tag{zulu}{}{zu} \end{multicols} \endgroup \hrule \bigskip \subsection{Selecting fonts} \New{3.15} Babel provides a high level interface on top of |fontspec| to select fonts. There is no need to load \textsf{fontspec} explicitly -- \babel{} does it for you with the first |\babelfont|.\footnote{See also the package \textsf{combofont} for a complementary approach.} \Describe\babelfont{\oarg{language-list}\marg{font-family}% \oarg{font-options}\marg{font-name}} \begin{note} See the note in the previous section about some issues in specific languages. \end{note} The main purpose of |\babelfont| is to define at once in a multilingual document the fonts required by the different languages, with their corresponding language systems (script and language). So, if you load, say, 4 languages, |\babelfont{rm}{FreeSerif}| defines 4 fonts (with their variants, of course), which are switched with the language by \babel. It is a tool to make things easier and transparent to the user. Here \textit{font-family} is |rm|, |sf| or |tt| (or newly defined ones, as explained below), and \textit{font-name} is the same as in \textsf{fontspec} and the like. If no language is given, then it is considered the default font for the family, activated when a language is selected. On the other hand, if there is one or more languages in the optional argument, the font will be assigned to them, overriding the default one. Alternatively, you may set a font for a script -- just precede its name (lowercase) with a star (eg, |*devanagari|). With this optional argument, the font is \textit{not} yet defined, but just predeclared. This means you may define as many fonts as you want ‘just in case’, because if the language is never selected, the corresponding |\babelfont| declaration is just ignored. \Babel{} takes care of the font language and the font script when languages are selected (as well as the writing direction); see the recognized languages above. In most cases, you will not need \textit{font-options}, which is the same as in \textsf{fontspec}, but you may add further key/value pairs if necessary. \begin{example} Usage in most cases is very simple. Let us assume you are setting up a document in Swedish, with some words in Hebrew, with a font suited for both languages. \begingroup % If you are looking at the code to see how it has been written, you % will be disappointed :-). The following example is built ad hoc to % emulate the final result to avoid dependencies, and therefore it's % not "real" code. \catcode`@=13 \def@#1{\ifcase#1\relax \egroup \or \bgroup\textdir TLT \else \bgroup\textdir TRT \fontspec[Scale=.87,Script=Hebrew]{Liberation Mono} \fi} \setengine{luatex/xetex} \begin{verbatim} \documentclass{article} \usepackage[swedish, bidi=default]{babel} \babelprovide[import]{hebrew} _\babelfont{rm}{FreeSerif}_ \begin{document} Svenska \foreignlanguage{hebrew}{@2עִבְרִית@0} svenska. \end{document} \end{verbatim} \endgroup If on the other hand you have to resort to different fonts, you can replace the red line above with, say: \setengine{luatex/xetex} \begin{verbatim} \babelfont{rm}{Iwona} \babelfont[hebrew]{rm}{FreeSerif} \end{verbatim} \end{example} |\babelfont| can be used to implicitly define a new font family. Just write its name instead of |rm|, |sf| or |tt|. This is the preferred way to select fonts in addition to the three basic families. \begin{example} Here is how to do it: \setengine{luatex/xetex} \begin{verbatim} \babelfont{kai}{FandolKai} \end{verbatim} Now, |\kaifamily| and |\kaidefault|, as well as |\textkai| are at your disposal. \end{example} \begin{note} You may load \textsf{fontspec} explicitly. For example: \setengine{luatex/xetex} \begin{verbatim} \usepackage{fontspec} \newfontscript{Devanagari}{deva} \babelfont[hindi]{rm}{Shobhika} \end{verbatim} This makes sure the OpenType script for Devanagari is |deva| and not |dev2|, in case it is not detected correctly. % You may also pass some % options to \textsf{fontspec}: with |silent|, the warnings about % unavailable scripts or languages are not shown (they are only really % useful when the document format is being set up). \end{note} \begin{note} |\fontspec| is not touched at all, only the preset font families (|rm|, |sf|, |tt|, and the like). If a language is switched when an \textit{ad hoc} font is active, or you select the font with this command, neither the script nor the language is passed. You must add them by hand. This is by design, for several reasons —for example, each font has its own set of features and a generic setting for several of them can be problematic, and also preserving a “lower-level” font selection is useful. \end{note} \begin{note} Directionality is a property affecting margins, indentation, column order, etc., not just text. Therefore, it is under the direct control of the language, which applies both the script and the direction to the text. As a consequence, there is no need to set \texttt{Script} when declaring a font with |\babelfont| (nor \texttt{Language}). In fact, it is even discouraged. \end{note} \begin{note} The keys |Language| and |Script| just pass these values to the \textit{font}, and do \textit{not} set the script for the \textit{language} (and therefore the writing direction). In other words, the |ini| file or |\babelprovide| provides default values for |\babelfont| if omitted, but the opposite is not true. See the note above for the reasons of this behavior. \end{note} \begin{warning} Using |\set|\textit{xxxx}|font| and |\babelfont| at the same time is discouraged, but very often works as expected. However, be aware with |\set|\textit{xxxx}|font| the language system will not be set by \babel{} and should be set with |fontspec| if necessary. \end{warning} \begin{troubleshooting} \trouble{Package fontspec Info: Language '' not explicitly supported within font '' with script '