Skip to content

Commit bb48fa2

Browse files
committed
LaTeX and ConTeXt writers: support lang attribute on divs and spans
For LaTeX, also collect lang and dir attributes on spans and divs to set the lang, otherlangs and dir variables if they aren’t set already. See jgm#895.
1 parent 3f9dd65 commit bb48fa2

File tree

3 files changed

+121
-38
lines changed

3 files changed

+121
-38
lines changed

README

+11-6
Original file line numberDiff line numberDiff line change
@@ -1046,12 +1046,21 @@ Language variables
10461046
format stored in the additional variables `babel-lang`,
10471047
`polyglossia-lang` (LaTeX) and `context-lang` (ConTeXt).
10481048

1049+
Native pandoc `span`s and `div`s with the lang attribute
1050+
(value in BCP 47) can be used to switch the language in
1051+
that range.
1052+
10491053
`otherlangs`
10501054
: a list of other languages used in the document
10511055
in the YAML metadata, according to [BCP 47]. For example:
10521056
`otherlangs: [en-GB, fr]`.
1053-
Currently only used by `xelatex` through the generated
1054-
`polyglossia-otherlangs` variable.
1057+
This is automatically generated from the `lang` attributes
1058+
in all `span`s and `div`s but can be overriden.
1059+
Currently only used by LaTeX through the generated
1060+
`babel-otherlangs` and `polyglossia-otherlangs` variables.
1061+
The LaTeX writer outputs polyglossia commands in the text but
1062+
the `babel-newcommands` variable contains mappings for them
1063+
to the corresponding babel.
10551064

10561065
`dir`
10571066
: the base direction of the document, either `rtl` (right-to-left)
@@ -1064,10 +1073,6 @@ Language variables
10641073
(e.g. the browser, when generating HTML) supports the
10651074
[Unicode Bidirectional Algorithm].
10661075

1067-
LaTeX and ConTeXt assume by default that all text is left-to-right.
1068-
Setting `dir: ltr` enables bidirectional text handling in a document
1069-
whose base direction is left-to-right but contains some right-to-left script.
1070-
10711076
When using LaTeX for bidirectional documents, only the `xelatex` engine
10721077
is fully supported (use `--latex-engine=xelatex`).
10731078

src/Text/Pandoc/Writers/ConTeXt.hs

+27-16
Original file line numberDiff line numberDiff line change
@@ -157,17 +157,21 @@ blockToConTeXt (CodeBlock _ str) =
157157
blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
158158
blockToConTeXt (RawBlock _ _ ) = return empty
159159
blockToConTeXt (Div (ident,_,kvs) bs) = do
160-
contents <- blockListToConTeXt bs
161-
let contents' = if null ident
162-
then contents
163-
else ("\\reference" <> brackets (text $ toLabel ident) <>
164-
braces empty <> "%") $$ contents
165-
let align dir = blankline <> "\\startalignment[" <> dir <> "]"
166-
$$ contents' $$ "\\stopalignment" <> blankline
167-
return $ case lookup "dir" kvs of
168-
Just "rtl" -> align "righttoleft"
169-
Just "ltr" -> align "lefttoright"
170-
_ -> contents'
160+
let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
161+
let wrapRef txt = if null ident
162+
then txt
163+
else ("\\reference" <> brackets (text $ toLabel ident) <>
164+
braces empty <> "%") $$ txt
165+
wrapDir = case lookup "dir" kvs of
166+
Just "rtl" -> align "righttoleft"
167+
Just "ltr" -> align "lefttoright"
168+
_ -> id
169+
wrapLang txt = case lookup "lang" kvs of
170+
Just lng -> "\\start\\language["
171+
<> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
172+
Nothing -> txt
173+
wrapBlank txt = blankline <> txt <> blankline
174+
fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
171175
blockToConTeXt (BulletList lst) = do
172176
contents <- mapM listItemToConTeXt lst
173177
return $ ("\\startitemize" <> if isTightList lst
@@ -346,11 +350,15 @@ inlineToConTeXt (Note contents) = do
346350
else text "\\startbuffer " <> nest 2 contents' <>
347351
text "\\stopbuffer\\footnote{\\getbuffer}"
348352
inlineToConTeXt (Span (_,_,kvs) ils) = do
349-
contents <- inlineListToConTeXt ils
350-
return $ case lookup "dir" kvs of
351-
Just "rtl" -> braces $ "\\righttoleft " <> contents
352-
Just "ltr" -> braces $ "\\lefttoright " <> contents
353-
_ -> contents
353+
let wrapDir txt = case lookup "dir" kvs of
354+
Just "rtl" -> braces $ "\\righttoleft " <> txt
355+
Just "ltr" -> braces $ "\\lefttoright " <> txt
356+
_ -> txt
357+
wrapLang txt = case lookup "lang" kvs of
358+
Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
359+
<> "]" <> txt <> "\\stop "
360+
Nothing -> txt
361+
fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
354362

355363
-- | Craft the section header, inserting the secton reference, if supplied.
356364
sectionHeader :: Attr
@@ -377,6 +385,9 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
377385
then char '\\' <> chapter <> braces contents
378386
else contents <> blankline
379387

388+
fromBcp47' :: String -> String
389+
fromBcp47' = fromBcp47 . splitBy (=='-')
390+
380391
-- Takes a list of the constituents of a BCP 47 language code
381392
-- and irons out ConTeXt's exceptions
382393
-- https://tools.ietf.org/html/bcp47#section-2.1

src/Text/Pandoc/Writers/LaTeX.hs

+83-16
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import Text.Pandoc.Templates
3939
import Text.Printf ( printf )
4040
import Network.URI ( isURI, unEscapeString )
4141
import Data.Aeson (object, (.=))
42-
import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse )
42+
import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy )
4343
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
4444
import Data.Maybe ( fromMaybe )
4545
import qualified Data.Text as T
@@ -145,6 +145,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
145145
st <- get
146146
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
147147
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
148+
let docLangs = nub $ query (extract "lang") blocks
148149
let context = defField "toc" (writerTableOfContents options) $
149150
defField "toc-depth" (show (writerTOCDepth options -
150151
if stBook st
@@ -179,18 +180,48 @@ pandocToLaTeX options (Pandoc meta blocks) = do
179180
Biblatex -> defField "biblio-title" biblioTitle .
180181
defField "biblatex" True
181182
_ -> id) $
183+
-- set lang to something so polyglossia/babel is included
184+
defField "lang" (if null docLangs then ""::String else "en") $
185+
defField "otherlangs" docLangs $
186+
defField "dir" (if (null $ query (extract "dir") blocks)
187+
then ""::String
188+
else "ltr") $
182189
metadata
183190
let toPolyObj lang = object [ "name" .= T.pack name
184191
, "options" .= T.pack opts ]
185192
where
186193
(name, opts) = toPolyglossia lang
187194
let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
195+
otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
188196
let context' =
189197
defField "babel-lang" (toBabel lang)
198+
$ defField "babel-otherlangs" (map toBabel otherlangs)
199+
$ defField "babel-newcommands" (concatMap (\(poly, babel) ->
200+
-- \textspanish and \textgalician are already used by babel
201+
-- save them as \oritext... and let babel use that
202+
if poly `elem` ["spanish", "galician"]
203+
then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++
204+
"\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++
205+
"{\\renewcommand{\\text" ++ poly ++ "}{\\oritext"
206+
++ poly ++ "}}\n" ++
207+
"\\AddBabelHook{" ++ poly ++ "}{afterextras}\n" ++
208+
"{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
209+
++ poly ++ "}{##2}}}\n"
210+
else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
211+
++ babel ++ "}{#2}}\n" ++
212+
"\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{"
213+
++ babel ++ "}}{\\end{otherlanguage}}\n"
214+
)
215+
-- eliminate duplicates that have same polyglossia name
216+
$ nubBy (\a b -> fst a == fst b)
217+
-- find polyglossia and babel names of languages used in the document
218+
$ map (\l ->
219+
let lng = splitBy (=='-') l
220+
in (fst $ toPolyglossia lng, toBabel lng)
221+
)
222+
docLangs )
190223
$ defField "polyglossia-lang" (toPolyObj lang)
191-
$ defField "polyglossia-otherlangs"
192-
(maybe [] (map $ toPolyObj . splitBy (=='-')) $
193-
getField "otherlangs" context)
224+
$ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
194225
$ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
195226
Just "rtl" -> True
196227
_ -> False)
@@ -337,15 +368,24 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
337368
then empty
338369
else "\\hyperdef{}" <> braces (text ref) <>
339370
braces ("\\label" <> braces (text ref))
340-
contents' <- blockListToLaTeX bs
341-
let align dir = inCmd "begin" dir $$ contents' $$ inCmd "end" dir
342-
let contents = case lookup "dir" kvs of
343-
Just "rtl" -> align "RTL"
344-
Just "ltr" -> align "LTR"
345-
_ -> contents'
346-
if beamer && "notes" `elem` classes -- speaker notes
347-
then return $ "\\note" <> braces contents
348-
else return (linkAnchor $$ contents)
371+
let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
372+
let wrapDir = case lookup "dir" kvs of
373+
Just "rtl" -> align "RTL"
374+
Just "ltr" -> align "LTR"
375+
_ -> id
376+
wrapLang txt = case lookup "lang" kvs of
377+
Just lng -> let (l, o) = toPolyglossiaEnv lng
378+
ops = if null o
379+
then ""
380+
else brackets $ text o
381+
in inCmd "begin" (text l) <> ops
382+
$$ blankline <> txt <> blankline
383+
$$ inCmd "end" (text l)
384+
Nothing -> txt
385+
wrapNotes txt = if beamer && "notes" `elem` classes
386+
then "\\note" <> braces txt -- speaker notes
387+
else linkAnchor $$ txt
388+
fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
349389
blockToLaTeX (Plain lst) =
350390
inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
351391
-- title beginning with fig: indicates that the image is a figure
@@ -756,9 +796,12 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do
756796
(if noSmallCaps then inCmd "textnormal" else id) .
757797
(if rtl then inCmd "RL" else id) .
758798
(if ltr then inCmd "LR" else id) .
759-
(if not (noEmph || noStrong || noSmallCaps || rtl || ltr)
760-
then braces
761-
else id)) `fmap` inlineListToLaTeX ils
799+
(case lookup "lang" kvs of
800+
Just lng -> let (l, o) = toPolyglossiaEnv lng
801+
ops = if null o then "" else brackets (text o)
802+
in \c -> char '\\' <> "text" <> text l <> ops <> braces c
803+
Nothing -> id)
804+
) `fmap` inlineListToLaTeX ils
762805
inlineToLaTeX (Emph lst) =
763806
inlineListToLaTeX lst >>= return . inCmd "emph"
764807
inlineToLaTeX (Strong lst) =
@@ -999,6 +1042,30 @@ getListingsLanguage :: [String] -> Maybe String
9991042
getListingsLanguage [] = Nothing
10001043
getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
10011044

1045+
-- Extract a key from divs and spans
1046+
extract :: String -> Block -> [String]
1047+
extract key (Div attr _) = lookKey key attr
1048+
extract key (Plain ils) = concatMap (extractInline key) ils
1049+
extract key (Para ils) = concatMap (extractInline key) ils
1050+
extract key (Header _ _ ils) = concatMap (extractInline key) ils
1051+
extract _ _ = []
1052+
1053+
-- Extract a key from spans
1054+
extractInline :: String -> Inline -> [String]
1055+
extractInline key (Span attr _) = lookKey key attr
1056+
extractInline _ _ = []
1057+
1058+
-- Look up a key in an attribute and give a list of its values
1059+
lookKey :: String -> Attr -> [String]
1060+
lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
1061+
1062+
-- In environments \Arabic instead of \arabic is used
1063+
toPolyglossiaEnv :: String -> (String, String)
1064+
toPolyglossiaEnv l =
1065+
case toPolyglossia $ (splitBy (=='-')) l of
1066+
("arabic", o) -> ("Arabic", o)
1067+
x -> x
1068+
10021069
-- Takes a list of the constituents of a BCP 47 language code and
10031070
-- converts it to a Polyglossia (language, options) tuple
10041071
-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf

0 commit comments

Comments
 (0)