diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 38e67e87e8ae..ded8b1c0c000 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -46,7 +46,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) -import Text.Pandoc.Shared (safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces, addPandocAttributes) import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Transforms (headerShift) import Text.TeXMath (readMathML, writeTeX) @@ -724,9 +724,7 @@ blockTags = Set.fromList $ , "articleinfo" , "attribution" , "authorinitials" - , "bibliodiv" , "biblioentry" - , "bibliography" , "bibliomisc" , "bibliomixed" , "blockquote" @@ -765,31 +763,19 @@ blockTags = Set.fromList $ , "preface" , "procedure" , "programlisting" - , "qandadiv" , "question" - , "refsect1" , "refsect1info" - , "refsect2" , "refsect2info" - , "refsect3" , "refsect3info" - , "refsection" , "refsectioninfo" , "screen" - , "sect1" , "sect1info" - , "sect2" , "sect2info" - , "sect3" , "sect3info" - , "sect4" , "sect4info" - , "sect5" , "sect5info" - , "section" , "sectioninfo" , "simpara" - , "simplesect" , "substeps" , "subtitle" , "table" @@ -797,7 +783,13 @@ blockTags = Set.fromList $ , "titleabbrev" , "toc" , "variablelist" - ] ++ admonitionTags + ] ++ sectionTags ++ admonitionTags + +sectionTags :: [Text] +sectionTags = ["bibliography", "bibliodiv" + , "sect1", "sect2", "sect3", "sect4", "sect5", "section", "simplesect" + , "refsect1", "refsect2", "refsect3", "refsection", "qandadiv" + ] admonitionTags :: [Text] admonitionTags = ["caution","danger","important","note","tip","warning"] @@ -856,6 +848,10 @@ getBlocks :: PandocMonad m => Element -> DB m Blocks getBlocks e = mconcat <$> mapM parseBlock (elContent e) +getRoleAttr :: Element -> [(Text, Text)] -- extract role attribute and add it to the attribute list +getRoleAttr e = case attrValue "role" e of + "" -> [] + r -> [("role", r)] parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE @@ -863,8 +859,8 @@ parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty else return $ plain $ trimInlines $ text s parseBlock (CRef x) = return $ plain $ str $ T.toUpper x -parseBlock (Elem e) = - case qName (elName e) of +parseBlock (Elem e) = do + parsedBlock <- case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated "index" -> skip -- skip index, since page numbers meaningless "para" -> parseMixed para (elContent e) @@ -976,6 +972,9 @@ parseBlock (Elem e) = "title" -> return mempty -- handled in parent element "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e + if qName (elName e) `elem` sectionTags + then return parsedBlock + else return $ addPandocAttributes (getRoleAttr e) parsedBlock where skip = do let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn @@ -1111,7 +1110,10 @@ parseBlock (Elem e) = modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr++attrs) n' headerText <> b + let hdr = addPandocAttributes (getRoleAttr e) + $ headerWith (elId, classes, maybeToList titleabbrevElAsAttr ++ attrs) + n' headerText + return $ hdr <> b titleabbrevElAsAttr = case filterChild (named "titleabbrev") e `mplus` (filterChild (named "info") e >>= @@ -1134,7 +1136,7 @@ parseBlock (Elem e) = b <- p case mbt of Nothing -> return b - Just t -> return $ divWith (attrValue "id" e,[],[]) + Just t -> return $ divWith (attrValue "id" e, [], getRoleAttr e) (divWith ("", ["title"], []) (plain t) <> b) -- Admonitions are parsed into a div. Following other Docbook tools that output HTML, @@ -1234,8 +1236,8 @@ parseInline (Text (CData _ s _)) = do else return $ text s parseInline (CRef ref) = return $ text $ fromMaybe (T.toUpper ref) $ lookupEntity ref -parseInline (Elem e) = - case qName (elName e) of +parseInline (Elem e) = do + parsedInline <- case qName (elName e) of "anchor" -> do return $ spanWith (attrValue "id" e, [], []) mempty "phrase" -> do @@ -1357,6 +1359,9 @@ parseInline (Elem e) = -- to in handleInstructions, above. "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id + return $ case qName (elName e) of + "emphasis" -> parsedInline + _ -> addPandocAttributes (getRoleAttr e) parsedInline where skip = do let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 6b7e7e1bc7af..076908583834 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -27,9 +27,9 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s markdown test suite. - + Headers - + Level 2 with an <ulink url="/url">embedded link</ulink> Level 3 with <emphasis>emphasis</emphasis> @@ -74,6 +74,9 @@ Here’s a regular paragraph. + + And here’s a regular paragraph with a role. + In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item. Because a hard-wrapped line in the middle of a paragraph looked like @@ -93,6 +96,11 @@ This is a block quote. It is pretty short. +
+ + This is a block quote with a role. + +
Code in a block quote: @@ -233,6 +241,26 @@ These should not be escaped: \$ \\ \> \[ \{ + + with role: + + + + + First + + + + + Second + + + + + Third + + + and tight: @@ -702,6 +730,12 @@ These should not be escaped: \$ \\ \> \[ \{ So is this word. + + So is this word with a role. + + + So is this phrase with a role. + This is code: >, $, \, \$, @@ -1408,7 +1442,7 @@ or here: <http://example.com/> Table with attributes - +
Attribute table caption @@ -1444,7 +1478,7 @@ or here: <http://example.com/> Table with attributes, without caption - +
diff --git a/test/docbook-reader.native b/test/docbook-reader.native index 6d0f728117cc..94bca827a178 100644 --- a/test/docbook-reader.native +++ b/test/docbook-reader.native @@ -62,10 +62,16 @@ Pandoc , Space , Str "suite." ] - , Header 1 ( "headers" , [] , [] ) [ Str "Headers" ] + , Header + 1 + ( "headers" , [] , [ ( "role" , "sect1role" ) ] ) + [ Str "Headers" ] , Header 2 - ( "level-2-with-an-embedded-link" , [] , [] ) + ( "level-2-with-an-embedded-link" + , [] + , [ ( "role" , "sect2role" ) ] + ) [ Str "Level" , Space , Str "2" @@ -151,6 +157,29 @@ Pandoc , Space , Str "paragraph." ] + , Div + ( "" + , [] + , [ ( "wrapper" , "1" ) , ( "role" , "pararole" ) ] + ) + [ Para + [ Str "And" + , Space + , Str "here\8217s" + , Space + , Str "a" + , Space + , Str "regular" + , Space + , Str "paragraph" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + ] , Para [ Str "In" , Space @@ -251,6 +280,31 @@ Pandoc , Str "short." ] ] + , Div + ( "" + , [] + , [ ( "wrapper" , "1" ) , ( "role" , "roleblockquote" ) ] + ) + [ BlockQuote + [ Para + [ Str "This" + , Space + , Str "is" + , Space + , Str "a" + , Space + , Str "block" + , Space + , Str "quote" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + ] + ] , BlockQuote [ Para [ Str "Code" @@ -348,6 +402,19 @@ Pandoc , [ Para [ Str "Second" ] ] , [ Para [ Str "Third" ] ] ] + , Para [ Str "with" , Space , Str "role:" ] + , Div + ( "" + , [] + , [ ( "wrapper" , "1" ) , ( "role" , "listrole" ) ] + ) + [ OrderedList + ( 1 , Decimal , DefaultDelim ) + [ [ Para [ Str "First" ] ] + , [ Para [ Str "Second" ] ] + , [ Para [ Str "Third" ] ] + ] + ] , Para [ Str "and" , Space , Str "tight:" ] , OrderedList ( 1 , Decimal , DefaultDelim ) @@ -931,6 +998,38 @@ Pandoc , Space , Str "word." ] + , Para + [ Str "So" + , Space + , Str "is" + , Space + , Emph [ Emph [ Str "this" ] ] + , Space + , Str "word" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] + , Para + [ Str "So" + , Space + , Str "is" + , Space + , Span + ( "" , [ "phraserole" ] , [ ( "role" , "phraserole" ) ] ) + [ Str "this" ] + , Space + , Str "phrase" + , Space + , Str "with" + , Space + , Str "a" + , Space + , Str "role." + ] , Para [ Str "This" , Space @@ -2570,7 +2669,9 @@ Pandoc , Table ( "mytableid1" , [ "mytableclass1" , "mytableclass2" ] - , [ ( "custom-style" , "mytabstyle1" ) ] + , [ ( "role" , "tablerole1" ) + , ( "custom-style" , "mytabstyle1" ) + ] ) (Caption Nothing @@ -2636,7 +2737,9 @@ Pandoc , Table ( "mytableid2" , [ "mytableclass3" , "mytableclass4" ] - , [ ( "custom-style" , "mytabstyle2" ) ] + , [ ( "role" , "tablerole2" ) + , ( "custom-style" , "mytabstyle2" ) + ] ) (Caption Nothing []) [ ( AlignDefault , ColWidthDefault )