diff --git a/app/src/main/java/net/gsantner/markor/format/wikitext/WikitextTextConverter.java b/app/src/main/java/net/gsantner/markor/format/wikitext/WikitextTextConverter.java index d9f0e345c6..11ccb3a3e7 100644 --- a/app/src/main/java/net/gsantner/markor/format/wikitext/WikitextTextConverter.java +++ b/app/src/main/java/net/gsantner/markor/format/wikitext/WikitextTextConverter.java @@ -23,7 +23,9 @@ import java.io.FileReader; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Objects; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Matcher; @@ -34,6 +36,17 @@ */ @SuppressWarnings("WeakerAccess") public class WikitextTextConverter extends TextConverterBase { + // Zim Source View plugin: {{{code: lang="..." ...\n...\n}}} + private static final Pattern CODE_BLOCK_SOURCE_VIEW = Pattern.compile( + "(?ms)^\\{\\{\\{code:([^\\r\\n]*)(?:\\r\\n?|\\n)(.*?)^\\}\\}\\}[ \\t]*$"); + // Zim standard multiline preformatted: '''\n...\n''' + private static final Pattern CODE_BLOCK_TRIPLE_QUOTE = Pattern.compile( + "(?ms)^'''[ \\t]*(?:\\r\\n?|\\n)(.*?)^'''[ \\t]*$"); + private static final Pattern CODE_BLOCK_LANG_ATTRIBUTE = Pattern.compile( + "lang=\"?([^\"\\s]+)\"?", Pattern.CASE_INSENSITIVE); + private static final String CODE_BLOCK_PLACEHOLDER_PREFIX = "\uE000WTCB"; + private static final String CODE_BLOCK_PLACEHOLDER_SUFFIX = "\uE000"; + /** * First, convert Wikitext to regular Markor markdown. Then, calls the regular converter. * @@ -47,6 +60,12 @@ public class WikitextTextConverter extends TextConverterBase { @Override public String convertMarkup(String markup, Context context, boolean lightMode, boolean lineNum, File file) { String contentWithoutHeader = markup.replaceFirst(WikitextSyntaxHighlighter.ZIMHEADER.toString(), ""); + + // Extract multiline code blocks before the per-line transformation so their content + // is not mangled by inline rules like IMAGE ({{X}} -> image embed). + List savedCodeBlocks = new ArrayList<>(); + contentWithoutHeader = preprocessCodeBlocks(contentWithoutHeader, savedCodeBlocks); + StringBuilder markdownContent = new StringBuilder(); for (String line : contentWithoutHeader.split("\\r\\n|\\r|\\n")) { @@ -56,7 +75,52 @@ public String convertMarkup(String markup, Context context, boolean lightMode, b markdownContent.append(String.format("%n")); } - return FormatRegistry.CONVERTER_MARKDOWN.convertMarkup(markdownContent.toString(), context, lightMode, lineNum, file); + String markdown = markdownContent.toString(); + for (int i = 0; i < savedCodeBlocks.size(); i++) { + markdown = markdown.replace(codeBlockPlaceholder(i), savedCodeBlocks.get(i)); + } + + return FormatRegistry.CONVERTER_MARKDOWN.convertMarkup(markdown, context, lightMode, lineNum, file); + } + + static String preprocessCodeBlocks(String input, List savedBlocks) { + StringBuffer out = new StringBuffer(); + Matcher m = CODE_BLOCK_SOURCE_VIEW.matcher(input); + while (m.find()) { + String lang = extractCodeBlockLang(m.group(1)); + String content = m.group(2); + String fenced = "\n```" + lang + "\n" + content + "```\n"; + int idx = savedBlocks.size(); + savedBlocks.add(fenced); + m.appendReplacement(out, Matcher.quoteReplacement(codeBlockPlaceholder(idx))); + } + m.appendTail(out); + + String intermediate = out.toString(); + out = new StringBuffer(); + m = CODE_BLOCK_TRIPLE_QUOTE.matcher(intermediate); + while (m.find()) { + String content = m.group(1); + String fenced = "\n```\n" + content + "```\n"; + int idx = savedBlocks.size(); + savedBlocks.add(fenced); + m.appendReplacement(out, Matcher.quoteReplacement(codeBlockPlaceholder(idx))); + } + m.appendTail(out); + + return out.toString(); + } + + private static String extractCodeBlockLang(String header) { + Matcher m = CODE_BLOCK_LANG_ATTRIBUTE.matcher(header); + if (m.find()) { + return m.group(1); + } + return ""; + } + + static String codeBlockPlaceholder(int idx) { + return CODE_BLOCK_PLACEHOLDER_PREFIX + idx + CODE_BLOCK_PLACEHOLDER_SUFFIX; } private String getMarkdownEquivalentLine(final Context context, final File file, String wikitextLine, final boolean isExportInLightMode) { diff --git a/app/src/test/java/net/gsantner/markor/format/wikitext/WikitextFileTests.java b/app/src/test/java/net/gsantner/markor/format/wikitext/WikitextFileTests.java index 773ba9eb0b..2f443f718b 100644 --- a/app/src/test/java/net/gsantner/markor/format/wikitext/WikitextFileTests.java +++ b/app/src/test/java/net/gsantner/markor/format/wikitext/WikitextFileTests.java @@ -11,8 +11,10 @@ import org.junit.Test; +import java.util.ArrayList; import java.util.Calendar; import java.util.Date; +import java.util.List; import java.util.Locale; import java.util.TimeZone; import java.util.regex.Matcher; @@ -63,4 +65,82 @@ public void zimHeaderNotAtBeginningOfTheFileShouldNotMatch() { } } + public static class CodeBlockPreprocessingTest { + + @Test + public void sourceViewBlockIsReplacedByPlaceholderAndFencedWithLang() { + String input = "before\n" + + "{{{code: id=\"Front Template\" lang=\"html\" linenumbers=\"False\"\n" + + "{{#Article}}{{Article}} {{/Article}}{{Word}}\n" + + "}}}\n" + + "after"; + List saved = new ArrayList<>(); + String result = WikitextTextConverter.preprocessCodeBlocks(input, saved); + + assertThat(saved).hasSize(1); + assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(0)); + assertThat(result).doesNotContain("{{{code:"); + assertThat(result).doesNotContain("{{Word}}"); + assertThat(saved.get(0)) + .startsWith("\n```html\n") + .contains("{{#Article}}{{Article}} {{/Article}}{{Word}}") + .endsWith("```\n"); + } + + @Test + public void sourceViewBlockWithoutLangProducesPlainFence() { + String input = "{{{code: id=\"X\"\ncontent\n}}}"; + List saved = new ArrayList<>(); + WikitextTextConverter.preprocessCodeBlocks(input, saved); + + assertThat(saved).hasSize(1); + assertThat(saved.get(0)).startsWith("\n```\n").contains("content").endsWith("```\n"); + } + + @Test + public void tripleQuoteBlockIsReplacedByPlaceholderAndFenced() { + String input = "before\n'''\nraw {{Word}} text\n'''\nafter"; + List saved = new ArrayList<>(); + String result = WikitextTextConverter.preprocessCodeBlocks(input, saved); + + assertThat(saved).hasSize(1); + assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(0)); + assertThat(result).doesNotContain("{{Word}}"); + assertThat(saved.get(0)).startsWith("\n```\n").contains("raw {{Word}} text").endsWith("```\n"); + } + + @Test + public void twoSourceViewBlocksProduceTwoPlaceholders() { + String input = "{{{code: lang=\"html\"\nfirst\n}}}\n\n{{{code: lang=\"js\"\nsecond\n}}}"; + List saved = new ArrayList<>(); + String result = WikitextTextConverter.preprocessCodeBlocks(input, saved); + + assertThat(saved).hasSize(2); + assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(0)); + assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(1)); + assertThat(saved.get(0)).contains("```html").contains("first"); + assertThat(saved.get(1)).contains("```js").contains("second"); + } + + @Test + public void inputWithoutCodeBlocksIsUnchanged() { + String input = "just some {{Word}} text\nwith multiple lines"; + List saved = new ArrayList<>(); + String result = WikitextTextConverter.preprocessCodeBlocks(input, saved); + + assertThat(saved).isEmpty(); + assertThat(result).isEqualTo(input); + } + + @Test + public void tripleQuoteInsideSourceViewBlockIsNotExtractedSeparately() { + String input = "{{{code: lang=\"md\"\n'''\ninner\n'''\n}}}"; + List saved = new ArrayList<>(); + WikitextTextConverter.preprocessCodeBlocks(input, saved); + + assertThat(saved).hasSize(1); + assertThat(saved.get(0)).contains("'''\ninner\n'''"); + } + } + }