Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import java.io.FileReader;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Matcher;
Expand All @@ -34,6 +36,17 @@
*/
@SuppressWarnings("WeakerAccess")
public class WikitextTextConverter extends TextConverterBase {
// Zim Source View plugin: {{{code: lang="..." ...\n...\n}}}
private static final Pattern CODE_BLOCK_SOURCE_VIEW = Pattern.compile(
"(?ms)^\\{\\{\\{code:([^\\r\\n]*)(?:\\r\\n?|\\n)(.*?)^\\}\\}\\}[ \\t]*$");
// Zim standard multiline preformatted: '''\n...\n'''
private static final Pattern CODE_BLOCK_TRIPLE_QUOTE = Pattern.compile(
"(?ms)^'''[ \\t]*(?:\\r\\n?|\\n)(.*?)^'''[ \\t]*$");
private static final Pattern CODE_BLOCK_LANG_ATTRIBUTE = Pattern.compile(
"lang=\"?([^\"\\s]+)\"?", Pattern.CASE_INSENSITIVE);
private static final String CODE_BLOCK_PLACEHOLDER_PREFIX = "\uE000WTCB";
private static final String CODE_BLOCK_PLACEHOLDER_SUFFIX = "\uE000";

/**
* First, convert Wikitext to regular Markor markdown. Then, calls the regular converter.
*
Expand All @@ -47,6 +60,12 @@ public class WikitextTextConverter extends TextConverterBase {
@Override
public String convertMarkup(String markup, Context context, boolean lightMode, boolean lineNum, File file) {
String contentWithoutHeader = markup.replaceFirst(WikitextSyntaxHighlighter.ZIMHEADER.toString(), "");

// Extract multiline code blocks before the per-line transformation so their content
// is not mangled by inline rules like IMAGE ({{X}} -> image embed).
List<String> savedCodeBlocks = new ArrayList<>();
contentWithoutHeader = preprocessCodeBlocks(contentWithoutHeader, savedCodeBlocks);

StringBuilder markdownContent = new StringBuilder();

for (String line : contentWithoutHeader.split("\\r\\n|\\r|\\n")) {
Expand All @@ -56,7 +75,52 @@ public String convertMarkup(String markup, Context context, boolean lightMode, b
markdownContent.append(String.format("%n"));
}

return FormatRegistry.CONVERTER_MARKDOWN.convertMarkup(markdownContent.toString(), context, lightMode, lineNum, file);
String markdown = markdownContent.toString();
for (int i = 0; i < savedCodeBlocks.size(); i++) {
markdown = markdown.replace(codeBlockPlaceholder(i), savedCodeBlocks.get(i));
}

return FormatRegistry.CONVERTER_MARKDOWN.convertMarkup(markdown, context, lightMode, lineNum, file);
}

static String preprocessCodeBlocks(String input, List<String> savedBlocks) {
StringBuffer out = new StringBuffer();
Matcher m = CODE_BLOCK_SOURCE_VIEW.matcher(input);
while (m.find()) {
String lang = extractCodeBlockLang(m.group(1));
String content = m.group(2);
String fenced = "\n```" + lang + "\n" + content + "```\n";
int idx = savedBlocks.size();
savedBlocks.add(fenced);
m.appendReplacement(out, Matcher.quoteReplacement(codeBlockPlaceholder(idx)));
}
m.appendTail(out);

String intermediate = out.toString();
out = new StringBuffer();
m = CODE_BLOCK_TRIPLE_QUOTE.matcher(intermediate);
while (m.find()) {
String content = m.group(1);
String fenced = "\n```\n" + content + "```\n";
int idx = savedBlocks.size();
savedBlocks.add(fenced);
m.appendReplacement(out, Matcher.quoteReplacement(codeBlockPlaceholder(idx)));
}
m.appendTail(out);

return out.toString();
}

private static String extractCodeBlockLang(String header) {
Matcher m = CODE_BLOCK_LANG_ATTRIBUTE.matcher(header);
if (m.find()) {
return m.group(1);
}
return "";
}

static String codeBlockPlaceholder(int idx) {
return CODE_BLOCK_PLACEHOLDER_PREFIX + idx + CODE_BLOCK_PLACEHOLDER_SUFFIX;
}

private String getMarkdownEquivalentLine(final Context context, final File file, String wikitextLine, final boolean isExportInLightMode) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@

import org.junit.Test;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -63,4 +65,82 @@ public void zimHeaderNotAtBeginningOfTheFileShouldNotMatch() {
}
}

public static class CodeBlockPreprocessingTest {

@Test
public void sourceViewBlockIsReplacedByPlaceholderAndFencedWithLang() {
String input = "before\n"
+ "{{{code: id=\"Front Template\" lang=\"html\" linenumbers=\"False\"\n"
+ "{{#Article}}{{Article}} {{/Article}}{{Word}}\n"
+ "}}}\n"
+ "after";
List<String> saved = new ArrayList<>();
String result = WikitextTextConverter.preprocessCodeBlocks(input, saved);

assertThat(saved).hasSize(1);
assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(0));
assertThat(result).doesNotContain("{{{code:");
assertThat(result).doesNotContain("{{Word}}");
assertThat(saved.get(0))
.startsWith("\n```html\n")
.contains("{{#Article}}{{Article}} {{/Article}}{{Word}}")
.endsWith("```\n");
}

@Test
public void sourceViewBlockWithoutLangProducesPlainFence() {
String input = "{{{code: id=\"X\"\ncontent\n}}}";
List<String> saved = new ArrayList<>();
WikitextTextConverter.preprocessCodeBlocks(input, saved);

assertThat(saved).hasSize(1);
assertThat(saved.get(0)).startsWith("\n```\n").contains("content").endsWith("```\n");
}

@Test
public void tripleQuoteBlockIsReplacedByPlaceholderAndFenced() {
String input = "before\n'''\nraw {{Word}} text\n'''\nafter";
List<String> saved = new ArrayList<>();
String result = WikitextTextConverter.preprocessCodeBlocks(input, saved);

assertThat(saved).hasSize(1);
assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(0));
assertThat(result).doesNotContain("{{Word}}");
assertThat(saved.get(0)).startsWith("\n```\n").contains("raw {{Word}} text").endsWith("```\n");
}

@Test
public void twoSourceViewBlocksProduceTwoPlaceholders() {
String input = "{{{code: lang=\"html\"\nfirst\n}}}\n\n{{{code: lang=\"js\"\nsecond\n}}}";
List<String> saved = new ArrayList<>();
String result = WikitextTextConverter.preprocessCodeBlocks(input, saved);

assertThat(saved).hasSize(2);
assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(0));
assertThat(result).contains(WikitextTextConverter.codeBlockPlaceholder(1));
assertThat(saved.get(0)).contains("```html").contains("first");
assertThat(saved.get(1)).contains("```js").contains("second");
}

@Test
public void inputWithoutCodeBlocksIsUnchanged() {
String input = "just some {{Word}} text\nwith multiple lines";
List<String> saved = new ArrayList<>();
String result = WikitextTextConverter.preprocessCodeBlocks(input, saved);

assertThat(saved).isEmpty();
assertThat(result).isEqualTo(input);
}

@Test
public void tripleQuoteInsideSourceViewBlockIsNotExtractedSeparately() {
String input = "{{{code: lang=\"md\"\n'''\ninner\n'''\n}}}";
List<String> saved = new ArrayList<>();
WikitextTextConverter.preprocessCodeBlocks(input, saved);

assertThat(saved).hasSize(1);
assertThat(saved.get(0)).contains("'''\ninner\n'''");
}
}

}
Loading