diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java
index 73f8670851..466ebc02c0 100644
--- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java
+++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java
@@ -167,19 +167,16 @@ void unescpae(TextUnit textUnit) {
}
void unescapeSource(TextUnit textUnit) {
- String sourceString = textUnitUtils.getSourceAsString(textUnit);
- String unescapedSourceString = unescapeUtils.replaceEscapedQuotes(sourceString);
- textUnitUtils.replaceSourceString(textUnit, unescapedSourceString);
+ // No-op: Okapi's POFilter already fully handles all C-style escape sequences
+ // (\\, \n, \r, \t, \", etc.) via its own single-pass unescape in toAbstract().
+ // Any additional unescaping here would double-process and corrupt strings.
+ // For example, PO \\\" (escaped-backslash + escaped-quote) becomes \" (literal
+ // backslash + quote) after Okapi's unescape. Applying replaceEscapedQuotes()
+ // would then strip the literal backslash.
}
void unescapeTarget(TextUnit textUnit) {
- TextContainer target = textUnit.getTarget(targetLocale);
- if (target != null) {
- String targetString = target.toString();
- String unescapedTargetString = unescapeUtils.replaceEscapedQuotes(targetString);
- TextContainer newTarget = new TextContainer(unescapedTargetString);
- textUnit.setTarget(targetLocale, newTarget);
- }
+ // No-op: same reasoning as unescapeSource.
}
boolean isPluralGroupStarting(Event event) {
@@ -290,13 +287,13 @@ void adaptTextUnitToCLDRForm(ITextUnit textUnit, String cldrPluralForm) {
// source should always be singular form for "one" form,
// this is needed for language with 6 entry like arabic
logger.debug("Set message singular: {}", msgID);
- textUnit.setSource(new TextContainer(unescapeUtils.replaceEscapedQuotes(msgID)));
+ textUnit.setSource(new TextContainer(unescapeUtils.unescape(msgID)));
} else {
// source should always be plural form unless for "one" form,
// this is needed for language with only one entry like
// japanese: [0] --> other
logger.debug("Set message plural: {}", msgIDPlural);
- textUnit.setSource(new TextContainer(unescapeUtils.replaceEscapedQuotes(msgIDPlural)));
+ textUnit.setSource(new TextContainer(unescapeUtils.unescape(msgIDPlural)));
}
}
@@ -361,10 +358,12 @@ void renameTextUnitWithSourceAndContent(ITextUnit textUnit) {
Property property = textUnit.getProperty(POFilter.PROPERTY_CONTEXT);
- StringBuilder newName = new StringBuilder(msgID);
+ // Unescape msgID for the name (backslash, newline, carriage return, quotes)
+ StringBuilder newName = new StringBuilder(unescapeUtils.unescape(msgID));
if (property != null) {
- newName.append(" --- ").append(property.getValue());
+ // Also unescape the context value
+ newName.append(" --- ").append(unescapeUtils.unescape(property.getValue()));
}
if (poPluralForm != null) {
diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java
index 0f07771c49..541c3aafbc 100644
--- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java
+++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java
@@ -8,7 +8,9 @@
import net.sf.okapi.common.encoder.IEncoder;
/**
- * Encoder to handle escaping \n, \r, double-quotes.
+ * Encoder to handle escaping backslash, \n, \r, \t, double-quotes.
+ *
+ *
Follows C-style string escaping as required by the GNU PO file format.
*
* @author jyi
*/
@@ -50,12 +52,18 @@ public String encode(char value, EncoderContext context) {
String res;
switch (value) {
+ case '\\':
+ res = "\\\\";
+ break;
case '\n':
res = "\\n";
break;
case '\r':
res = "\\r";
break;
+ case '\t':
+ res = "\\t";
+ break;
case '"':
res = "\\\"";
break;
diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java
index 02d1e73a8e..42591eaea6 100644
--- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java
+++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java
@@ -1,5 +1,6 @@
package com.box.l10n.mojito.okapi.filters;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -14,6 +15,7 @@ public class UnescapeUtils {
/** Logger */
static Logger logger = LoggerFactory.getLogger(UnescapeUtils.class);
+ private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\");
private static final Pattern ESCAPED_CARIAGE_RETURN = Pattern.compile("\\\\r");
private static final Pattern ESCAPED_LINE_FEED = Pattern.compile("\\\\n");
private static final Pattern ESCAPED_QUOTES = Pattern.compile("\\\\(\"|')");
@@ -24,16 +26,84 @@ public class UnescapeUtils {
private static final Pattern LINE_FEED = Pattern.compile("\n");
/**
- * Unescapes line feed, cariage return, single quote and double quote
+ * Single-pass pattern for C-style escape sequences used in GNU PO files. Matches exactly
+ * two-character sequences starting with a backslash, so "\\\\n" (4 chars) matches "\\\\" first (→
+ * \), leaving "n" as a literal — not "\n" (newline).
+ *
+ *
Covers the same set as Okapi's {@code POFilter.unescape()}: {@code \\[abfnrtv"'\\]}.
+ */
+ private static final Pattern ESCAPE_SEQUENCE = Pattern.compile("\\\\[abfnrtv\"'\\\\]");
+
+ /**
+ * Unescapes C-style escape sequences in a single pass, following the GNU PO file format (same
+ * escaping rules as C strings).
+ *
+ *
Handles: {@code \\} (backslash), {@code \n} (newline), {@code \r} (CR), {@code \t} (tab),
+ * {@code \"} (quote), {@code \'} (single quote), {@code \a} (bell), {@code \b} (backspace),
+ * {@code \f} (form feed), {@code \v} (vertical tab).
+ *
+ *
A single-pass approach is required because sequential replacement can corrupt strings
+ * containing ambiguous sequences like "\\\\n" (escaped-backslash followed by literal 'n'). With
+ * sequential replacement, this would be incorrectly decoded as a newline character.
+ *
+ * @param text the escaped text
+ * @return the unescaped text
+ */
+ public String unescape(String text) {
+ Matcher matcher = ESCAPE_SEQUENCE.matcher(text);
+ StringBuilder sb = new StringBuilder(text.length());
+ while (matcher.find()) {
+ String match = matcher.group();
+ String replacement;
+ switch (match.charAt(1)) {
+ case '\\':
+ replacement = "\\";
+ break;
+ case 'a':
+ replacement = "\u0007"; // bell
+ break;
+ case 'b':
+ replacement = "\b"; // backspace
+ break;
+ case 'f':
+ replacement = "\f"; // form feed
+ break;
+ case 'n':
+ replacement = "\n";
+ break;
+ case 'r':
+ replacement = "\r";
+ break;
+ case 't':
+ replacement = "\t";
+ break;
+ case 'v':
+ replacement = "\u000B"; // vertical tab
+ break;
+ case '"':
+ replacement = "\"";
+ break;
+ case '\'':
+ replacement = "'";
+ break;
+ default:
+ replacement = match;
+ break;
+ }
+ matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement));
+ }
+ matcher.appendTail(sb);
+ return sb.toString();
+ }
+
+ /**
+ * Replaces \\\\ with \\
*
* @param text
* @return
*/
- public String unescape(String text) {
- String unescapedText = replaceEscapedCarriageReturn(text);
- unescapedText = replaceEscapedLineFeed(unescapedText);
- unescapedText = replaceEscapedQuotes(unescapedText);
- return unescapedText;
+ String replaceEscapedBackslash(String text) {
+ return ESCAPED_BACKSLASH.matcher(text).replaceAll("\\\\");
}
String replaceEscapedCarriageReturn(String text) {
diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java
index 83c569b775..5850e597b0 100644
--- a/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java
+++ b/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java
@@ -7,6 +7,7 @@
import com.box.l10n.mojito.okapi.asset.AssetPathToFilterConfigMapper;
import com.box.l10n.mojito.okapi.asset.FilterConfigurationMappers;
import com.box.l10n.mojito.okapi.asset.UnsupportedAssetFilterTypeException;
+import com.box.l10n.mojito.okapi.filters.UnescapeUtils;
import java.util.Arrays;
import java.util.List;
import org.assertj.core.api.Assertions;
@@ -25,6 +26,7 @@
AssetPathToFilterConfigMapper.class,
FilterConfigurationMappers.class,
TextUnitUtils.class,
+ UnescapeUtils.class,
AssetExtractorTest.class
})
@EnableSpringConfigured
@@ -253,4 +255,133 @@ public void documentNoPartExtraction() throws UnsupportedAssetFilterTypeExceptio
"34a6a48789dd1ff7dff813a8fb627b91-8f1bdae06589d55b62184a76e0e70d0e-1",
"Image in text
."));
}
+
+ @Test
+ public void extractPoWithBackslash() throws UnsupportedAssetFilterTypeException {
+ // PO file with backslash in msgid - should be unescaped to literal backslash
+ String poContent =
+ "msgid \"\"\n"
+ + "msgstr \"\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\n"
+ + "#. Path with backslash\n"
+ + "msgid \"C:\\\\Users\\\\test\"\n"
+ + "msgstr \"\"\n";
+
+ List assetExtractorTextUnitsForAsset =
+ assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null);
+
+ // The backslash should be unescaped: C:\\Users\\test -> C:\Users\test
+ Assertions.assertThat(assetExtractorTextUnitsForAsset)
+ .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource)
+ .containsExactly(tuple("C:\\Users\\test", "C:\\Users\\test"));
+ }
+
+ @Test
+ public void extractPoWithNewlineEscape() throws UnsupportedAssetFilterTypeException {
+ // PO file with escaped newline in msgid
+ String poContent =
+ "msgid \"\"\n"
+ + "msgstr \"\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\n"
+ + "#. Multi-line text\n"
+ + "msgid \"line1\\nline2\"\n"
+ + "msgstr \"\"\n";
+
+ List assetExtractorTextUnitsForAsset =
+ assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null);
+
+ // The newline escape should be unescaped: line1\nline2 -> line1line2
+ Assertions.assertThat(assetExtractorTextUnitsForAsset)
+ .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource)
+ .containsExactly(tuple("line1\nline2", "line1\nline2"));
+ }
+
+ @Test
+ public void extractPoWithQuoteEscape() throws UnsupportedAssetFilterTypeException {
+ // PO file with escaped quote in msgid
+ String poContent =
+ "msgid \"\"\n"
+ + "msgstr \"\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\n"
+ + "#. Text with quotes\n"
+ + "msgid \"say \\\"hello\\\"\"\n"
+ + "msgstr \"\"\n";
+
+ List assetExtractorTextUnitsForAsset =
+ assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null);
+
+ // The quote escape should be unescaped: say \"hello\" -> say "hello"
+ Assertions.assertThat(assetExtractorTextUnitsForAsset)
+ .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource)
+ .containsExactly(tuple("say \"hello\"", "say \"hello\""));
+ }
+
+ @Test
+ public void extractPoWithComplexEscapes() throws UnsupportedAssetFilterTypeException {
+ // PO file with multiple escape sequences
+ String poContent =
+ "msgid \"\"\n"
+ + "msgstr \"\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\n"
+ + "#. Complex escapes\n"
+ + "msgid \"path\\\\to\\\\file\\nwith \\\"quotes\\\"\"\n"
+ + "msgstr \"\"\n";
+
+ List assetExtractorTextUnitsForAsset =
+ assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null);
+
+ // All escapes should be unescaped
+ Assertions.assertThat(assetExtractorTextUnitsForAsset)
+ .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource)
+ .containsExactly(
+ tuple("path\\to\\file\nwith \"quotes\"", "path\\to\\file\nwith \"quotes\""));
+ }
+
+ @Test
+ public void extractPoWithContext() throws UnsupportedAssetFilterTypeException {
+ // PO file with msgctxt containing backslash
+ String poContent =
+ "msgid \"\"\n"
+ + "msgstr \"\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\n"
+ + "#. Context test\n"
+ + "msgctxt \"menu\\\\file\"\n"
+ + "msgid \"Open\"\n"
+ + "msgstr \"\"\n";
+
+ List assetExtractorTextUnitsForAsset =
+ assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null);
+
+ // The name should include the context with unescaped backslash
+ Assertions.assertThat(assetExtractorTextUnitsForAsset)
+ .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource)
+ .containsExactly(tuple("Open --- menu\\file", "Open"));
+ }
+
+ @Test
+ public void extractPoWithRealisticBackslashMessage() throws UnsupportedAssetFilterTypeException {
+ // Realistic PO message: You are not able to use "/" or "\" in text files
+ // PO escaping: \" for quotes, \\ for backslash
+ String poContent =
+ "msgid \"\"\n"
+ + "msgstr \"\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\n"
+ + "#. File name validation error\n"
+ + "msgid \"You are not able to use \\\"/\\\" or \\\"\\\\\\\" in text files\"\n"
+ + "msgstr \"\"\n";
+
+ List assetExtractorTextUnitsForAsset =
+ assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null);
+
+ String expectedString = "You are not able to use \"/\" or \"\\\" in text files";
+ Assertions.assertThat(assetExtractorTextUnitsForAsset)
+ .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource)
+ .containsExactly(tuple(expectedString, expectedString));
+ }
}
diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java
index c72a1f13d5..adea59abb4 100644
--- a/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java
+++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java
@@ -2,10 +2,14 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
+import net.sf.okapi.common.encoder.EncoderContext;
+import net.sf.okapi.common.encoder.EncoderManager;
+import net.sf.okapi.common.encoder.IEncoder;
import org.junit.Test;
/**
@@ -173,4 +177,67 @@ public void removeUntranslatedEOL() {
Stream.of("", "\n", "#. Comments", "#. Comments\n")
.forEach(s -> assertEquals(s, POFilter.removeUntranslated(s)));
}
+
+ @Test
+ public void encoderManagerUsesPOEncoder() {
+ POFilter poFilter = new POFilter();
+ EncoderManager encoderManager = poFilter.getEncoderManager();
+ encoderManager.setDefaultOptions(null, "UTF-8", "\n");
+ encoderManager.updateEncoder("application/x-gettext");
+ IEncoder encoder = encoderManager.getEncoder();
+ assertTrue("POFilter should use POEncoder for PO MIME type", encoder instanceof POEncoder);
+ }
+
+ @Test
+ public void poEncoderEscapesBackslash() {
+ POFilter poFilter = new POFilter();
+ EncoderManager encoderManager = poFilter.getEncoderManager();
+ encoderManager.setDefaultOptions(null, "UTF-8", "\n");
+ encoderManager.updateEncoder("application/x-gettext");
+ IEncoder encoder = encoderManager.getEncoder();
+
+ // Test that backslash is properly escaped
+ assertEquals("\\\\", encoder.encode("\\", EncoderContext.TEXT));
+ assertEquals("C:\\\\Users\\\\test", encoder.encode("C:\\Users\\test", EncoderContext.TEXT));
+ }
+
+ @Test
+ public void poEncoderEscapesSpecialCharacters() {
+ POFilter poFilter = new POFilter();
+ EncoderManager encoderManager = poFilter.getEncoderManager();
+ encoderManager.setDefaultOptions(null, "UTF-8", "\n");
+ encoderManager.updateEncoder("application/x-gettext");
+ IEncoder encoder = encoderManager.getEncoder();
+
+ // Test newline, carriage return, and double quote escaping
+ assertEquals("\\n", encoder.encode("\n", EncoderContext.TEXT));
+ assertEquals("\\r", encoder.encode("\r", EncoderContext.TEXT));
+ assertEquals("\\\"", encoder.encode("\"", EncoderContext.TEXT));
+ }
+
+ @Test
+ public void poEncoderRoundtripWithUnescapeUtils() {
+ POFilter poFilter = new POFilter();
+ EncoderManager encoderManager = poFilter.getEncoderManager();
+ encoderManager.setDefaultOptions(null, "UTF-8", "\n");
+ encoderManager.updateEncoder("application/x-gettext");
+ IEncoder encoder = encoderManager.getEncoder();
+ UnescapeUtils unescapeUtils = new UnescapeUtils();
+
+ // Test roundtrip: unescape(encode(str)) == str
+ String[] testStrings = {
+ "C:\\Users\\test",
+ "line1\nline2",
+ "say \"hello\"",
+ "path\\to\\file\nwith \"quotes\"",
+ "\\\\\\",
+ "normal text without escapes"
+ };
+
+ for (String original : testStrings) {
+ String encoded = encoder.encode(original, EncoderContext.TEXT);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals("Roundtrip failed for: " + original, original, decoded);
+ }
+ }
}
diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/SimpleEncoderTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/SimpleEncoderTest.java
new file mode 100644
index 0000000000..d1fcc506ab
--- /dev/null
+++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/SimpleEncoderTest.java
@@ -0,0 +1,172 @@
+package com.box.l10n.mojito.okapi.filters;
+
+import static org.junit.Assert.assertEquals;
+
+import net.sf.okapi.common.encoder.EncoderContext;
+import org.junit.Before;
+import org.junit.Test;
+
+/** Tests for {@link SimpleEncoder} to verify C-style escape sequences for PO files. */
+public class SimpleEncoderTest {
+
+ private SimpleEncoder encoder;
+ private EncoderContext context;
+ private UnescapeUtils unescapeUtils;
+
+ @Before
+ public void setUp() {
+ encoder = new SimpleEncoder();
+ encoder.setOptions(null, "UTF-8", "\n");
+ context = EncoderContext.TEXT;
+ unescapeUtils = new UnescapeUtils();
+ }
+
+ @Test
+ public void testEncodeBackslash() {
+ assertEquals("\\\\", encoder.encode('\\', context));
+ }
+
+ @Test
+ public void testEncodeNewline() {
+ assertEquals("\\n", encoder.encode('\n', context));
+ }
+
+ @Test
+ public void testEncodeCarriageReturn() {
+ assertEquals("\\r", encoder.encode('\r', context));
+ }
+
+ @Test
+ public void testEncodeTab() {
+ assertEquals("\\t", encoder.encode('\t', context));
+ }
+
+ @Test
+ public void testEncodeDoubleQuote() {
+ assertEquals("\\\"", encoder.encode('"', context));
+ }
+
+ @Test
+ public void testEncodeRegularCharacter() {
+ assertEquals("a", encoder.encode('a', context));
+ assertEquals("Z", encoder.encode('Z', context));
+ assertEquals("1", encoder.encode('1', context));
+ assertEquals("/", encoder.encode('/', context));
+ }
+
+ @Test
+ public void testEncodeStringWithBackslash() {
+ String input = "C:\\Users\\test";
+ String expected = "C:\\\\Users\\\\test";
+ assertEquals(expected, encoder.encode(input, context));
+ }
+
+ @Test
+ public void testEncodeStringWithNewline() {
+ String input = "line1\nline2";
+ String expected = "line1\\nline2";
+ assertEquals(expected, encoder.encode(input, context));
+ }
+
+ @Test
+ public void testEncodeStringWithMultipleEscapes() {
+ String input = "path\\to\\file\nwith \"quotes\"";
+ String expected = "path\\\\to\\\\file\\nwith \\\"quotes\\\"";
+ assertEquals(expected, encoder.encode(input, context));
+ }
+
+ @Test
+ public void testEncodeEmptyString() {
+ assertEquals("", encoder.encode("", context));
+ }
+
+ @Test
+ public void testEncodeOnlyBackslashes() {
+ assertEquals("\\\\\\\\\\\\", encoder.encode("\\\\\\", context));
+ }
+
+ // Roundtrip tests to verify unescape(encode(str)) == str
+ @Test
+ public void testRoundtripBackslash() {
+ String original = "C:\\Users\\test";
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripNewline() {
+ String original = "line1\nline2";
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripCarriageReturn() {
+ String original = "line1\rline2";
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripDoubleQuote() {
+ String original = "say \"hello\"";
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripComplexString() {
+ String original = "path\\to\\file\nwith \"quotes\" and\rcarriage return";
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripTab() {
+ String original = "col1\tcol2";
+ String encoded = encoder.encode(original, context);
+ assertEquals("col1\\tcol2", encoded);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripMultipleBackslashes() {
+ String original = "\\\\\\";
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripBackslashN() {
+ // Edge case: backslash followed by 'n' (not newline)
+ String original = "\\n"; // backslash + n, 2 chars
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripBackslashR() {
+ // Edge case: backslash followed by 'r' (not carriage return)
+ String original = "\\r"; // backslash + r, 2 chars
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+
+ @Test
+ public void testRoundtripBackslashQuote() {
+ // Edge case: backslash followed by quote
+ String original = "\\\""; // backslash + quote, 2 chars
+ String encoded = encoder.encode(original, context);
+ String decoded = unescapeUtils.unescape(encoded);
+ assertEquals(original, decoded);
+ }
+}
diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java
index 46376e7a14..82cd6cff99 100644
--- a/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java
+++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java
@@ -30,6 +30,13 @@ public void replaceEscapedQuotes() {
assertEquals("\" '", unescapeUtils.replaceEscapedQuotes("\\\" \\'"));
}
+ @Test
+ public void replaceEscapedBackslash() {
+ assertEquals("\\", unescapeUtils.replaceEscapedBackslash("\\\\"));
+ assertEquals("C:\\Users\\test", unescapeUtils.replaceEscapedBackslash("C:\\\\Users\\\\test"));
+ assertEquals("\\\\\\", unescapeUtils.replaceEscapedBackslash("\\\\\\\\\\\\"));
+ }
+
@Test
public void collapseSpaces() {
assertEquals(" a b c ", unescapeUtils.collapseSpaces(" a b c "));
@@ -44,4 +51,56 @@ public void replaceLineFeedWithSpace() {
public void unescape() {
assertEquals(" ' \" \n ", unescapeUtils.unescape(" \' \\\" \\n "));
}
+
+ @Test
+ public void unescapeWithBackslash() {
+ assertEquals("C:\\Users\\test", unescapeUtils.unescape("C:\\\\Users\\\\test"));
+ }
+
+ @Test
+ public void unescapeComplexString() {
+ assertEquals(
+ "path\\to\\file\nwith \"quotes\"",
+ unescapeUtils.unescape("path\\\\to\\\\file\\nwith \\\"quotes\\\""));
+ }
+
+ @Test
+ public void unescapeBackslashFollowedByN() {
+ // "\\\\n" (4 chars: \, \, \, n → escaped backslash + literal n)
+ // should unescape to "\n" (2 chars: backslash + n), NOT a newline character
+ assertEquals("\\n", unescapeUtils.unescape("\\\\n"));
+ }
+
+ @Test
+ public void unescapeBackslashFollowedByR() {
+ // "\\\\r" should unescape to "\r" (backslash + r), NOT a carriage return
+ assertEquals("\\r", unescapeUtils.unescape("\\\\r"));
+ }
+
+ @Test
+ public void unescapeBackslashFollowedByQuote() {
+ // "\\\\\"" should unescape to "\"" (backslash + quote)
+ assertEquals("\\\"", unescapeUtils.unescape("\\\\\\\""));
+ }
+
+ @Test
+ public void unescapeTab() {
+ assertEquals("\t", unescapeUtils.unescape("\\t"));
+ }
+
+ @Test
+ public void unescapeNoEscapeSequences() {
+ assertEquals("hello world", unescapeUtils.unescape("hello world"));
+ }
+
+ @Test
+ public void unescapeEmptyString() {
+ assertEquals("", unescapeUtils.unescape(""));
+ }
+
+ @Test
+ public void unescapeMultipleBackslashes() {
+ // 6 backslashes: three escaped pairs → 3 literal backslashes
+ assertEquals("\\\\\\", unescapeUtils.unescape("\\\\\\\\\\\\"));
+ }
}
diff --git a/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java b/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java
index 4cce8f807d..fa9a3800d7 100644
--- a/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java
+++ b/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java
@@ -3160,7 +3160,8 @@ public void testLocalizePoEscaping() throws Exception {
assertEquals(1, textUnitDTOs.size());
TextUnitDTO textUnitDTO = textUnitDTOs.get(0);
- assertEquals("repin \\\"{}\\\"", textUnitDTO.getName());
+ // Name is now unescaped (quotes decoded from raw PO msgID)
+ assertEquals("repin \"{}\"", textUnitDTO.getName());
assertEquals("repin \"{}\"", textUnitDTO.getSource());
String localizedAsset =
@@ -3226,11 +3227,155 @@ public void testLocalizePoEscaping() throws Exception {
assertEquals(1, textUnitDTOs.size());
textUnitDTO = textUnitDTOs.get(0);
- assertEquals("repin \\\"{}\\\"", textUnitDTO.getName());
+ // Name is now unescaped (quotes decoded from raw PO msgID)
+ assertEquals("repin \"{}\"", textUnitDTO.getName());
assertEquals("repin \"{}\"", textUnitDTO.getSource());
assertEquals("repin \"{}\" jp", textUnitDTO.getTarget());
}
+ @Test
+ public void testLocalizePoBackslashEscaping() throws Exception {
+
+ Repository repo = repositoryService.createRepository(testIdWatcher.getEntityName("repository"));
+ RepositoryLocale repoLocale;
+ try {
+ repoLocale = repositoryService.addRepositoryLocale(repo, "ja-JP");
+ } catch (RepositoryLocaleCreationException e) {
+ throw new RuntimeException(e);
+ }
+
+ // PO file with backslash escapes in msgid: C:\\Users\\test represents C:\Users\test
+ String assetContent =
+ "msgstr \"\"\n"
+ + "\"Project-Id-Version: PACKAGE VERSION\\n\"\n"
+ + "\"Report-Msgid-Bugs-To: \\n\"\n"
+ + "\"POT-Creation-Date: 2017-09-15 11:53-0500\\n\"\n"
+ + "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n"
+ + "\"Last-Translator: FULL NAME \\n\"\n"
+ + "\"Language-Team: LANGUAGE \\n\"\n"
+ + "\"MIME-Version: 1.0\\n\"\n"
+ + "\"Plural-Forms: nplurals=2; plural=(n != 1);\\n\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\"Content-Transfer-Encoding: 8bit\\n\"\n"
+ + "#. Path comment\n"
+ + "#: src/config.py:10\n"
+ + "msgid \"C:\\\\Users\\\\test\"\n"
+ + "msgstr \"\"";
+
+ // USE_PARENT: msgstr inherits from source, encoder must re-escape backslashes
+ String expectedLocalizedAsset =
+ "msgstr \"\"\n"
+ + "\"Project-Id-Version: PACKAGE VERSION\\n\"\n"
+ + "\"Report-Msgid-Bugs-To: \\n\"\n"
+ + "\"POT-Creation-Date: 2017-09-15 11:53-0500\\n\"\n"
+ + "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n"
+ + "\"Last-Translator: FULL NAME \\n\"\n"
+ + "\"Language-Team: LANGUAGE \\n\"\n"
+ + "\"MIME-Version: 1.0\\n\"\n"
+ + "\"Plural-Forms: nplurals=1; plural=0;\\n\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\"Content-Transfer-Encoding: 8bit\\n\"\n"
+ + "#. Path comment\n"
+ + "#: src/config.py:10\n"
+ + "msgid \"C:\\\\Users\\\\test\"\n"
+ + "msgstr \"C:\\\\Users\\\\test\"\n";
+
+ asset = assetService.createAssetWithContent(repo.getId(), "messages.pot", assetContent);
+ asset = assetRepository.findById(asset.getId()).orElse(null);
+ assetId = asset.getId();
+ tmId = repo.getTm().getId();
+
+ PollableFuture assetResult =
+ assetService.addOrUpdateAssetAndProcessIfNeeded(
+ repo.getId(), asset.getPath(), assetContent, false, null, null, null, null, null, null);
+ try {
+ pollableTaskService.waitForPollableTask(assetResult.getPollableTask().getId());
+ } catch (PollableTaskException | InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ assetResult.get();
+
+ TextUnitSearcherParameters textUnitSearcherParameters = new TextUnitSearcherParameters();
+ textUnitSearcherParameters.setRepositoryIds(repo.getId());
+ textUnitSearcherParameters.setStatusFilter(StatusFilter.FOR_TRANSLATION);
+ List textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters);
+
+ assertEquals(1, textUnitDTOs.size());
+ TextUnitDTO textUnitDTO = textUnitDTOs.get(0);
+ // Name and source should have decoded backslashes
+ assertEquals("C:\\Users\\test", textUnitDTO.getName());
+ assertEquals("C:\\Users\\test", textUnitDTO.getSource());
+
+ // Generate localized: USE_PARENT means msgstr gets the source, re-encoded with backslashes
+ String localizedAsset =
+ tmService.generateLocalized(
+ asset,
+ assetContent,
+ repoLocale,
+ "ja-JP",
+ null,
+ null,
+ Status.ALL,
+ InheritanceMode.USE_PARENT,
+ null);
+ logger.debug("localized=\n{}", localizedAsset);
+ assertEquals(expectedLocalizedAsset, localizedAsset);
+
+ // Import a translation that also contains backslashes
+ String forImport =
+ "msgstr \"\"\n"
+ + "\"Project-Id-Version: PACKAGE VERSION\\n\"\n"
+ + "\"Report-Msgid-Bugs-To: \\n\"\n"
+ + "\"POT-Creation-Date: 2017-09-15 11:53-0500\\n\"\n"
+ + "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n"
+ + "\"Last-Translator: FULL NAME \\n\"\n"
+ + "\"Language-Team: LANGUAGE \\n\"\n"
+ + "\"MIME-Version: 1.0\\n\"\n"
+ + "\"Plural-Forms: nplurals=1; plural=0;\\n\"\n"
+ + "\"Content-Type: text/plain; charset=utf-8\\n\"\n"
+ + "\"Content-Transfer-Encoding: 8bit\\n\"\n"
+ + "#. Path comment\n"
+ + "#: src/config.py:10\n"
+ + "msgid \"C:\\\\Users\\\\test\"\n"
+ + "msgstr \"C:\\\\Users\\\\test jp\"\n";
+
+ tmService
+ .importLocalizedAssetAsync(
+ assetId,
+ forImport,
+ repoLocale.getLocale().getId(),
+ StatusForEqualTarget.TRANSLATION_NEEDED,
+ null,
+ null)
+ .get();
+
+ localizedAsset =
+ tmService.generateLocalized(
+ asset,
+ assetContent,
+ repoLocale,
+ "ja-JP",
+ null,
+ null,
+ Status.ALL,
+ InheritanceMode.REMOVE_UNTRANSLATED,
+ null);
+ logger.debug("localized after import=\n{}", localizedAsset);
+ assertEquals(forImport, localizedAsset);
+
+ textUnitSearcherParameters = new TextUnitSearcherParameters();
+ textUnitSearcherParameters.setRepositoryIds(repo.getId());
+ textUnitSearcherParameters.setStatusFilter(StatusFilter.TRANSLATED);
+ textUnitSearcherParameters.setLocaleId(repoLocale.getLocale().getId());
+ textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters);
+
+ assertEquals(1, textUnitDTOs.size());
+ textUnitDTO = textUnitDTOs.get(0);
+ assertEquals("C:\\Users\\test", textUnitDTO.getName());
+ assertEquals("C:\\Users\\test", textUnitDTO.getSource());
+ assertEquals("C:\\Users\\test jp", textUnitDTO.getTarget());
+ }
+
@Test
public void testLocalizePoPluralRu() throws Exception {