diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java index 73f8670851..466ebc02c0 100644 --- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java +++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/POFilter.java @@ -167,19 +167,16 @@ void unescpae(TextUnit textUnit) { } void unescapeSource(TextUnit textUnit) { - String sourceString = textUnitUtils.getSourceAsString(textUnit); - String unescapedSourceString = unescapeUtils.replaceEscapedQuotes(sourceString); - textUnitUtils.replaceSourceString(textUnit, unescapedSourceString); + // No-op: Okapi's POFilter already fully handles all C-style escape sequences + // (\\, \n, \r, \t, \", etc.) via its own single-pass unescape in toAbstract(). + // Any additional unescaping here would double-process and corrupt strings. + // For example, PO \\\" (escaped-backslash + escaped-quote) becomes \" (literal + // backslash + quote) after Okapi's unescape. Applying replaceEscapedQuotes() + // would then strip the literal backslash. } void unescapeTarget(TextUnit textUnit) { - TextContainer target = textUnit.getTarget(targetLocale); - if (target != null) { - String targetString = target.toString(); - String unescapedTargetString = unescapeUtils.replaceEscapedQuotes(targetString); - TextContainer newTarget = new TextContainer(unescapedTargetString); - textUnit.setTarget(targetLocale, newTarget); - } + // No-op: same reasoning as unescapeSource. } boolean isPluralGroupStarting(Event event) { @@ -290,13 +287,13 @@ void adaptTextUnitToCLDRForm(ITextUnit textUnit, String cldrPluralForm) { // source should always be singular form for "one" form, // this is needed for language with 6 entry like arabic logger.debug("Set message singular: {}", msgID); - textUnit.setSource(new TextContainer(unescapeUtils.replaceEscapedQuotes(msgID))); + textUnit.setSource(new TextContainer(unescapeUtils.unescape(msgID))); } else { // source should always be plural form unless for "one" form, // this is needed for language with only one entry like // japanese: [0] --> other logger.debug("Set message plural: {}", msgIDPlural); - textUnit.setSource(new TextContainer(unescapeUtils.replaceEscapedQuotes(msgIDPlural))); + textUnit.setSource(new TextContainer(unescapeUtils.unescape(msgIDPlural))); } } @@ -361,10 +358,12 @@ void renameTextUnitWithSourceAndContent(ITextUnit textUnit) { Property property = textUnit.getProperty(POFilter.PROPERTY_CONTEXT); - StringBuilder newName = new StringBuilder(msgID); + // Unescape msgID for the name (backslash, newline, carriage return, quotes) + StringBuilder newName = new StringBuilder(unescapeUtils.unescape(msgID)); if (property != null) { - newName.append(" --- ").append(property.getValue()); + // Also unescape the context value + newName.append(" --- ").append(unescapeUtils.unescape(property.getValue())); } if (poPluralForm != null) { diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java index 0f07771c49..541c3aafbc 100644 --- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java +++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/SimpleEncoder.java @@ -8,7 +8,9 @@ import net.sf.okapi.common.encoder.IEncoder; /** - * Encoder to handle escaping \n, \r, double-quotes. + * Encoder to handle escaping backslash, \n, \r, \t, double-quotes. + * + *

Follows C-style string escaping as required by the GNU PO file format. * * @author jyi */ @@ -50,12 +52,18 @@ public String encode(char value, EncoderContext context) { String res; switch (value) { + case '\\': + res = "\\\\"; + break; case '\n': res = "\\n"; break; case '\r': res = "\\r"; break; + case '\t': + res = "\\t"; + break; case '"': res = "\\\""; break; diff --git a/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java b/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java index 02d1e73a8e..42591eaea6 100644 --- a/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java +++ b/common/src/main/java/com/box/l10n/mojito/okapi/filters/UnescapeUtils.java @@ -1,5 +1,6 @@ package com.box.l10n.mojito.okapi.filters; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,6 +15,7 @@ public class UnescapeUtils { /** Logger */ static Logger logger = LoggerFactory.getLogger(UnescapeUtils.class); + private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\"); private static final Pattern ESCAPED_CARIAGE_RETURN = Pattern.compile("\\\\r"); private static final Pattern ESCAPED_LINE_FEED = Pattern.compile("\\\\n"); private static final Pattern ESCAPED_QUOTES = Pattern.compile("\\\\(\"|')"); @@ -24,16 +26,84 @@ public class UnescapeUtils { private static final Pattern LINE_FEED = Pattern.compile("\n"); /** - * Unescapes line feed, cariage return, single quote and double quote + * Single-pass pattern for C-style escape sequences used in GNU PO files. Matches exactly + * two-character sequences starting with a backslash, so "\\\\n" (4 chars) matches "\\\\" first (→ + * \), leaving "n" as a literal — not "\n" (newline). + * + *

Covers the same set as Okapi's {@code POFilter.unescape()}: {@code \\[abfnrtv"'\\]}. + */ + private static final Pattern ESCAPE_SEQUENCE = Pattern.compile("\\\\[abfnrtv\"'\\\\]"); + + /** + * Unescapes C-style escape sequences in a single pass, following the GNU PO file format (same + * escaping rules as C strings). + * + *

Handles: {@code \\} (backslash), {@code \n} (newline), {@code \r} (CR), {@code \t} (tab), + * {@code \"} (quote), {@code \'} (single quote), {@code \a} (bell), {@code \b} (backspace), + * {@code \f} (form feed), {@code \v} (vertical tab). + * + *

A single-pass approach is required because sequential replacement can corrupt strings + * containing ambiguous sequences like "\\\\n" (escaped-backslash followed by literal 'n'). With + * sequential replacement, this would be incorrectly decoded as a newline character. + * + * @param text the escaped text + * @return the unescaped text + */ + public String unescape(String text) { + Matcher matcher = ESCAPE_SEQUENCE.matcher(text); + StringBuilder sb = new StringBuilder(text.length()); + while (matcher.find()) { + String match = matcher.group(); + String replacement; + switch (match.charAt(1)) { + case '\\': + replacement = "\\"; + break; + case 'a': + replacement = "\u0007"; // bell + break; + case 'b': + replacement = "\b"; // backspace + break; + case 'f': + replacement = "\f"; // form feed + break; + case 'n': + replacement = "\n"; + break; + case 'r': + replacement = "\r"; + break; + case 't': + replacement = "\t"; + break; + case 'v': + replacement = "\u000B"; // vertical tab + break; + case '"': + replacement = "\""; + break; + case '\'': + replacement = "'"; + break; + default: + replacement = match; + break; + } + matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement)); + } + matcher.appendTail(sb); + return sb.toString(); + } + + /** + * Replaces \\\\ with \\ * * @param text * @return */ - public String unescape(String text) { - String unescapedText = replaceEscapedCarriageReturn(text); - unescapedText = replaceEscapedLineFeed(unescapedText); - unescapedText = replaceEscapedQuotes(unescapedText); - return unescapedText; + String replaceEscapedBackslash(String text) { + return ESCAPED_BACKSLASH.matcher(text).replaceAll("\\\\"); } String replaceEscapedCarriageReturn(String text) { diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java index 83c569b775..5850e597b0 100644 --- a/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java +++ b/common/src/test/java/com/box/l10n/mojito/okapi/extractor/AssetExtractorTest.java @@ -7,6 +7,7 @@ import com.box.l10n.mojito.okapi.asset.AssetPathToFilterConfigMapper; import com.box.l10n.mojito.okapi.asset.FilterConfigurationMappers; import com.box.l10n.mojito.okapi.asset.UnsupportedAssetFilterTypeException; +import com.box.l10n.mojito.okapi.filters.UnescapeUtils; import java.util.Arrays; import java.util.List; import org.assertj.core.api.Assertions; @@ -25,6 +26,7 @@ AssetPathToFilterConfigMapper.class, FilterConfigurationMappers.class, TextUnitUtils.class, + UnescapeUtils.class, AssetExtractorTest.class }) @EnableSpringConfigured @@ -253,4 +255,133 @@ public void documentNoPartExtraction() throws UnsupportedAssetFilterTypeExceptio "34a6a48789dd1ff7dff813a8fb627b91-8f1bdae06589d55b62184a76e0e70d0e-1", "Image in text
.")); } + + @Test + public void extractPoWithBackslash() throws UnsupportedAssetFilterTypeException { + // PO file with backslash in msgid - should be unescaped to literal backslash + String poContent = + "msgid \"\"\n" + + "msgstr \"\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\n" + + "#. Path with backslash\n" + + "msgid \"C:\\\\Users\\\\test\"\n" + + "msgstr \"\"\n"; + + List assetExtractorTextUnitsForAsset = + assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null); + + // The backslash should be unescaped: C:\\Users\\test -> C:\Users\test + Assertions.assertThat(assetExtractorTextUnitsForAsset) + .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource) + .containsExactly(tuple("C:\\Users\\test", "C:\\Users\\test")); + } + + @Test + public void extractPoWithNewlineEscape() throws UnsupportedAssetFilterTypeException { + // PO file with escaped newline in msgid + String poContent = + "msgid \"\"\n" + + "msgstr \"\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\n" + + "#. Multi-line text\n" + + "msgid \"line1\\nline2\"\n" + + "msgstr \"\"\n"; + + List assetExtractorTextUnitsForAsset = + assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null); + + // The newline escape should be unescaped: line1\nline2 -> line1line2 + Assertions.assertThat(assetExtractorTextUnitsForAsset) + .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource) + .containsExactly(tuple("line1\nline2", "line1\nline2")); + } + + @Test + public void extractPoWithQuoteEscape() throws UnsupportedAssetFilterTypeException { + // PO file with escaped quote in msgid + String poContent = + "msgid \"\"\n" + + "msgstr \"\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\n" + + "#. Text with quotes\n" + + "msgid \"say \\\"hello\\\"\"\n" + + "msgstr \"\"\n"; + + List assetExtractorTextUnitsForAsset = + assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null); + + // The quote escape should be unescaped: say \"hello\" -> say "hello" + Assertions.assertThat(assetExtractorTextUnitsForAsset) + .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource) + .containsExactly(tuple("say \"hello\"", "say \"hello\"")); + } + + @Test + public void extractPoWithComplexEscapes() throws UnsupportedAssetFilterTypeException { + // PO file with multiple escape sequences + String poContent = + "msgid \"\"\n" + + "msgstr \"\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\n" + + "#. Complex escapes\n" + + "msgid \"path\\\\to\\\\file\\nwith \\\"quotes\\\"\"\n" + + "msgstr \"\"\n"; + + List assetExtractorTextUnitsForAsset = + assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null); + + // All escapes should be unescaped + Assertions.assertThat(assetExtractorTextUnitsForAsset) + .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource) + .containsExactly( + tuple("path\\to\\file\nwith \"quotes\"", "path\\to\\file\nwith \"quotes\"")); + } + + @Test + public void extractPoWithContext() throws UnsupportedAssetFilterTypeException { + // PO file with msgctxt containing backslash + String poContent = + "msgid \"\"\n" + + "msgstr \"\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\n" + + "#. Context test\n" + + "msgctxt \"menu\\\\file\"\n" + + "msgid \"Open\"\n" + + "msgstr \"\"\n"; + + List assetExtractorTextUnitsForAsset = + assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null); + + // The name should include the context with unescaped backslash + Assertions.assertThat(assetExtractorTextUnitsForAsset) + .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource) + .containsExactly(tuple("Open --- menu\\file", "Open")); + } + + @Test + public void extractPoWithRealisticBackslashMessage() throws UnsupportedAssetFilterTypeException { + // Realistic PO message: You are not able to use "/" or "\" in text files + // PO escaping: \" for quotes, \\ for backslash + String poContent = + "msgid \"\"\n" + + "msgstr \"\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\n" + + "#. File name validation error\n" + + "msgid \"You are not able to use \\\"/\\\" or \\\"\\\\\\\" in text files\"\n" + + "msgstr \"\"\n"; + + List assetExtractorTextUnitsForAsset = + assetExtractor.getAssetExtractorTextUnitsForAsset("messages.pot", poContent, null, null); + + String expectedString = "You are not able to use \"/\" or \"\\\" in text files"; + Assertions.assertThat(assetExtractorTextUnitsForAsset) + .extracting(AssetExtractorTextUnit::getName, AssetExtractorTextUnit::getSource) + .containsExactly(tuple(expectedString, expectedString)); + } } diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java index c72a1f13d5..adea59abb4 100644 --- a/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java +++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/POFilterTest.java @@ -2,10 +2,14 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.List; import java.util.stream.Stream; +import net.sf.okapi.common.encoder.EncoderContext; +import net.sf.okapi.common.encoder.EncoderManager; +import net.sf.okapi.common.encoder.IEncoder; import org.junit.Test; /** @@ -173,4 +177,67 @@ public void removeUntranslatedEOL() { Stream.of("", "\n", "#. Comments", "#. Comments\n") .forEach(s -> assertEquals(s, POFilter.removeUntranslated(s))); } + + @Test + public void encoderManagerUsesPOEncoder() { + POFilter poFilter = new POFilter(); + EncoderManager encoderManager = poFilter.getEncoderManager(); + encoderManager.setDefaultOptions(null, "UTF-8", "\n"); + encoderManager.updateEncoder("application/x-gettext"); + IEncoder encoder = encoderManager.getEncoder(); + assertTrue("POFilter should use POEncoder for PO MIME type", encoder instanceof POEncoder); + } + + @Test + public void poEncoderEscapesBackslash() { + POFilter poFilter = new POFilter(); + EncoderManager encoderManager = poFilter.getEncoderManager(); + encoderManager.setDefaultOptions(null, "UTF-8", "\n"); + encoderManager.updateEncoder("application/x-gettext"); + IEncoder encoder = encoderManager.getEncoder(); + + // Test that backslash is properly escaped + assertEquals("\\\\", encoder.encode("\\", EncoderContext.TEXT)); + assertEquals("C:\\\\Users\\\\test", encoder.encode("C:\\Users\\test", EncoderContext.TEXT)); + } + + @Test + public void poEncoderEscapesSpecialCharacters() { + POFilter poFilter = new POFilter(); + EncoderManager encoderManager = poFilter.getEncoderManager(); + encoderManager.setDefaultOptions(null, "UTF-8", "\n"); + encoderManager.updateEncoder("application/x-gettext"); + IEncoder encoder = encoderManager.getEncoder(); + + // Test newline, carriage return, and double quote escaping + assertEquals("\\n", encoder.encode("\n", EncoderContext.TEXT)); + assertEquals("\\r", encoder.encode("\r", EncoderContext.TEXT)); + assertEquals("\\\"", encoder.encode("\"", EncoderContext.TEXT)); + } + + @Test + public void poEncoderRoundtripWithUnescapeUtils() { + POFilter poFilter = new POFilter(); + EncoderManager encoderManager = poFilter.getEncoderManager(); + encoderManager.setDefaultOptions(null, "UTF-8", "\n"); + encoderManager.updateEncoder("application/x-gettext"); + IEncoder encoder = encoderManager.getEncoder(); + UnescapeUtils unescapeUtils = new UnescapeUtils(); + + // Test roundtrip: unescape(encode(str)) == str + String[] testStrings = { + "C:\\Users\\test", + "line1\nline2", + "say \"hello\"", + "path\\to\\file\nwith \"quotes\"", + "\\\\\\", + "normal text without escapes" + }; + + for (String original : testStrings) { + String encoded = encoder.encode(original, EncoderContext.TEXT); + String decoded = unescapeUtils.unescape(encoded); + assertEquals("Roundtrip failed for: " + original, original, decoded); + } + } } diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/SimpleEncoderTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/SimpleEncoderTest.java new file mode 100644 index 0000000000..d1fcc506ab --- /dev/null +++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/SimpleEncoderTest.java @@ -0,0 +1,172 @@ +package com.box.l10n.mojito.okapi.filters; + +import static org.junit.Assert.assertEquals; + +import net.sf.okapi.common.encoder.EncoderContext; +import org.junit.Before; +import org.junit.Test; + +/** Tests for {@link SimpleEncoder} to verify C-style escape sequences for PO files. */ +public class SimpleEncoderTest { + + private SimpleEncoder encoder; + private EncoderContext context; + private UnescapeUtils unescapeUtils; + + @Before + public void setUp() { + encoder = new SimpleEncoder(); + encoder.setOptions(null, "UTF-8", "\n"); + context = EncoderContext.TEXT; + unescapeUtils = new UnescapeUtils(); + } + + @Test + public void testEncodeBackslash() { + assertEquals("\\\\", encoder.encode('\\', context)); + } + + @Test + public void testEncodeNewline() { + assertEquals("\\n", encoder.encode('\n', context)); + } + + @Test + public void testEncodeCarriageReturn() { + assertEquals("\\r", encoder.encode('\r', context)); + } + + @Test + public void testEncodeTab() { + assertEquals("\\t", encoder.encode('\t', context)); + } + + @Test + public void testEncodeDoubleQuote() { + assertEquals("\\\"", encoder.encode('"', context)); + } + + @Test + public void testEncodeRegularCharacter() { + assertEquals("a", encoder.encode('a', context)); + assertEquals("Z", encoder.encode('Z', context)); + assertEquals("1", encoder.encode('1', context)); + assertEquals("/", encoder.encode('/', context)); + } + + @Test + public void testEncodeStringWithBackslash() { + String input = "C:\\Users\\test"; + String expected = "C:\\\\Users\\\\test"; + assertEquals(expected, encoder.encode(input, context)); + } + + @Test + public void testEncodeStringWithNewline() { + String input = "line1\nline2"; + String expected = "line1\\nline2"; + assertEquals(expected, encoder.encode(input, context)); + } + + @Test + public void testEncodeStringWithMultipleEscapes() { + String input = "path\\to\\file\nwith \"quotes\""; + String expected = "path\\\\to\\\\file\\nwith \\\"quotes\\\""; + assertEquals(expected, encoder.encode(input, context)); + } + + @Test + public void testEncodeEmptyString() { + assertEquals("", encoder.encode("", context)); + } + + @Test + public void testEncodeOnlyBackslashes() { + assertEquals("\\\\\\\\\\\\", encoder.encode("\\\\\\", context)); + } + + // Roundtrip tests to verify unescape(encode(str)) == str + @Test + public void testRoundtripBackslash() { + String original = "C:\\Users\\test"; + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripNewline() { + String original = "line1\nline2"; + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripCarriageReturn() { + String original = "line1\rline2"; + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripDoubleQuote() { + String original = "say \"hello\""; + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripComplexString() { + String original = "path\\to\\file\nwith \"quotes\" and\rcarriage return"; + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripTab() { + String original = "col1\tcol2"; + String encoded = encoder.encode(original, context); + assertEquals("col1\\tcol2", encoded); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripMultipleBackslashes() { + String original = "\\\\\\"; + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripBackslashN() { + // Edge case: backslash followed by 'n' (not newline) + String original = "\\n"; // backslash + n, 2 chars + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripBackslashR() { + // Edge case: backslash followed by 'r' (not carriage return) + String original = "\\r"; // backslash + r, 2 chars + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } + + @Test + public void testRoundtripBackslashQuote() { + // Edge case: backslash followed by quote + String original = "\\\""; // backslash + quote, 2 chars + String encoded = encoder.encode(original, context); + String decoded = unescapeUtils.unescape(encoded); + assertEquals(original, decoded); + } +} diff --git a/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java b/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java index 46376e7a14..82cd6cff99 100644 --- a/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java +++ b/common/src/test/java/com/box/l10n/mojito/okapi/filters/UnescapeUtilsTest.java @@ -30,6 +30,13 @@ public void replaceEscapedQuotes() { assertEquals("\" '", unescapeUtils.replaceEscapedQuotes("\\\" \\'")); } + @Test + public void replaceEscapedBackslash() { + assertEquals("\\", unescapeUtils.replaceEscapedBackslash("\\\\")); + assertEquals("C:\\Users\\test", unescapeUtils.replaceEscapedBackslash("C:\\\\Users\\\\test")); + assertEquals("\\\\\\", unescapeUtils.replaceEscapedBackslash("\\\\\\\\\\\\")); + } + @Test public void collapseSpaces() { assertEquals(" a b c ", unescapeUtils.collapseSpaces(" a b c ")); @@ -44,4 +51,56 @@ public void replaceLineFeedWithSpace() { public void unescape() { assertEquals(" ' \" \n ", unescapeUtils.unescape(" \' \\\" \\n ")); } + + @Test + public void unescapeWithBackslash() { + assertEquals("C:\\Users\\test", unescapeUtils.unescape("C:\\\\Users\\\\test")); + } + + @Test + public void unescapeComplexString() { + assertEquals( + "path\\to\\file\nwith \"quotes\"", + unescapeUtils.unescape("path\\\\to\\\\file\\nwith \\\"quotes\\\"")); + } + + @Test + public void unescapeBackslashFollowedByN() { + // "\\\\n" (4 chars: \, \, \, n → escaped backslash + literal n) + // should unescape to "\n" (2 chars: backslash + n), NOT a newline character + assertEquals("\\n", unescapeUtils.unescape("\\\\n")); + } + + @Test + public void unescapeBackslashFollowedByR() { + // "\\\\r" should unescape to "\r" (backslash + r), NOT a carriage return + assertEquals("\\r", unescapeUtils.unescape("\\\\r")); + } + + @Test + public void unescapeBackslashFollowedByQuote() { + // "\\\\\"" should unescape to "\"" (backslash + quote) + assertEquals("\\\"", unescapeUtils.unescape("\\\\\\\"")); + } + + @Test + public void unescapeTab() { + assertEquals("\t", unescapeUtils.unescape("\\t")); + } + + @Test + public void unescapeNoEscapeSequences() { + assertEquals("hello world", unescapeUtils.unescape("hello world")); + } + + @Test + public void unescapeEmptyString() { + assertEquals("", unescapeUtils.unescape("")); + } + + @Test + public void unescapeMultipleBackslashes() { + // 6 backslashes: three escaped pairs → 3 literal backslashes + assertEquals("\\\\\\", unescapeUtils.unescape("\\\\\\\\\\\\")); + } } diff --git a/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java b/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java index 4cce8f807d..fa9a3800d7 100644 --- a/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java +++ b/webapp/src/test/java/com/box/l10n/mojito/service/tm/TMServiceTest.java @@ -3160,7 +3160,8 @@ public void testLocalizePoEscaping() throws Exception { assertEquals(1, textUnitDTOs.size()); TextUnitDTO textUnitDTO = textUnitDTOs.get(0); - assertEquals("repin \\\"{}\\\"", textUnitDTO.getName()); + // Name is now unescaped (quotes decoded from raw PO msgID) + assertEquals("repin \"{}\"", textUnitDTO.getName()); assertEquals("repin \"{}\"", textUnitDTO.getSource()); String localizedAsset = @@ -3226,11 +3227,155 @@ public void testLocalizePoEscaping() throws Exception { assertEquals(1, textUnitDTOs.size()); textUnitDTO = textUnitDTOs.get(0); - assertEquals("repin \\\"{}\\\"", textUnitDTO.getName()); + // Name is now unescaped (quotes decoded from raw PO msgID) + assertEquals("repin \"{}\"", textUnitDTO.getName()); assertEquals("repin \"{}\"", textUnitDTO.getSource()); assertEquals("repin \"{}\" jp", textUnitDTO.getTarget()); } + @Test + public void testLocalizePoBackslashEscaping() throws Exception { + + Repository repo = repositoryService.createRepository(testIdWatcher.getEntityName("repository")); + RepositoryLocale repoLocale; + try { + repoLocale = repositoryService.addRepositoryLocale(repo, "ja-JP"); + } catch (RepositoryLocaleCreationException e) { + throw new RuntimeException(e); + } + + // PO file with backslash escapes in msgid: C:\\Users\\test represents C:\Users\test + String assetContent = + "msgstr \"\"\n" + + "\"Project-Id-Version: PACKAGE VERSION\\n\"\n" + + "\"Report-Msgid-Bugs-To: \\n\"\n" + + "\"POT-Creation-Date: 2017-09-15 11:53-0500\\n\"\n" + + "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n" + + "\"Last-Translator: FULL NAME \\n\"\n" + + "\"Language-Team: LANGUAGE \\n\"\n" + + "\"MIME-Version: 1.0\\n\"\n" + + "\"Plural-Forms: nplurals=2; plural=(n != 1);\\n\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\"Content-Transfer-Encoding: 8bit\\n\"\n" + + "#. Path comment\n" + + "#: src/config.py:10\n" + + "msgid \"C:\\\\Users\\\\test\"\n" + + "msgstr \"\""; + + // USE_PARENT: msgstr inherits from source, encoder must re-escape backslashes + String expectedLocalizedAsset = + "msgstr \"\"\n" + + "\"Project-Id-Version: PACKAGE VERSION\\n\"\n" + + "\"Report-Msgid-Bugs-To: \\n\"\n" + + "\"POT-Creation-Date: 2017-09-15 11:53-0500\\n\"\n" + + "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n" + + "\"Last-Translator: FULL NAME \\n\"\n" + + "\"Language-Team: LANGUAGE \\n\"\n" + + "\"MIME-Version: 1.0\\n\"\n" + + "\"Plural-Forms: nplurals=1; plural=0;\\n\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\"Content-Transfer-Encoding: 8bit\\n\"\n" + + "#. Path comment\n" + + "#: src/config.py:10\n" + + "msgid \"C:\\\\Users\\\\test\"\n" + + "msgstr \"C:\\\\Users\\\\test\"\n"; + + asset = assetService.createAssetWithContent(repo.getId(), "messages.pot", assetContent); + asset = assetRepository.findById(asset.getId()).orElse(null); + assetId = asset.getId(); + tmId = repo.getTm().getId(); + + PollableFuture assetResult = + assetService.addOrUpdateAssetAndProcessIfNeeded( + repo.getId(), asset.getPath(), assetContent, false, null, null, null, null, null, null); + try { + pollableTaskService.waitForPollableTask(assetResult.getPollableTask().getId()); + } catch (PollableTaskException | InterruptedException e) { + throw new RuntimeException(e); + } + assetResult.get(); + + TextUnitSearcherParameters textUnitSearcherParameters = new TextUnitSearcherParameters(); + textUnitSearcherParameters.setRepositoryIds(repo.getId()); + textUnitSearcherParameters.setStatusFilter(StatusFilter.FOR_TRANSLATION); + List textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters); + + assertEquals(1, textUnitDTOs.size()); + TextUnitDTO textUnitDTO = textUnitDTOs.get(0); + // Name and source should have decoded backslashes + assertEquals("C:\\Users\\test", textUnitDTO.getName()); + assertEquals("C:\\Users\\test", textUnitDTO.getSource()); + + // Generate localized: USE_PARENT means msgstr gets the source, re-encoded with backslashes + String localizedAsset = + tmService.generateLocalized( + asset, + assetContent, + repoLocale, + "ja-JP", + null, + null, + Status.ALL, + InheritanceMode.USE_PARENT, + null); + logger.debug("localized=\n{}", localizedAsset); + assertEquals(expectedLocalizedAsset, localizedAsset); + + // Import a translation that also contains backslashes + String forImport = + "msgstr \"\"\n" + + "\"Project-Id-Version: PACKAGE VERSION\\n\"\n" + + "\"Report-Msgid-Bugs-To: \\n\"\n" + + "\"POT-Creation-Date: 2017-09-15 11:53-0500\\n\"\n" + + "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n" + + "\"Last-Translator: FULL NAME \\n\"\n" + + "\"Language-Team: LANGUAGE \\n\"\n" + + "\"MIME-Version: 1.0\\n\"\n" + + "\"Plural-Forms: nplurals=1; plural=0;\\n\"\n" + + "\"Content-Type: text/plain; charset=utf-8\\n\"\n" + + "\"Content-Transfer-Encoding: 8bit\\n\"\n" + + "#. Path comment\n" + + "#: src/config.py:10\n" + + "msgid \"C:\\\\Users\\\\test\"\n" + + "msgstr \"C:\\\\Users\\\\test jp\"\n"; + + tmService + .importLocalizedAssetAsync( + assetId, + forImport, + repoLocale.getLocale().getId(), + StatusForEqualTarget.TRANSLATION_NEEDED, + null, + null) + .get(); + + localizedAsset = + tmService.generateLocalized( + asset, + assetContent, + repoLocale, + "ja-JP", + null, + null, + Status.ALL, + InheritanceMode.REMOVE_UNTRANSLATED, + null); + logger.debug("localized after import=\n{}", localizedAsset); + assertEquals(forImport, localizedAsset); + + textUnitSearcherParameters = new TextUnitSearcherParameters(); + textUnitSearcherParameters.setRepositoryIds(repo.getId()); + textUnitSearcherParameters.setStatusFilter(StatusFilter.TRANSLATED); + textUnitSearcherParameters.setLocaleId(repoLocale.getLocale().getId()); + textUnitDTOs = textUnitSearcher.search(textUnitSearcherParameters); + + assertEquals(1, textUnitDTOs.size()); + textUnitDTO = textUnitDTOs.get(0); + assertEquals("C:\\Users\\test", textUnitDTO.getName()); + assertEquals("C:\\Users\\test", textUnitDTO.getSource()); + assertEquals("C:\\Users\\test jp", textUnitDTO.getTarget()); + } + @Test public void testLocalizePoPluralRu() throws Exception {