Skip to content

Commit 152e06b

Browse files
committed
Optimise writing of serialised Work Unit to File system
1 parent 9eeca09 commit 152e06b

File tree

2 files changed

+5
-31
lines changed

2 files changed

+5
-31
lines changed

gobblin-api/src/main/java/org/apache/gobblin/compat/hadoop/TextSerializer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public class TextSerializer {
3131
*/
3232
public static void writeStringAsText(DataOutput stream, String str) throws IOException {
3333
writeVLong(stream, str.length());
34-
stream.writeBytes(str);
34+
stream.writeChars(str);
3535
}
3636

3737
/**
@@ -42,7 +42,7 @@ public static String readTextAsString(DataInput in) throws IOException {
4242
StringBuilder sb = new StringBuilder();
4343

4444
for (int i = 0; i < bufLen; i++) {
45-
sb.append((char) in.readByte());
45+
sb.append(in.readChar());
4646
}
4747
return sb.toString();
4848
}

gobblin-api/src/test/java/org/apache/gobblin/compat/TextSerializerTest.java

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import java.io.DataOutputStream;
2323
import java.io.IOException;
2424

25-
import org.apache.hadoop.io.Text;
2625
import org.testng.Assert;
2726
import org.testng.annotations.Test;
2827

@@ -35,9 +34,7 @@ public class TextSerializerTest {
3534
private static final String[] textsToSerialize = new String[]{"abracadabra", Strings.repeat("longString", 128000)};
3635

3736
@Test
38-
public void testSerialize()
39-
throws IOException {
40-
37+
public void testWriteAndRead() throws IOException {
4138
// Use our serializer, verify Hadoop deserializer can read it back
4239
for (String textToSerialize : textsToSerialize) {
4340
ByteArrayOutputStream bOs = new ByteArrayOutputStream();
@@ -49,31 +46,8 @@ public void testSerialize()
4946
ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray());
5047
DataInputStream dataInputStream = new DataInputStream(bIn);
5148

52-
Text hadoopText = new Text();
53-
hadoopText.readFields(dataInputStream);
54-
55-
Assert.assertEquals(hadoopText.toString(), textToSerialize);
56-
}
57-
}
58-
59-
@Test
60-
public void testDeserialize() throws IOException {
61-
// Use Hadoop's serializer, verify our deserializer can read the string back
62-
for (String textToSerialize : textsToSerialize) {
63-
ByteArrayOutputStream bOs = new ByteArrayOutputStream();
64-
DataOutputStream dataOutputStream = new DataOutputStream(bOs);
65-
66-
Text hadoopText = new Text();
67-
hadoopText.set(textToSerialize);
68-
hadoopText.write(dataOutputStream);
69-
dataOutputStream.close();
70-
71-
ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray());
72-
DataInputStream dataInputStream = new DataInputStream(bIn);
73-
74-
String deserializedString = TextSerializer.readTextAsString(dataInputStream);
75-
76-
Assert.assertEquals(deserializedString, textToSerialize);
49+
String deserializedText = TextSerializer.readTextAsString(dataInputStream);
50+
Assert.assertEquals(deserializedText, textToSerialize);
7751
}
7852
}
7953
}

0 commit comments

Comments
 (0)