2222import org .apache .hudi .DataSourceWriteOptions ;
2323import org .apache .hudi .avro .HoodieAvroUtils ;
2424import org .apache .hudi .common .config .TypedProperties ;
25+ import org .apache .hudi .common .util .Option ;
2526import org .apache .hudi .exception .HoodieDeltaStreamerException ;
2627import org .apache .hudi .exception .HoodieException ;
2728import org .apache .hudi .exception .HoodieNotSupportedException ;
4041import java .io .UnsupportedEncodingException ;
4142import java .net .URLEncoder ;
4243import java .nio .charset .StandardCharsets ;
43- import java .text .ParseException ;
4444import java .util .concurrent .TimeUnit ;
4545
4646import static java .util .concurrent .TimeUnit .MILLISECONDS ;
@@ -61,7 +61,8 @@ public enum TimestampType implements Serializable {
6161 private final TimeUnit timeUnit ;
6262 private final TimestampType timestampType ;
6363 private final String outputDateFormat ;
64- private DateTimeFormatter inputFormatter ;
64+ private transient Option <DateTimeFormatter > inputFormatter ;
65+ private transient DateTimeFormatter partitionFormatter ;
6566 private final HoodieDateTimeParser parser ;
6667
6768 // TimeZone detailed settings reference
@@ -108,13 +109,8 @@ public TimestampBasedKeyGenerator(TypedProperties config) throws IOException {
108109 this .parser = DataSourceUtils .createDateTimeParser (config , dateTimeParserClass );
109110 this .outputDateTimeZone = parser .getOutputDateTimeZone ();
110111 this .outputDateFormat = parser .getOutputDateFormat ();
111- this .inputFormatter = parser .getInputFormatter ();
112112 this .timestampType = TimestampType .valueOf (config .getString (Config .TIMESTAMP_TYPE_FIELD_PROP ));
113113
114- if (timestampType == TimestampType .DATE_STRING || timestampType == TimestampType .MIXED ) {
115- this .inputFormatter = parser .getInputFormatter ();
116- }
117-
118114 switch (this .timestampType ) {
119115 case EPOCHMILLISECONDS :
120116 timeUnit = MILLISECONDS ;
@@ -146,18 +142,29 @@ public String getPartitionPath(GenericRecord record) {
146142 }
147143 }
148144
145+ /**
146+ * The function takes care of lazily initialising dateTimeFormatter variables only once.
147+ */
148+ private void initIfNeeded () {
149+ if (this .inputFormatter == null ) {
150+ this .inputFormatter = parser .getInputFormatter ();
151+ }
152+ if (this .partitionFormatter == null ) {
153+ this .partitionFormatter = DateTimeFormat .forPattern (outputDateFormat );
154+ if (this .outputDateTimeZone != null ) {
155+ partitionFormatter = partitionFormatter .withZone (outputDateTimeZone );
156+ }
157+ }
158+ }
159+
149160 /**
150161 * Parse and fetch partition path based on data type.
151162 *
152163 * @param partitionVal partition path object value fetched from record/row
153164 * @return the parsed partition path based on data type
154- * @throws ParseException on any parse exception
155165 */
156- private String getPartitionPath (Object partitionVal ) throws ParseException {
157- DateTimeFormatter partitionFormatter = DateTimeFormat .forPattern (outputDateFormat );
158- if (this .outputDateTimeZone != null ) {
159- partitionFormatter = partitionFormatter .withZone (outputDateTimeZone );
160- }
166+ private String getPartitionPath (Object partitionVal ) {
167+ initIfNeeded ();
161168 long timeMs ;
162169 if (partitionVal instanceof Double ) {
163170 timeMs = convertLongTimeToMillis (((Double ) partitionVal ).longValue ());
@@ -166,13 +173,16 @@ private String getPartitionPath(Object partitionVal) throws ParseException {
166173 } else if (partitionVal instanceof Long ) {
167174 timeMs = convertLongTimeToMillis ((Long ) partitionVal );
168175 } else if (partitionVal instanceof CharSequence ) {
169- DateTime parsedDateTime = inputFormatter .parseDateTime (partitionVal .toString ());
176+ if (!inputFormatter .isPresent ()) {
177+ throw new HoodieException ("Missing inputformatter. Ensure " + Config .TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!" );
178+ }
179+ DateTime parsedDateTime = inputFormatter .get ().parseDateTime (partitionVal .toString ());
170180 if (this .outputDateTimeZone == null ) {
171181 // Use the timezone that came off the date that was passed in, if it had one
172182 partitionFormatter = partitionFormatter .withZone (parsedDateTime .getZone ());
173183 }
174184
175- timeMs = inputFormatter .parseDateTime (partitionVal .toString ()).getMillis ();
185+ timeMs = inputFormatter .get (). parseDateTime (partitionVal .toString ()).getMillis ();
176186 } else {
177187 throw new HoodieNotSupportedException (
178188 "Unexpected type for partition field: " + partitionVal .getClass ().getName ());
0 commit comments