|
1 | 1 | package com.snowflake.kafka.connector.internal.streaming; |
2 | 2 |
|
3 | | -import static org.apache.kafka.common.record.TimestampType.NO_TIMESTAMP_TYPE; |
4 | | - |
5 | | -import com.fasterxml.jackson.core.JsonProcessingException; |
6 | | -import com.google.common.collect.ImmutableMap; |
7 | 3 | import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter; |
8 | 4 | import com.snowflake.kafka.connector.internal.KCLogger; |
9 | | -import com.snowflake.kafka.connector.internal.SnowflakeErrors; |
10 | | -import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; |
11 | | -import com.snowflake.kafka.connector.records.RecordService; |
12 | | -import com.snowflake.kafka.connector.records.SnowflakeJsonSchema; |
13 | | -import com.snowflake.kafka.connector.records.SnowflakeRecordContent; |
14 | | -import java.io.ByteArrayOutputStream; |
15 | | -import java.io.ObjectOutputStream; |
| 5 | +import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig; |
| 6 | +import com.snowflake.kafka.connector.records.SnowflakeSinkRecord; |
16 | 7 | import java.util.Map; |
17 | | -import org.apache.kafka.connect.data.Schema; |
18 | 8 | import org.apache.kafka.connect.errors.DataException; |
19 | 9 | import org.apache.kafka.connect.sink.SinkRecord; |
20 | 10 |
|
21 | | -/** Service to transform data from Kafka format into a map that is accepted by ingest sdk. */ |
| 11 | +/** |
| 12 | + * Service to transform data from Kafka format into a map that is accepted by the Snowflake |
| 13 | + * Streaming Ingest SDK. |
| 14 | + */ |
22 | 15 | public class StreamingRecordService { |
| 16 | + |
23 | 17 | private static final KCLogger LOGGER = new KCLogger(StreamingRecordService.class.getName()); |
24 | 18 |
|
25 | | - private final RecordService recordService; |
26 | 19 | private final KafkaRecordErrorReporter kafkaRecordErrorReporter; |
| 20 | + private final SnowflakeMetadataConfig metadataConfig; |
27 | 21 |
|
28 | 22 | public StreamingRecordService( |
29 | | - RecordService recordService, KafkaRecordErrorReporter kafkaRecordErrorReporter) { |
30 | | - this.recordService = recordService; |
| 23 | + KafkaRecordErrorReporter kafkaRecordErrorReporter, SnowflakeMetadataConfig metadataConfig) { |
31 | 24 | this.kafkaRecordErrorReporter = kafkaRecordErrorReporter; |
| 25 | + this.metadataConfig = metadataConfig; |
32 | 26 | } |
33 | 27 |
|
34 | | - /** |
35 | | - * @param kafkaSinkRecord a record in Kafka format |
36 | | - * @return a map that format depends on the schematization settings |
37 | | - */ |
38 | 28 | public Map<String, Object> transformData(SinkRecord kafkaSinkRecord) { |
39 | | - SinkRecord snowflakeSinkRecord = getSnowflakeSinkRecordFromKafkaRecord(kafkaSinkRecord); |
40 | | - // broken record |
41 | | - if (isRecordBroken(snowflakeSinkRecord)) { |
42 | | - // check for error tolerance and log tolerance values |
43 | | - // errors.log.enable and errors.tolerance |
| 29 | + SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaSinkRecord, metadataConfig); |
| 30 | + |
| 31 | + if (record.isBroken()) { |
44 | 32 | LOGGER.debug( |
45 | 33 | "Broken record offset:{}, topic:{}", |
46 | 34 | kafkaSinkRecord.kafkaOffset(), |
47 | 35 | kafkaSinkRecord.topic()); |
48 | 36 | kafkaRecordErrorReporter.reportError(kafkaSinkRecord, new DataException("Broken Record")); |
49 | | - } else { |
50 | | - // lag telemetry, note that sink record timestamp might be null |
51 | | - if (kafkaSinkRecord.timestamp() != null |
52 | | - && kafkaSinkRecord.timestampType() != NO_TIMESTAMP_TYPE) { |
53 | | - // TODO:SNOW-529751 telemetry |
54 | | - } |
55 | | - |
56 | | - // Convert this records into Json Schema which has content and metadata, add it to DLQ if |
57 | | - // there is an exception |
58 | | - try { |
59 | | - return recordService.getProcessedRecordForStreamingIngest(snowflakeSinkRecord); |
60 | | - } catch (JsonProcessingException e) { |
61 | | - LOGGER.warn( |
62 | | - "Record has JsonProcessingException offset:{}, topic:{}", |
63 | | - kafkaSinkRecord.kafkaOffset(), |
64 | | - kafkaSinkRecord.topic()); |
65 | | - kafkaRecordErrorReporter.reportError(kafkaSinkRecord, e); |
66 | | - } catch (SnowflakeKafkaConnectorException e) { |
67 | | - if (e.checkErrorCode(SnowflakeErrors.ERROR_0010)) { |
68 | | - LOGGER.warn( |
69 | | - "Cannot parse record offset:{}, topic:{}. Sending to DLQ.", |
70 | | - kafkaSinkRecord.kafkaOffset(), |
71 | | - kafkaSinkRecord.topic()); |
72 | | - kafkaRecordErrorReporter.reportError(kafkaSinkRecord, e); |
73 | | - } else { |
74 | | - throw e; |
75 | | - } |
76 | | - } |
77 | | - } |
78 | | - |
79 | | - // return empty |
80 | | - return ImmutableMap.of(); |
81 | | - } |
82 | | - |
83 | | - /** |
84 | | - * Converts the original kafka sink record into a Json Record. i.e key and values are converted |
85 | | - * into Json so that it can be used to insert into variant column of Snowflake Table. |
86 | | - * |
87 | | - * <p>TODO: SNOW-630885 - When schematization is enabled, we should create the map directly from |
88 | | - * the SinkRecord instead of first turning it into json |
89 | | - */ |
90 | | - private SinkRecord getSnowflakeSinkRecordFromKafkaRecord(final SinkRecord kafkaSinkRecord) { |
91 | | - SinkRecord snowflakeRecord = kafkaSinkRecord; |
92 | | - if (shouldConvertContent(kafkaSinkRecord.value())) { |
93 | | - snowflakeRecord = handleNativeRecord(kafkaSinkRecord, false); |
94 | | - } |
95 | | - if (shouldConvertContent(kafkaSinkRecord.key())) { |
96 | | - snowflakeRecord = handleNativeRecord(snowflakeRecord, true); |
| 37 | + return Map.of(); |
97 | 38 | } |
98 | 39 |
|
99 | | - return snowflakeRecord; |
100 | | - } |
101 | | - |
102 | | - private boolean shouldConvertContent(final Object content) { |
103 | | - return content != null && !(content instanceof SnowflakeRecordContent); |
104 | | - } |
105 | | - |
106 | | - /** |
107 | | - * This would always return false for streaming ingest use case since isBroken field is never set. |
108 | | - * isBroken is set only when using Custom snowflake converters and the content was not json |
109 | | - * serializable. |
110 | | - * |
111 | | - * <p>For Community converters, the kafka record will not be sent to Kafka connector if the record |
112 | | - * is not serializable. |
113 | | - */ |
114 | | - private boolean isRecordBroken(final SinkRecord record) { |
115 | | - return isContentBroken(record.value()) || isContentBroken(record.key()); |
116 | | - } |
117 | | - |
118 | | - private boolean isContentBroken(final Object content) { |
119 | | - return content != null && ((SnowflakeRecordContent) content).isBroken(); |
120 | | - } |
121 | | - |
122 | | - private SinkRecord handleNativeRecord(SinkRecord record, boolean isKey) { |
123 | | - SnowflakeRecordContent newSFContent; |
124 | | - Schema schema = isKey ? record.keySchema() : record.valueSchema(); |
125 | | - Object content = isKey ? record.key() : record.value(); |
126 | | - try { |
127 | | - newSFContent = new SnowflakeRecordContent(schema, content); |
128 | | - } catch (Exception e) { |
129 | | - LOGGER.error("Native content parser error:\n{}", e.getMessage()); |
130 | | - try { |
131 | | - // try to serialize this object and send that as broken record |
132 | | - ByteArrayOutputStream out = new ByteArrayOutputStream(); |
133 | | - ObjectOutputStream os = new ObjectOutputStream(out); |
134 | | - os.writeObject(content); |
135 | | - newSFContent = new SnowflakeRecordContent(out.toByteArray()); |
136 | | - } catch (Exception serializeError) { |
137 | | - LOGGER.error( |
138 | | - "Failed to convert broken native record to byte data:\n{}", |
139 | | - serializeError.getMessage()); |
140 | | - throw e; |
141 | | - } |
142 | | - } |
143 | | - // create new sinkRecord |
144 | | - Schema keySchema = isKey ? new SnowflakeJsonSchema() : record.keySchema(); |
145 | | - Object keyContent = isKey ? newSFContent : record.key(); |
146 | | - Schema valueSchema = isKey ? record.valueSchema() : new SnowflakeJsonSchema(); |
147 | | - Object valueContent = isKey ? record.value() : newSFContent; |
148 | | - return new SinkRecord( |
149 | | - record.topic(), |
150 | | - record.kafkaPartition(), |
151 | | - keySchema, |
152 | | - keyContent, |
153 | | - valueSchema, |
154 | | - valueContent, |
155 | | - record.kafkaOffset(), |
156 | | - record.timestamp(), |
157 | | - record.timestampType(), |
158 | | - record.headers()); |
| 40 | + // Tombstone records are handled by the caller (shouldSkipNullValue check) |
| 41 | + // If we reach here, it means we should ingest an empty record |
| 42 | + return record.getContentWithMetadata(metadataConfig.shouldIncludeAllMetadata()); |
159 | 43 | } |
160 | 44 | } |
0 commit comments