Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,10 @@ private ImmutableMap<String, SourceColumnType> getTableCols(
.put("TINYTEXT", IndexType.STRING)
.put("DATETIME", IndexType.TIME_STAMP)
.put("TIMESTAMP", IndexType.TIME_STAMP)
// Float is listed as numeric types in Mysql Ref https://dev.mysql.com/doc/refman/8.4/en/numeric-types.html
// But here the end goal is to map to a Java Float.class,
// we need a distinct Source IndexType to map to Float.class
.put("FLOAT", IndexType.FLOAT)
.build();

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,13 @@ public Boundary<T> merge(Boundary<?> other) {
*/
public boolean isSplittable(@Nullable ProcessContext processContext) {
T mid = splitPoint(processContext);
// TODO: Support approximate values like FLOAT and DOUBLE that does have strict equality
// Approximate values are equals when diff < delta
// Delta being the minimum step between 2 values.
// Delta either come from:
// - PartitionColumn.delta which is filled based on the granularity of the source type
// (e.g. Float(size, d), d is the number of decimals)
// - Or fallback to Value defined in configuration file
return !(Objects.equal(end(), mid)) && !(Objects.equal(start(), mid));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ public class BoundaryExtractorFactory {
.put(
Timestamp.class,
(BoundaryExtractor<Timestamp>) BoundaryExtractorFactory::fromTimestamps)
.put(
Float.class,
(BoundaryExtractor<Float>) BoundaryExtractorFactory::fromFloats)
.build();

/**
Expand Down Expand Up @@ -178,5 +181,21 @@ private static Boundary<Timestamp> fromTimestamps(
.build();
}

private static Boundary<Float> fromFloats(
PartitionColumn partitionColumn,
ResultSet resultSet,
@Nullable BoundaryTypeMapper boundaryTypeMapper)
throws SQLException {
Preconditions.checkArgument(partitionColumn.columnClass().equals(Float.class));
resultSet.next();
return Boundary.<Float>builder()
.setPartitionColumn(partitionColumn)
.setStart(resultSet.getFloat(1))
.setEnd(resultSet.getFloat(2))
.setBoundarySplitter(BoundarySplitterFactory.create(Float.class))
.setBoundaryTypeMapper(boundaryTypeMapper)
.build();
}

private BoundaryExtractorFactory() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ public class BoundarySplitterFactory {
(BoundarySplitter<Timestamp>)
(start, end, partitionColumn, boundaryTypeMapper, processContext) ->
splitTimestamps(start, end))
.put(
Float.class,
(BoundarySplitter<Float>)
(start, end, partitionColumn, boundaryTypeMapper, processContext) ->
splitFloats(start, end))
.build();

/**
Expand Down Expand Up @@ -263,4 +268,29 @@ protected static Timestamp instantToTimestamp(Instant instant) {
private static Timestamp splitTimestamps(Timestamp start, Timestamp end) {
return instantToTimestamp(splitInstants(timeStampToInstant(start), timeStampToInstant(end)));
}

private static Float splitFloats(Float start, Float end) {
if (start == null && end == null) {
return null;
Copy link
Author

@NicolasRicheYopp NicolasRicheYopp Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BoundarySplitter interface comments does not specify what should be the behavior when both start and end parameters are null :

  • Implementations must not assume that that start is less than end. It depends on the
    specific ordering used by the database schema.
  • Implementations must be overflow safe.
  • Implementations must guarantee that the splitter is idempotent. The same boundary must get
    same split point through the lifetime of the migration.
  • Implementations must guarantee that the database would teat the splitpoint as being
    in-between the start and end as per the ordering and collation used for the partition
    column.

For consistency, I then followed the behavior used by other split functions:
When both start and end params are null the split functions return null.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right.

}
if (start == null) {
start = -Float.MAX_VALUE;
}
if (end == null) {
end = Float.MAX_VALUE;
}

// Calculate overflow safe mid-point

// If signs are different, simple addition is safe from overflow
// because the values cancel each other out towards zero.
if ((start < 0 && end > 0) || (start > 0 && end < 0)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For float and double we also need to enhance isSplittable to stop the splitting at appropriate precision.

return (start + end) / 2.0f;
}

// If signs are the same (both positive or both negative),
// we use the offset formula to prevent overflow (Infinity).
// This works regardless of whether start > end or start < end.
return start + (end - start) / 2.0f;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ public abstract class PartitionColumn implements Serializable {
*/
public abstract Class columnClass();

// TODO: Add optional delta value that is used to set the granularity of approximate types like Float or Double
// For example to support Mysql primary key FLOAT(size, d) and DOUBLE PRECISION(size, d) where d is the number of decimals
// and represent the minimal delta between 2 values.

/**
* String Collation. Must be set for if {@link PartitionColumn#columnClass()} is {@link String}
* and must not be set otherwise. Defaults to null.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ public enum IndexType {
BINARY,
STRING,
TIME_STAMP,
FLOAT,
OTHER
};

Expand All @@ -153,5 +154,6 @@ public enum IndexType {
IndexType.STRING, String.class,
IndexType.BIG_INT_UNSIGNED, BigDecimal.class,
IndexType.BINARY, BoundaryExtractorFactory.BYTE_ARRAY_CLASS,
IndexType.TIME_STAMP, Timestamp.class);
IndexType.TIME_STAMP, Timestamp.class,
IndexType.FLOAT, Float.class);
}
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ public void testDiscoverIndexesBasic() throws SQLException, RetriableSchemaDisco
.setIsPrimary(false)
.setCardinality(42L)
.setOrdinalPosition(1)
.setIndexType(IndexType.OTHER)
.setIndexType(IndexType.FLOAT)
.build(),
SourceColumnIndexInfo.builder()
.setColumnName("testCol1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,31 @@ public void testFromTimestampsEmptyTable() throws SQLException {
assertThat(boundary.split(null).getLeft().end()).isNull();
}

@Test
public void testFromFloat() throws SQLException {
PartitionColumn partitionColumn =
PartitionColumn.builder().setColumnName("col1").setColumnClass(Float.class).build();
BoundaryExtractor<Float> extractor = BoundaryExtractorFactory.create(Float.class);

when(mockResultSet.next()).thenReturn(true);
when(mockResultSet.getFloat(1)).thenReturn(-50.0f);
when(mockResultSet.getFloat(2)).thenReturn(100.0f);
Boundary<Float> boundary = extractor.getBoundary(partitionColumn, mockResultSet, null);

assertThat(boundary.start()).isEqualTo(-50.0f);
assertThat(boundary.end()).isEqualTo(100.0f);
assertThat(boundary.split(null).getLeft().end()).isEqualTo(25.0f);
assertThat(boundary.split(null).getRight().start()).isEqualTo(25.0f);
// Mismatched Type
assertThrows(
IllegalArgumentException.class,
() ->
extractor.getBoundary(
PartitionColumn.builder().setColumnName("col1").setColumnClass(Long.class).build(),
mockResultSet,
null));
}

@Test
public void testFromUnsupported() {
assertThrows(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,65 @@ public void testTimeStampSplitting() {
.isEqualTo(Timestamp.valueOf("1975-01-01 00:00:00.000000000"));
}

@Test
public void testFloatBoundarySplitter() {
BoundarySplitter<Float> splitter = BoundarySplitterFactory.create(Float.class);

// 1. Standard Ascending Ranges
// 0 to 10 -> 5
assertThat(splitter.getSplitPoint(0.0f, 10.0f, null, null, null))
.isEqualTo(5.0f);
// Negative range: -20 to -10 -> -15
assertThat(splitter.getSplitPoint(-20.0f, -10.0f, null, null, null))
.isEqualTo(-15.0f);
// Crossing zero symmetric: -100 to 100 -> 0
assertThat(splitter.getSplitPoint(-100.0f, 100.0f, null, null, null))
.isEqualTo(0.0f);
// Crossing zero asymmetric: -50 to 100 -> 25
assertThat(splitter.getSplitPoint(-50.0f, 100.0f, null, null, null))
.isEqualTo(25.0f);

// 2. Inverted Ranges (Start > End)
// 10 to 0 -> 5 (Same as 0 to 10)
assertThat(splitter.getSplitPoint(10.0f, 0.0f, null, null, null))
.isEqualTo(5.0f);
// -10 to -20 -> -15 (Same as -20 to -10)
assertThat(splitter.getSplitPoint(-10.0f, -20.0f, null, null, null))
.isEqualTo(-15.0f);
// 100 to -100 -> 0 (Same as -100 to 100)
assertThat(splitter.getSplitPoint(100.0f, -100.0f, null, null, null))
.isEqualTo(0.0f);
// 100 to -50 -> 25 (Same as -50 to 100)
assertThat(splitter.getSplitPoint(100.0f, -50.0f, null, null, null))
.isEqualTo(25.0f);

// 3. Null Handling
// Both Null the splitter does not process and return null
assertThat(splitter.getSplitPoint(null, null, null, null, null))
.isEqualTo(null);
// Start Null (-Max) to 0 -> Split at half of negative max
assertThat(splitter.getSplitPoint(null, 0.0f, null, null, null))
.isEqualTo(-Float.MAX_VALUE / 2.0f);
// 0 to End Null (+Max) -> Split at half of max
assertThat(splitter.getSplitPoint(0.0f, null, null, null, null))
.isEqualTo(Float.MAX_VALUE / 2.0f);

// 4. Overflow Protection & Extreme Values
// Large Positives: Max-100 to Max -> Max-50
assertThat(splitter.getSplitPoint(Float.MAX_VALUE - 100.0f, Float.MAX_VALUE, null, null, null))
.isEqualTo(Float.MAX_VALUE - 50.0f);
// Large Negatives: -Max to -Max+100 -> -Max+50
assertThat(splitter.getSplitPoint(-Float.MAX_VALUE, -Float.MAX_VALUE + 100.0f, null, null, null))
.isEqualTo(-Float.MAX_VALUE + 50.0f);
// Full Range: Max to -Max (Inverted Extreme) -> 0
assertThat(splitter.getSplitPoint(Float.MAX_VALUE, -Float.MAX_VALUE, null, null, null))
.isEqualTo(0.0f);

// 5. Identity, start and end equals
assertThat(splitter.getSplitPoint(5.0f, 5.0f, null, null, null))
.isEqualTo(5.0f);
}

/* Not for production as it does not look at collation ordering */
private class TestBoundaryTypeMapper implements BoundaryTypeMapper {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ private Map<String, List<Map<String, Object>>> getExpectedData() {
"2005-01-01T00:01:54.123456000Z",
"2037-12-30T23:59:59Z",
"2038-01-18T23:59:59Z"));
expectedData.put("float_pk", createRows("float_pk", "45.56", "3.4E38", "-3.4E38"));
return expectedData;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,11 @@ CREATE TABLE timestamp_pk_table (
CONSTRAINT PRIMARY KEY (id)
);

CREATE TABLE float_pk_table (
id FLOAT PRIMARY KEY,
float_pk_col FLOAT NOT NULL
);

ALTER TABLE `bigint_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `bigint_unsigned_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `binary_table` MODIFY `id` INT AUTO_INCREMENT;
Expand Down Expand Up @@ -411,6 +416,8 @@ INSERT INTO `tiny_text_pk_table` (`id`, `tiny_text_pk_col`) VALUES ('AA==', 'AA=
INSERT INTO `date_time_pk_table` (`id`, `date_time_pk_col`) VALUES ('1000-01-01 00:00:00', '1000-01-01 00:00:00'), ('1000-01-01 00:00:01', '1000-01-01 00:00:01'),
('2001-01-01 00:01:54.123456', '2001-01-01 00:01:54.123456'),
('9999-12-30 23:59:59', '9999-12-30 23:59:59'), ('9999-12-31 23:59:59', '9999-12-31 23:59:59');
INSERT INTO `float_pk_table` (`id`, `float_pk_col`) VALUES (45.56, 3.4E38, -3.4E38);

SET time_zone = 'Asia/Kolkata';
INSERT INTO `date_time_pk_table` (`id`, `date_time_pk_col`) VALUES ('2005-01-01 05:31:54.123456', '2005-01-01 05:31:54.123456');
SET time_zone = SYSTEM;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,3 +300,8 @@ CREATE TABLE IF NOT EXISTS timestamp_pk_table (
id TIMESTAMP NOT NULL,
timestamp_pk_col TIMESTAMP NOT NULL,
) PRIMARY KEY(id);

CREATE TABLE IF NOT EXISTS float_pk_table (
id FLOAT NOT NULL,
float_pk_col FLOAT NOT NULL,
) PRIMARY KEY(id);