From e63ef39c40772a4c59b1a77ff67de71d5c2494f1 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 24 Jul 2024 22:52:36 +0800 Subject: [PATCH 01/35] perf: auto batch flush --- .../java/io/lettuce/core/ClientOptions.java | 71 +- .../io/lettuce/core/ContextualChannel.java | 257 +++++ .../java/io/lettuce/core/RedisClient.java | 6 +- .../ClusterNodeBatchFlushEndpoint.java | 55 + .../core/cluster/RedisClusterClient.java | 11 +- .../DummyContextualChannelInstances.java | 25 + .../context/BatchFlushEndPointContext.java | 155 +++ .../core/context/ConnectionContext.java | 115 ++ .../UnboundedMpscOfferFirstQueue.java | 35 + .../impl/ConcurrentLinkedOfferFirstQueue.java | 44 + .../JcToolsUnboundedMpscOfferFirstQueue.java | 66 ++ .../unmodifiabledeque/UnmodifiableDeque.java | 211 ++++ .../lettuce/core/internal/LettuceAssert.java | 6 + .../core/protocol/BatchFlushEndpoint.java | 38 + .../lettuce/core/protocol/CommandHandler.java | 26 +- .../core/protocol/ConnectionWatchdog.java | 83 +- .../protocol/DefaultBatchFlushEndpoint.java | 1021 +++++++++++++++++ .../io/lettuce/core/utils/ExceptionUtils.java | 95 ++ 18 files changed, 2290 insertions(+), 30 deletions(-) create mode 100644 src/main/java/io/lettuce/core/ContextualChannel.java create mode 100644 src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java create mode 100644 src/main/java/io/lettuce/core/constant/DummyContextualChannelInstances.java create mode 100644 src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java create mode 100644 src/main/java/io/lettuce/core/context/ConnectionContext.java create mode 100644 src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java create mode 100644 src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java create mode 100644 src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java create mode 100644 src/main/java/io/lettuce/core/datastructure/queue/unmodifiabledeque/UnmodifiableDeque.java create mode 100644 src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java create mode 100644 src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java create mode 100644 src/main/java/io/lettuce/core/utils/ExceptionUtils.java diff --git a/src/main/java/io/lettuce/core/ClientOptions.java b/src/main/java/io/lettuce/core/ClientOptions.java index aa3d2ba188..b03fa90015 100644 --- a/src/main/java/io/lettuce/core/ClientOptions.java +++ b/src/main/java/io/lettuce/core/ClientOptions.java @@ -19,10 +19,6 @@ */ package io.lettuce.core; -import java.io.Serializable; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; - import io.lettuce.core.api.StatefulConnection; import io.lettuce.core.internal.LettuceAssert; import io.lettuce.core.protocol.DecodeBufferPolicies; @@ -31,6 +27,10 @@ import io.lettuce.core.protocol.ReadOnlyCommands; import io.lettuce.core.resource.ClientResources; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + /** * Client Options to control the behavior of {@link RedisClient}. * @@ -69,6 +69,12 @@ public class ClientOptions implements Serializable { public static final TimeoutOptions DEFAULT_TIMEOUT_OPTIONS = TimeoutOptions.create(); + public static final boolean DEFAULT_USE_BATCH_FLUSH = false; + + public static final int DEFAULT_WRITE_SPIN_COUNT = 16; + + public static final int DEFAULT_BATCH_SIZE = 8; + private final boolean autoReconnect; private final boolean cancelCommandsOnReconnectFailure; @@ -97,6 +103,12 @@ public class ClientOptions implements Serializable { private final TimeoutOptions timeoutOptions; + private final boolean useBatchFlush; + + private final int writeSpinCount; + + private final int batchSize; + protected ClientOptions(Builder builder) { this.autoReconnect = builder.autoReconnect; this.cancelCommandsOnReconnectFailure = builder.cancelCommandsOnReconnectFailure; @@ -112,6 +124,9 @@ protected ClientOptions(Builder builder) { this.sslOptions = builder.sslOptions; this.suspendReconnectOnProtocolFailure = builder.suspendReconnectOnProtocolFailure; this.timeoutOptions = builder.timeoutOptions; + this.useBatchFlush = builder.useBatchFlush; + this.writeSpinCount = builder.writeSpinCount; + this.batchSize = builder.batchSize; } protected ClientOptions(ClientOptions original) { @@ -129,6 +144,9 @@ protected ClientOptions(ClientOptions original) { this.sslOptions = original.getSslOptions(); this.suspendReconnectOnProtocolFailure = original.isSuspendReconnectOnProtocolFailure(); this.timeoutOptions = original.getTimeoutOptions(); + this.useBatchFlush = original.useBatchFlush; + this.writeSpinCount = original.getWriteSpinCount(); + this.batchSize = original.batchSize; } /** @@ -192,6 +210,12 @@ public static class Builder { private TimeoutOptions timeoutOptions = DEFAULT_TIMEOUT_OPTIONS; + public boolean useBatchFlush = DEFAULT_USE_BATCH_FLUSH; + + private int writeSpinCount = DEFAULT_WRITE_SPIN_COUNT; + + private int batchSize = DEFAULT_BATCH_SIZE; + protected Builder() { } @@ -247,8 +271,8 @@ public Builder bufferUsageRatio(int bufferUsageRatio) { * * @param policy the policy to use in {@link io.lettuce.core.protocol.CommandHandler} * @return {@code this} - * @since 6.0 * @see DecodeBufferPolicies + * @since 6.0 */ public Builder decodeBufferPolicy(DecodeBufferPolicy policy) { @@ -295,8 +319,8 @@ public Builder pingBeforeActivateConnection(boolean pingBeforeActivateConnection * * @param protocolVersion version to use. * @return {@code this} - * @since 6.0 * @see ProtocolVersion#newestSupported() + * @since 6.0 */ public Builder protocolVersion(ProtocolVersion protocolVersion) { @@ -315,9 +339,9 @@ public Builder protocolVersion(ProtocolVersion protocolVersion) { * * @param publishOnScheduler true/false * @return {@code this} - * @since 5.2 * @see org.reactivestreams.Subscriber#onNext(Object) * @see ClientResources#eventExecutorGroup() + * @since 5.2 */ public Builder publishOnScheduler(boolean publishOnScheduler) { this.publishOnScheduler = publishOnScheduler; @@ -422,6 +446,25 @@ public Builder timeoutOptions(TimeoutOptions timeoutOptions) { return this; } + public Builder useBatchFlush(boolean useBatchFlush) { + this.useBatchFlush = useBatchFlush; + return this; + } + + public Builder writeSpinCount(int writeSpinCount) { + LettuceAssert.isPositive(writeSpinCount, "writeSpinCount is not positive"); + + this.writeSpinCount = writeSpinCount; + return this; + } + + public Builder batchSize(int batchSize) { + LettuceAssert.isPositive(batchSize, "batchSize is not positive"); + + this.batchSize = batchSize; + return this; + } + /** * Create a new instance of {@link ClientOptions}. * @@ -439,7 +482,6 @@ public ClientOptions build() { * * @return a {@link ClientOptions.Builder} to create new {@link ClientOptions} whose settings are replicated from the * current {@link ClientOptions}. - * * @since 5.1 */ public ClientOptions.Builder mutate() { @@ -498,7 +540,6 @@ public DecodeBufferPolicy getDecodeBufferPolicy() { * * @return zero. * @since 5.2 - * * @deprecated since 6.0 in favor of {@link DecodeBufferPolicy}. */ @Deprecated @@ -637,6 +678,18 @@ public TimeoutOptions getTimeoutOptions() { return timeoutOptions; } + public int getWriteSpinCount() { + return writeSpinCount; + } + + public int getBatchSize() { + return batchSize; + } + + public boolean isUseBatchFlush() { + return useBatchFlush; + } + /** * Behavior of connections in disconnected state. */ diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java new file mode 100644 index 0000000000..38b669f8fe --- /dev/null +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -0,0 +1,257 @@ +package io.lettuce.core; + +import io.lettuce.core.context.ConnectionContext; +import io.netty.buffer.ByteBufAllocator; +import io.netty.channel.Channel; +import io.netty.channel.ChannelConfig; +import io.netty.channel.ChannelFuture; +import io.netty.channel.ChannelId; +import io.netty.channel.ChannelMetadata; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.ChannelProgressivePromise; +import io.netty.channel.ChannelPromise; +import io.netty.channel.EventLoop; +import io.netty.util.Attribute; +import io.netty.util.AttributeKey; +import org.jetbrains.annotations.NotNull; + +import java.net.SocketAddress; + +/** + * @author chenxiaofan + */ +public class ContextualChannel implements Channel { + + private final Channel delegate; + + private final ConnectionContext context; + + public ConnectionContext getContext() { + return context; + } + + public Channel getDelegate() { + return delegate; + } + + public ContextualChannel(Channel delegate, ConnectionContext.State initialState) { + this.delegate = delegate; + context = new ConnectionContext(initialState); + } + + @Override + public ChannelId id() { + return delegate.id(); + } + + @Override + public EventLoop eventLoop() { + return delegate.eventLoop(); + } + + @Override + public Channel parent() { + return delegate.parent(); + } + + @Override + public ChannelConfig config() { + return delegate.config(); + } + + @Override + public boolean isOpen() { + return delegate.isOpen(); + } + + @Override + public boolean isRegistered() { + return delegate.isRegistered(); + } + + @Override + public boolean isActive() { + return delegate.isActive(); + } + + @Override + public ChannelMetadata metadata() { + return delegate.metadata(); + } + + @Override + public SocketAddress localAddress() { + return delegate.localAddress(); + } + + @Override + public SocketAddress remoteAddress() { + return delegate.remoteAddress(); + } + + @Override + public ChannelFuture closeFuture() { + return delegate.closeFuture(); + } + + @Override + public boolean isWritable() { + return delegate.isWritable(); + } + + @Override + public long bytesBeforeUnwritable() { + return delegate.bytesBeforeUnwritable(); + } + + @Override + public long bytesBeforeWritable() { + return delegate.bytesBeforeWritable(); + } + + @Override + public Unsafe unsafe() { + return delegate.unsafe(); + } + + @Override + public ChannelPipeline pipeline() { + return delegate.pipeline(); + } + + @Override + public ByteBufAllocator alloc() { + return delegate.alloc(); + } + + @Override + public Channel read() { + return delegate.read(); + } + + @Override + public Channel flush() { + return delegate.flush(); + } + + @Override + public ChannelFuture write(Object o) { + return delegate.write(o); + } + + @Override + public ChannelFuture write(Object o, ChannelPromise channelPromise) { + return delegate.write(o, channelPromise); + } + + @Override + public ChannelFuture writeAndFlush(Object o, ChannelPromise channelPromise) { + return delegate.writeAndFlush(o, channelPromise); + } + + @Override + public ChannelFuture writeAndFlush(Object o) { + return delegate.writeAndFlush(o); + } + + @Override + public ChannelPromise newPromise() { + return delegate.newPromise(); + } + + @Override + public ChannelProgressivePromise newProgressivePromise() { + return delegate.newProgressivePromise(); + } + + @Override + public ChannelFuture newSucceededFuture() { + return delegate.newSucceededFuture(); + } + + @Override + public ChannelFuture newFailedFuture(Throwable throwable) { + return delegate.newFailedFuture(throwable); + } + + @Override + public ChannelPromise voidPromise() { + return delegate.voidPromise(); + } + + @Override + public ChannelFuture bind(SocketAddress socketAddress) { + return delegate.bind(socketAddress); + } + + @Override + public ChannelFuture connect(SocketAddress socketAddress) { + return delegate.connect(socketAddress); + } + + @Override + public ChannelFuture connect(SocketAddress socketAddress, SocketAddress socketAddress1) { + return delegate.connect(socketAddress, socketAddress1); + } + + @Override + public ChannelFuture disconnect() { + return delegate.disconnect(); + } + + @Override + public ChannelFuture close() { + return delegate.close(); + } + + @Override + public ChannelFuture deregister() { + return delegate.deregister(); + } + + @Override + public ChannelFuture bind(SocketAddress socketAddress, ChannelPromise channelPromise) { + return delegate.bind(socketAddress, channelPromise); + } + + @Override + public ChannelFuture connect(SocketAddress socketAddress, ChannelPromise channelPromise) { + return delegate.connect(socketAddress, channelPromise); + } + + @Override + public ChannelFuture connect(SocketAddress socketAddress, SocketAddress socketAddress1, ChannelPromise channelPromise) { + return delegate.connect(socketAddress, socketAddress1, channelPromise); + } + + @Override + public ChannelFuture disconnect(ChannelPromise channelPromise) { + return delegate.disconnect(channelPromise); + } + + @Override + public ChannelFuture close(ChannelPromise channelPromise) { + return delegate.close(channelPromise); + } + + @Override + public ChannelFuture deregister(ChannelPromise channelPromise) { + return delegate.deregister(channelPromise); + } + + @Override + public Attribute attr(AttributeKey attributeKey) { + return delegate.attr(attributeKey); + } + + @Override + public boolean hasAttr(AttributeKey attributeKey) { + return delegate.hasAttr(attributeKey); + } + + @Override + public int compareTo(@NotNull Channel o) { + return this == o ? 0 : this.id().compareTo(o.id()); + } + +} diff --git a/src/main/java/io/lettuce/core/RedisClient.java b/src/main/java/io/lettuce/core/RedisClient.java index 550c5bf104..3622698170 100644 --- a/src/main/java/io/lettuce/core/RedisClient.java +++ b/src/main/java/io/lettuce/core/RedisClient.java @@ -41,6 +41,7 @@ import io.lettuce.core.masterreplica.MasterReplica; import io.lettuce.core.protocol.CommandExpiryWriter; import io.lettuce.core.protocol.CommandHandler; +import io.lettuce.core.protocol.DefaultBatchFlushEndpoint; import io.lettuce.core.protocol.DefaultEndpoint; import io.lettuce.core.protocol.Endpoint; import io.lettuce.core.protocol.PushHandler; @@ -275,8 +276,9 @@ private ConnectionFuture> connectStandalone logger.debug("Trying to get a Redis connection for: {}", redisURI); - DefaultEndpoint endpoint = new DefaultEndpoint(getOptions(), getResources()); - RedisChannelWriter writer = endpoint; + Endpoint endpoint = getOptions().isUseBatchFlush() ? new DefaultBatchFlushEndpoint(getOptions(), getResources()) + : new DefaultEndpoint(getOptions(), getResources()); + RedisChannelWriter writer = (RedisChannelWriter) endpoint; if (CommandExpiryWriter.isSupported(getOptions())) { writer = new CommandExpiryWriter(writer, getOptions(), getResources()); diff --git a/src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java new file mode 100644 index 0000000000..977f6f43f9 --- /dev/null +++ b/src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java @@ -0,0 +1,55 @@ +/* + * Copyright 2011-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.lettuce.core.cluster; + +import io.lettuce.core.ClientOptions; +import io.lettuce.core.RedisChannelWriter; +import io.lettuce.core.RedisException; +import io.lettuce.core.protocol.DefaultBatchFlushEndpoint; +import io.lettuce.core.resource.ClientResources; + +/** + * Command handler for node connections within the Redis Cluster context. This handler can requeue commands if it is + * disconnected and closed but has commands in the queue. If the handler was connected it would retry commands using the + * {@literal MOVED} or {@literal ASK} redirection. + * + * @author Mark Paluch + */ +public class ClusterNodeBatchFlushEndpoint extends DefaultBatchFlushEndpoint { + + /** + * Initialize a new instance that handles commands from the supplied queue. + * + * @param clientOptions client options for this connection. + * @param clientResources client resources for this connection. + * @param clusterChannelWriter top-most channel writer. + */ + public ClusterNodeBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources, + RedisChannelWriter clusterChannelWriter) { + super(clientOptions, clientResources, clusterChannelWriter != null ? cmd -> { + if (cmd.isDone()) { + return; + } + + try { + clusterChannelWriter.write(cmd); + } catch (RedisException e) { + cmd.completeExceptionally(e); + } + } : DefaultBatchFlushEndpoint::cancelCommandOnEndpointClose); + } + +} diff --git a/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java b/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java index a60dfd0d82..fc28913e30 100644 --- a/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java +++ b/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java @@ -63,6 +63,7 @@ import io.lettuce.core.protocol.CommandExpiryWriter; import io.lettuce.core.protocol.CommandHandler; import io.lettuce.core.protocol.DefaultEndpoint; +import io.lettuce.core.protocol.Endpoint; import io.lettuce.core.protocol.PushHandler; import io.lettuce.core.pubsub.PubSubCommandHandler; import io.lettuce.core.pubsub.PubSubEndpoint; @@ -540,9 +541,11 @@ ConnectionFuture> connectToNodeAsync(RedisC assertNotEmpty(initialUris); LettuceAssert.notNull(socketAddressSupplier, "SocketAddressSupplier must not be null"); - ClusterNodeEndpoint endpoint = new ClusterNodeEndpoint(getClusterClientOptions(), getResources(), clusterWriter); + Endpoint endpoint = getClusterClientOptions().isUseBatchFlush() + ? new ClusterNodeBatchFlushEndpoint(getClusterClientOptions(), getResources(), clusterWriter) + : new ClusterNodeEndpoint(getClusterClientOptions(), getResources(), clusterWriter); - RedisChannelWriter writer = endpoint; + RedisChannelWriter writer = (RedisChannelWriter) endpoint; if (CommandExpiryWriter.isSupported(getClusterClientOptions())) { writer = new CommandExpiryWriter(writer, getClusterClientOptions(), getResources()); @@ -814,7 +817,7 @@ private , S> Connection */ @SuppressWarnings("unchecked") private , S> ConnectionFuture connectStatefulAsync(T connection, - DefaultEndpoint endpoint, RedisURI connectionSettings, Mono socketAddressSupplier, + Endpoint endpoint, RedisURI connectionSettings, Mono socketAddressSupplier, Supplier commandHandlerSupplier) { ConnectionBuilder connectionBuilder = createConnectionBuilder(connection, connection.getConnectionState(), endpoint, @@ -826,7 +829,7 @@ private , S> ConnectionFuture< } private ConnectionBuilder createConnectionBuilder(RedisChannelHandler connection, ConnectionState state, - DefaultEndpoint endpoint, RedisURI connectionSettings, Mono socketAddressSupplier, + Endpoint endpoint, RedisURI connectionSettings, Mono socketAddressSupplier, Supplier commandHandlerSupplier) { ConnectionBuilder connectionBuilder; diff --git a/src/main/java/io/lettuce/core/constant/DummyContextualChannelInstances.java b/src/main/java/io/lettuce/core/constant/DummyContextualChannelInstances.java new file mode 100644 index 0000000000..c4d08bee6a --- /dev/null +++ b/src/main/java/io/lettuce/core/constant/DummyContextualChannelInstances.java @@ -0,0 +1,25 @@ +package io.lettuce.core.constant; + +import io.lettuce.core.ContextualChannel; +import io.lettuce.core.context.ConnectionContext; + +/** + * @author chenxiaofan + */ +public class DummyContextualChannelInstances { + + private DummyContextualChannelInstances() { + } + + public static final ContextualChannel CHANNEL_WILL_RECONNECT = new ContextualChannel(null, + ConnectionContext.State.WILL_RECONNECT); + + public static final ContextualChannel CHANNEL_CONNECTING = new ContextualChannel(null, ConnectionContext.State.CONNECTING); + + public static final ContextualChannel CHANNEL_RECONNECT_FAILED = new ContextualChannel(null, + ConnectionContext.State.RECONNECT_FAILED); + + public static final ContextualChannel CHANNEL_ENDPOINT_CLOSED = new ContextualChannel(null, + ConnectionContext.State.ENDPOINT_CLOSED); + +} diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java new file mode 100644 index 0000000000..53077e3a5b --- /dev/null +++ b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java @@ -0,0 +1,155 @@ +package io.lettuce.core.context; + +import io.lettuce.core.datastructure.queue.unmodifiabledeque.UnmodifiableDeque; +import io.lettuce.core.protocol.RedisCommand; +import io.netty.util.internal.logging.InternalLogger; +import io.netty.util.internal.logging.InternalLoggerFactory; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * @author chenxiaofan + */ +public class BatchFlushEndPointContext { + + private static final InternalLogger logger = InternalLoggerFactory.getInstance(BatchFlushEndPointContext.class); + + public static class HasOngoingSendLoop { + + /** + * Used in multi-threaded environment, can be used to synchronize between threads. + */ + final AtomicInteger safe; + + /** + * Used in single thread. + */ + boolean unsafe; + + public HasOngoingSendLoop() { + safe = new AtomicInteger(); + unsafe = false; + } + + /** + * Try enter loop with the memory semantic getVolatile + * + * @return true if entered the loop, false if already have a running loop. + */ + public boolean tryEnterSafeGetVolatile() { + while (safe.get() == 0) { + if (safe.weakCompareAndSet(0, 1) /* stale read as 0 is acceptable */) { + return true; + } + } + return false; + } + + /** + * This method is not thread safe, can only be used from single thread. + * + * @return true if the value was updated + */ + public boolean tryEnterUnsafe() { + if (unsafe) { + return false; + } + unsafe = true; + return true; + } + + public void exitSafe() { + safe.set(0); + } + + public void exitUnsafe() { + unsafe = false; + } + + } + + BatchFlushEndPointContext() { + } + + /** + * Tasks that failed to send (probably due to connection errors) + */ + @Nullable + Deque> retryableFailedToSendTasks = null; + + Throwable firstDiscontinueReason = null; + + public Throwable getFirstDiscontinueReason() { + return firstDiscontinueReason; + } + + private int flyingTaskNum; + + @SuppressWarnings("unused") + public int getFlyingTaskNum() { + return flyingTaskNum; + } + + private int total = 0; + + public int getTotal() { + return total; + } + + private final HasOngoingSendLoop hasOngoingSendLoop = new HasOngoingSendLoop(); + + public HasOngoingSendLoop getHasOngoingSendLoop() { + return hasOngoingSendLoop; + } + + public void add(int n) { + this.total += n; + this.flyingTaskNum += n; + } + + public @Nullable Deque> getAndClearRetryableFailedToSendTasks() { + final Deque> old = this.retryableFailedToSendTasks; + // don't set to null so give us a chance to expose potential bugs if there is addRetryableFailedToSendTask() afterwards + this.retryableFailedToSendTasks = UnmodifiableDeque.emptyDeque(); + return old; + } + + public void done(int n) { + this.flyingTaskNum -= n; + } + + public boolean isDone() { + if (this.flyingTaskNum < 0) { + logger.error("[unexpected] flyingTaskNum < 0, flyingTaskNum: {}, total: {}", this.flyingTaskNum, this.total); + return true; + } + return this.flyingTaskNum == 0; + } + + public boolean hasRetryableFailedToSendTasks() { + return retryableFailedToSendTasks != null; + } + + /** + * @param retryableTask retryable task + * @param cause fail reason + * @return true if this is the first retryable failed task + */ + public boolean addRetryableFailedToSendTask(RedisCommand retryableTask, @Nonnull Throwable cause) { + if (retryableFailedToSendTasks == null) { + retryableFailedToSendTasks = new ArrayDeque<>(); + retryableFailedToSendTasks.add(retryableTask); + + firstDiscontinueReason = cause; + return true; + } + + retryableFailedToSendTasks.add(retryableTask); + return false; + } + +} diff --git a/src/main/java/io/lettuce/core/context/ConnectionContext.java b/src/main/java/io/lettuce/core/context/ConnectionContext.java new file mode 100644 index 0000000000..25a6fac4ec --- /dev/null +++ b/src/main/java/io/lettuce/core/context/ConnectionContext.java @@ -0,0 +1,115 @@ +package io.lettuce.core.context; + +import java.util.Deque; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import io.lettuce.core.RedisException; +import io.lettuce.core.protocol.RedisCommand; + +/** + * Should be accessed by the event loop thread only. + * + * @author chenxiaofan + */ +public class ConnectionContext { + + public static class CloseStatus { + + private final boolean willReconnect; + + private Deque> retryablePendingCommands; + + private final RedisException channelClosed; + + public CloseStatus(boolean willReconnect, Deque> retryablePendingCommands, + RedisException channelClosed) { + this.willReconnect = willReconnect; + this.retryablePendingCommands = retryablePendingCommands; + this.channelClosed = channelClosed; + } + + public boolean isWillReconnect() { + return willReconnect; + } + + public @Nullable Deque> getAndClearRetryablePendingCommands() { + final Deque> old = this.retryablePendingCommands; + this.retryablePendingCommands = null; + return old; + } + + public Exception getErr() { + return channelClosed; + } + + @Override + public String toString() { + return "CloseStatus{willReconnect=" + willReconnect + ", clientCloseReason=" + channelClosed + '}'; + } + + } + + public enum State { + + WILL_RECONNECT, CONNECTING, CONNECTED, + /** + * The client is closed. NOTE: this is different from connection closed. + */ + ENDPOINT_CLOSED, RECONNECT_FAILED; + + public boolean isConnected() { + return this == CONNECTED; + } + + } + + private final State initialState; + + private final BatchFlushEndPointContext batchFlushEndPointContext; + + public ConnectionContext(State initialState) { + this.initialState = initialState; + this.batchFlushEndPointContext = new BatchFlushEndPointContext(); + } + + public State getInitialState() { + return initialState; + } + + public BatchFlushEndPointContext getFairEndPointContext() { + return batchFlushEndPointContext; + } + + /* below fields must be accessed by the event loop thread only */ + @Nullable + private CloseStatus closeStatus = null; + + public void setCloseStatus(@Nonnull CloseStatus closeStatus) { + this.closeStatus = closeStatus; + } + + public @Nullable CloseStatus getCloseStatus() { + return closeStatus; + } + + public boolean isChannelInactiveEventFired() { + return closeStatus != null; + } + + private boolean channelQuiescent = false; + + public boolean isChannelQuiescent() { + return channelQuiescent; + } + + public boolean setChannelQuiescentOnce() { + if (channelQuiescent) { + return false; + } + channelQuiescent = true; + return true; + } + +} diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java new file mode 100644 index 0000000000..690f7c1b4a --- /dev/null +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java @@ -0,0 +1,35 @@ +package io.lettuce.core.datastructure.queue.offerfirst; + +import javax.annotation.Nullable; +import java.util.Deque; + +/** + * @author chenxiaofan + */ +public interface UnboundedMpscOfferFirstQueue { + + /** + * add element to the tail of the queue. The method is concurrent safe. + */ + void offer(E e); + + /** + * add all elements to the head of the queue. + *

+ * Should only be called from the single consumer thread. + * + * @param q a queue to add + */ + void offerFirstAll(@Nullable Deque q); + + /** + * poll the first element from the head of the queue. + *

+ * Should only be called from the single consumer thread. + * + * @return null if the queue is empty else the first element of the queue + */ + @Nullable + E poll(); + +} diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java new file mode 100644 index 0000000000..f3cd2d4a35 --- /dev/null +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java @@ -0,0 +1,44 @@ +package io.lettuce.core.datastructure.queue.offerfirst.impl; + +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; + +import javax.annotation.Nullable; +import java.util.Deque; +import java.util.concurrent.ConcurrentLinkedDeque; + +/** + * @author chenxiaofan + */ +public class ConcurrentLinkedOfferFirstQueue implements UnboundedMpscOfferFirstQueue { + + private final ConcurrentLinkedDeque delegate; + + public ConcurrentLinkedOfferFirstQueue() { + this.delegate = new ConcurrentLinkedDeque<>(); + } + + @Override + public void offer(E e) { + delegate.offer(e); + } + + @Override + public void offerFirstAll(@Nullable Deque q) { + if (q == null) { + return; + } + while (true) { + E e = q.pollLast(); + if (e == null) { + break; + } + delegate.offerFirst(e); + } + } + + @Override + public E poll() { + return delegate.poll(); + } + +} diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java new file mode 100644 index 0000000000..1a5436425f --- /dev/null +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java @@ -0,0 +1,66 @@ +package io.lettuce.core.datastructure.queue.offerfirst.impl; + +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; +import io.netty.util.internal.shaded.org.jctools.queues.MessagePassingQueue; +import io.netty.util.internal.shaded.org.jctools.queues.atomic.MpscUnboundedAtomicArrayQueue; + +import javax.annotation.Nullable; +import java.util.Deque; +import java.util.LinkedList; +import java.util.Objects; +import java.util.Queue; + +/** + * @author chenxiaofan + */ +public class JcToolsUnboundedMpscOfferFirstQueue implements UnboundedMpscOfferFirstQueue { + + private static final int MPSC_CHUNK_SIZE = 1024; + + /** + * The queues can only be manipulated in a single thread env. + */ + private final LinkedList> unsafeQueues = new LinkedList<>(); + + private final MessagePassingQueue mpscQueue = new MpscUnboundedAtomicArrayQueue<>(MPSC_CHUNK_SIZE); + + @Override + public void offer(E e) { + mpscQueue.offer(e); + } + + /** + * must call from consumer thread. + * + * @param q an queue to add + */ + @Override + public void offerFirstAll(@Nullable Deque q) { + if (q != null && !q.isEmpty()) { + unsafeQueues.addFirst(q); + } + } + + /** + * Must call from the consumer thread. + * + * @return last element of the queue or null if the queue is empty + */ + @Override + public E poll() { + if (!unsafeQueues.isEmpty()) { + return pollFromUnsafeQueues(); + } + return mpscQueue.poll(); + } + + private E pollFromUnsafeQueues() { + Queue first = unsafeQueues.getFirst(); + E e = first.poll(); + if (first.isEmpty()) { + unsafeQueues.removeFirst(); + } + return Objects.requireNonNull(e); + } + +} diff --git a/src/main/java/io/lettuce/core/datastructure/queue/unmodifiabledeque/UnmodifiableDeque.java b/src/main/java/io/lettuce/core/datastructure/queue/unmodifiabledeque/UnmodifiableDeque.java new file mode 100644 index 0000000000..31a0337ac2 --- /dev/null +++ b/src/main/java/io/lettuce/core/datastructure/queue/unmodifiabledeque/UnmodifiableDeque.java @@ -0,0 +1,211 @@ +package io.lettuce.core.datastructure.queue.unmodifiabledeque; + +import javax.annotation.Nonnull; +import java.util.ArrayDeque; +import java.util.Collection; +import java.util.Deque; +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * @param + * @author chenxiaofan + */ +public class UnmodifiableDeque implements Deque { + + private static final UnmodifiableDeque EMPTY_DEQUEUE = new UnmodifiableDeque<>(new ArrayDeque<>()); + + private final Deque delegate; + + public UnmodifiableDeque(Deque delegate) { + this.delegate = delegate; + } + + @SuppressWarnings("unchecked") + public static Deque emptyDeque() { + return (Deque) EMPTY_DEQUEUE; + } + + /* + * unmodifiable throw unsupported exception for all write methods and generate delegate methods for all read methods + */ + @Override + public void addFirst(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public void addLast(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean offerFirst(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean offerLast(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public E removeFirst() { + throw new UnsupportedOperationException(); + } + + @Override + public E removeLast() { + throw new UnsupportedOperationException(); + } + + @Override + public E pollFirst() { + throw new UnsupportedOperationException(); + } + + @Override + public E pollLast() { + throw new UnsupportedOperationException(); + } + + @Override + public E getFirst() { + return delegate.getFirst(); + } + + @Override + public E getLast() { + return delegate.getLast(); + } + + @Override + public E peekFirst() { + return delegate.peekFirst(); + } + + @Override + public E peekLast() { + return delegate.peekLast(); + } + + @Override + public boolean removeFirstOccurrence(Object o) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean removeLastOccurrence(Object o) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean add(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean offer(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public E remove() { + throw new UnsupportedOperationException(); + } + + @Override + public E poll() { + throw new UnsupportedOperationException(); + } + + @Override + public E element() { + final E e = peek(); + if (e == null) { + throw new NoSuchElementException(); + } + return e; + } + + @Override + public E peek() { + return delegate.peek(); + } + + @Override + public boolean addAll(Collection c) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean removeAll(@Nonnull Collection ignored) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean retainAll(@Nonnull Collection c) { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() { + throw new UnsupportedOperationException(); + } + + @Override + public void push(E e) { + throw new UnsupportedOperationException(); + } + + @Override + public E pop() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean remove(Object o) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean containsAll(@Nonnull Collection c) { + return delegate.containsAll(c); + } + + @Override + public boolean contains(Object o) { + return delegate.contains(o); + } + + @Override + public int size() { + return delegate.size(); + } + + @Override + public boolean isEmpty() { + return delegate.isEmpty(); + } + + @Override + public Iterator iterator() { + return delegate.iterator(); + } + + @Override + public Object[] toArray() { + return delegate.toArray(); + } + + @Override + public T[] toArray(@Nonnull T[] a) { + return delegate.toArray(a); + } + + @Override + public Iterator descendingIterator() { + return delegate.descendingIterator(); + } + +} diff --git a/src/main/java/io/lettuce/core/internal/LettuceAssert.java b/src/main/java/io/lettuce/core/internal/LettuceAssert.java index e41a13df46..f468d3b0a5 100644 --- a/src/main/java/io/lettuce/core/internal/LettuceAssert.java +++ b/src/main/java/io/lettuce/core/internal/LettuceAssert.java @@ -237,4 +237,10 @@ public static void assertState(boolean condition, Supplier messageSuppli } } + public static void isPositive(int writeSpinCount, String writeSpinCountIsNotPositive) { + if (writeSpinCount <= 0) { + throw new IllegalArgumentException(writeSpinCountIsNotPositive); + } + } + } diff --git a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java new file mode 100644 index 0000000000..78f99eddaa --- /dev/null +++ b/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java @@ -0,0 +1,38 @@ +package io.lettuce.core.protocol; + +import java.util.Deque; + +import io.netty.channel.Channel; + +/** + * @author chenxiaofan + */ +public interface BatchFlushEndpoint extends Endpoint { + + @Override + default void notifyChannelInactive(Channel channel) { + throw new UnsupportedOperationException(); + } + + @Override + default void notifyDrainQueuedCommands(HasQueuedCommands queuedCommands) { + throw new UnsupportedOperationException(); + } + + /** + * Merge Endpoint#notifyChannelInactive(Channel) and Endpoint#notifyDrainQueuedCommands(HasQueuedCommands) + * + * @param channel the channel + * @param retryableQueuedCommands retryable queued commands in command handler + */ + void notifyChannelInactive(Channel channel, Deque> retryableQueuedCommands); + + enum AcquireQuiescenceResult { + SUCCESS, FAILED, TRY_LATER + } + + AcquireQuiescenceResult tryAcquireQuiescence(); + + void notifyReconnectFailed(Throwable throwable); + +} diff --git a/src/main/java/io/lettuce/core/protocol/CommandHandler.java b/src/main/java/io/lettuce/core/protocol/CommandHandler.java index 83bf304020..16dd374bfc 100644 --- a/src/main/java/io/lettuce/core/protocol/CommandHandler.java +++ b/src/main/java/io/lettuce/core/protocol/CommandHandler.java @@ -19,8 +19,6 @@ */ package io.lettuce.core.protocol; -import static io.lettuce.core.ConnectionEvents.*; - import java.io.IOException; import java.net.SocketAddress; import java.nio.ByteBuffer; @@ -28,6 +26,7 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; +import java.util.Deque; import java.util.LinkedHashSet; import java.util.List; import java.util.Queue; @@ -63,6 +62,8 @@ import io.netty.util.internal.logging.InternalLogger; import io.netty.util.internal.logging.InternalLoggerFactory; +import static io.lettuce.core.ConnectionEvents.Reset; + /** * A netty {@link ChannelHandler} responsible for writing redis commands and reading responses from the server. * @@ -185,6 +186,19 @@ void setBuffer(ByteBuf buffer) { return drainCommands(stack); } + private Deque> drainStack() { + final Deque> target = new ArrayDeque<>(stack.size()); + + RedisCommand cmd; + while ((cmd = stack.poll()) != null) { + if (!cmd.isDone() && !ActivationCommand.isActivationCommand(cmd)) { + target.add(cmd); + } + } + + return target; + } + protected LifecycleState getState() { return lifecycleState; } @@ -358,8 +372,12 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { setState(LifecycleState.DISCONNECTED); setState(LifecycleState.DEACTIVATING); - endpoint.notifyChannelInactive(ctx.channel()); - endpoint.notifyDrainQueuedCommands(this); + if (endpoint instanceof BatchFlushEndpoint) { + ((BatchFlushEndpoint) endpoint).notifyChannelInactive(ctx.channel(), drainStack()); + } else { + endpoint.notifyChannelInactive(ctx.channel()); + endpoint.notifyDrainQueuedCommands(this); + } setState(LifecycleState.DEACTIVATED); diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index 84bcb41f1f..f90e4e5f47 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -19,14 +19,6 @@ */ package io.lettuce.core.protocol; -import java.net.SocketAddress; -import java.time.Duration; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - -import reactor.core.publisher.Mono; -import reactor.util.function.Tuple2; import io.lettuce.core.ClientOptions; import io.lettuce.core.ConnectionBuilder; import io.lettuce.core.ConnectionEvents; @@ -50,6 +42,15 @@ import io.netty.util.internal.logging.InternalLogLevel; import io.netty.util.internal.logging.InternalLogger; import io.netty.util.internal.logging.InternalLoggerFactory; +import reactor.core.publisher.Mono; +import reactor.util.function.Tuple2; + +import java.net.SocketAddress; +import java.time.Duration; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; /** * A netty {@link ChannelHandler} responsible for monitoring the channel and reconnecting when the connection is lost. @@ -83,6 +84,8 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private final String epid; + private final Endpoint endpoint; + private Channel channel; private SocketAddress remoteAddress; @@ -101,6 +104,8 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private volatile Timeout reconnectScheduleTimeout; + private volatile boolean willReconnect; + /** * Create a new watchdog that adds to new connections to the supplied {@link ChannelGroup} and establishes a new * {@link Channel} when disconnected, while reconnect is true. The socketAddressSupplier can supply the reconnect address. @@ -141,6 +146,7 @@ public ConnectionWatchdog(Delay reconnectDelay, ClientOptions clientOptions, Boo this.eventBus = eventBus; this.redisUri = (String) bootstrap.config().attrs().get(ConnectionBuilder.REDIS_URI); this.epid = endpoint.getId(); + this.endpoint = endpoint; Mono wrappedSocketAddressSupplier = socketAddressSupplier.doOnNext(addr -> remoteAddress = addr) .onErrorResume(t -> { @@ -195,6 +201,7 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception { @Override public void channelInactive(ChannelHandlerContext ctx) throws Exception { + willReconnect = false; logger.debug("{} channelInactive()", logPrefix()); if (!armed) { @@ -205,7 +212,22 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { channel = null; if (listenOnChannelInactive && !reconnectionHandler.isReconnectSuspended()) { - scheduleReconnect(); + if (!isEventLoopGroupActive()) { + logger.debug("isEventLoopGroupActive() == false"); + return; + } + + if (!isListenOnChannelInactive()) { + logger.debug("Skip reconnect scheduling, listener disabled"); + return; + } + + if (endpoint instanceof BatchFlushEndpoint) { + waitQuiescence((BatchFlushEndpoint) endpoint, this::scheduleReconnect); + } else { + scheduleReconnect(); + } + willReconnect = true; } else { logger.debug("{} Reconnect scheduling disabled", logPrefix(), ctx); } @@ -213,6 +235,22 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { super.channelInactive(ctx); } + private void waitQuiescence(BatchFlushEndpoint batchFlushEndpoint, Runnable runnable) { + final BatchFlushEndpoint.AcquireQuiescenceResult ret = batchFlushEndpoint.tryAcquireQuiescence(); + switch (ret) { + case SUCCESS: + runnable.run(); + break; + case FAILED: + logger.error("{} Failed to acquire quiescence", logPrefix()); + break; + case TRY_LATER: + // TODO use exponential backoff + timer.newTimeout(it -> waitQuiescence(batchFlushEndpoint, runnable), 3, TimeUnit.MILLISECONDS); + break; + } + } + /** * Enable {@link ConnectionWatchdog} to listen for disconnected events. */ @@ -230,11 +268,13 @@ public void scheduleReconnect() { if (!isEventLoopGroupActive()) { logger.debug("isEventLoopGroupActive() == false"); + notifyEndpointFailedToConnectIfNeeded(); return; } if (!isListenOnChannelInactive()) { logger.debug("Skip reconnect scheduling, listener disabled"); + notifyEndpointFailedToConnectIfNeeded(); return; } @@ -252,6 +292,7 @@ public void scheduleReconnect() { if (!isEventLoopGroupActive()) { logger.warn("Cannot execute scheduled reconnect timer, reconnect workers are terminated"); + notifyEndpointFailedToConnectIfNeeded(); return; } @@ -267,7 +308,19 @@ public void scheduleReconnect() { } } else { logger.debug("{} Skipping scheduleReconnect() because I have an active channel", logPrefix()); + notifyEndpointFailedToConnectIfNeeded(); + } + } + + private void notifyEndpointFailedToConnectIfNeeded() { + notifyEndpointFailedToConnectIfNeeded(new CancellationException()); + } + + private void notifyEndpointFailedToConnectIfNeeded(Exception e) { + if (!(endpoint instanceof BatchFlushEndpoint)) { + return; } + ((BatchFlushEndpoint) endpoint).notifyReconnectFailed(e); } /** @@ -275,7 +328,6 @@ public void scheduleReconnect() { * the same handler instances contained in the old channel's pipeline. * * @param attempt attempt counter - * * @throws Exception when reconnection fails. */ public void run(int attempt) throws Exception { @@ -288,7 +340,6 @@ public void run(int attempt) throws Exception { * * @param attempt attempt counter. * @param delay retry delay. - * * @throws Exception when reconnection fails. */ private void run(int attempt, Duration delay) throws Exception { @@ -298,16 +349,19 @@ private void run(int attempt, Duration delay) throws Exception { if (!isEventLoopGroupActive()) { logger.debug("isEventLoopGroupActive() == false"); + notifyEndpointFailedToConnectIfNeeded(); return; } if (!isListenOnChannelInactive()) { logger.debug("Skip reconnect scheduling, listener disabled"); + notifyEndpointFailedToConnectIfNeeded(); return; } if (isReconnectSuspended()) { logger.debug("Skip reconnect scheduling, reconnect is suspended"); + notifyEndpointFailedToConnectIfNeeded(); return; } @@ -363,11 +417,14 @@ private void run(int attempt, Duration delay) throws Exception { if (!isReconnectSuspended()) { scheduleReconnect(); + } else { + notifyEndpointFailedToConnectIfNeeded(); } }); } catch (Exception e) { logger.log(warnLevel, "Cannot reconnect: {}", e.toString()); eventBus.publish(new ReconnectFailedEvent(redisUri, epid, LocalAddress.ANY, remoteAddress, e, attempt)); + notifyEndpointFailedToConnectIfNeeded(e); } } @@ -436,4 +493,8 @@ private String logPrefix() { return logPrefix = buffer; } + public boolean isWillReconnect() { + return willReconnect; + } + } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java new file mode 100644 index 0000000000..6a37f1afd0 --- /dev/null +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -0,0 +1,1021 @@ +/* + * Copyright 2011-2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.lettuce.core.protocol; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Deque; +import java.util.HashSet; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Consumer; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import io.lettuce.core.ClientOptions; +import io.lettuce.core.ConnectionEvents; +import io.lettuce.core.ContextualChannel; +import io.lettuce.core.RedisChannelWriter; +import io.lettuce.core.RedisConnectionException; +import io.lettuce.core.RedisException; +import io.lettuce.core.api.push.PushListener; +import io.lettuce.core.constant.DummyContextualChannelInstances; +import io.lettuce.core.context.BatchFlushEndPointContext; +import io.lettuce.core.context.ConnectionContext; +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; +import io.lettuce.core.datastructure.queue.offerfirst.impl.JcToolsUnboundedMpscOfferFirstQueue; +import io.lettuce.core.internal.Futures; +import io.lettuce.core.internal.LettuceAssert; +import io.lettuce.core.resource.ClientResources; +import io.lettuce.core.utils.ExceptionUtils; +import io.netty.channel.Channel; +import io.netty.channel.ChannelFuture; +import io.netty.channel.EventLoop; +import io.netty.handler.codec.EncoderException; +import io.netty.util.concurrent.Future; +import io.netty.util.internal.logging.InternalLogger; +import io.netty.util.internal.logging.InternalLoggerFactory; + +/** + * Default {@link Endpoint} implementation. + * + * @author Mark Paluch + */ +public class DefaultBatchFlushEndpoint implements RedisChannelWriter, BatchFlushEndpoint, PushHandler { + + private static final InternalLogger logger = InternalLoggerFactory.getInstance(BatchFlushEndpoint.class); + + private static final AtomicLong ENDPOINT_COUNTER = new AtomicLong(); + + private static final AtomicReferenceFieldUpdater CHANNEL = AtomicReferenceFieldUpdater + .newUpdater(DefaultBatchFlushEndpoint.class, ContextualChannel.class, "channel"); + + private static final AtomicIntegerFieldUpdater QUEUE_SIZE = AtomicIntegerFieldUpdater + .newUpdater(DefaultBatchFlushEndpoint.class, "queueSize"); + + private static final AtomicIntegerFieldUpdater STATUS = AtomicIntegerFieldUpdater + .newUpdater(DefaultBatchFlushEndpoint.class, "status"); + + private static final int ST_OPEN = 0; + + private static final int ST_CLOSED = 1; + + private static final Set> SHOULD_NOT_RETRY_EXCEPTION_TYPES = new HashSet<>(); + + static { + SHOULD_NOT_RETRY_EXCEPTION_TYPES.add(EncoderException.class); + SHOULD_NOT_RETRY_EXCEPTION_TYPES.add(Error.class); + } + + private static boolean isRejectCommand(ClientOptions clientOptions) { + switch (clientOptions.getDisconnectedBehavior()) { + case REJECT_COMMANDS: + return true; + case ACCEPT_COMMANDS: + throw new UnsupportedOperationException("ACCEPT_COMMANDS is not supported"); + case DEFAULT: + return !clientOptions.isAutoReconnect(); + default: + throw new IllegalStateException("Unknown disconnected behavior: " + clientOptions.getDisconnectedBehavior()); + } + } + + protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { + if (cmd.isDone()) { + return; + } + + if (cmd.getOutput() != null) { + cmd.getOutput().setError("endpoint closed"); + } + cmd.cancel(); + } + + protected volatile @Nonnull ContextualChannel channel = DummyContextualChannelInstances.CHANNEL_CONNECTING; + + private final Reliability reliability; + + private final ClientOptions clientOptions; + + private final ClientResources clientResources; + + private final boolean boundedQueues; + + // access via QUEUE_SIZE + @SuppressWarnings("unused") + private volatile int queueSize = 0; + + // access via STATUS + @SuppressWarnings("unused") + private volatile int status = ST_OPEN; + + private final Consumer> callbackOnClose; + + private final boolean rejectCommandsWhileDisconnected; + + private final List pushListeners = new CopyOnWriteArrayList<>(); + + private final boolean debugEnabled = logger.isDebugEnabled(); + + protected final CompletableFuture closeFuture = new CompletableFuture<>(); + + private String logPrefix; + + private boolean autoFlushCommands = true; + + private boolean inActivation = false; + + protected @Nullable ConnectionWatchdog connectionWatchdog; + + private ConnectionFacade connectionFacade; + + private volatile Throwable connectionError; + + // // access via QUEUE_SIZE + // @SuppressWarnings("unused") + // private volatile int queueSize = 0; + + private final String cachedEndpointId; + + protected final UnboundedMpscOfferFirstQueue> taskQueue; + + private final AtomicBoolean quiescence; + + private final boolean canFire; + + private volatile boolean inProtectMode; + + private volatile Throwable failedToReconnectReason; + + private volatile EventLoop lastEventLoop = null; + + /** + * Create a new {@link BatchFlushEndpoint}. + * + * @param clientOptions client options for this connection, must not be {@code null}. + * @param clientResources client resources for this connection, must not be {@code null}. + */ + public DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources) { + this(clientOptions, clientResources, DefaultBatchFlushEndpoint::cancelCommandOnEndpointClose); + } + + protected DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources, + Consumer> callbackOnClose) { + + LettuceAssert.notNull(clientOptions, "ClientOptions must not be null"); + LettuceAssert.notNull(clientOptions, "ClientResources must not be null"); + + this.clientOptions = clientOptions; + this.clientResources = clientResources; + this.reliability = clientOptions.isAutoReconnect() ? Reliability.AT_LEAST_ONCE : Reliability.AT_MOST_ONCE; + this.boundedQueues = clientOptions.getRequestQueueSize() != Integer.MAX_VALUE; + this.rejectCommandsWhileDisconnected = isRejectCommand(clientOptions); + long endpointId = ENDPOINT_COUNTER.incrementAndGet(); + this.cachedEndpointId = "0x" + Long.toHexString(endpointId); + this.taskQueue = new JcToolsUnboundedMpscOfferFirstQueue<>(); + this.quiescence = new AtomicBoolean(); + this.canFire = false; + this.callbackOnClose = callbackOnClose; + } + + @Override + public void setConnectionFacade(ConnectionFacade connectionFacade) { + this.connectionFacade = connectionFacade; + } + + @Override + public ClientResources getClientResources() { + return clientResources; + } + + @Override + public void setAutoFlushCommands(boolean autoFlush) { + this.autoFlushCommands = autoFlush; + } + + @Override + public void addListener(PushListener listener) { + pushListeners.add(listener); + } + + @Override + public void removeListener(PushListener listener) { + pushListeners.remove(listener); + } + + @Override + public List getPushListeners() { + return pushListeners; + } + + @Override + public RedisCommand write(RedisCommand command) { + + LettuceAssert.notNull(command, "Command must not be null"); + + final Throwable validation = validateWrite(1); + if (validation != null) { + command.completeExceptionally(validation); + return command; + } + + try { + if (inActivation) { + command = processActivationCommand(command); + } + + QUEUE_SIZE.incrementAndGet(this); + this.taskQueue.offer(command); + + if (autoFlushCommands) { + flushCommands(); + } + + } finally { + if (debugEnabled) { + logger.debug("{} write() done", logPrefix()); + } + } + + return command; + } + + @SuppressWarnings("unchecked") + @Override + public Collection> write(Collection> commands) { + + LettuceAssert.notNull(commands, "Commands must not be null"); + + final Throwable validation = validateWrite(commands.size()); + if (validation != null) { + commands.forEach(it -> it.completeExceptionally(validation)); + return (Collection>) commands; + } + + try { + if (inActivation) { + commands = processActivationCommands(commands); + } + + for (RedisCommand command : commands) { + this.taskQueue.offer(command); + } + QUEUE_SIZE.addAndGet(this, commands.size()); + + if (autoFlushCommands) { + flushCommands(); + } + } finally { + if (debugEnabled) { + logger.debug("{} write() done", logPrefix()); + } + } + + return (Collection>) commands; + } + + @Override + public void notifyChannelActive(Channel channel) { + lastEventLoop = channel.eventLoop(); + + final ContextualChannel contextualChannel = new ContextualChannel(channel, ConnectionContext.State.CONNECTED); + + this.logPrefix = null; + this.connectionError = null; + + if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, contextualChannel)) { + logger.error("[unexpected] {} failed to set to CHANNEL_CONNECTING because current state is '{}'", logPrefix(), + CHANNEL.get(this)); + channel.close(); + onUnexpectedState("notifyChannelActive", ConnectionContext.State.CONNECTING, + this.channel.getContext().getInitialState()); + return; + } + + // Created a synchronize-before with set channel to CHANNEL_CONNECTING, + if (isClosed()) { + logger.info("{} Closing channel because endpoint is already closed", logPrefix()); + channel.close(); + + onEndpointClosed(); + CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); + return; + } + + if (connectionWatchdog != null) { + connectionWatchdog.arm(); + } + + try { + if (debugEnabled) { + logger.debug("{} activating endpoint", logPrefix()); + } + + try { + inActivation = true; + connectionFacade.activated(); + } finally { + inActivation = false; + } + + scheduleSendJobOnConnected(contextualChannel); + } catch (Exception e) { + + if (debugEnabled) { + logger.debug("{} channelActive() ran into an exception", logPrefix()); + } + + if (clientOptions.isCancelCommandsOnReconnectFailure()) { + resetInternal(); + } + + throw e; + } + } + + private void onUnexpectedState(String caller, ConnectionContext.State exp, ConnectionContext.State actual) { + logger.error("[{}][unexpected] {}: unexpected state: exp '{}' got '{}'", caller, logPrefix(), exp, actual); + cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual)); + } + + @Override + public void notifyReconnectFailed(Throwable t) { + this.failedToReconnectReason = t; + + if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, + DummyContextualChannelInstances.CHANNEL_RECONNECT_FAILED)) { + logger.error("[unexpected] {} failed to set to CHANNEL_CONNECTING because current state is '{}'", logPrefix(), + CHANNEL.get(this)); + onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING, + this.channel.getContext().getInitialState()); + return; + } + + syncAfterTerminated(() -> { + if (isClosed()) { + onEndpointClosed(); + } else { + cancelCommands("reconnect failed"); + } + }); + } + + @Override + public void notifyChannelInactive(Channel channel, Deque> retryableQueuedCommands) { + if (debugEnabled) { + logger.debug("{} deactivating endpoint handler", logPrefix()); + } + + connectionFacade.deactivated(); + + final ContextualChannel chan = this.channel; + if (!chan.getContext().getInitialState().isConnected() || chan.getDelegate() != channel) { + logger.error("[unexpected][{}] notifyChannelInactive: channel not match", logPrefix()); + return; + } + + if (chan.getContext().isChannelInactiveEventFired()) { + logger.error("[unexpected][{}] notifyChannelInactive: already fired", logPrefix()); + return; + } + + boolean willReconnect = connectionWatchdog != null && connectionWatchdog.isWillReconnect(); + RedisException exception = null; + // Unlike DefaultEndpoint, here we don't check reliability since connectionWatchdog.isWillReconnect() already does it. + if (isClosed()) { + exception = new RedisException("endpoint closed"); + willReconnect = false; + } + + if (willReconnect) { + CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT); + // Create a synchronize-before with this.channel = CHANNEL_WILL_RECONNECT + if (isClosed()) { + exception = new RedisException("endpoint closed"); + willReconnect = false; + } else { + exception = new RedisException("channel inactive and will reconnect"); + } + } else if (exception == null) { + exception = new RedisException("channel inactive and connectionWatchdog won't reconnect"); + } + + if (!willReconnect) { + CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); + } + chan.getContext().setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryableQueuedCommands, exception)); + trySetEndpointQuiescence(chan); + } + + private boolean setEndpointQuiescenceOncePerConnection(ContextualChannel contextualChannel) { + if (contextualChannel.getContext().setChannelQuiescentOnce()) { + this.quiescence.set(true); + return true; + } + return false; + } + + @Override + public AcquireQuiescenceResult tryAcquireQuiescence() { + if (quiescence.compareAndSet(true, false)) { + if (channel.getContext().getInitialState() == ConnectionContext.State.ENDPOINT_CLOSED) { + return AcquireQuiescenceResult.FAILED; + } + if (CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, + DummyContextualChannelInstances.CHANNEL_CONNECTING)) { + return AcquireQuiescenceResult.SUCCESS; + } + onUnexpectedState("tryAcquireQuiescence", ConnectionContext.State.WILL_RECONNECT, + this.channel.getContext().getInitialState()); + return AcquireQuiescenceResult.FAILED; + } + return AcquireQuiescenceResult.TRY_LATER; + } + + @Override + public void notifyException(Throwable t) { + if (t instanceof RedisConnectionException && RedisConnectionException.isProtectedMode(t.getMessage())) { + connectionError = t; + inProtectMode = true; + } + + final ContextualChannel curr = this.channel; + if (!curr.getContext().getInitialState().isConnected() || !curr.isActive()) { + connectionError = t; + } + } + + @Override + public void registerConnectionWatchdog(ConnectionWatchdog connectionWatchdog) { + this.connectionWatchdog = connectionWatchdog; + } + + @Override + public void flushCommands() { + final ContextualChannel chan = this.channel; + switch (chan.getContext().getInitialState()) { + case ENDPOINT_CLOSED: + syncAfterTerminated(() -> { + if (isClosed()) { + onEndpointClosed(); + } else { + fulfillCommands("Connection is closed", + cmd -> cmd.completeExceptionally(new RedisException("Connection is closed"))); + } + }); + return; + case RECONNECT_FAILED: + syncAfterTerminated(() -> { + if (isClosed()) { + onEndpointClosed(); + } else { + fulfillCommands("Reconnect failed", + cmd -> cmd.completeExceptionally(new RedisException("Reconnect failed"))); + } + }); + return; + case WILL_RECONNECT: + case CONNECTING: + // command will be handled later either in notifyReconnectFailed or in notifyChannelActive + return; + case CONNECTED: + scheduleSendJobIfNeeded(chan); + return; + default: + throw new IllegalStateException("unexpected state: " + chan.getContext().getInitialState()); + } + } + + /** + * Close the connection. + */ + @Override + public void close() { + + if (debugEnabled) { + logger.debug("{} close()", logPrefix()); + } + + closeAsync().join(); + } + + @Override + public CompletableFuture closeAsync() { + + if (debugEnabled) { + logger.debug("{} closeAsync()", logPrefix()); + } + + if (isClosed()) { + return closeFuture; + } + + if (STATUS.compareAndSet(this, ST_OPEN, ST_CLOSED)) { + if (connectionWatchdog != null) { + connectionWatchdog.prepareClose(); + } + + final Channel chan = channel; + if (channel.getContext().getInitialState().isConnected()) { + // 1. STATUS.compareAndSet(this, ST_OPEN, ST_CLOSED) synchronize-before channel == CONNECTED + // 2. channel == CONNECTED synchronize-before setting channel to WILL_RECONNECT/ENDPOINT_CLOSED + // 3. setting channel to WILL_RECONNECT synchronize-before `isClosed()`, which will cancel all the commands. + Futures.adapt(chan.close(), closeFuture); + } else { + // if is FAILED_TO_CONNECT/CLIENT_CLOSED, don't care, otherwise + // 1. STATUS.compareAndSet(this, ST_OPEN, ST_CLOSED) synchronize-before channel == WILL_RECONNECT/CONNECTING + // 2. channel == WILL_RECONNECT/CONNECTING synchronize-before setting channel to CONNECTED/RECONNECT_FAILED + // 3. setting channel to CONNECTED/RECONNECT_FAILED synchronize-before `isClosed()`, which will cancel the + // commands; + closeFuture.complete(null); + } + } + + return closeFuture; + } + + /** + * Disconnect the channel. + */ + public void disconnect() { + + ContextualChannel chan = this.channel; + + if (chan.getContext().getInitialState().isConnected() && chan.isOpen()) { + chan.disconnect(); + } + } + + /** + * Reset the writer state. Queued commands will be canceled and the internal state will be reset. This is useful when the + * internal state machine gets out of sync with the connection. + */ + @Override + public void reset() { + + if (debugEnabled) { + logger.debug("{} reset()", logPrefix()); + } + + final ContextualChannel curr = channel; + if (curr.getContext().getInitialState().isConnected()) { + curr.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); + } + // Unsafe to call cancelBufferedCommands() here. + // cancelBufferedCommands("Reset"); + } + + private void resetInternal() { + + if (debugEnabled) { + logger.debug("{} reset()", logPrefix()); + } + + ContextualChannel curr = channel; + if (curr.getContext().getInitialState().isConnected()) { + curr.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); + } + // Unsafe to call cancelBufferedCommands() here. + cancelCommands("Reset"); + } + + /** + * Reset the command-handler to the initial not-connected state. + */ + @Override + public void initialState() { + + // Thread safe since we are not connected yet. + cancelCommands("initialState"); + + ContextualChannel currentChannel = this.channel; + if (currentChannel.getContext().getInitialState().isConnected()) { + ChannelFuture close = currentChannel.close(); + if (currentChannel.isOpen()) { + close.syncUninterruptibly(); + } + } + } + + private boolean isClosed() { + return STATUS.get(this) == ST_CLOSED; + } + + protected String logPrefix() { + + if (logPrefix != null) { + return logPrefix; + } + + String buffer = "[" + ChannelLogDescriptor.logDescriptor(channel.getDelegate()) + ", " + "epid=" + getId() + ']'; + logPrefix = buffer; + return buffer; + } + + @Override + public String getId() { + return cachedEndpointId; + } + + private void scheduleSendJobOnConnected(final ContextualChannel chan) { + LettuceAssert.assertState(chan.eventLoop().inEventLoop(), "must be called in event loop thread"); + + // Schedule directly + scheduleSendJobInEventLoopIfNeeded(chan); + } + + private void scheduleSendJobIfNeeded(final ContextualChannel chan) { + final EventLoop eventLoop = chan.eventLoop(); + if (eventLoop.inEventLoop()) { + scheduleSendJobInEventLoopIfNeeded(chan); + return; + } + + if (chan.getContext().getFairEndPointContext().getHasOngoingSendLoop().tryEnterSafeGetVolatile()) { + eventLoop.execute(() -> scheduleSendJobInEventLoopIfNeeded(chan)); + } + + // Otherwise: + // 1. offer() (volatile write) synchronizes-before hasOngoingSendLoop.safe.get() == 1 (volatile read) + // 2. hasOngoingSendLoop.safe.get() == 1 (volatile read) synchronizes-before + // hasOngoingSendLoop.safe.set(0) (volatile write) in first loopSend0() + // 3. hasOngoingSendLoop.safe.set(0) (volatile write) synchronizes-before + // second loopSend0(), which will call poll() + } + + private void scheduleSendJobInEventLoopIfNeeded(final ContextualChannel chan) { + // Guarantee only 1 send loop. + if (chan.getContext().getFairEndPointContext().getHasOngoingSendLoop().tryEnterUnsafe()) { + loopSend(chan); + } + } + + private void loopSend(final ContextualChannel chan) { + final ConnectionContext connectionContext = chan.getContext(); + final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getFairEndPointContext(); + if (connectionContext.isChannelInactiveEventFired() || batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { + return; + } + + LettuceAssert.assertState(channel == chan, "unexpected: channel not match but closeStatus == null"); + loopSend0(batchFlushEndPointContext, chan, clientOptions.getWriteSpinCount(), clientOptions.getBatchSize(), true); + } + + private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, + int remainingSpinnCount, final int maxBatchSize, final boolean firstCall) { + do { + final int count = pollBatch(batchFlushEndPointContext, maxBatchSize, chan); + if (count < 0) { + return; + } + if (count == 0 || (firstCall && count < maxBatchSize)) { + // queue was empty + break; + } + } while (--remainingSpinnCount > 0); + + if (remainingSpinnCount <= 0) { + chan.eventLoop().execute(() -> loopSend(chan)); + return; + } + + // QPSPattern is low and we have drained all tasks. + if (firstCall) { + // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. + batchFlushEndPointContext.getHasOngoingSendLoop().exitSafe(); + // Guarantee thread-safety: no dangling tasks in the queue. + loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, maxBatchSize, false); + } else { + // In low qps pattern, the send job will be triggered later when a new task is added, + batchFlushEndPointContext.getHasOngoingSendLoop().exitUnsafe(); + } + } + + private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, final int maxBatchSize, + ContextualChannel chan) { + int count = 0; + for (; count < maxBatchSize; count++) { + final RedisCommand cmd = this.taskQueue.poll(); // relaxed poll is faster and we wil retry later anyway. + if (cmd == null) { + break; + } + channelWrite(chan, cmd).addListener(future -> { + QUEUE_SIZE.decrementAndGet(this); + batchFlushEndPointContext.done(1); + + final Throwable retryableErr = checkSendResult(future, chan, cmd); + if (retryableErr != null && batchFlushEndPointContext.addRetryableFailedToSendTask(cmd, retryableErr)) { + // Close connection on first transient write failure + internalCloseConnectionIfNeeded(chan, retryableErr); + } + + trySetEndpointQuiescence(chan); + }); + } + + if (count > 0) { + batchFlushEndPointContext.add(count); + + channelFlush(chan); + if (batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { + // Wait for onConnectionClose event() + return -1; + } + } + return count; + } + + /** + * Check write result. + * + * @param sendFuture The future to check. + * @param contextualChannel The channel instance associated with the future. + * @param cmd The task. + * @return The cause of the failure if is a retryable failed task, otherwise null. + */ + private Throwable checkSendResult(Future sendFuture, ContextualChannel contextualChannel, RedisCommand cmd) { + if (cmd.isDone()) { + ExceptionUtils.logUnexpectedDone(logger, logPrefix(), cmd); + return null; + } + + final ConnectionContext.CloseStatus closeStatus = contextualChannel.getContext().getCloseStatus(); + if (closeStatus != null) { + logger.warn("[checkSendResult][interesting][{}] callback called after onClose() event, close status: {}", + logPrefix(), contextualChannel.getContext().getCloseStatus()); + final Throwable err = sendFuture.isSuccess() ? closeStatus.getErr() : sendFuture.cause(); + if (!closeStatus.isWillReconnect() || shouldNotRetry(err, cmd)) { + cmd.completeExceptionally(err); + return null; + } else { + return err; + } + } + + if (sendFuture.isSuccess()) { + return null; + } + + final Throwable cause = sendFuture.cause(); + ExceptionUtils.maybeLogSendError(logger, cause); + if (shouldNotRetry(cause, cmd)) { + cmd.completeExceptionally(cause); + return null; + } + + return cause; + } + + private boolean shouldNotRetry(Throwable cause, RedisCommand cmd) { + return reliability == Reliability.AT_MOST_ONCE || ActivationCommand.isActivationCommand(cmd) + || ExceptionUtils.oneOf(cause, SHOULD_NOT_RETRY_EXCEPTION_TYPES); + } + + private void trySetEndpointQuiescence(ContextualChannel chan) { + final EventLoop chanEventLoop = chan.eventLoop(); + LettuceAssert.isTrue(chanEventLoop.inEventLoop(), "unexpected: not in event loop"); + LettuceAssert.isTrue(chanEventLoop == lastEventLoop, "unexpected: event loop not match"); + + final ConnectionContext connectionContext = chan.getContext(); + final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); + final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getFairEndPointContext(); + if (batchFlushEndPointContext.isDone() && closeStatus != null) { + if (closeStatus.isWillReconnect()) { + onWillReconnect(closeStatus, batchFlushEndPointContext); + } else { + onWontReconnect(closeStatus, batchFlushEndPointContext); + } + if (!setEndpointQuiescenceOncePerConnection(chan)) { + ExceptionUtils.maybeFire(logger, canFire, "unexpected: setEndpointQuiescenceOncePerConnection() failed"); + } + } + } + + private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, + final BatchFlushEndPointContext batchFlushEndPointContext) { + final @Nullable Deque> retryableFailedToSendTasks = batchFlushEndPointContext + .getAndClearRetryableFailedToSendTasks(); + if (retryableFailedToSendTasks != null) { + // Save retryable failed tasks + logger.info( + "[onWillReconnect][{}] compensate {} retryableFailedToSendTasks (write failure) for retrying on reconnecting, first write error: {}", + retryableFailedToSendTasks.size(), batchFlushEndPointContext.getFirstDiscontinueReason().getMessage(), + logPrefix()); + offerFirstAll(retryableFailedToSendTasks); + } + + LettuceAssert.assertState(reliability != Reliability.AT_MOST_ONCE, "unexpected: reliability is AT_MOST_ONCE"); + final Deque> retryablePendingCommands = closeStatus.getAndClearRetryablePendingCommands(); + if (retryablePendingCommands != null) { + // Save uncompletedTasks for later retry. + logger.info("[onWillReconnect][{}] compensate {} pendingCommands (write success) for retrying on reconnecting", + retryablePendingCommands.size(), logPrefix()); + offerFirstAll(retryablePendingCommands); + } + + // follow the same logic as DefaultEndpoint + if (inProtectMode) { + cancelCommands("inProtectMode"); + } + } + + private void onWontReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, + final BatchFlushEndPointContext batchFlushEndPointContext) { + // No need to use syncAfterTerminated() since we are already in the event loop. + if (isClosed()) { + onEndpointClosed(closeStatus.getAndClearRetryablePendingCommands(), + batchFlushEndPointContext.getAndClearRetryableFailedToSendTasks()); + } else { + fulfillCommands("onConnectionClose called and won't reconnect", + it -> it.completeExceptionally(closeStatus.getErr()), closeStatus.getAndClearRetryablePendingCommands(), + batchFlushEndPointContext.getAndClearRetryableFailedToSendTasks()); + } + } + + private void offerFirstAll(Deque> commands) { + commands.forEach(cmd -> { + if (cmd instanceof DemandAware.Sink) { + ((DemandAware.Sink) cmd).removeSource(); + } + }); + this.taskQueue.offerFirstAll(commands); + } + + private void internalCloseConnectionIfNeeded(ContextualChannel toCloseChan, Throwable reason) { + if (toCloseChan.getContext().isChannelInactiveEventFired() || !toCloseChan.isActive()) { + return; + } + + logger.error("[internalCloseConnectionIfNeeded][attention][{}] close the connection due to write error, reason: '{}'", + logPrefix(), reason.getMessage(), reason); + toCloseChan.eventLoop().schedule(() -> { + if (toCloseChan.isActive()) { + toCloseChan.close(); + } + }, 1, TimeUnit.SECONDS); + } + + private void cancelCommands(String message) { + fulfillCommands(message, RedisCommand::cancel); + } + + @SafeVarargs + private final void onEndpointClosed(Queue>... queues) { + fulfillCommands("endpoint closed", callbackOnClose, queues); + } + + @SafeVarargs + private final void fulfillCommands(String message, Consumer> commandConsumer, + Queue>... queues) { + int totalCancelledTaskNum = 0; + for (Queue> queue : queues) { + while (true) { + RedisCommand cmd = queue.poll(); + if (cmd == null) { + break; + } + if (cmd.getOutput() != null) { + cmd.getOutput().setError(message); + } + commandConsumer.accept(cmd); + + totalCancelledTaskNum++; + } + } + + while (true) { + RedisCommand cmd = this.taskQueue.poll(); + if (cmd == null) { + break; + } + if (cmd.getOutput() != null) { + cmd.getOutput().setError(message); + } + commandConsumer.accept(cmd); + totalCancelledTaskNum++; + } + + if (totalCancelledTaskNum > 0) { + logger.error("cancel {} pending tasks, reason: '{}'", totalCancelledTaskNum, message); + } + } + + private RedisCommand processActivationCommand(RedisCommand command) { + + if (!ActivationCommand.isActivationCommand(command)) { + return new ActivationCommand<>(command); + } + + return command; + } + + private Collection> processActivationCommands( + Collection> commands) { + + Collection> commandsToReturn = new ArrayList<>(commands.size()); + + for (RedisCommand command : commands) { + + if (!ActivationCommand.isActivationCommand(command)) { + command = new ActivationCommand<>(command); + } + + commandsToReturn.add(command); + } + + return commandsToReturn; + } + + private Throwable validateWrite(@SuppressWarnings("unused") int commands) { + if (isClosed()) { + return new RedisException("Connection is closed"); + } + + final Throwable localConnectionErr = connectionError; + if (localConnectionErr != null /* different logic of DefaultEndpoint */) { + return localConnectionErr; + } + + if (boundedQueues && QUEUE_SIZE.get(this) + commands > clientOptions.getRequestQueueSize()) { + return new RedisException("Request queue size exceeded: " + clientOptions.getRequestQueueSize() + + ". Commands are not accepted until the queue size drops."); + } + + final ContextualChannel chan = this.channel; + switch (chan.getContext().getInitialState()) { + case ENDPOINT_CLOSED: + return new RedisException("Connection is closed"); + case RECONNECT_FAILED: + return failedToReconnectReason; + case WILL_RECONNECT: + case CONNECTING: + return rejectCommandsWhileDisconnected ? new RedisException("Currently not connected. Commands are rejected.") + : null; + case CONNECTED: + return !chan.isActive() && rejectCommandsWhileDisconnected ? new RedisException("Connection is closed") : null; + default: + throw new IllegalStateException("unexpected state: " + chan.getContext().getInitialState()); + } + } + + private void channelFlush(Channel channel) { + if (debugEnabled) { + logger.debug("{} write() channelFlush", logPrefix()); + } + + channel.flush(); + } + + private ChannelFuture channelWrite(Channel channel, RedisCommand command) { + + if (debugEnabled) { + logger.debug("{} write() channelWrite command {}", logPrefix(), command); + } + + return channel.write(command); + } + + /* + * Synchronize after the endpoint is terminated. This is to ensure only one thread can access the task queue after endpoint + * is terminated (state is RECONNECT_FAILED/ENDPOINT_CLOSED) + */ + private void syncAfterTerminated(Runnable runnable) { + final EventLoop localLastEventLoop = lastEventLoop; + LettuceAssert.notNull(localLastEventLoop, "lastEventLoop must not be null after terminated"); + if (localLastEventLoop.inEventLoop()) { + runnable.run(); + } else { + localLastEventLoop.execute(() -> { + runnable.run(); + LettuceAssert.isTrue(lastEventLoop == localLastEventLoop, "lastEventLoop must not be changed after terminated"); + }); + } + } + + private enum Reliability { + AT_MOST_ONCE, AT_LEAST_ONCE + } + +} diff --git a/src/main/java/io/lettuce/core/utils/ExceptionUtils.java b/src/main/java/io/lettuce/core/utils/ExceptionUtils.java new file mode 100644 index 0000000000..4072d81b3e --- /dev/null +++ b/src/main/java/io/lettuce/core/utils/ExceptionUtils.java @@ -0,0 +1,95 @@ +package io.lettuce.core.utils; + +import io.lettuce.core.output.CommandOutput; +import io.lettuce.core.protocol.RedisCommand; +import io.netty.channel.socket.ChannelOutputShutdownException; +import io.netty.util.internal.logging.InternalLogger; + +import java.io.IOException; +import java.nio.channels.ClosedChannelException; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Function; + +public class ExceptionUtils { + + private static final Set SUPPRESS_IO_EXCEPTION_MESSAGES = new HashSet<>( + Arrays.asList("Connection reset by peer", "Broken pipe", "Connection timed out")); + + private ExceptionUtils() { + } + + public static void maybeLogSendError(InternalLogger logger, Throwable cause) { + if (cause instanceof ClosedChannelException) { + return; + } + + if (cause instanceof IOException && (SUPPRESS_IO_EXCEPTION_MESSAGES.contains(cause.getMessage()) + || cause instanceof ChannelOutputShutdownException)) { + logger.debug("[maybeLogSendError] error during request: {}", cause.getMessage(), cause); + } else { + logger.error("[maybeLogSendError][attention] unexpected exception during request: {}", cause.getMessage(), cause); + } + } + + public static T castTo(Throwable throwable, Class clazz, Function supplier) { + if (clazz.isInstance(throwable)) { + return clazz.cast(throwable); + } + return supplier.apply(throwable); + } + + public static T clearStackTrace(T throwable) { + throwable.setStackTrace(new StackTraceElement[0]); + return throwable; + } + + /** + * Returns whether the throwable is one of the exception types or one of the cause in the cause chain is one of the + * exception types + * + * @param throwable exception to check + * @param exceptionTypes target exception types. + * @return whether the throwable is one of the exception types or one of the cause in the cause chain is one of the + * exception types + */ + public static boolean oneOf(final Throwable throwable, final Collection> exceptionTypes) { + Throwable cause = throwable; + do { + for (Class exceptionType : exceptionTypes) { + if (exceptionType.isInstance(cause)) { + return true; + } + } + cause = cause.getCause(); + } while (cause != null); + return false; + } + + public static void maybeFire(InternalLogger logger, boolean canFire, String msg) { + final IllegalStateException ex = new IllegalStateException(msg); + logger.error("[unexpected] {}", msg, ex); + if (canFire) { + throw ex; + } + } + + public static void logUnexpectedDone(InternalLogger logger, String logPrefix, RedisCommand cmd) { + if (cmd.isCancelled()) { + logger.warn("[logUnexpectedDone][{}] command is cancelled: {}", logPrefix, cmd); + return; + } + + final CommandOutput output = cmd.getOutput(); + final String err = output.getError(); + if (err != null) { + logger.warn("[logUnexpectedDone][{}] command completes with err, cmd: [{}], err: [{}]", logPrefix, cmd, err); + return; + } + + logger.warn("[logUnexpectedDone][{}] command completes normally, cmd: [{}], value: [{}]", logPrefix, cmd, output.get()); + } + +} From 63bb9579527d60207b2709b4df60b0c77961ff9c Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 24 Jul 2024 23:40:41 +0800 Subject: [PATCH 02/35] fix: add notifyChannelInactiveAfterWatchdogDecision --- .../core/protocol/BatchFlushEndpoint.java | 7 +------ .../lettuce/core/protocol/CommandHandler.java | 17 ++++++++++++++--- .../core/protocol/ConnectionWatchdog.java | 16 ++++++++-------- .../protocol/DefaultBatchFlushEndpoint.java | 6 +++++- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java index 78f99eddaa..71cd7a6cb8 100644 --- a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java @@ -9,11 +9,6 @@ */ public interface BatchFlushEndpoint extends Endpoint { - @Override - default void notifyChannelInactive(Channel channel) { - throw new UnsupportedOperationException(); - } - @Override default void notifyDrainQueuedCommands(HasQueuedCommands queuedCommands) { throw new UnsupportedOperationException(); @@ -25,7 +20,7 @@ default void notifyDrainQueuedCommands(HasQueuedCommands queuedCommands) { * @param channel the channel * @param retryableQueuedCommands retryable queued commands in command handler */ - void notifyChannelInactive(Channel channel, Deque> retryableQueuedCommands); + void notifyChannelInactiveAfterWatchdogDecision(Channel channel, Deque> retryableQueuedCommands); enum AcquireQuiescenceResult { SUCCESS, FAILED, TRY_LATER diff --git a/src/main/java/io/lettuce/core/protocol/CommandHandler.java b/src/main/java/io/lettuce/core/protocol/CommandHandler.java index 16dd374bfc..340e776148 100644 --- a/src/main/java/io/lettuce/core/protocol/CommandHandler.java +++ b/src/main/java/io/lettuce/core/protocol/CommandHandler.java @@ -38,6 +38,7 @@ import io.lettuce.core.RedisException; import io.lettuce.core.api.push.PushListener; import io.lettuce.core.api.push.PushMessage; +import io.lettuce.core.datastructure.queue.unmodifiabledeque.UnmodifiableDeque; import io.lettuce.core.internal.LettuceAssert; import io.lettuce.core.internal.LettuceSets; import io.lettuce.core.metrics.CommandLatencyRecorder; @@ -95,6 +96,8 @@ public class CommandHandler extends ChannelDuplexHandler implements HasQueuedCom private final Endpoint endpoint; + private final boolean supportsBatchFlush; + private final ArrayDeque> stack = new ArrayDeque<>(); private final long commandHandlerId = COMMAND_HANDLER_COUNTER.incrementAndGet(); @@ -151,6 +154,7 @@ public CommandHandler(ClientOptions clientOptions, ClientResources clientResourc this.clientOptions = clientOptions; this.clientResources = clientResources; this.endpoint = endpoint; + this.supportsBatchFlush = endpoint instanceof BatchFlushEndpoint; this.commandLatencyRecorder = clientResources.commandLatencyRecorder(); this.latencyMetricsEnabled = commandLatencyRecorder.isEnabled(); this.boundedQueues = clientOptions.getRequestQueueSize() != Integer.MAX_VALUE; @@ -372,10 +376,11 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { setState(LifecycleState.DISCONNECTED); setState(LifecycleState.DEACTIVATING); - if (endpoint instanceof BatchFlushEndpoint) { - ((BatchFlushEndpoint) endpoint).notifyChannelInactive(ctx.channel(), drainStack()); + endpoint.notifyChannelInactive(ctx.channel()); + Deque> batchFlushRetryableDrainQueuedCommands = UnmodifiableDeque.emptyDeque(); + if (supportsBatchFlush) { + batchFlushRetryableDrainQueuedCommands = drainStack(); } else { - endpoint.notifyChannelInactive(ctx.channel()); endpoint.notifyDrainQueuedCommands(this); } @@ -391,6 +396,12 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { } super.channelInactive(ctx); + + if (supportsBatchFlush) { + // Needs decision of watchdog + ((BatchFlushEndpoint) endpoint).notifyChannelInactiveAfterWatchdogDecision(ctx.channel(), + batchFlushRetryableDrainQueuedCommands); + } } /** diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index f90e4e5f47..b811c96641 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -19,6 +19,13 @@ */ package io.lettuce.core.protocol; +import java.net.SocketAddress; +import java.time.Duration; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + import io.lettuce.core.ClientOptions; import io.lettuce.core.ConnectionBuilder; import io.lettuce.core.ConnectionEvents; @@ -45,13 +52,6 @@ import reactor.core.publisher.Mono; import reactor.util.function.Tuple2; -import java.net.SocketAddress; -import java.time.Duration; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - /** * A netty {@link ChannelHandler} responsible for monitoring the channel and reconnecting when the connection is lost. * @@ -104,7 +104,7 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private volatile Timeout reconnectScheduleTimeout; - private volatile boolean willReconnect; + private boolean willReconnect; /** * Create a new watchdog that adds to new connections to the supplied {@link ChannelGroup} and establishes a new diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 6a37f1afd0..286344d3c4 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -383,13 +383,17 @@ public void notifyReconnectFailed(Throwable t) { } @Override - public void notifyChannelInactive(Channel channel, Deque> retryableQueuedCommands) { + public void notifyChannelInactive(Channel channel) { if (debugEnabled) { logger.debug("{} deactivating endpoint handler", logPrefix()); } connectionFacade.deactivated(); + } + @Override + public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, + Deque> retryableQueuedCommands) { final ContextualChannel chan = this.channel; if (!chan.getContext().getInitialState().isConnected() || chan.getDelegate() != channel) { logger.error("[unexpected][{}] notifyChannelInactive: channel not match", logPrefix()); From f5cc9380eb568711a10d5770162d67b7edb04015 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 25 Jul 2024 05:18:48 +0800 Subject: [PATCH 03/35] chore: watchdog pass onEndpointQuiescence callback instead --- .../core/protocol/BatchFlushEndpoint.java | 6 -- .../core/protocol/ConnectionWatchdog.java | 41 +++++------- .../protocol/DefaultBatchFlushEndpoint.java | 67 +++++++++---------- 3 files changed, 45 insertions(+), 69 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java index 71cd7a6cb8..1878bf853e 100644 --- a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java @@ -22,12 +22,6 @@ default void notifyDrainQueuedCommands(HasQueuedCommands queuedCommands) { */ void notifyChannelInactiveAfterWatchdogDecision(Channel channel, Deque> retryableQueuedCommands); - enum AcquireQuiescenceResult { - SUCCESS, FAILED, TRY_LATER - } - - AcquireQuiescenceResult tryAcquireQuiescence(); - void notifyReconnectFailed(Throwable throwable); } diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index b811c96641..d23c9adc73 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -84,6 +84,8 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private final String epid; + private final boolean useBatchFlushEndpoint; + private final Endpoint endpoint; private Channel channel; @@ -104,7 +106,7 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private volatile Timeout reconnectScheduleTimeout; - private boolean willReconnect; + private Runnable doReconnectOnEndpointQuiescence; /** * Create a new watchdog that adds to new connections to the supplied {@link ChannelGroup} and establishes a new @@ -147,6 +149,7 @@ public ConnectionWatchdog(Delay reconnectDelay, ClientOptions clientOptions, Boo this.redisUri = (String) bootstrap.config().attrs().get(ConnectionBuilder.REDIS_URI); this.epid = endpoint.getId(); this.endpoint = endpoint; + this.useBatchFlushEndpoint = endpoint instanceof BatchFlushEndpoint; Mono wrappedSocketAddressSupplier = socketAddressSupplier.doOnNext(addr -> remoteAddress = addr) .onErrorResume(t -> { @@ -201,7 +204,7 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception { @Override public void channelInactive(ChannelHandlerContext ctx) throws Exception { - willReconnect = false; + doReconnectOnEndpointQuiescence = null; logger.debug("{} channelInactive()", logPrefix()); if (!armed) { @@ -222,12 +225,11 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { return; } - if (endpoint instanceof BatchFlushEndpoint) { - waitQuiescence((BatchFlushEndpoint) endpoint, this::scheduleReconnect); - } else { - scheduleReconnect(); + doReconnectOnEndpointQuiescence = this::scheduleReconnect; + if (!useBatchFlushEndpoint) { + doReconnectOnEndpointQuiescence.run(); } - willReconnect = true; + // otherwise, will be called later by BatchFlushEndpoint#onEndpointQuiescence } else { logger.debug("{} Reconnect scheduling disabled", logPrefix(), ctx); } @@ -235,20 +237,8 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { super.channelInactive(ctx); } - private void waitQuiescence(BatchFlushEndpoint batchFlushEndpoint, Runnable runnable) { - final BatchFlushEndpoint.AcquireQuiescenceResult ret = batchFlushEndpoint.tryAcquireQuiescence(); - switch (ret) { - case SUCCESS: - runnable.run(); - break; - case FAILED: - logger.error("{} Failed to acquire quiescence", logPrefix()); - break; - case TRY_LATER: - // TODO use exponential backoff - timer.newTimeout(it -> waitQuiescence(batchFlushEndpoint, runnable), 3, TimeUnit.MILLISECONDS); - break; - } + void reconnectOnEndpointQuiescence() { + doReconnectOnEndpointQuiescence.run(); } /** @@ -317,10 +307,9 @@ private void notifyEndpointFailedToConnectIfNeeded() { } private void notifyEndpointFailedToConnectIfNeeded(Exception e) { - if (!(endpoint instanceof BatchFlushEndpoint)) { - return; + if (useBatchFlushEndpoint) { + ((BatchFlushEndpoint) endpoint).notifyReconnectFailed(e); } - ((BatchFlushEndpoint) endpoint).notifyReconnectFailed(e); } /** @@ -493,8 +482,8 @@ private String logPrefix() { return logPrefix = buffer; } - public boolean isWillReconnect() { - return willReconnect; + public boolean willReconnect() { + return doReconnectOnEndpointQuiescence != null; } } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 286344d3c4..1efa6ed43b 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -25,7 +25,6 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; @@ -161,8 +160,6 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { protected final UnboundedMpscOfferFirstQueue> taskQueue; - private final AtomicBoolean quiescence; - private final boolean canFire; private volatile boolean inProtectMode; @@ -195,7 +192,6 @@ protected DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources long endpointId = ENDPOINT_COUNTER.incrementAndGet(); this.cachedEndpointId = "0x" + Long.toHexString(endpointId); this.taskQueue = new JcToolsUnboundedMpscOfferFirstQueue<>(); - this.quiescence = new AtomicBoolean(); this.canFire = false; this.callbackOnClose = callbackOnClose; } @@ -394,20 +390,20 @@ public void notifyChannelInactive(Channel channel) { @Override public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, Deque> retryableQueuedCommands) { - final ContextualChannel chan = this.channel; - if (!chan.getContext().getInitialState().isConnected() || chan.getDelegate() != channel) { + final ContextualChannel prevChan = this.channel; + if (!prevChan.getContext().getInitialState().isConnected() || prevChan.getDelegate() != channel) { logger.error("[unexpected][{}] notifyChannelInactive: channel not match", logPrefix()); return; } - if (chan.getContext().isChannelInactiveEventFired()) { + if (prevChan.getContext().isChannelInactiveEventFired()) { logger.error("[unexpected][{}] notifyChannelInactive: already fired", logPrefix()); return; } - boolean willReconnect = connectionWatchdog != null && connectionWatchdog.isWillReconnect(); + boolean willReconnect = connectionWatchdog != null && connectionWatchdog.willReconnect(); RedisException exception = null; - // Unlike DefaultEndpoint, here we don't check reliability since connectionWatchdog.isWillReconnect() already does it. + // Unlike DefaultEndpoint, here we don't check reliability since connectionWatchdog.willReconnect() already does it. if (isClosed()) { exception = new RedisException("endpoint closed"); willReconnect = false; @@ -429,33 +425,9 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, if (!willReconnect) { CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); } - chan.getContext().setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryableQueuedCommands, exception)); - trySetEndpointQuiescence(chan); - } - - private boolean setEndpointQuiescenceOncePerConnection(ContextualChannel contextualChannel) { - if (contextualChannel.getContext().setChannelQuiescentOnce()) { - this.quiescence.set(true); - return true; - } - return false; - } - - @Override - public AcquireQuiescenceResult tryAcquireQuiescence() { - if (quiescence.compareAndSet(true, false)) { - if (channel.getContext().getInitialState() == ConnectionContext.State.ENDPOINT_CLOSED) { - return AcquireQuiescenceResult.FAILED; - } - if (CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, - DummyContextualChannelInstances.CHANNEL_CONNECTING)) { - return AcquireQuiescenceResult.SUCCESS; - } - onUnexpectedState("tryAcquireQuiescence", ConnectionContext.State.WILL_RECONNECT, - this.channel.getContext().getInitialState()); - return AcquireQuiescenceResult.FAILED; - } - return AcquireQuiescenceResult.TRY_LATER; + prevChan.getContext() + .setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryableQueuedCommands, exception)); + trySetEndpointQuiescence(prevChan); } @Override @@ -811,12 +783,33 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { } else { onWontReconnect(closeStatus, batchFlushEndPointContext); } - if (!setEndpointQuiescenceOncePerConnection(chan)) { + + if (chan.getContext().setChannelQuiescentOnce()) { + onEndpointQuiescence(); + } else { ExceptionUtils.maybeFire(logger, canFire, "unexpected: setEndpointQuiescenceOncePerConnection() failed"); } } } + private void onEndpointQuiescence() { + if (channel.getContext().getInitialState() == ConnectionContext.State.ENDPOINT_CLOSED) { + return; + } + + // Create happens-before with channelActive() + if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, + DummyContextualChannelInstances.CHANNEL_CONNECTING)) { + + onUnexpectedState("onEndpointQuiescence", ConnectionContext.State.WILL_RECONNECT, + this.channel.getContext().getInitialState()); + return; + } + + // neither connectionWatchdog nor doReconnectOnEndpointQuiescence could be null + connectionWatchdog.reconnectOnEndpointQuiescence(); + } + private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, final BatchFlushEndPointContext batchFlushEndPointContext) { final @Nullable Deque> retryableFailedToSendTasks = batchFlushEndPointContext From 96cfe7bec163df4ae41bd0068bc8b26faa6eb673 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 25 Jul 2024 10:06:22 +0800 Subject: [PATCH 04/35] perf: use platform dependent mpsc queue, refine code style --- .../lettuce/core/AutoBatchFlushOptions.java | 129 ++++++++++++++++++ .../java/io/lettuce/core/ClientOptions.java | 70 +++------- .../java/io/lettuce/core/RedisClient.java | 9 +- .../core/cluster/RedisClusterClient.java | 19 ++- .../context/BatchFlushEndPointContext.java | 16 ++- .../UnboundedMpscOfferFirstQueue.java | 3 +- ... ConcurrentLinkedMpscOfferFirstQueue.java} | 11 +- .../JcToolsUnboundedMpscOfferFirstQueue.java | 14 +- .../protocol/DefaultBatchFlushEndpoint.java | 16 ++- 9 files changed, 204 insertions(+), 83 deletions(-) create mode 100644 src/main/java/io/lettuce/core/AutoBatchFlushOptions.java rename src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/{ConcurrentLinkedOfferFirstQueue.java => ConcurrentLinkedMpscOfferFirstQueue.java} (85%) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java new file mode 100644 index 0000000000..35ea9e0ed3 --- /dev/null +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -0,0 +1,129 @@ +package io.lettuce.core; + +import java.io.Serializable; + +/** + * Options for command timeouts. These options configure how and whether commands time out once they were dispatched. Command + * timeout begins: + *
    + *
  • When the command is sent successfully to the transport
  • + *
  • Queued while the connection was inactive
  • + *
+ * + * The timeout is canceled upon command completion/cancellation. Timeouts are not tied to a specific API and expire commands + * regardless of the synchronization method provided by the API that was used to enqueue the command. + * + * @author Mark Paluch + * @since 5.1 + */ +public class AutoBatchFlushOptions implements Serializable { + + public static final boolean DEFAULT_ENABLE_AUTO_BATCH_FLUSH = false; + + public static final int DEFAULT_WRITE_SPIN_COUNT = 16; + + public static final int DEFAULT_BATCH_SIZE = 8; + + private final boolean enableAutoBatchFlush; + + private final int writeSpinCount; + + private final int batchSize; + + public AutoBatchFlushOptions(AutoBatchFlushOptions.Builder builder) { + this.enableAutoBatchFlush = builder.enableAutoBatchFlush; + this.writeSpinCount = builder.writeSpinCount; + this.batchSize = builder.batchSize; + } + + /** + * Returns a new {@link AutoBatchFlushOptions.Builder} to construct {@link AutoBatchFlushOptions}. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Create a new instance of {@link AutoBatchFlushOptions} with default settings. + */ + public static AutoBatchFlushOptions create() { + return builder().build(); + } + + /** + * Builder for {@link AutoBatchFlushOptions}. + */ + public static class Builder { + + private boolean enableAutoBatchFlush = DEFAULT_ENABLE_AUTO_BATCH_FLUSH; + + private int writeSpinCount = DEFAULT_WRITE_SPIN_COUNT; + + private int batchSize = DEFAULT_BATCH_SIZE; + + /** + * Enable auto batch flush. + * + * @param enableAutoBatchFlush {@code true} to enable auto batch flush. + * @return {@code this} + */ + public Builder enableAutoBatchFlush(boolean enableAutoBatchFlush) { + this.enableAutoBatchFlush = enableAutoBatchFlush; + return this; + } + + /** + * how many times to spin batchPoll() from the task queue + * + * @param writeSpinCount the write spin count + * @return {@code this} + */ + public Builder writeSpinCount(int writeSpinCount) { + this.writeSpinCount = writeSpinCount; + return this; + } + + /** + * how many commands to batch in a single flush + * + * @param batchSize the batch size + * @return {@code this} + */ + public Builder batchSize(int batchSize) { + this.batchSize = batchSize; + return this; + } + + /** + * Create a new instance of {@link AutoBatchFlushOptions}. + * + * @return new instance of {@link AutoBatchFlushOptions} + */ + public AutoBatchFlushOptions build() { + return new AutoBatchFlushOptions(this); + } + + } + + /** + * @return {@code true} if auto batch flush is enabled. + */ + public boolean isAutoBatchFlushEnabled() { + return enableAutoBatchFlush; + } + + /** + * @return the write spin count + */ + public int getWriteSpinCount() { + return writeSpinCount; + } + + /** + * @return the batch size + */ + public int getBatchSize() { + return batchSize; + } + +} diff --git a/src/main/java/io/lettuce/core/ClientOptions.java b/src/main/java/io/lettuce/core/ClientOptions.java index b03fa90015..2b45c4f239 100644 --- a/src/main/java/io/lettuce/core/ClientOptions.java +++ b/src/main/java/io/lettuce/core/ClientOptions.java @@ -19,6 +19,10 @@ */ package io.lettuce.core; +import java.io.Serializable; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + import io.lettuce.core.api.StatefulConnection; import io.lettuce.core.internal.LettuceAssert; import io.lettuce.core.protocol.DecodeBufferPolicies; @@ -27,10 +31,6 @@ import io.lettuce.core.protocol.ReadOnlyCommands; import io.lettuce.core.resource.ClientResources; -import java.io.Serializable; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; - /** * Client Options to control the behavior of {@link RedisClient}. * @@ -69,11 +69,7 @@ public class ClientOptions implements Serializable { public static final TimeoutOptions DEFAULT_TIMEOUT_OPTIONS = TimeoutOptions.create(); - public static final boolean DEFAULT_USE_BATCH_FLUSH = false; - - public static final int DEFAULT_WRITE_SPIN_COUNT = 16; - - public static final int DEFAULT_BATCH_SIZE = 8; + public static final AutoBatchFlushOptions DEFAULT_AUTO_BATCH_FLUSH_OPTIONS = AutoBatchFlushOptions.create(); private final boolean autoReconnect; @@ -103,11 +99,7 @@ public class ClientOptions implements Serializable { private final TimeoutOptions timeoutOptions; - private final boolean useBatchFlush; - - private final int writeSpinCount; - - private final int batchSize; + private final AutoBatchFlushOptions autoBatchFlushOptions; protected ClientOptions(Builder builder) { this.autoReconnect = builder.autoReconnect; @@ -124,9 +116,7 @@ protected ClientOptions(Builder builder) { this.sslOptions = builder.sslOptions; this.suspendReconnectOnProtocolFailure = builder.suspendReconnectOnProtocolFailure; this.timeoutOptions = builder.timeoutOptions; - this.useBatchFlush = builder.useBatchFlush; - this.writeSpinCount = builder.writeSpinCount; - this.batchSize = builder.batchSize; + this.autoBatchFlushOptions = builder.autoBatchFlushOptions; } protected ClientOptions(ClientOptions original) { @@ -144,9 +134,7 @@ protected ClientOptions(ClientOptions original) { this.sslOptions = original.getSslOptions(); this.suspendReconnectOnProtocolFailure = original.isSuspendReconnectOnProtocolFailure(); this.timeoutOptions = original.getTimeoutOptions(); - this.useBatchFlush = original.useBatchFlush; - this.writeSpinCount = original.getWriteSpinCount(); - this.batchSize = original.batchSize; + this.autoBatchFlushOptions = original.getAutoBatchFlushOptions(); } /** @@ -210,11 +198,7 @@ public static class Builder { private TimeoutOptions timeoutOptions = DEFAULT_TIMEOUT_OPTIONS; - public boolean useBatchFlush = DEFAULT_USE_BATCH_FLUSH; - - private int writeSpinCount = DEFAULT_WRITE_SPIN_COUNT; - - private int batchSize = DEFAULT_BATCH_SIZE; + private AutoBatchFlushOptions autoBatchFlushOptions = DEFAULT_AUTO_BATCH_FLUSH_OPTIONS; protected Builder() { } @@ -446,22 +430,14 @@ public Builder timeoutOptions(TimeoutOptions timeoutOptions) { return this; } - public Builder useBatchFlush(boolean useBatchFlush) { - this.useBatchFlush = useBatchFlush; - return this; - } - - public Builder writeSpinCount(int writeSpinCount) { - LettuceAssert.isPositive(writeSpinCount, "writeSpinCount is not positive"); - - this.writeSpinCount = writeSpinCount; - return this; - } - - public Builder batchSize(int batchSize) { - LettuceAssert.isPositive(batchSize, "batchSize is not positive"); - - this.batchSize = batchSize; + /** + * Sets the {@link AutoBatchFlushOptions} + * + * @param autoBatchFlushOptions must not be {@code null}. + */ + public Builder autoBatchFlushOptions(AutoBatchFlushOptions autoBatchFlushOptions) { + LettuceAssert.notNull(autoBatchFlushOptions, "AutoBatchFlushOptions must not be null"); + this.autoBatchFlushOptions = autoBatchFlushOptions; return this; } @@ -678,16 +654,8 @@ public TimeoutOptions getTimeoutOptions() { return timeoutOptions; } - public int getWriteSpinCount() { - return writeSpinCount; - } - - public int getBatchSize() { - return batchSize; - } - - public boolean isUseBatchFlush() { - return useBatchFlush; + public AutoBatchFlushOptions getAutoBatchFlushOptions() { + return autoBatchFlushOptions; } /** diff --git a/src/main/java/io/lettuce/core/RedisClient.java b/src/main/java/io/lettuce/core/RedisClient.java index 3622698170..917e2dce91 100644 --- a/src/main/java/io/lettuce/core/RedisClient.java +++ b/src/main/java/io/lettuce/core/RedisClient.java @@ -19,8 +19,6 @@ */ package io.lettuce.core; -import static io.lettuce.core.internal.LettuceStrings.*; - import java.net.InetSocketAddress; import java.net.SocketAddress; import java.time.Duration; @@ -56,6 +54,9 @@ import io.netty.util.internal.logging.InternalLoggerFactory; import reactor.core.publisher.Mono; +import static io.lettuce.core.internal.LettuceStrings.isEmpty; +import static io.lettuce.core.internal.LettuceStrings.isNotEmpty; + /** * A scalable and thread-safe Redis client supporting synchronous, asynchronous and reactive * execution models. Multiple threads may share one connection if they avoid blocking and transactional operations such as BLPOP @@ -172,7 +173,6 @@ public static RedisClient create(ClientResources clientResources) { * * @param clientResources the client resources, must not be {@code null} * @param uri the Redis URI, must not be {@code null} - * * @return a new instance of {@link RedisClient} */ public static RedisClient create(ClientResources clientResources, String uri) { @@ -276,7 +276,8 @@ private ConnectionFuture> connectStandalone logger.debug("Trying to get a Redis connection for: {}", redisURI); - Endpoint endpoint = getOptions().isUseBatchFlush() ? new DefaultBatchFlushEndpoint(getOptions(), getResources()) + Endpoint endpoint = getOptions().getAutoBatchFlushOptions().isAutoBatchFlushEnabled() + ? new DefaultBatchFlushEndpoint(getOptions(), getResources()) : new DefaultEndpoint(getOptions(), getResources()); RedisChannelWriter writer = (RedisChannelWriter) endpoint; diff --git a/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java b/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java index fc28913e30..b8e244e684 100644 --- a/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java +++ b/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java @@ -38,7 +38,20 @@ import java.util.function.Predicate; import java.util.function.Supplier; -import io.lettuce.core.*; +import io.lettuce.core.AbstractRedisClient; +import io.lettuce.core.ClientOptions; +import io.lettuce.core.CommandListenerWriter; +import io.lettuce.core.ConnectionBuilder; +import io.lettuce.core.ConnectionFuture; +import io.lettuce.core.ConnectionState; +import io.lettuce.core.ReadFrom; +import io.lettuce.core.RedisChannelHandler; +import io.lettuce.core.RedisChannelWriter; +import io.lettuce.core.RedisConnectionException; +import io.lettuce.core.RedisException; +import io.lettuce.core.RedisURI; +import io.lettuce.core.SslConnectionBuilder; +import io.lettuce.core.StatefulRedisConnectionImpl; import io.lettuce.core.api.StatefulRedisConnection; import io.lettuce.core.cluster.api.NodeSelectionSupport; import io.lettuce.core.cluster.api.StatefulRedisClusterConnection; @@ -146,12 +159,12 @@ * possible. * * @author Mark Paluch - * @since 3.0 * @see RedisURI * @see StatefulRedisClusterConnection * @see RedisCodec * @see ClusterClientOptions * @see ClientResources + * @since 3.0 */ public class RedisClusterClient extends AbstractRedisClient { @@ -541,7 +554,7 @@ ConnectionFuture> connectToNodeAsync(RedisC assertNotEmpty(initialUris); LettuceAssert.notNull(socketAddressSupplier, "SocketAddressSupplier must not be null"); - Endpoint endpoint = getClusterClientOptions().isUseBatchFlush() + Endpoint endpoint = getClusterClientOptions().getAutoBatchFlushOptions().isAutoBatchFlushEnabled() ? new ClusterNodeBatchFlushEndpoint(getClusterClientOptions(), getResources(), clusterWriter) : new ClusterNodeEndpoint(getClusterClientOptions(), getResources(), clusterWriter); diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java index 53077e3a5b..2730c3284b 100644 --- a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java @@ -1,16 +1,17 @@ package io.lettuce.core.context; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.concurrent.atomic.AtomicInteger; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + import io.lettuce.core.datastructure.queue.unmodifiabledeque.UnmodifiableDeque; import io.lettuce.core.protocol.RedisCommand; import io.netty.util.internal.logging.InternalLogger; import io.netty.util.internal.logging.InternalLoggerFactory; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.concurrent.atomic.AtomicInteger; - /** * @author chenxiaofan */ @@ -42,6 +43,9 @@ public HasOngoingSendLoop() { */ public boolean tryEnterSafeGetVolatile() { while (safe.get() == 0) { + // Use deprecated API is okay, since: + // In java8, it is weakCompareAndSetVolatile; + // In java9 and afterward, it is weakCompareAndSetPlain. if (safe.weakCompareAndSet(0, 1) /* stale read as 0 is acceptable */) { return true; } diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java index 690f7c1b4a..42b3e7ceeb 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java @@ -1,8 +1,9 @@ package io.lettuce.core.datastructure.queue.offerfirst; -import javax.annotation.Nullable; import java.util.Deque; +import javax.annotation.Nullable; + /** * @author chenxiaofan */ diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java similarity index 85% rename from src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java rename to src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java index f3cd2d4a35..5def1b5466 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java @@ -1,19 +1,20 @@ package io.lettuce.core.datastructure.queue.offerfirst.impl; -import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; - -import javax.annotation.Nullable; import java.util.Deque; import java.util.concurrent.ConcurrentLinkedDeque; +import javax.annotation.Nullable; + +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; + /** * @author chenxiaofan */ -public class ConcurrentLinkedOfferFirstQueue implements UnboundedMpscOfferFirstQueue { +public class ConcurrentLinkedMpscOfferFirstQueue implements UnboundedMpscOfferFirstQueue { private final ConcurrentLinkedDeque delegate; - public ConcurrentLinkedOfferFirstQueue() { + public ConcurrentLinkedMpscOfferFirstQueue() { this.delegate = new ConcurrentLinkedDeque<>(); } diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java index 1a5436425f..feaa8d2ee8 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java @@ -1,28 +1,26 @@ package io.lettuce.core.datastructure.queue.offerfirst.impl; -import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; -import io.netty.util.internal.shaded.org.jctools.queues.MessagePassingQueue; -import io.netty.util.internal.shaded.org.jctools.queues.atomic.MpscUnboundedAtomicArrayQueue; - -import javax.annotation.Nullable; import java.util.Deque; import java.util.LinkedList; import java.util.Objects; import java.util.Queue; +import javax.annotation.Nullable; + +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; +import io.netty.util.internal.PlatformDependent; + /** * @author chenxiaofan */ public class JcToolsUnboundedMpscOfferFirstQueue implements UnboundedMpscOfferFirstQueue { - private static final int MPSC_CHUNK_SIZE = 1024; - /** * The queues can only be manipulated in a single thread env. */ private final LinkedList> unsafeQueues = new LinkedList<>(); - private final MessagePassingQueue mpscQueue = new MpscUnboundedAtomicArrayQueue<>(MPSC_CHUNK_SIZE); + private final Queue mpscQueue = PlatformDependent.newMpscQueue(); @Override public void offer(E e) { diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 1efa6ed43b..8a2d420e5d 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -168,6 +168,10 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private volatile EventLoop lastEventLoop = null; + private final int writeSpinCount; + + private final int batchSize; + /** * Create a new {@link BatchFlushEndpoint}. * @@ -194,6 +198,8 @@ protected DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources this.taskQueue = new JcToolsUnboundedMpscOfferFirstQueue<>(); this.canFire = false; this.callbackOnClose = callbackOnClose; + this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); + this.batchSize = clientOptions.getAutoBatchFlushOptions().getBatchSize(); } @Override @@ -656,17 +662,17 @@ private void loopSend(final ContextualChannel chan) { } LettuceAssert.assertState(channel == chan, "unexpected: channel not match but closeStatus == null"); - loopSend0(batchFlushEndPointContext, chan, clientOptions.getWriteSpinCount(), clientOptions.getBatchSize(), true); + loopSend0(batchFlushEndPointContext, chan, writeSpinCount, true); } private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, - int remainingSpinnCount, final int maxBatchSize, final boolean firstCall) { + int remainingSpinnCount, final boolean firstCall) { do { - final int count = pollBatch(batchFlushEndPointContext, maxBatchSize, chan); + final int count = pollBatch(batchFlushEndPointContext, batchSize, chan); if (count < 0) { return; } - if (count == 0 || (firstCall && count < maxBatchSize)) { + if (count == 0 || (firstCall && count < batchSize)) { // queue was empty break; } @@ -682,7 +688,7 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. batchFlushEndPointContext.getHasOngoingSendLoop().exitSafe(); // Guarantee thread-safety: no dangling tasks in the queue. - loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, maxBatchSize, false); + loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, false); } else { // In low qps pattern, the send job will be triggered later when a new task is added, batchFlushEndPointContext.getHasOngoingSendLoop().exitUnsafe(); From 1357f4997a2343271ed63680a7b4f7ce982fbb1f Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 25 Jul 2024 12:11:18 +0800 Subject: [PATCH 05/35] chore: use compareAndSet instead of weakCompareAndSet --- .../core/context/BatchFlushEndPointContext.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java index 2730c3284b..b20555f639 100644 --- a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java @@ -42,15 +42,7 @@ public HasOngoingSendLoop() { * @return true if entered the loop, false if already have a running loop. */ public boolean tryEnterSafeGetVolatile() { - while (safe.get() == 0) { - // Use deprecated API is okay, since: - // In java8, it is weakCompareAndSetVolatile; - // In java9 and afterward, it is weakCompareAndSetPlain. - if (safe.weakCompareAndSet(0, 1) /* stale read as 0 is acceptable */) { - return true; - } - } - return false; + return safe.get() == 0 && /* rare case if QPS is high */ safe.compareAndSet(0, 1); } /** From f62758700a3bc2261af55c45b4607354b46178f0 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 25 Jul 2024 17:10:36 +0800 Subject: [PATCH 06/35] chore: remove unused log --- .../io/lettuce/core/ContextualChannel.java | 8 +- .../context/BatchFlushEndPointContext.java | 8 +- .../protocol/DefaultBatchFlushEndpoint.java | 91 ++++++++----------- 3 files changed, 50 insertions(+), 57 deletions(-) diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java index 38b669f8fe..ee9b6ca204 100644 --- a/src/main/java/io/lettuce/core/ContextualChannel.java +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -1,5 +1,7 @@ package io.lettuce.core; +import java.net.SocketAddress; + import io.lettuce.core.context.ConnectionContext; import io.netty.buffer.ByteBufAllocator; import io.netty.channel.Channel; @@ -15,8 +17,6 @@ import io.netty.util.AttributeKey; import org.jetbrains.annotations.NotNull; -import java.net.SocketAddress; - /** * @author chenxiaofan */ @@ -30,6 +30,10 @@ public ConnectionContext getContext() { return context; } + public ConnectionContext.State getInitialState() { + return context.getInitialState(); + } + public Channel getDelegate() { return delegate; } diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java index b20555f639..f42af5aea5 100644 --- a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java @@ -45,6 +45,10 @@ public boolean tryEnterSafeGetVolatile() { return safe.get() == 0 && /* rare case if QPS is high */ safe.compareAndSet(0, 1); } + public void exitSafe() { + safe.set(0); + } + /** * This method is not thread safe, can only be used from single thread. * @@ -58,10 +62,6 @@ public boolean tryEnterUnsafe() { return true; } - public void exitSafe() { - safe.set(0); - } - public void exitUnsafe() { unsafe = false; } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 8a2d420e5d..a1263f728f 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -112,8 +112,6 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { cmd.cancel(); } - protected volatile @Nonnull ContextualChannel channel = DummyContextualChannelInstances.CHANNEL_CONNECTING; - private final Reliability reliability; private final ClientOptions clientOptions; @@ -123,12 +121,13 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final boolean boundedQueues; // access via QUEUE_SIZE - @SuppressWarnings("unused") private volatile int queueSize = 0; // access via STATUS - @SuppressWarnings("unused") private volatile int status = ST_OPEN; + // access via CHANNEL + + protected volatile @Nonnull ContextualChannel channel = DummyContextualChannelInstances.CHANNEL_CONNECTING; private final Consumer> callbackOnClose; @@ -152,10 +151,6 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private volatile Throwable connectionError; - // // access via QUEUE_SIZE - // @SuppressWarnings("unused") - // private volatile int queueSize = 0; - private final String cachedEndpointId; protected final UnboundedMpscOfferFirstQueue> taskQueue; @@ -308,11 +303,8 @@ public void notifyChannelActive(Channel channel) { this.connectionError = null; if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, contextualChannel)) { - logger.error("[unexpected] {} failed to set to CHANNEL_CONNECTING because current state is '{}'", logPrefix(), - CHANNEL.get(this)); channel.close(); - onUnexpectedState("notifyChannelActive", ConnectionContext.State.CONNECTING, - this.channel.getContext().getInitialState()); + onUnexpectedState("notifyChannelActive", ConnectionContext.State.CONNECTING); return; } @@ -357,21 +349,13 @@ public void notifyChannelActive(Channel channel) { } } - private void onUnexpectedState(String caller, ConnectionContext.State exp, ConnectionContext.State actual) { - logger.error("[{}][unexpected] {}: unexpected state: exp '{}' got '{}'", caller, logPrefix(), exp, actual); - cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual)); - } - @Override public void notifyReconnectFailed(Throwable t) { this.failedToReconnectReason = t; if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, DummyContextualChannelInstances.CHANNEL_RECONNECT_FAILED)) { - logger.error("[unexpected] {} failed to set to CHANNEL_CONNECTING because current state is '{}'", logPrefix(), - CHANNEL.get(this)); - onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING, - this.channel.getContext().getInitialState()); + onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING); return; } @@ -396,13 +380,13 @@ public void notifyChannelInactive(Channel channel) { @Override public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, Deque> retryableQueuedCommands) { - final ContextualChannel prevChan = this.channel; - if (!prevChan.getContext().getInitialState().isConnected() || prevChan.getDelegate() != channel) { + final ContextualChannel inactiveChan = this.channel; + if (!inactiveChan.getInitialState().isConnected() || inactiveChan.getDelegate() != channel) { logger.error("[unexpected][{}] notifyChannelInactive: channel not match", logPrefix()); return; } - if (prevChan.getContext().isChannelInactiveEventFired()) { + if (inactiveChan.getContext().isChannelInactiveEventFired()) { logger.error("[unexpected][{}] notifyChannelInactive: already fired", logPrefix()); return; } @@ -431,9 +415,9 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, if (!willReconnect) { CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); } - prevChan.getContext() + inactiveChan.getContext() .setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryableQueuedCommands, exception)); - trySetEndpointQuiescence(prevChan); + trySetEndpointQuiescence(inactiveChan); } @Override @@ -444,7 +428,7 @@ public void notifyException(Throwable t) { } final ContextualChannel curr = this.channel; - if (!curr.getContext().getInitialState().isConnected() || !curr.isActive()) { + if (!curr.getInitialState().isConnected() || !curr.isActive()) { connectionError = t; } } @@ -457,7 +441,7 @@ public void registerConnectionWatchdog(ConnectionWatchdog connectionWatchdog) { @Override public void flushCommands() { final ContextualChannel chan = this.channel; - switch (chan.getContext().getInitialState()) { + switch (chan.getInitialState()) { case ENDPOINT_CLOSED: syncAfterTerminated(() -> { if (isClosed()) { @@ -486,7 +470,7 @@ public void flushCommands() { scheduleSendJobIfNeeded(chan); return; default: - throw new IllegalStateException("unexpected state: " + chan.getContext().getInitialState()); + throw new IllegalStateException("unexpected state: " + chan.getInitialState()); } } @@ -519,8 +503,8 @@ public CompletableFuture closeAsync() { connectionWatchdog.prepareClose(); } - final Channel chan = channel; - if (channel.getContext().getInitialState().isConnected()) { + final ContextualChannel chan = channel; + if (chan.getInitialState().isConnected()) { // 1. STATUS.compareAndSet(this, ST_OPEN, ST_CLOSED) synchronize-before channel == CONNECTED // 2. channel == CONNECTED synchronize-before setting channel to WILL_RECONNECT/ENDPOINT_CLOSED // 3. setting channel to WILL_RECONNECT synchronize-before `isClosed()`, which will cancel all the commands. @@ -545,7 +529,7 @@ public void disconnect() { ContextualChannel chan = this.channel; - if (chan.getContext().getInitialState().isConnected() && chan.isOpen()) { + if (chan.getInitialState().isConnected() && chan.isOpen()) { chan.disconnect(); } } @@ -561,9 +545,9 @@ public void reset() { logger.debug("{} reset()", logPrefix()); } - final ContextualChannel curr = channel; - if (curr.getContext().getInitialState().isConnected()) { - curr.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); + final ContextualChannel chan = channel; + if (chan.getInitialState().isConnected()) { + chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } // Unsafe to call cancelBufferedCommands() here. // cancelBufferedCommands("Reset"); @@ -575,9 +559,9 @@ private void resetInternal() { logger.debug("{} reset()", logPrefix()); } - ContextualChannel curr = channel; - if (curr.getContext().getInitialState().isConnected()) { - curr.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); + ContextualChannel chan = channel; + if (chan.getInitialState().isConnected()) { + chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } // Unsafe to call cancelBufferedCommands() here. cancelCommands("Reset"); @@ -593,7 +577,7 @@ public void initialState() { cancelCommands("initialState"); ContextualChannel currentChannel = this.channel; - if (currentChannel.getContext().getInitialState().isConnected()) { + if (currentChannel.getInitialState().isConnected()) { ChannelFuture close = currentChannel.close(); if (currentChannel.isOpen()) { close.syncUninterruptibly(); @@ -602,7 +586,7 @@ public void initialState() { } private boolean isClosed() { - return STATUS.get(this) == ST_CLOSED; + return status == ST_CLOSED; } protected String logPrefix() { @@ -799,16 +783,14 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { } private void onEndpointQuiescence() { - if (channel.getContext().getInitialState() == ConnectionContext.State.ENDPOINT_CLOSED) { + if (channel.getInitialState() == ConnectionContext.State.ENDPOINT_CLOSED) { return; } // Create happens-before with channelActive() if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, DummyContextualChannelInstances.CHANNEL_CONNECTING)) { - - onUnexpectedState("onEndpointQuiescence", ConnectionContext.State.WILL_RECONNECT, - this.channel.getContext().getInitialState()); + onUnexpectedState("onEndpointQuiescence", ConnectionContext.State.WILL_RECONNECT); return; } @@ -824,8 +806,8 @@ private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeS // Save retryable failed tasks logger.info( "[onWillReconnect][{}] compensate {} retryableFailedToSendTasks (write failure) for retrying on reconnecting, first write error: {}", - retryableFailedToSendTasks.size(), batchFlushEndPointContext.getFirstDiscontinueReason().getMessage(), - logPrefix()); + logPrefix(), retryableFailedToSendTasks.size(), + batchFlushEndPointContext.getFirstDiscontinueReason().getMessage()); offerFirstAll(retryableFailedToSendTasks); } @@ -833,8 +815,9 @@ private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeS final Deque> retryablePendingCommands = closeStatus.getAndClearRetryablePendingCommands(); if (retryablePendingCommands != null) { // Save uncompletedTasks for later retry. - logger.info("[onWillReconnect][{}] compensate {} pendingCommands (write success) for retrying on reconnecting", - retryablePendingCommands.size(), logPrefix()); + logger.info( + "[onWillReconnect][{}] compensate {} retryable pending commands (write success) for retrying on reconnecting", + logPrefix(), retryablePendingCommands.size()); offerFirstAll(retryablePendingCommands); } @@ -961,13 +944,13 @@ private Throwable validateWrite(@SuppressWarnings("unused") int commands) { return localConnectionErr; } - if (boundedQueues && QUEUE_SIZE.get(this) + commands > clientOptions.getRequestQueueSize()) { + if (boundedQueues && queueSize + commands > clientOptions.getRequestQueueSize()) { return new RedisException("Request queue size exceeded: " + clientOptions.getRequestQueueSize() + ". Commands are not accepted until the queue size drops."); } final ContextualChannel chan = this.channel; - switch (chan.getContext().getInitialState()) { + switch (chan.getInitialState()) { case ENDPOINT_CLOSED: return new RedisException("Connection is closed"); case RECONNECT_FAILED: @@ -979,10 +962,16 @@ private Throwable validateWrite(@SuppressWarnings("unused") int commands) { case CONNECTED: return !chan.isActive() && rejectCommandsWhileDisconnected ? new RedisException("Connection is closed") : null; default: - throw new IllegalStateException("unexpected state: " + chan.getContext().getInitialState()); + throw new IllegalStateException("unexpected state: " + chan.getInitialState()); } } + private void onUnexpectedState(String caller, ConnectionContext.State exp) { + final ConnectionContext.State actual = this.channel.getInitialState(); + logger.error("[{}][unexpected] {}: unexpected state: exp '{}' got '{}'", caller, logPrefix(), exp, actual); + cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual)); + } + private void channelFlush(Channel channel) { if (debugEnabled) { logger.debug("{} write() channelFlush", logPrefix()); From de67728697608f0fc1a88e0ace20ccf85af0ad60 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 25 Jul 2024 21:45:18 +0800 Subject: [PATCH 07/35] perf: use RECYCLE for WrittenToChannel --- .../core/context/ConnectionContext.java | 2 +- .../protocol/DefaultBatchFlushEndpoint.java | 225 +++++++++++------- 2 files changed, 145 insertions(+), 82 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/ConnectionContext.java b/src/main/java/io/lettuce/core/context/ConnectionContext.java index 25a6fac4ec..f3dfe1f02e 100644 --- a/src/main/java/io/lettuce/core/context/ConnectionContext.java +++ b/src/main/java/io/lettuce/core/context/ConnectionContext.java @@ -78,7 +78,7 @@ public State getInitialState() { return initialState; } - public BatchFlushEndPointContext getFairEndPointContext() { + public BatchFlushEndPointContext getBatchFlushEndPointContext() { return batchFlushEndPointContext; } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index a1263f728f..86f4ae2f94 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -53,7 +53,9 @@ import io.netty.channel.ChannelFuture; import io.netty.channel.EventLoop; import io.netty.handler.codec.EncoderException; +import io.netty.util.Recycler; import io.netty.util.concurrent.Future; +import io.netty.util.concurrent.GenericFutureListener; import io.netty.util.internal.logging.InternalLogger; import io.netty.util.internal.logging.InternalLoggerFactory; @@ -619,7 +621,7 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { return; } - if (chan.getContext().getFairEndPointContext().getHasOngoingSendLoop().tryEnterSafeGetVolatile()) { + if (chan.getContext().getBatchFlushEndPointContext().getHasOngoingSendLoop().tryEnterSafeGetVolatile()) { eventLoop.execute(() -> scheduleSendJobInEventLoopIfNeeded(chan)); } @@ -633,14 +635,14 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { private void scheduleSendJobInEventLoopIfNeeded(final ContextualChannel chan) { // Guarantee only 1 send loop. - if (chan.getContext().getFairEndPointContext().getHasOngoingSendLoop().tryEnterUnsafe()) { + if (chan.getContext().getBatchFlushEndPointContext().getHasOngoingSendLoop().tryEnterUnsafe()) { loopSend(chan); } } private void loopSend(final ContextualChannel chan) { final ConnectionContext connectionContext = chan.getContext(); - final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getFairEndPointContext(); + final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getBatchFlushEndPointContext(); if (connectionContext.isChannelInactiveEventFired() || batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { return; } @@ -687,18 +689,7 @@ private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, if (cmd == null) { break; } - channelWrite(chan, cmd).addListener(future -> { - QUEUE_SIZE.decrementAndGet(this); - batchFlushEndPointContext.done(1); - - final Throwable retryableErr = checkSendResult(future, chan, cmd); - if (retryableErr != null && batchFlushEndPointContext.addRetryableFailedToSendTask(cmd, retryableErr)) { - // Close connection on first transient write failure - internalCloseConnectionIfNeeded(chan, retryableErr); - } - - trySetEndpointQuiescence(chan); - }); + channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd)); } if (count > 0) { @@ -713,60 +704,12 @@ private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, return count; } - /** - * Check write result. - * - * @param sendFuture The future to check. - * @param contextualChannel The channel instance associated with the future. - * @param cmd The task. - * @return The cause of the failure if is a retryable failed task, otherwise null. - */ - private Throwable checkSendResult(Future sendFuture, ContextualChannel contextualChannel, RedisCommand cmd) { - if (cmd.isDone()) { - ExceptionUtils.logUnexpectedDone(logger, logPrefix(), cmd); - return null; - } - - final ConnectionContext.CloseStatus closeStatus = contextualChannel.getContext().getCloseStatus(); - if (closeStatus != null) { - logger.warn("[checkSendResult][interesting][{}] callback called after onClose() event, close status: {}", - logPrefix(), contextualChannel.getContext().getCloseStatus()); - final Throwable err = sendFuture.isSuccess() ? closeStatus.getErr() : sendFuture.cause(); - if (!closeStatus.isWillReconnect() || shouldNotRetry(err, cmd)) { - cmd.completeExceptionally(err); - return null; - } else { - return err; - } - } - - if (sendFuture.isSuccess()) { - return null; - } - - final Throwable cause = sendFuture.cause(); - ExceptionUtils.maybeLogSendError(logger, cause); - if (shouldNotRetry(cause, cmd)) { - cmd.completeExceptionally(cause); - return null; - } - - return cause; - } - - private boolean shouldNotRetry(Throwable cause, RedisCommand cmd) { - return reliability == Reliability.AT_MOST_ONCE || ActivationCommand.isActivationCommand(cmd) - || ExceptionUtils.oneOf(cause, SHOULD_NOT_RETRY_EXCEPTION_TYPES); - } - private void trySetEndpointQuiescence(ContextualChannel chan) { - final EventLoop chanEventLoop = chan.eventLoop(); - LettuceAssert.isTrue(chanEventLoop.inEventLoop(), "unexpected: not in event loop"); - LettuceAssert.isTrue(chanEventLoop == lastEventLoop, "unexpected: event loop not match"); + LettuceAssert.isTrue(chan.eventLoop().inEventLoop(), "unexpected: not in event loop"); final ConnectionContext connectionContext = chan.getContext(); final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); - final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getFairEndPointContext(); + final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getBatchFlushEndPointContext(); if (batchFlushEndPointContext.isDone() && closeStatus != null) { if (closeStatus.isWillReconnect()) { onWillReconnect(closeStatus, batchFlushEndPointContext); @@ -849,20 +792,6 @@ private void offerFirstAll(Deque> commands) { this.taskQueue.offerFirstAll(commands); } - private void internalCloseConnectionIfNeeded(ContextualChannel toCloseChan, Throwable reason) { - if (toCloseChan.getContext().isChannelInactiveEventFired() || !toCloseChan.isActive()) { - return; - } - - logger.error("[internalCloseConnectionIfNeeded][attention][{}] close the connection due to write error, reason: '{}'", - logPrefix(), reason.getMessage(), reason); - toCloseChan.eventLoop().schedule(() -> { - if (toCloseChan.isActive()) { - toCloseChan.close(); - } - }, 1, TimeUnit.SECONDS); - } - private void cancelCommands(String message) { fulfillCommands(message, RedisCommand::cancel); } @@ -904,7 +833,7 @@ private final void fulfillCommands(String message, Consumer 0) { - logger.error("cancel {} pending tasks, reason: '{}'", totalCancelledTaskNum, message); + logger.error("{} cancel {} pending tasks, reason: '{}'", logPrefix(), totalCancelledTaskNum, message); } } @@ -968,7 +897,7 @@ private Throwable validateWrite(@SuppressWarnings("unused") int commands) { private void onUnexpectedState(String caller, ConnectionContext.State exp) { final ConnectionContext.State actual = this.channel.getInitialState(); - logger.error("[{}][unexpected] {}: unexpected state: exp '{}' got '{}'", caller, logPrefix(), exp, actual); + logger.error("{}[{}][unexpected] : unexpected state: exp '{}' got '{}'", logPrefix(), caller, exp, actual); cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual)); } @@ -1010,4 +939,138 @@ private enum Reliability { AT_MOST_ONCE, AT_LEAST_ONCE } + /** + * Add to stack listener. This listener is pooled and must be {@link #recycle() recycled after usage}. + */ + static class WrittenToChannel implements GenericFutureListener> { + + private static final Recycler RECYCLER = new Recycler() { + + @Override + protected WrittenToChannel newObject(Recycler.Handle handle) { + return new WrittenToChannel(handle); + } + + }; + + private final Recycler.Handle handle; + + private DefaultBatchFlushEndpoint endpoint; + + private RedisCommand command; + + private ContextualChannel chan; + + private WrittenToChannel(Recycler.Handle handle) { + this.handle = handle; + } + + /** + * Allocate a new instance. + * + * @return new instance + */ + static WrittenToChannel newInstance(DefaultBatchFlushEndpoint endpoint, ContextualChannel chan, + RedisCommand command) { + + WrittenToChannel entry = RECYCLER.get(); + + entry.endpoint = endpoint; + entry.chan = chan; + entry.command = command; + + return entry; + } + + @Override + public void operationComplete(Future future) { + final BatchFlushEndPointContext batchFlushEndPointContext = chan.getContext().getBatchFlushEndPointContext(); + try { + QUEUE_SIZE.decrementAndGet(endpoint); + batchFlushEndPointContext.done(1); + + final Throwable retryableErr = checkSendResult(future, chan, command); + if (retryableErr != null && batchFlushEndPointContext.addRetryableFailedToSendTask(command, retryableErr)) { + // Close connection on first transient write failure + internalCloseConnectionIfNeeded(chan, retryableErr); + } + + endpoint.trySetEndpointQuiescence(chan); + } finally { + recycle(); + } + } + + /** + * Check write result. + * + * @param sendFuture The future to check. + * @param contextualChannel The channel instance associated with the future. + * @param cmd The task. + * @return The cause of the failure if is a retryable failed task, otherwise null. + */ + private Throwable checkSendResult(Future sendFuture, ContextualChannel contextualChannel, + RedisCommand cmd) { + if (cmd.isDone()) { + ExceptionUtils.logUnexpectedDone(logger, endpoint.logPrefix(), cmd); + return null; + } + + final ConnectionContext.CloseStatus closeStatus = contextualChannel.getContext().getCloseStatus(); + if (closeStatus != null) { + logger.warn("[checkSendResult][interesting][{}] callback called after onClose() event, close status: {}", + endpoint.logPrefix(), contextualChannel.getContext().getCloseStatus()); + final Throwable err = sendFuture.isSuccess() ? closeStatus.getErr() : sendFuture.cause(); + if (!closeStatus.isWillReconnect() || shouldNotRetry(err, cmd)) { + cmd.completeExceptionally(err); + return null; + } else { + return err; + } + } + + if (sendFuture.isSuccess()) { + return null; + } + + final Throwable cause = sendFuture.cause(); + ExceptionUtils.maybeLogSendError(logger, cause); + if (shouldNotRetry(cause, cmd)) { + cmd.completeExceptionally(cause); + return null; + } + + return cause; + } + + private boolean shouldNotRetry(Throwable cause, RedisCommand cmd) { + return endpoint.reliability == Reliability.AT_MOST_ONCE || ActivationCommand.isActivationCommand(cmd) + || ExceptionUtils.oneOf(cause, SHOULD_NOT_RETRY_EXCEPTION_TYPES); + } + + private void internalCloseConnectionIfNeeded(ContextualChannel toCloseChan, Throwable reason) { + if (toCloseChan.getContext().isChannelInactiveEventFired() || !toCloseChan.isActive()) { + return; + } + + logger.error( + "[internalCloseConnectionIfNeeded][interesting][{}] close the connection due to write error, reason: '{}'", + endpoint.logPrefix(), reason.getMessage(), reason); + toCloseChan.eventLoop().schedule(() -> { + if (toCloseChan.isActive()) { + toCloseChan.close(); + } + }, 1, TimeUnit.SECONDS); + } + + private void recycle() { + this.endpoint = null; + this.chan = null; + this.command = null; + + handle.recycle(this); + } + + } + } From 70c46773e9d7af4e349a014272246dfee5d7b839 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Mon, 29 Jul 2024 17:32:42 +0800 Subject: [PATCH 08/35] chore: remove getters --- .../io/lettuce/core/ContextualChannel.java | 6 +- .../context/BatchFlushEndPointContext.java | 6 +- .../core/context/ConnectionContext.java | 12 +-- .../protocol/DefaultBatchFlushEndpoint.java | 92 +++++++++---------- 4 files changed, 49 insertions(+), 67 deletions(-) diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java index ee9b6ca204..f01c88bee1 100644 --- a/src/main/java/io/lettuce/core/ContextualChannel.java +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -24,16 +24,12 @@ public class ContextualChannel implements Channel { private final Channel delegate; - private final ConnectionContext context; + public final ConnectionContext context; public ConnectionContext getContext() { return context; } - public ConnectionContext.State getInitialState() { - return context.getInitialState(); - } - public Channel getDelegate() { return delegate; } diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java index f42af5aea5..c7dff147bd 100644 --- a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java @@ -96,11 +96,7 @@ public int getTotal() { return total; } - private final HasOngoingSendLoop hasOngoingSendLoop = new HasOngoingSendLoop(); - - public HasOngoingSendLoop getHasOngoingSendLoop() { - return hasOngoingSendLoop; - } + public final HasOngoingSendLoop hasOngoingSendLoop = new HasOngoingSendLoop(); public void add(int n) { this.total += n; diff --git a/src/main/java/io/lettuce/core/context/ConnectionContext.java b/src/main/java/io/lettuce/core/context/ConnectionContext.java index f3dfe1f02e..796674eafe 100644 --- a/src/main/java/io/lettuce/core/context/ConnectionContext.java +++ b/src/main/java/io/lettuce/core/context/ConnectionContext.java @@ -65,23 +65,15 @@ public boolean isConnected() { } - private final State initialState; + public final State initialState; - private final BatchFlushEndPointContext batchFlushEndPointContext; + public final BatchFlushEndPointContext batchFlushEndPointContext; public ConnectionContext(State initialState) { this.initialState = initialState; this.batchFlushEndPointContext = new BatchFlushEndPointContext(); } - public State getInitialState() { - return initialState; - } - - public BatchFlushEndPointContext getBatchFlushEndPointContext() { - return batchFlushEndPointContext; - } - /* below fields must be accessed by the event loop thread only */ @Nullable private CloseStatus closeStatus = null; diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 86f4ae2f94..8e89ef7c89 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -315,8 +315,7 @@ public void notifyChannelActive(Channel channel) { logger.info("{} Closing channel because endpoint is already closed", logPrefix()); channel.close(); - onEndpointClosed(); - CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); + // Cleaning will be done later in notifyChannelInactiveAfterWatchdogDecision, we are happy so far. return; } @@ -383,12 +382,12 @@ public void notifyChannelInactive(Channel channel) { public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, Deque> retryableQueuedCommands) { final ContextualChannel inactiveChan = this.channel; - if (!inactiveChan.getInitialState().isConnected() || inactiveChan.getDelegate() != channel) { + if (!inactiveChan.context.initialState.isConnected() || inactiveChan.getDelegate() != channel) { logger.error("[unexpected][{}] notifyChannelInactive: channel not match", logPrefix()); return; } - if (inactiveChan.getContext().isChannelInactiveEventFired()) { + if (inactiveChan.context.isChannelInactiveEventFired()) { logger.error("[unexpected][{}] notifyChannelInactive: already fired", logPrefix()); return; } @@ -417,7 +416,7 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, if (!willReconnect) { CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); } - inactiveChan.getContext() + inactiveChan.context .setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryableQueuedCommands, exception)); trySetEndpointQuiescence(inactiveChan); } @@ -430,7 +429,7 @@ public void notifyException(Throwable t) { } final ContextualChannel curr = this.channel; - if (!curr.getInitialState().isConnected() || !curr.isActive()) { + if (!curr.context.initialState.isConnected() || !curr.isActive()) { connectionError = t; } } @@ -443,16 +442,9 @@ public void registerConnectionWatchdog(ConnectionWatchdog connectionWatchdog) { @Override public void flushCommands() { final ContextualChannel chan = this.channel; - switch (chan.getInitialState()) { + switch (chan.context.initialState) { case ENDPOINT_CLOSED: - syncAfterTerminated(() -> { - if (isClosed()) { - onEndpointClosed(); - } else { - fulfillCommands("Connection is closed", - cmd -> cmd.completeExceptionally(new RedisException("Connection is closed"))); - } - }); + syncAfterTerminated(this::onEndpointClosed); return; case RECONNECT_FAILED: syncAfterTerminated(() -> { @@ -472,7 +464,7 @@ public void flushCommands() { scheduleSendJobIfNeeded(chan); return; default: - throw new IllegalStateException("unexpected state: " + chan.getInitialState()); + throw new IllegalStateException("unexpected state: " + chan.context.initialState); } } @@ -506,7 +498,7 @@ public CompletableFuture closeAsync() { } final ContextualChannel chan = channel; - if (chan.getInitialState().isConnected()) { + if (chan.context.initialState.isConnected()) { // 1. STATUS.compareAndSet(this, ST_OPEN, ST_CLOSED) synchronize-before channel == CONNECTED // 2. channel == CONNECTED synchronize-before setting channel to WILL_RECONNECT/ENDPOINT_CLOSED // 3. setting channel to WILL_RECONNECT synchronize-before `isClosed()`, which will cancel all the commands. @@ -531,7 +523,7 @@ public void disconnect() { ContextualChannel chan = this.channel; - if (chan.getInitialState().isConnected() && chan.isOpen()) { + if (chan.context.initialState.isConnected() && chan.isOpen()) { chan.disconnect(); } } @@ -548,7 +540,7 @@ public void reset() { } final ContextualChannel chan = channel; - if (chan.getInitialState().isConnected()) { + if (chan.context.initialState.isConnected()) { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } // Unsafe to call cancelBufferedCommands() here. @@ -562,7 +554,7 @@ private void resetInternal() { } ContextualChannel chan = channel; - if (chan.getInitialState().isConnected()) { + if (chan.context.initialState.isConnected()) { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } // Unsafe to call cancelBufferedCommands() here. @@ -579,7 +571,7 @@ public void initialState() { cancelCommands("initialState"); ContextualChannel currentChannel = this.channel; - if (currentChannel.getInitialState().isConnected()) { + if (currentChannel.context.initialState.isConnected()) { ChannelFuture close = currentChannel.close(); if (currentChannel.isOpen()) { close.syncUninterruptibly(); @@ -621,7 +613,14 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { return; } - if (chan.getContext().getBatchFlushEndPointContext().getHasOngoingSendLoop().tryEnterSafeGetVolatile()) { + if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterSafeGetVolatile()) { + // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): + // 1. uses tryEnterSafeGetVolatile() to avoid unnecessary eventLoop.execute() calls + // Avg latency: 3.2956217278663s + // Avg QPS: 495238.50056392356/s + // 2. uses eventLoop.execute() directly + // Avg latency: 3.2677197021496998s + // Avg QPS: 476925.0751855796/s eventLoop.execute(() -> scheduleSendJobInEventLoopIfNeeded(chan)); } @@ -635,14 +634,14 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { private void scheduleSendJobInEventLoopIfNeeded(final ContextualChannel chan) { // Guarantee only 1 send loop. - if (chan.getContext().getBatchFlushEndPointContext().getHasOngoingSendLoop().tryEnterUnsafe()) { + if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterUnsafe()) { loopSend(chan); } } private void loopSend(final ContextualChannel chan) { - final ConnectionContext connectionContext = chan.getContext(); - final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getBatchFlushEndPointContext(); + final ConnectionContext connectionContext = chan.context; + final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.batchFlushEndPointContext; if (connectionContext.isChannelInactiveEventFired() || batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { return; } @@ -654,37 +653,36 @@ private void loopSend(final ContextualChannel chan) { private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, int remainingSpinnCount, final boolean firstCall) { do { - final int count = pollBatch(batchFlushEndPointContext, batchSize, chan); + final int count = pollBatch(batchFlushEndPointContext, chan); if (count < 0) { return; } - if (count == 0 || (firstCall && count < batchSize)) { + if (count < batchSize) { // queue was empty break; } } while (--remainingSpinnCount > 0); if (remainingSpinnCount <= 0) { + // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. chan.eventLoop().execute(() -> loopSend(chan)); return; } - // QPSPattern is low and we have drained all tasks. if (firstCall) { - // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. - batchFlushEndPointContext.getHasOngoingSendLoop().exitSafe(); - // Guarantee thread-safety: no dangling tasks in the queue. + // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. + batchFlushEndPointContext.hasOngoingSendLoop.exitSafe(); + // // Guarantee thread-safety: no dangling tasks in the queue. loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, false); } else { - // In low qps pattern, the send job will be triggered later when a new task is added, - batchFlushEndPointContext.getHasOngoingSendLoop().exitUnsafe(); + // The send loop will be triggered later when a new task is added, + batchFlushEndPointContext.hasOngoingSendLoop.exitUnsafe(); } } - private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, final int maxBatchSize, - ContextualChannel chan) { + private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, ContextualChannel chan) { int count = 0; - for (; count < maxBatchSize; count++) { + for (; count < batchSize; count++) { final RedisCommand cmd = this.taskQueue.poll(); // relaxed poll is faster and we wil retry later anyway. if (cmd == null) { break; @@ -707,9 +705,9 @@ private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, private void trySetEndpointQuiescence(ContextualChannel chan) { LettuceAssert.isTrue(chan.eventLoop().inEventLoop(), "unexpected: not in event loop"); - final ConnectionContext connectionContext = chan.getContext(); + final ConnectionContext connectionContext = chan.context; final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); - final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.getBatchFlushEndPointContext(); + final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.batchFlushEndPointContext; if (batchFlushEndPointContext.isDone() && closeStatus != null) { if (closeStatus.isWillReconnect()) { onWillReconnect(closeStatus, batchFlushEndPointContext); @@ -717,7 +715,7 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { onWontReconnect(closeStatus, batchFlushEndPointContext); } - if (chan.getContext().setChannelQuiescentOnce()) { + if (chan.context.setChannelQuiescentOnce()) { onEndpointQuiescence(); } else { ExceptionUtils.maybeFire(logger, canFire, "unexpected: setEndpointQuiescenceOncePerConnection() failed"); @@ -726,7 +724,7 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { } private void onEndpointQuiescence() { - if (channel.getInitialState() == ConnectionContext.State.ENDPOINT_CLOSED) { + if (channel.context.initialState == ConnectionContext.State.ENDPOINT_CLOSED) { return; } @@ -879,7 +877,7 @@ private Throwable validateWrite(@SuppressWarnings("unused") int commands) { } final ContextualChannel chan = this.channel; - switch (chan.getInitialState()) { + switch (chan.context.initialState) { case ENDPOINT_CLOSED: return new RedisException("Connection is closed"); case RECONNECT_FAILED: @@ -891,12 +889,12 @@ private Throwable validateWrite(@SuppressWarnings("unused") int commands) { case CONNECTED: return !chan.isActive() && rejectCommandsWhileDisconnected ? new RedisException("Connection is closed") : null; default: - throw new IllegalStateException("unexpected state: " + chan.getInitialState()); + throw new IllegalStateException("unexpected state: " + chan.context.initialState); } } private void onUnexpectedState(String caller, ConnectionContext.State exp) { - final ConnectionContext.State actual = this.channel.getInitialState(); + final ConnectionContext.State actual = this.channel.context.initialState; logger.error("{}[{}][unexpected] : unexpected state: exp '{}' got '{}'", logPrefix(), caller, exp, actual); cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual)); } @@ -984,7 +982,7 @@ static WrittenToChannel newInstance(DefaultBatchFlushEndpoint endpoint, Contextu @Override public void operationComplete(Future future) { - final BatchFlushEndPointContext batchFlushEndPointContext = chan.getContext().getBatchFlushEndPointContext(); + final BatchFlushEndPointContext batchFlushEndPointContext = chan.context.batchFlushEndPointContext; try { QUEUE_SIZE.decrementAndGet(endpoint); batchFlushEndPointContext.done(1); @@ -1016,10 +1014,10 @@ private Throwable checkSendResult(Future sendFuture, ContextualChannel contex return null; } - final ConnectionContext.CloseStatus closeStatus = contextualChannel.getContext().getCloseStatus(); + final ConnectionContext.CloseStatus closeStatus = contextualChannel.context.getCloseStatus(); if (closeStatus != null) { logger.warn("[checkSendResult][interesting][{}] callback called after onClose() event, close status: {}", - endpoint.logPrefix(), contextualChannel.getContext().getCloseStatus()); + endpoint.logPrefix(), contextualChannel.context.getCloseStatus()); final Throwable err = sendFuture.isSuccess() ? closeStatus.getErr() : sendFuture.cause(); if (!closeStatus.isWillReconnect() || shouldNotRetry(err, cmd)) { cmd.completeExceptionally(err); @@ -1049,7 +1047,7 @@ private boolean shouldNotRetry(Throwable cause, RedisCommand cmd) { } private void internalCloseConnectionIfNeeded(ContextualChannel toCloseChan, Throwable reason) { - if (toCloseChan.getContext().isChannelInactiveEventFired() || !toCloseChan.isActive()) { + if (toCloseChan.context.isChannelInactiveEventFired() || !toCloseChan.isActive()) { return; } From ae8b533197767b25a0afd922bd567cbd6314eb65 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Mon, 29 Jul 2024 22:16:59 +0800 Subject: [PATCH 09/35] fix: queue size not correct --- .../lettuce/core/protocol/DefaultBatchFlushEndpoint.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 8e89ef7c89..f580955766 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -245,8 +245,8 @@ public RedisCommand write(RedisCommand command) { command = processActivationCommand(command); } - QUEUE_SIZE.incrementAndGet(this); this.taskQueue.offer(command); + QUEUE_SIZE.incrementAndGet(this); if (autoFlushCommands) { flushCommands(); @@ -788,6 +788,7 @@ private void offerFirstAll(Deque> commands) { } }); this.taskQueue.offerFirstAll(commands); + QUEUE_SIZE.addAndGet(this, commands.size()); } private void cancelCommands(String message) { @@ -818,6 +819,7 @@ private final void fulfillCommands(String message, Consumer cmd = this.taskQueue.poll(); if (cmd == null) { @@ -827,9 +829,12 @@ private final void fulfillCommands(String message, Consumer 0) { logger.error("{} cancel {} pending tasks, reason: '{}'", logPrefix(), totalCancelledTaskNum, message); } From aa9aec36274ca88cc3db285953df7537388de0e6 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 1 Aug 2024 13:43:39 +0800 Subject: [PATCH 10/35] perf: add busyLoop mode --- .../lettuce/core/AutoBatchFlushOptions.java | 40 +++++++++++++++++++ .../io/lettuce/core/ContextualChannel.java | 5 ++- .../lettuce/core/internal/LettuceAssert.java | 12 ++++-- .../protocol/DefaultBatchFlushEndpoint.java | 19 ++++++++- 4 files changed, 70 insertions(+), 6 deletions(-) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java index 35ea9e0ed3..781c15ab87 100644 --- a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -2,6 +2,8 @@ import java.io.Serializable; +import io.lettuce.core.internal.LettuceAssert; + /** * Options for command timeouts. These options configure how and whether commands time out once they were dispatched. Command * timeout begins: @@ -24,16 +26,26 @@ public class AutoBatchFlushOptions implements Serializable { public static final int DEFAULT_BATCH_SIZE = 8; + public static final boolean DEFAULT_USE_BUSY_LOOP = false; + + public static final long DEFAULT_BUSY_LOOP_DELAY_IN_NANOS = 400; + private final boolean enableAutoBatchFlush; private final int writeSpinCount; private final int batchSize; + private final boolean busyLoop; + + private final long busyLoopDelayInNanos; + public AutoBatchFlushOptions(AutoBatchFlushOptions.Builder builder) { this.enableAutoBatchFlush = builder.enableAutoBatchFlush; this.writeSpinCount = builder.writeSpinCount; this.batchSize = builder.batchSize; + this.busyLoop = builder.busyLoop; + this.busyLoopDelayInNanos = builder.busyLoopDelayInNanos; } /** @@ -61,6 +73,10 @@ public static class Builder { private int batchSize = DEFAULT_BATCH_SIZE; + private boolean busyLoop = DEFAULT_USE_BUSY_LOOP; + + private long busyLoopDelayInNanos = DEFAULT_BUSY_LOOP_DELAY_IN_NANOS; + /** * Enable auto batch flush. * @@ -79,6 +95,8 @@ public Builder enableAutoBatchFlush(boolean enableAutoBatchFlush) { * @return {@code this} */ public Builder writeSpinCount(int writeSpinCount) { + LettuceAssert.isPositive(writeSpinCount, "Write spin count must be greater 0"); + this.writeSpinCount = writeSpinCount; return this; } @@ -90,10 +108,24 @@ public Builder writeSpinCount(int writeSpinCount) { * @return {@code this} */ public Builder batchSize(int batchSize) { + LettuceAssert.isPositive(batchSize, "Batch size must be greater 0"); + this.batchSize = batchSize; return this; } + public Builder busyLoop(boolean busyLoop) { + this.busyLoop = busyLoop; + return this; + } + + public Builder busyLoopDelayInNanos(long busyLoopDelayInNanos) { + LettuceAssert.isNonNegative(busyLoopDelayInNanos, "Busy loop delay must be greater 0"); + + this.busyLoopDelayInNanos = busyLoopDelayInNanos; + return this; + } + /** * Create a new instance of {@link AutoBatchFlushOptions}. * @@ -126,4 +158,12 @@ public int getBatchSize() { return batchSize; } + public boolean isBusyLoop() { + return busyLoop; + } + + public long getBusyLoopDelayInNanos() { + return busyLoopDelayInNanos; + } + } diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java index f01c88bee1..188698e0cf 100644 --- a/src/main/java/io/lettuce/core/ContextualChannel.java +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -2,6 +2,8 @@ import java.net.SocketAddress; +import javax.annotation.Nonnull; + import io.lettuce.core.context.ConnectionContext; import io.netty.buffer.ByteBufAllocator; import io.netty.channel.Channel; @@ -15,7 +17,6 @@ import io.netty.channel.EventLoop; import io.netty.util.Attribute; import io.netty.util.AttributeKey; -import org.jetbrains.annotations.NotNull; /** * @author chenxiaofan @@ -250,7 +251,7 @@ public boolean hasAttr(AttributeKey attributeKey) { } @Override - public int compareTo(@NotNull Channel o) { + public int compareTo(@Nonnull Channel o) { return this == o ? 0 : this.id().compareTo(o.id()); } diff --git a/src/main/java/io/lettuce/core/internal/LettuceAssert.java b/src/main/java/io/lettuce/core/internal/LettuceAssert.java index f468d3b0a5..b14d230736 100644 --- a/src/main/java/io/lettuce/core/internal/LettuceAssert.java +++ b/src/main/java/io/lettuce/core/internal/LettuceAssert.java @@ -237,9 +237,15 @@ public static void assertState(boolean condition, Supplier messageSuppli } } - public static void isPositive(int writeSpinCount, String writeSpinCountIsNotPositive) { - if (writeSpinCount <= 0) { - throw new IllegalArgumentException(writeSpinCountIsNotPositive); + public static void isPositive(int digit, String message) { + if (digit <= 0) { + throw new IllegalArgumentException(message); + } + } + + public static void isNonNegative(long digit, String message) { + if (digit < 0) { + throw new IllegalArgumentException(message); } } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index f580955766..34f20d84bc 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -169,6 +169,10 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final int batchSize; + private final boolean busyLoop; + + private final long busyLoopDelayInNanos; + /** * Create a new {@link BatchFlushEndpoint}. * @@ -197,6 +201,8 @@ protected DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources this.callbackOnClose = callbackOnClose; this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); this.batchSize = clientOptions.getAutoBatchFlushOptions().getBatchSize(); + this.busyLoop = clientOptions.getAutoBatchFlushOptions().isBusyLoop(); + this.busyLoopDelayInNanos = clientOptions.getAutoBatchFlushOptions().getBusyLoopDelayInNanos(); } @Override @@ -607,6 +613,10 @@ private void scheduleSendJobOnConnected(final ContextualChannel chan) { } private void scheduleSendJobIfNeeded(final ContextualChannel chan) { + if (busyLoop) { + return; + } + final EventLoop eventLoop = chan.eventLoop(); if (eventLoop.inEventLoop()) { scheduleSendJobInEventLoopIfNeeded(chan); @@ -669,6 +679,13 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext return; } + if (busyLoop) { + // Don't use chan.eventLoop().execute(), otherwise performance will drop, since the event loop + // thread will trap within a certain time period. + chan.eventLoop().schedule(() -> loopSend(chan), busyLoopDelayInNanos, TimeUnit.NANOSECONDS); + return; + } + if (firstCall) { // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. batchFlushEndPointContext.hasOngoingSendLoop.exitSafe(); @@ -683,7 +700,7 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, ContextualChannel chan) { int count = 0; for (; count < batchSize; count++) { - final RedisCommand cmd = this.taskQueue.poll(); // relaxed poll is faster and we wil retry later anyway. + final RedisCommand cmd = this.taskQueue.poll(); if (cmd == null) { break; } From 275b7fc0f1c31c5d181a4199854c05d9ebeb48d6 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 7 Aug 2024 21:08:36 +0800 Subject: [PATCH 11/35] fix: ensure hasOngoingSendLoop.exitSafe() --- .../protocol/DefaultBatchFlushEndpoint.java | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 34f20d84bc..8955f151da 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -609,7 +609,11 @@ private void scheduleSendJobOnConnected(final ContextualChannel chan) { LettuceAssert.assertState(chan.eventLoop().inEventLoop(), "must be called in event loop thread"); // Schedule directly - scheduleSendJobInEventLoopIfNeeded(chan); + if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterSafeGetVolatile()) { + scheduleSendJobInEventLoopIfNeeded(chan); + } + // Otherwise: + // someone will do the job for us } private void scheduleSendJobIfNeeded(final ContextualChannel chan) { @@ -618,11 +622,6 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { } final EventLoop eventLoop = chan.eventLoop(); - if (eventLoop.inEventLoop()) { - scheduleSendJobInEventLoopIfNeeded(chan); - return; - } - if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterSafeGetVolatile()) { // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): // 1. uses tryEnterSafeGetVolatile() to avoid unnecessary eventLoop.execute() calls @@ -644,8 +643,11 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { private void scheduleSendJobInEventLoopIfNeeded(final ContextualChannel chan) { // Guarantee only 1 send loop. - if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterUnsafe()) { + BatchFlushEndPointContext.HasOngoingSendLoop hasOngoingSendLoop = chan.context.batchFlushEndPointContext.hasOngoingSendLoop; + if (hasOngoingSendLoop.tryEnterUnsafe()) { loopSend(chan); + } else { + hasOngoingSendLoop.exitSafe(); } } @@ -657,11 +659,11 @@ private void loopSend(final ContextualChannel chan) { } LettuceAssert.assertState(channel == chan, "unexpected: channel not match but closeStatus == null"); - loopSend0(batchFlushEndPointContext, chan, writeSpinCount, true); + loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false); } private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, - int remainingSpinnCount, final boolean firstCall) { + int remainingSpinnCount, final boolean exitedSafe) { do { final int count = pollBatch(batchFlushEndPointContext, chan); if (count < 0) { @@ -686,14 +688,16 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext return; } - if (firstCall) { + if (exitedSafe) { + // The send loop will be triggered later when a new task is added, + batchFlushEndPointContext.hasOngoingSendLoop.exitUnsafe(); + } else { // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. batchFlushEndPointContext.hasOngoingSendLoop.exitSafe(); // // Guarantee thread-safety: no dangling tasks in the queue. - loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, false); - } else { - // The send loop will be triggered later when a new task is added, - batchFlushEndPointContext.hasOngoingSendLoop.exitUnsafe(); + loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, true); + // chan.eventLoop().schedule(() -> loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false), 100, + // TimeUnit.NANOSECONDS); } } From 6897b0598d0da18787d9681d93649cf1621472a5 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 13:31:29 +0800 Subject: [PATCH 12/35] perf: batch offer --- .../protocol/DefaultBatchFlushEndpoint.java | 56 +++++++++++++------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 8955f151da..27a980dd82 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -155,7 +155,7 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final String cachedEndpointId; - protected final UnboundedMpscOfferFirstQueue> taskQueue; + protected final UnboundedMpscOfferFirstQueue taskQueue; private final boolean canFire; @@ -284,9 +284,7 @@ public RedisCommand write(RedisCommand command) { commands = processActivationCommands(commands); } - for (RedisCommand command : commands) { - this.taskQueue.offer(command); - } + this.taskQueue.offer(commands); QUEUE_SIZE.addAndGet(this, commands.size()); if (autoFlushCommands) { @@ -703,12 +701,24 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, ContextualChannel chan) { int count = 0; - for (; count < batchSize; count++) { - final RedisCommand cmd = this.taskQueue.poll(); - if (cmd == null) { + while (count < batchSize) { + final Object o = this.taskQueue.poll(); + if (o == null) { break; } - channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd)); + + if (o instanceof RedisCommand) { + RedisCommand cmd = (RedisCommand) o; + channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd)); + count++; + } else { + @SuppressWarnings("unchecked") + Collection> commands = (Collection>) o; + for (RedisCommand cmd : commands) { + channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd)); + } + count += commands.size(); + } } if (count > 0) { @@ -842,17 +852,31 @@ private final void fulfillCommands(String message, Consumer cmd = this.taskQueue.poll(); - if (cmd == null) { + Object o = this.taskQueue.poll(); + if (o == null) { break; } - if (cmd.getOutput() != null) { - cmd.getOutput().setError(message); - } - commandConsumer.accept(cmd); - cancelledTaskNumInTaskQueue++; - totalCancelledTaskNum++; + if (o instanceof RedisCommand) { + RedisCommand cmd = (RedisCommand) o; + if (cmd.getOutput() != null) { + cmd.getOutput().setError(message); + } + commandConsumer.accept(cmd); + cancelledTaskNumInTaskQueue++; + totalCancelledTaskNum++; + } else { + @SuppressWarnings("unchecked") + Collection> commands = (Collection>) o; + for (RedisCommand cmd : commands) { + if (cmd.getOutput() != null) { + cmd.getOutput().setError(message); + } + commandConsumer.accept(cmd); + } + cancelledTaskNumInTaskQueue += commands.size(); + totalCancelledTaskNum += commands.size(); + } } QUEUE_SIZE.addAndGet(this, -cancelledTaskNumInTaskQueue); From 0d3df3b1c394bbe84923dd88af0dbe5c3e455c81 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 13:44:43 +0800 Subject: [PATCH 13/35] Revert "perf: add busyLoop mode" This reverts commit 9f2edeb1d302389beee1329640cbb89d6f3e45f4. --- .../lettuce/core/AutoBatchFlushOptions.java | 40 ------------------- .../io/lettuce/core/ContextualChannel.java | 5 +-- .../lettuce/core/internal/LettuceAssert.java | 12 ++---- .../protocol/DefaultBatchFlushEndpoint.java | 17 -------- 4 files changed, 5 insertions(+), 69 deletions(-) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java index 781c15ab87..35ea9e0ed3 100644 --- a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -2,8 +2,6 @@ import java.io.Serializable; -import io.lettuce.core.internal.LettuceAssert; - /** * Options for command timeouts. These options configure how and whether commands time out once they were dispatched. Command * timeout begins: @@ -26,26 +24,16 @@ public class AutoBatchFlushOptions implements Serializable { public static final int DEFAULT_BATCH_SIZE = 8; - public static final boolean DEFAULT_USE_BUSY_LOOP = false; - - public static final long DEFAULT_BUSY_LOOP_DELAY_IN_NANOS = 400; - private final boolean enableAutoBatchFlush; private final int writeSpinCount; private final int batchSize; - private final boolean busyLoop; - - private final long busyLoopDelayInNanos; - public AutoBatchFlushOptions(AutoBatchFlushOptions.Builder builder) { this.enableAutoBatchFlush = builder.enableAutoBatchFlush; this.writeSpinCount = builder.writeSpinCount; this.batchSize = builder.batchSize; - this.busyLoop = builder.busyLoop; - this.busyLoopDelayInNanos = builder.busyLoopDelayInNanos; } /** @@ -73,10 +61,6 @@ public static class Builder { private int batchSize = DEFAULT_BATCH_SIZE; - private boolean busyLoop = DEFAULT_USE_BUSY_LOOP; - - private long busyLoopDelayInNanos = DEFAULT_BUSY_LOOP_DELAY_IN_NANOS; - /** * Enable auto batch flush. * @@ -95,8 +79,6 @@ public Builder enableAutoBatchFlush(boolean enableAutoBatchFlush) { * @return {@code this} */ public Builder writeSpinCount(int writeSpinCount) { - LettuceAssert.isPositive(writeSpinCount, "Write spin count must be greater 0"); - this.writeSpinCount = writeSpinCount; return this; } @@ -108,24 +90,10 @@ public Builder writeSpinCount(int writeSpinCount) { * @return {@code this} */ public Builder batchSize(int batchSize) { - LettuceAssert.isPositive(batchSize, "Batch size must be greater 0"); - this.batchSize = batchSize; return this; } - public Builder busyLoop(boolean busyLoop) { - this.busyLoop = busyLoop; - return this; - } - - public Builder busyLoopDelayInNanos(long busyLoopDelayInNanos) { - LettuceAssert.isNonNegative(busyLoopDelayInNanos, "Busy loop delay must be greater 0"); - - this.busyLoopDelayInNanos = busyLoopDelayInNanos; - return this; - } - /** * Create a new instance of {@link AutoBatchFlushOptions}. * @@ -158,12 +126,4 @@ public int getBatchSize() { return batchSize; } - public boolean isBusyLoop() { - return busyLoop; - } - - public long getBusyLoopDelayInNanos() { - return busyLoopDelayInNanos; - } - } diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java index 188698e0cf..f01c88bee1 100644 --- a/src/main/java/io/lettuce/core/ContextualChannel.java +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -2,8 +2,6 @@ import java.net.SocketAddress; -import javax.annotation.Nonnull; - import io.lettuce.core.context.ConnectionContext; import io.netty.buffer.ByteBufAllocator; import io.netty.channel.Channel; @@ -17,6 +15,7 @@ import io.netty.channel.EventLoop; import io.netty.util.Attribute; import io.netty.util.AttributeKey; +import org.jetbrains.annotations.NotNull; /** * @author chenxiaofan @@ -251,7 +250,7 @@ public boolean hasAttr(AttributeKey attributeKey) { } @Override - public int compareTo(@Nonnull Channel o) { + public int compareTo(@NotNull Channel o) { return this == o ? 0 : this.id().compareTo(o.id()); } diff --git a/src/main/java/io/lettuce/core/internal/LettuceAssert.java b/src/main/java/io/lettuce/core/internal/LettuceAssert.java index b14d230736..f468d3b0a5 100644 --- a/src/main/java/io/lettuce/core/internal/LettuceAssert.java +++ b/src/main/java/io/lettuce/core/internal/LettuceAssert.java @@ -237,15 +237,9 @@ public static void assertState(boolean condition, Supplier messageSuppli } } - public static void isPositive(int digit, String message) { - if (digit <= 0) { - throw new IllegalArgumentException(message); - } - } - - public static void isNonNegative(long digit, String message) { - if (digit < 0) { - throw new IllegalArgumentException(message); + public static void isPositive(int writeSpinCount, String writeSpinCountIsNotPositive) { + if (writeSpinCount <= 0) { + throw new IllegalArgumentException(writeSpinCountIsNotPositive); } } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 27a980dd82..5f1132d8e3 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -169,10 +169,6 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final int batchSize; - private final boolean busyLoop; - - private final long busyLoopDelayInNanos; - /** * Create a new {@link BatchFlushEndpoint}. * @@ -201,8 +197,6 @@ protected DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources this.callbackOnClose = callbackOnClose; this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); this.batchSize = clientOptions.getAutoBatchFlushOptions().getBatchSize(); - this.busyLoop = clientOptions.getAutoBatchFlushOptions().isBusyLoop(); - this.busyLoopDelayInNanos = clientOptions.getAutoBatchFlushOptions().getBusyLoopDelayInNanos(); } @Override @@ -615,10 +609,6 @@ private void scheduleSendJobOnConnected(final ContextualChannel chan) { } private void scheduleSendJobIfNeeded(final ContextualChannel chan) { - if (busyLoop) { - return; - } - final EventLoop eventLoop = chan.eventLoop(); if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterSafeGetVolatile()) { // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): @@ -679,13 +669,6 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext return; } - if (busyLoop) { - // Don't use chan.eventLoop().execute(), otherwise performance will drop, since the event loop - // thread will trap within a certain time period. - chan.eventLoop().schedule(() -> loopSend(chan), busyLoopDelayInNanos, TimeUnit.NANOSECONDS); - return; - } - if (exitedSafe) { // The send loop will be triggered later when a new task is added, batchFlushEndPointContext.hasOngoingSendLoop.exitUnsafe(); From b4a90abb7558b50ab41642ee9d20fc4391c0fea7 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 13:47:34 +0800 Subject: [PATCH 14/35] chore: add asserts, revert jetbrain#NotNull --- src/main/java/io/lettuce/core/AutoBatchFlushOptions.java | 6 ++++++ src/main/java/io/lettuce/core/ContextualChannel.java | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java index 35ea9e0ed3..7c61c1d04a 100644 --- a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -2,6 +2,8 @@ import java.io.Serializable; +import io.lettuce.core.internal.LettuceAssert; + /** * Options for command timeouts. These options configure how and whether commands time out once they were dispatched. Command * timeout begins: @@ -79,6 +81,8 @@ public Builder enableAutoBatchFlush(boolean enableAutoBatchFlush) { * @return {@code this} */ public Builder writeSpinCount(int writeSpinCount) { + LettuceAssert.isPositive(writeSpinCount, "Batch size must be greater than 0"); + this.writeSpinCount = writeSpinCount; return this; } @@ -90,6 +94,8 @@ public Builder writeSpinCount(int writeSpinCount) { * @return {@code this} */ public Builder batchSize(int batchSize) { + LettuceAssert.isPositive(batchSize, "Batch size must be greater than 0"); + this.batchSize = batchSize; return this; } diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java index f01c88bee1..188698e0cf 100644 --- a/src/main/java/io/lettuce/core/ContextualChannel.java +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -2,6 +2,8 @@ import java.net.SocketAddress; +import javax.annotation.Nonnull; + import io.lettuce.core.context.ConnectionContext; import io.netty.buffer.ByteBufAllocator; import io.netty.channel.Channel; @@ -15,7 +17,6 @@ import io.netty.channel.EventLoop; import io.netty.util.Attribute; import io.netty.util.AttributeKey; -import org.jetbrains.annotations.NotNull; /** * @author chenxiaofan @@ -250,7 +251,7 @@ public boolean hasAttr(AttributeKey attributeKey) { } @Override - public int compareTo(@NotNull Channel o) { + public int compareTo(@Nonnull Channel o) { return this == o ? 0 : this.id().compareTo(o.id()); } From fb65b0a132c1c3bae1999cb7f49371d8ae4533ca Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 13:55:59 +0800 Subject: [PATCH 15/35] refactor: remove tryEnterUnsafe/exitUnsafe --- .../context/BatchFlushEndPointContext.java | 21 ++------------- .../protocol/DefaultBatchFlushEndpoint.java | 26 +++++-------------- 2 files changed, 9 insertions(+), 38 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java index c7dff147bd..1939c62a55 100644 --- a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java @@ -41,31 +41,14 @@ public HasOngoingSendLoop() { * * @return true if entered the loop, false if already have a running loop. */ - public boolean tryEnterSafeGetVolatile() { + public boolean tryEnter() { return safe.get() == 0 && /* rare case if QPS is high */ safe.compareAndSet(0, 1); } - public void exitSafe() { + public void exit() { safe.set(0); } - /** - * This method is not thread safe, can only be used from single thread. - * - * @return true if the value was updated - */ - public boolean tryEnterUnsafe() { - if (unsafe) { - return false; - } - unsafe = true; - return true; - } - - public void exitUnsafe() { - unsafe = false; - } - } BatchFlushEndPointContext() { diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 5f1132d8e3..0ca5739bf6 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -601,8 +601,8 @@ private void scheduleSendJobOnConnected(final ContextualChannel chan) { LettuceAssert.assertState(chan.eventLoop().inEventLoop(), "must be called in event loop thread"); // Schedule directly - if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterSafeGetVolatile()) { - scheduleSendJobInEventLoopIfNeeded(chan); + if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { + loopSend(chan); } // Otherwise: // someone will do the job for us @@ -610,7 +610,7 @@ private void scheduleSendJobOnConnected(final ContextualChannel chan) { private void scheduleSendJobIfNeeded(final ContextualChannel chan) { final EventLoop eventLoop = chan.eventLoop(); - if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnterSafeGetVolatile()) { + if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): // 1. uses tryEnterSafeGetVolatile() to avoid unnecessary eventLoop.execute() calls // Avg latency: 3.2956217278663s @@ -618,7 +618,7 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { // 2. uses eventLoop.execute() directly // Avg latency: 3.2677197021496998s // Avg QPS: 476925.0751855796/s - eventLoop.execute(() -> scheduleSendJobInEventLoopIfNeeded(chan)); + eventLoop.execute(() -> loopSend(chan)); } // Otherwise: @@ -629,16 +629,6 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { // second loopSend0(), which will call poll() } - private void scheduleSendJobInEventLoopIfNeeded(final ContextualChannel chan) { - // Guarantee only 1 send loop. - BatchFlushEndPointContext.HasOngoingSendLoop hasOngoingSendLoop = chan.context.batchFlushEndPointContext.hasOngoingSendLoop; - if (hasOngoingSendLoop.tryEnterUnsafe()) { - loopSend(chan); - } else { - hasOngoingSendLoop.exitSafe(); - } - } - private void loopSend(final ContextualChannel chan) { final ConnectionContext connectionContext = chan.context; final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.batchFlushEndPointContext; @@ -651,7 +641,7 @@ private void loopSend(final ContextualChannel chan) { } private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, - int remainingSpinnCount, final boolean exitedSafe) { + int remainingSpinnCount, final boolean exited) { do { final int count = pollBatch(batchFlushEndPointContext, chan); if (count < 0) { @@ -669,12 +659,10 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext return; } - if (exitedSafe) { + if (!exited) { // The send loop will be triggered later when a new task is added, - batchFlushEndPointContext.hasOngoingSendLoop.exitUnsafe(); - } else { // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. - batchFlushEndPointContext.hasOngoingSendLoop.exitSafe(); + batchFlushEndPointContext.hasOngoingSendLoop.exit(); // // Guarantee thread-safety: no dangling tasks in the queue. loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, true); // chan.eventLoop().schedule(() -> loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false), 100, From 3241d42e5474cbbc7e5a2c0fea0ee444d89dfca8 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 16:57:34 +0800 Subject: [PATCH 16/35] chore: handle eventLoop.inEventLoop() case of scheduleSendJobIfNeeded() --- .../protocol/DefaultBatchFlushEndpoint.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java index 0ca5739bf6..54cbd6c511 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java @@ -601,15 +601,17 @@ private void scheduleSendJobOnConnected(final ContextualChannel chan) { LettuceAssert.assertState(chan.eventLoop().inEventLoop(), "must be called in event loop thread"); // Schedule directly - if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { - loopSend(chan); - } - // Otherwise: - // someone will do the job for us + loopSend(chan, false); } private void scheduleSendJobIfNeeded(final ContextualChannel chan) { final EventLoop eventLoop = chan.eventLoop(); + if (eventLoop.inEventLoop()) { + // Possible in reactive() mode. + loopSend(chan, false); + return; + } + if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): // 1. uses tryEnterSafeGetVolatile() to avoid unnecessary eventLoop.execute() calls @@ -618,7 +620,7 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { // 2. uses eventLoop.execute() directly // Avg latency: 3.2677197021496998s // Avg QPS: 476925.0751855796/s - eventLoop.execute(() -> loopSend(chan)); + eventLoop.execute(() -> loopSend(chan, true)); } // Otherwise: @@ -629,7 +631,7 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { // second loopSend0(), which will call poll() } - private void loopSend(final ContextualChannel chan) { + private void loopSend(final ContextualChannel chan, boolean entered) { final ConnectionContext connectionContext = chan.context; final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.batchFlushEndPointContext; if (connectionContext.isChannelInactiveEventFired() || batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { @@ -637,11 +639,11 @@ private void loopSend(final ContextualChannel chan) { } LettuceAssert.assertState(channel == chan, "unexpected: channel not match but closeStatus == null"); - loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false); + loopSend0(batchFlushEndPointContext, chan, writeSpinCount, entered); } private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, - int remainingSpinnCount, final boolean exited) { + int remainingSpinnCount, final boolean entered) { do { final int count = pollBatch(batchFlushEndPointContext, chan); if (count < 0) { @@ -655,16 +657,16 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext if (remainingSpinnCount <= 0) { // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. - chan.eventLoop().execute(() -> loopSend(chan)); + chan.eventLoop().execute(() -> loopSend(chan, entered)); return; } - if (!exited) { + if (entered) { // The send loop will be triggered later when a new task is added, // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. batchFlushEndPointContext.hasOngoingSendLoop.exit(); // // Guarantee thread-safety: no dangling tasks in the queue. - loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, true); + loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, false); // chan.eventLoop().schedule(() -> loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false), 100, // TimeUnit.NANOSECONDS); } From 205813a9914e85d529a4e8b41bb23ef67719ca64 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 22:42:47 +0800 Subject: [PATCH 17/35] refactor: rename BatchFlushEndpoint->AutoBatchFlushEndpoint --- .../java/io/lettuce/core/RedisClient.java | 4 +-- ...=> ClusterNodeAutoBatchFlushEndpoint.java} | 8 +++--- .../core/cluster/RedisClusterClient.java | 2 +- ...point.java => AutoBatchFlushEndpoint.java} | 2 +- .../lettuce/core/protocol/CommandHandler.java | 4 +-- .../core/protocol/ConnectionWatchdog.java | 4 +-- ...ava => DefaultAutoBatchFlushEndpoint.java} | 28 +++++++++---------- 7 files changed, 26 insertions(+), 26 deletions(-) rename src/main/java/io/lettuce/core/cluster/{ClusterNodeBatchFlushEndpoint.java => ClusterNodeAutoBatchFlushEndpoint.java} (84%) rename src/main/java/io/lettuce/core/protocol/{BatchFlushEndpoint.java => AutoBatchFlushEndpoint.java} (92%) rename src/main/java/io/lettuce/core/protocol/{DefaultBatchFlushEndpoint.java => DefaultAutoBatchFlushEndpoint.java} (96%) diff --git a/src/main/java/io/lettuce/core/RedisClient.java b/src/main/java/io/lettuce/core/RedisClient.java index 917e2dce91..222021fdf9 100644 --- a/src/main/java/io/lettuce/core/RedisClient.java +++ b/src/main/java/io/lettuce/core/RedisClient.java @@ -39,7 +39,7 @@ import io.lettuce.core.masterreplica.MasterReplica; import io.lettuce.core.protocol.CommandExpiryWriter; import io.lettuce.core.protocol.CommandHandler; -import io.lettuce.core.protocol.DefaultBatchFlushEndpoint; +import io.lettuce.core.protocol.DefaultAutoBatchFlushEndpoint; import io.lettuce.core.protocol.DefaultEndpoint; import io.lettuce.core.protocol.Endpoint; import io.lettuce.core.protocol.PushHandler; @@ -277,7 +277,7 @@ private ConnectionFuture> connectStandalone logger.debug("Trying to get a Redis connection for: {}", redisURI); Endpoint endpoint = getOptions().getAutoBatchFlushOptions().isAutoBatchFlushEnabled() - ? new DefaultBatchFlushEndpoint(getOptions(), getResources()) + ? new DefaultAutoBatchFlushEndpoint(getOptions(), getResources()) : new DefaultEndpoint(getOptions(), getResources()); RedisChannelWriter writer = (RedisChannelWriter) endpoint; diff --git a/src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/cluster/ClusterNodeAutoBatchFlushEndpoint.java similarity index 84% rename from src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java rename to src/main/java/io/lettuce/core/cluster/ClusterNodeAutoBatchFlushEndpoint.java index 977f6f43f9..bf754cb3a9 100644 --- a/src/main/java/io/lettuce/core/cluster/ClusterNodeBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/cluster/ClusterNodeAutoBatchFlushEndpoint.java @@ -18,7 +18,7 @@ import io.lettuce.core.ClientOptions; import io.lettuce.core.RedisChannelWriter; import io.lettuce.core.RedisException; -import io.lettuce.core.protocol.DefaultBatchFlushEndpoint; +import io.lettuce.core.protocol.DefaultAutoBatchFlushEndpoint; import io.lettuce.core.resource.ClientResources; /** @@ -28,7 +28,7 @@ * * @author Mark Paluch */ -public class ClusterNodeBatchFlushEndpoint extends DefaultBatchFlushEndpoint { +public class ClusterNodeAutoBatchFlushEndpoint extends DefaultAutoBatchFlushEndpoint { /** * Initialize a new instance that handles commands from the supplied queue. @@ -37,7 +37,7 @@ public class ClusterNodeBatchFlushEndpoint extends DefaultBatchFlushEndpoint { * @param clientResources client resources for this connection. * @param clusterChannelWriter top-most channel writer. */ - public ClusterNodeBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources, + public ClusterNodeAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources, RedisChannelWriter clusterChannelWriter) { super(clientOptions, clientResources, clusterChannelWriter != null ? cmd -> { if (cmd.isDone()) { @@ -49,7 +49,7 @@ public ClusterNodeBatchFlushEndpoint(ClientOptions clientOptions, ClientResource } catch (RedisException e) { cmd.completeExceptionally(e); } - } : DefaultBatchFlushEndpoint::cancelCommandOnEndpointClose); + } : DefaultAutoBatchFlushEndpoint::cancelCommandOnEndpointClose); } } diff --git a/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java b/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java index b8e244e684..9b0409c06b 100644 --- a/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java +++ b/src/main/java/io/lettuce/core/cluster/RedisClusterClient.java @@ -555,7 +555,7 @@ ConnectionFuture> connectToNodeAsync(RedisC LettuceAssert.notNull(socketAddressSupplier, "SocketAddressSupplier must not be null"); Endpoint endpoint = getClusterClientOptions().getAutoBatchFlushOptions().isAutoBatchFlushEnabled() - ? new ClusterNodeBatchFlushEndpoint(getClusterClientOptions(), getResources(), clusterWriter) + ? new ClusterNodeAutoBatchFlushEndpoint(getClusterClientOptions(), getResources(), clusterWriter) : new ClusterNodeEndpoint(getClusterClientOptions(), getResources(), clusterWriter); RedisChannelWriter writer = (RedisChannelWriter) endpoint; diff --git a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/AutoBatchFlushEndpoint.java similarity index 92% rename from src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java rename to src/main/java/io/lettuce/core/protocol/AutoBatchFlushEndpoint.java index 1878bf853e..5ba84c9466 100644 --- a/src/main/java/io/lettuce/core/protocol/BatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/AutoBatchFlushEndpoint.java @@ -7,7 +7,7 @@ /** * @author chenxiaofan */ -public interface BatchFlushEndpoint extends Endpoint { +public interface AutoBatchFlushEndpoint extends Endpoint { @Override default void notifyDrainQueuedCommands(HasQueuedCommands queuedCommands) { diff --git a/src/main/java/io/lettuce/core/protocol/CommandHandler.java b/src/main/java/io/lettuce/core/protocol/CommandHandler.java index 340e776148..8f5ee0f8fa 100644 --- a/src/main/java/io/lettuce/core/protocol/CommandHandler.java +++ b/src/main/java/io/lettuce/core/protocol/CommandHandler.java @@ -154,7 +154,7 @@ public CommandHandler(ClientOptions clientOptions, ClientResources clientResourc this.clientOptions = clientOptions; this.clientResources = clientResources; this.endpoint = endpoint; - this.supportsBatchFlush = endpoint instanceof BatchFlushEndpoint; + this.supportsBatchFlush = endpoint instanceof AutoBatchFlushEndpoint; this.commandLatencyRecorder = clientResources.commandLatencyRecorder(); this.latencyMetricsEnabled = commandLatencyRecorder.isEnabled(); this.boundedQueues = clientOptions.getRequestQueueSize() != Integer.MAX_VALUE; @@ -399,7 +399,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { if (supportsBatchFlush) { // Needs decision of watchdog - ((BatchFlushEndpoint) endpoint).notifyChannelInactiveAfterWatchdogDecision(ctx.channel(), + ((AutoBatchFlushEndpoint) endpoint).notifyChannelInactiveAfterWatchdogDecision(ctx.channel(), batchFlushRetryableDrainQueuedCommands); } } diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index d23c9adc73..dc0f222084 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -149,7 +149,7 @@ public ConnectionWatchdog(Delay reconnectDelay, ClientOptions clientOptions, Boo this.redisUri = (String) bootstrap.config().attrs().get(ConnectionBuilder.REDIS_URI); this.epid = endpoint.getId(); this.endpoint = endpoint; - this.useBatchFlushEndpoint = endpoint instanceof BatchFlushEndpoint; + this.useBatchFlushEndpoint = endpoint instanceof AutoBatchFlushEndpoint; Mono wrappedSocketAddressSupplier = socketAddressSupplier.doOnNext(addr -> remoteAddress = addr) .onErrorResume(t -> { @@ -308,7 +308,7 @@ private void notifyEndpointFailedToConnectIfNeeded() { private void notifyEndpointFailedToConnectIfNeeded(Exception e) { if (useBatchFlushEndpoint) { - ((BatchFlushEndpoint) endpoint).notifyReconnectFailed(e); + ((AutoBatchFlushEndpoint) endpoint).notifyReconnectFailed(e); } } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java similarity index 96% rename from src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java rename to src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index 54cbd6c511..a8f2494cb6 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -64,20 +64,20 @@ * * @author Mark Paluch */ -public class DefaultBatchFlushEndpoint implements RedisChannelWriter, BatchFlushEndpoint, PushHandler { +public class DefaultAutoBatchFlushEndpoint implements RedisChannelWriter, AutoBatchFlushEndpoint, PushHandler { - private static final InternalLogger logger = InternalLoggerFactory.getInstance(BatchFlushEndpoint.class); + private static final InternalLogger logger = InternalLoggerFactory.getInstance(AutoBatchFlushEndpoint.class); private static final AtomicLong ENDPOINT_COUNTER = new AtomicLong(); - private static final AtomicReferenceFieldUpdater CHANNEL = AtomicReferenceFieldUpdater - .newUpdater(DefaultBatchFlushEndpoint.class, ContextualChannel.class, "channel"); + private static final AtomicReferenceFieldUpdater CHANNEL = AtomicReferenceFieldUpdater + .newUpdater(DefaultAutoBatchFlushEndpoint.class, ContextualChannel.class, "channel"); - private static final AtomicIntegerFieldUpdater QUEUE_SIZE = AtomicIntegerFieldUpdater - .newUpdater(DefaultBatchFlushEndpoint.class, "queueSize"); + private static final AtomicIntegerFieldUpdater QUEUE_SIZE = AtomicIntegerFieldUpdater + .newUpdater(DefaultAutoBatchFlushEndpoint.class, "queueSize"); - private static final AtomicIntegerFieldUpdater STATUS = AtomicIntegerFieldUpdater - .newUpdater(DefaultBatchFlushEndpoint.class, "status"); + private static final AtomicIntegerFieldUpdater STATUS = AtomicIntegerFieldUpdater + .newUpdater(DefaultAutoBatchFlushEndpoint.class, "status"); private static final int ST_OPEN = 0; @@ -170,16 +170,16 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final int batchSize; /** - * Create a new {@link BatchFlushEndpoint}. + * Create a new {@link AutoBatchFlushEndpoint}. * * @param clientOptions client options for this connection, must not be {@code null}. * @param clientResources client resources for this connection, must not be {@code null}. */ - public DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources) { - this(clientOptions, clientResources, DefaultBatchFlushEndpoint::cancelCommandOnEndpointClose); + public DefaultAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources) { + this(clientOptions, clientResources, DefaultAutoBatchFlushEndpoint::cancelCommandOnEndpointClose); } - protected DefaultBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources, + protected DefaultAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResources clientResources, Consumer> callbackOnClose) { LettuceAssert.notNull(clientOptions, "ClientOptions must not be null"); @@ -976,7 +976,7 @@ protected WrittenToChannel newObject(Recycler.Handle handle) { private final Recycler.Handle handle; - private DefaultBatchFlushEndpoint endpoint; + private DefaultAutoBatchFlushEndpoint endpoint; private RedisCommand command; @@ -991,7 +991,7 @@ private WrittenToChannel(Recycler.Handle handle) { * * @return new instance */ - static WrittenToChannel newInstance(DefaultBatchFlushEndpoint endpoint, ContextualChannel chan, + static WrittenToChannel newInstance(DefaultAutoBatchFlushEndpoint endpoint, ContextualChannel chan, RedisCommand command) { WrittenToChannel entry = RECYCLER.get(); From 2b573341d0d0ab763ec35860eb122447442a2e31 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 23:06:14 +0800 Subject: [PATCH 18/35] refactor: rename BatchFlushEndpointContext->AutoBatchFlushEndpointContext --- ...ava => AutoBatchFlushEndPointContext.java} | 53 +++++------ .../core/context/ConnectionContext.java | 8 +- .../lettuce/core/protocol/CommandHandler.java | 14 +-- .../core/protocol/ConnectionWatchdog.java | 8 +- .../DefaultAutoBatchFlushEndpoint.java | 88 +++++++++---------- .../io/lettuce/core/utils/ExceptionUtils.java | 12 --- 6 files changed, 83 insertions(+), 100 deletions(-) rename src/main/java/io/lettuce/core/context/{BatchFlushEndPointContext.java => AutoBatchFlushEndPointContext.java} (61%) diff --git a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java similarity index 61% rename from src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java rename to src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java index 1939c62a55..32dc7fd241 100644 --- a/src/main/java/io/lettuce/core/context/BatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java @@ -15,9 +15,9 @@ /** * @author chenxiaofan */ -public class BatchFlushEndPointContext { +public class AutoBatchFlushEndPointContext { - private static final InternalLogger logger = InternalLoggerFactory.getInstance(BatchFlushEndPointContext.class); + private static final InternalLogger logger = InternalLoggerFactory.getInstance(AutoBatchFlushEndPointContext.class); public static class HasOngoingSendLoop { @@ -51,14 +51,14 @@ public void exit() { } - BatchFlushEndPointContext() { + AutoBatchFlushEndPointContext() { } /** - * Tasks that failed to send (probably due to connection errors) + * Commands that failed to send (probably due to connection errors) */ @Nullable - Deque> retryableFailedToSendTasks = null; + Deque> retryableFailedToSendCommands = null; Throwable firstDiscontinueReason = null; @@ -66,11 +66,11 @@ public Throwable getFirstDiscontinueReason() { return firstDiscontinueReason; } - private int flyingTaskNum; + private int flyingCmdNum; @SuppressWarnings("unused") - public int getFlyingTaskNum() { - return flyingTaskNum; + public int getFlyingCmdNum() { + return flyingCmdNum; } private int total = 0; @@ -83,47 +83,48 @@ public int getTotal() { public void add(int n) { this.total += n; - this.flyingTaskNum += n; + this.flyingCmdNum += n; } - public @Nullable Deque> getAndClearRetryableFailedToSendTasks() { - final Deque> old = this.retryableFailedToSendTasks; - // don't set to null so give us a chance to expose potential bugs if there is addRetryableFailedToSendTask() afterwards - this.retryableFailedToSendTasks = UnmodifiableDeque.emptyDeque(); + public @Nullable Deque> getAndClearRetryableFailedToSendCommands() { + final Deque> old = this.retryableFailedToSendCommands; + // don't set to null so give us a chance to expose potential bugs if there is addRetryableFailedToSendCommand() + // afterwards + this.retryableFailedToSendCommands = UnmodifiableDeque.emptyDeque(); return old; } public void done(int n) { - this.flyingTaskNum -= n; + this.flyingCmdNum -= n; } public boolean isDone() { - if (this.flyingTaskNum < 0) { - logger.error("[unexpected] flyingTaskNum < 0, flyingTaskNum: {}, total: {}", this.flyingTaskNum, this.total); + if (this.flyingCmdNum < 0) { + logger.error("[unexpected] flyingCmdNum < 0, flyingCmdNum: {}, total: {}", this.flyingCmdNum, this.total); return true; } - return this.flyingTaskNum == 0; + return this.flyingCmdNum == 0; } - public boolean hasRetryableFailedToSendTasks() { - return retryableFailedToSendTasks != null; + public boolean hasRetryableFailedToSendCommands() { + return retryableFailedToSendCommands != null; } /** - * @param retryableTask retryable task + * @param retryableCommand retryable command * @param cause fail reason - * @return true if this is the first retryable failed task + * @return true if this is the first retryable failed command */ - public boolean addRetryableFailedToSendTask(RedisCommand retryableTask, @Nonnull Throwable cause) { - if (retryableFailedToSendTasks == null) { - retryableFailedToSendTasks = new ArrayDeque<>(); - retryableFailedToSendTasks.add(retryableTask); + public boolean addRetryableFailedToSendCommand(RedisCommand retryableCommand, @Nonnull Throwable cause) { + if (retryableFailedToSendCommands == null) { + retryableFailedToSendCommands = new ArrayDeque<>(); + retryableFailedToSendCommands.add(retryableCommand); firstDiscontinueReason = cause; return true; } - retryableFailedToSendTasks.add(retryableTask); + retryableFailedToSendCommands.add(retryableCommand); return false; } diff --git a/src/main/java/io/lettuce/core/context/ConnectionContext.java b/src/main/java/io/lettuce/core/context/ConnectionContext.java index 796674eafe..74ce4e9534 100644 --- a/src/main/java/io/lettuce/core/context/ConnectionContext.java +++ b/src/main/java/io/lettuce/core/context/ConnectionContext.java @@ -67,11 +67,11 @@ public boolean isConnected() { public final State initialState; - public final BatchFlushEndPointContext batchFlushEndPointContext; + public final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext; public ConnectionContext(State initialState) { this.initialState = initialState; - this.batchFlushEndPointContext = new BatchFlushEndPointContext(); + this.autoBatchFlushEndPointContext = new AutoBatchFlushEndPointContext(); } /* below fields must be accessed by the event loop thread only */ @@ -92,10 +92,6 @@ public boolean isChannelInactiveEventFired() { private boolean channelQuiescent = false; - public boolean isChannelQuiescent() { - return channelQuiescent; - } - public boolean setChannelQuiescentOnce() { if (channelQuiescent) { return false; diff --git a/src/main/java/io/lettuce/core/protocol/CommandHandler.java b/src/main/java/io/lettuce/core/protocol/CommandHandler.java index 8f5ee0f8fa..de6cf0f0e1 100644 --- a/src/main/java/io/lettuce/core/protocol/CommandHandler.java +++ b/src/main/java/io/lettuce/core/protocol/CommandHandler.java @@ -96,7 +96,7 @@ public class CommandHandler extends ChannelDuplexHandler implements HasQueuedCom private final Endpoint endpoint; - private final boolean supportsBatchFlush; + private final boolean supportsAutoBatchFlush; private final ArrayDeque> stack = new ArrayDeque<>(); @@ -154,7 +154,7 @@ public CommandHandler(ClientOptions clientOptions, ClientResources clientResourc this.clientOptions = clientOptions; this.clientResources = clientResources; this.endpoint = endpoint; - this.supportsBatchFlush = endpoint instanceof AutoBatchFlushEndpoint; + this.supportsAutoBatchFlush = endpoint instanceof AutoBatchFlushEndpoint; this.commandLatencyRecorder = clientResources.commandLatencyRecorder(); this.latencyMetricsEnabled = commandLatencyRecorder.isEnabled(); this.boundedQueues = clientOptions.getRequestQueueSize() != Integer.MAX_VALUE; @@ -377,9 +377,9 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { setState(LifecycleState.DEACTIVATING); endpoint.notifyChannelInactive(ctx.channel()); - Deque> batchFlushRetryableDrainQueuedCommands = UnmodifiableDeque.emptyDeque(); - if (supportsBatchFlush) { - batchFlushRetryableDrainQueuedCommands = drainStack(); + Deque> autoBatchFlushRetryableDrainQueuedCommands = UnmodifiableDeque.emptyDeque(); + if (supportsAutoBatchFlush) { + autoBatchFlushRetryableDrainQueuedCommands = drainStack(); } else { endpoint.notifyDrainQueuedCommands(this); } @@ -397,10 +397,10 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { super.channelInactive(ctx); - if (supportsBatchFlush) { + if (supportsAutoBatchFlush) { // Needs decision of watchdog ((AutoBatchFlushEndpoint) endpoint).notifyChannelInactiveAfterWatchdogDecision(ctx.channel(), - batchFlushRetryableDrainQueuedCommands); + autoBatchFlushRetryableDrainQueuedCommands); } } diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index dc0f222084..82bb679bb6 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -84,7 +84,7 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private final String epid; - private final boolean useBatchFlushEndpoint; + private final boolean useAutoBatchFlushEndpoint; private final Endpoint endpoint; @@ -149,7 +149,7 @@ public ConnectionWatchdog(Delay reconnectDelay, ClientOptions clientOptions, Boo this.redisUri = (String) bootstrap.config().attrs().get(ConnectionBuilder.REDIS_URI); this.epid = endpoint.getId(); this.endpoint = endpoint; - this.useBatchFlushEndpoint = endpoint instanceof AutoBatchFlushEndpoint; + this.useAutoBatchFlushEndpoint = endpoint instanceof AutoBatchFlushEndpoint; Mono wrappedSocketAddressSupplier = socketAddressSupplier.doOnNext(addr -> remoteAddress = addr) .onErrorResume(t -> { @@ -226,7 +226,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { } doReconnectOnEndpointQuiescence = this::scheduleReconnect; - if (!useBatchFlushEndpoint) { + if (!useAutoBatchFlushEndpoint) { doReconnectOnEndpointQuiescence.run(); } // otherwise, will be called later by BatchFlushEndpoint#onEndpointQuiescence @@ -307,7 +307,7 @@ private void notifyEndpointFailedToConnectIfNeeded() { } private void notifyEndpointFailedToConnectIfNeeded(Exception e) { - if (useBatchFlushEndpoint) { + if (useAutoBatchFlushEndpoint) { ((AutoBatchFlushEndpoint) endpoint).notifyReconnectFailed(e); } } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index a8f2494cb6..ba21ab5f8e 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -41,7 +41,7 @@ import io.lettuce.core.RedisException; import io.lettuce.core.api.push.PushListener; import io.lettuce.core.constant.DummyContextualChannelInstances; -import io.lettuce.core.context.BatchFlushEndPointContext; +import io.lettuce.core.context.AutoBatchFlushEndPointContext; import io.lettuce.core.context.ConnectionContext; import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; import io.lettuce.core.datastructure.queue.offerfirst.impl.JcToolsUnboundedMpscOfferFirstQueue; @@ -612,7 +612,7 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { return; } - if (chan.context.batchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { + if (chan.context.autoBatchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): // 1. uses tryEnterSafeGetVolatile() to avoid unnecessary eventLoop.execute() calls // Avg latency: 3.2956217278663s @@ -633,19 +633,20 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { private void loopSend(final ContextualChannel chan, boolean entered) { final ConnectionContext connectionContext = chan.context; - final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.batchFlushEndPointContext; - if (connectionContext.isChannelInactiveEventFired() || batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = connectionContext.autoBatchFlushEndPointContext; + if (connectionContext.isChannelInactiveEventFired() + || autoBatchFlushEndPointContext.hasRetryableFailedToSendCommands()) { return; } LettuceAssert.assertState(channel == chan, "unexpected: channel not match but closeStatus == null"); - loopSend0(batchFlushEndPointContext, chan, writeSpinCount, entered); + loopSend0(autoBatchFlushEndPointContext, chan, writeSpinCount, entered); } - private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext, final ContextualChannel chan, + private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, final ContextualChannel chan, int remainingSpinnCount, final boolean entered) { do { - final int count = pollBatch(batchFlushEndPointContext, chan); + final int count = pollBatch(autoBatchFlushEndPointContext, chan); if (count < 0) { return; } @@ -664,15 +665,15 @@ private void loopSend0(final BatchFlushEndPointContext batchFlushEndPointContext if (entered) { // The send loop will be triggered later when a new task is added, // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. - batchFlushEndPointContext.hasOngoingSendLoop.exit(); + autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); // // Guarantee thread-safety: no dangling tasks in the queue. - loopSend0(batchFlushEndPointContext, chan, remainingSpinnCount, false); + loopSend0(autoBatchFlushEndPointContext, chan, remainingSpinnCount, false); // chan.eventLoop().schedule(() -> loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false), 100, // TimeUnit.NANOSECONDS); } } - private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, ContextualChannel chan) { + private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, ContextualChannel chan) { int count = 0; while (count < batchSize) { final Object o = this.taskQueue.poll(); @@ -695,10 +696,10 @@ private int pollBatch(final BatchFlushEndPointContext batchFlushEndPointContext, } if (count > 0) { - batchFlushEndPointContext.add(count); + autoBatchFlushEndPointContext.add(count); channelFlush(chan); - if (batchFlushEndPointContext.hasRetryableFailedToSendTasks()) { + if (autoBatchFlushEndPointContext.hasRetryableFailedToSendCommands()) { // Wait for onConnectionClose event() return -1; } @@ -711,12 +712,12 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { final ConnectionContext connectionContext = chan.context; final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); - final BatchFlushEndPointContext batchFlushEndPointContext = connectionContext.batchFlushEndPointContext; - if (batchFlushEndPointContext.isDone() && closeStatus != null) { + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = connectionContext.autoBatchFlushEndPointContext; + if (autoBatchFlushEndPointContext.isDone() && closeStatus != null) { if (closeStatus.isWillReconnect()) { - onWillReconnect(closeStatus, batchFlushEndPointContext); + onWillReconnect(closeStatus, autoBatchFlushEndPointContext); } else { - onWontReconnect(closeStatus, batchFlushEndPointContext); + onWontReconnect(closeStatus, autoBatchFlushEndPointContext); } if (chan.context.setChannelQuiescentOnce()) { @@ -744,15 +745,15 @@ private void onEndpointQuiescence() { } private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, - final BatchFlushEndPointContext batchFlushEndPointContext) { - final @Nullable Deque> retryableFailedToSendTasks = batchFlushEndPointContext - .getAndClearRetryableFailedToSendTasks(); + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext) { + final @Nullable Deque> retryableFailedToSendTasks = autoBatchFlushEndPointContext + .getAndClearRetryableFailedToSendCommands(); if (retryableFailedToSendTasks != null) { // Save retryable failed tasks logger.info( "[onWillReconnect][{}] compensate {} retryableFailedToSendTasks (write failure) for retrying on reconnecting, first write error: {}", logPrefix(), retryableFailedToSendTasks.size(), - batchFlushEndPointContext.getFirstDiscontinueReason().getMessage()); + autoBatchFlushEndPointContext.getFirstDiscontinueReason().getMessage()); offerFirstAll(retryableFailedToSendTasks); } @@ -773,15 +774,15 @@ private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeS } private void onWontReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, - final BatchFlushEndPointContext batchFlushEndPointContext) { + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext) { // No need to use syncAfterTerminated() since we are already in the event loop. if (isClosed()) { onEndpointClosed(closeStatus.getAndClearRetryablePendingCommands(), - batchFlushEndPointContext.getAndClearRetryableFailedToSendTasks()); + autoBatchFlushEndPointContext.getAndClearRetryableFailedToSendCommands()); } else { fulfillCommands("onConnectionClose called and won't reconnect", it -> it.completeExceptionally(closeStatus.getErr()), closeStatus.getAndClearRetryablePendingCommands(), - batchFlushEndPointContext.getAndClearRetryableFailedToSendTasks()); + autoBatchFlushEndPointContext.getAndClearRetryableFailedToSendCommands()); } } @@ -978,7 +979,7 @@ protected WrittenToChannel newObject(Recycler.Handle handle) { private DefaultAutoBatchFlushEndpoint endpoint; - private RedisCommand command; + private RedisCommand cmd; private ContextualChannel chan; @@ -998,22 +999,22 @@ static WrittenToChannel newInstance(DefaultAutoBatchFlushEndpoint endpoint, Cont entry.endpoint = endpoint; entry.chan = chan; - entry.command = command; + entry.cmd = command; return entry; } @Override public void operationComplete(Future future) { - final BatchFlushEndPointContext batchFlushEndPointContext = chan.context.batchFlushEndPointContext; + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = chan.context.autoBatchFlushEndPointContext; try { QUEUE_SIZE.decrementAndGet(endpoint); - batchFlushEndPointContext.done(1); + autoBatchFlushEndPointContext.done(1); - final Throwable retryableErr = checkSendResult(future, chan, command); - if (retryableErr != null && batchFlushEndPointContext.addRetryableFailedToSendTask(command, retryableErr)) { + final Throwable retryableErr = checkSendResult(future); + if (retryableErr != null && autoBatchFlushEndPointContext.addRetryableFailedToSendCommand(cmd, retryableErr)) { // Close connection on first transient write failure - internalCloseConnectionIfNeeded(chan, retryableErr); + internalCloseConnectionIfNeeded(retryableErr); } endpoint.trySetEndpointQuiescence(chan); @@ -1026,23 +1027,20 @@ public void operationComplete(Future future) { * Check write result. * * @param sendFuture The future to check. - * @param contextualChannel The channel instance associated with the future. - * @param cmd The task. * @return The cause of the failure if is a retryable failed task, otherwise null. */ - private Throwable checkSendResult(Future sendFuture, ContextualChannel contextualChannel, - RedisCommand cmd) { + private Throwable checkSendResult(Future sendFuture) { if (cmd.isDone()) { ExceptionUtils.logUnexpectedDone(logger, endpoint.logPrefix(), cmd); return null; } - final ConnectionContext.CloseStatus closeStatus = contextualChannel.context.getCloseStatus(); + final ConnectionContext.CloseStatus closeStatus = chan.context.getCloseStatus(); if (closeStatus != null) { logger.warn("[checkSendResult][interesting][{}] callback called after onClose() event, close status: {}", - endpoint.logPrefix(), contextualChannel.context.getCloseStatus()); + endpoint.logPrefix(), chan.context.getCloseStatus()); final Throwable err = sendFuture.isSuccess() ? closeStatus.getErr() : sendFuture.cause(); - if (!closeStatus.isWillReconnect() || shouldNotRetry(err, cmd)) { + if (!closeStatus.isWillReconnect() || shouldNotRetry(err)) { cmd.completeExceptionally(err); return null; } else { @@ -1056,7 +1054,7 @@ private Throwable checkSendResult(Future sendFuture, ContextualChannel contex final Throwable cause = sendFuture.cause(); ExceptionUtils.maybeLogSendError(logger, cause); - if (shouldNotRetry(cause, cmd)) { + if (shouldNotRetry(cause)) { cmd.completeExceptionally(cause); return null; } @@ -1064,22 +1062,22 @@ private Throwable checkSendResult(Future sendFuture, ContextualChannel contex return cause; } - private boolean shouldNotRetry(Throwable cause, RedisCommand cmd) { + private boolean shouldNotRetry(Throwable cause) { return endpoint.reliability == Reliability.AT_MOST_ONCE || ActivationCommand.isActivationCommand(cmd) || ExceptionUtils.oneOf(cause, SHOULD_NOT_RETRY_EXCEPTION_TYPES); } - private void internalCloseConnectionIfNeeded(ContextualChannel toCloseChan, Throwable reason) { - if (toCloseChan.context.isChannelInactiveEventFired() || !toCloseChan.isActive()) { + private void internalCloseConnectionIfNeeded(Throwable reason) { + if (chan.context.isChannelInactiveEventFired() || !chan.isActive()) { return; } logger.error( "[internalCloseConnectionIfNeeded][interesting][{}] close the connection due to write error, reason: '{}'", endpoint.logPrefix(), reason.getMessage(), reason); - toCloseChan.eventLoop().schedule(() -> { - if (toCloseChan.isActive()) { - toCloseChan.close(); + chan.eventLoop().schedule(() -> { + if (chan.isActive()) { + chan.close(); } }, 1, TimeUnit.SECONDS); } @@ -1087,7 +1085,7 @@ private void internalCloseConnectionIfNeeded(ContextualChannel toCloseChan, Thro private void recycle() { this.endpoint = null; this.chan = null; - this.command = null; + this.cmd = null; handle.recycle(this); } diff --git a/src/main/java/io/lettuce/core/utils/ExceptionUtils.java b/src/main/java/io/lettuce/core/utils/ExceptionUtils.java index 4072d81b3e..49ed6e548c 100644 --- a/src/main/java/io/lettuce/core/utils/ExceptionUtils.java +++ b/src/main/java/io/lettuce/core/utils/ExceptionUtils.java @@ -34,18 +34,6 @@ public static void maybeLogSendError(InternalLogger logger, Throwable cause) { } } - public static T castTo(Throwable throwable, Class clazz, Function supplier) { - if (clazz.isInstance(throwable)) { - return clazz.cast(throwable); - } - return supplier.apply(throwable); - } - - public static T clearStackTrace(T throwable) { - throwable.setStackTrace(new StackTraceElement[0]); - return throwable; - } - /** * Returns whether the throwable is one of the exception types or one of the cause in the cause chain is one of the * exception types From 19e646349ee7016b45371c4452df330fbefad7b0 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 8 Aug 2024 23:17:01 +0800 Subject: [PATCH 19/35] refactor: better naming --- .../AutoBatchFlushEndPointContext.java | 9 +-------- .../core/protocol/ConnectionWatchdog.java | 20 +++++++++---------- .../DefaultAutoBatchFlushEndpoint.java | 4 ++-- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java index 32dc7fd241..03de30152c 100644 --- a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java @@ -73,16 +73,9 @@ public int getFlyingCmdNum() { return flyingCmdNum; } - private int total = 0; - - public int getTotal() { - return total; - } - public final HasOngoingSendLoop hasOngoingSendLoop = new HasOngoingSendLoop(); public void add(int n) { - this.total += n; this.flyingCmdNum += n; } @@ -100,7 +93,7 @@ public void done(int n) { public boolean isDone() { if (this.flyingCmdNum < 0) { - logger.error("[unexpected] flyingCmdNum < 0, flyingCmdNum: {}, total: {}", this.flyingCmdNum, this.total); + logger.error("[unexpected] flyingCmdNum < 0, flyingCmdNum: {}", this.flyingCmdNum); return true; } return this.flyingCmdNum == 0; diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index 82bb679bb6..43005eea81 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -106,7 +106,7 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private volatile Timeout reconnectScheduleTimeout; - private Runnable doReconnectOnEndpointQuiescence; + private Runnable doReconnectOnAutoBatchFlushEndpointQuiescence; /** * Create a new watchdog that adds to new connections to the supplied {@link ChannelGroup} and establishes a new @@ -204,7 +204,7 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception { @Override public void channelInactive(ChannelHandlerContext ctx) throws Exception { - doReconnectOnEndpointQuiescence = null; + doReconnectOnAutoBatchFlushEndpointQuiescence = null; logger.debug("{} channelInactive()", logPrefix()); if (!armed) { @@ -225,10 +225,10 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { return; } - doReconnectOnEndpointQuiescence = this::scheduleReconnect; if (!useAutoBatchFlushEndpoint) { - doReconnectOnEndpointQuiescence.run(); + this.scheduleReconnect(); } + doReconnectOnAutoBatchFlushEndpointQuiescence = this::scheduleReconnect; // otherwise, will be called later by BatchFlushEndpoint#onEndpointQuiescence } else { logger.debug("{} Reconnect scheduling disabled", logPrefix(), ctx); @@ -237,8 +237,12 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { super.channelInactive(ctx); } - void reconnectOnEndpointQuiescence() { - doReconnectOnEndpointQuiescence.run(); + boolean willReconnect() { + return doReconnectOnAutoBatchFlushEndpointQuiescence != null; + } + + void reconnectOnAutoBatchFlushEndpointQuiescence() { + doReconnectOnAutoBatchFlushEndpointQuiescence.run(); } /** @@ -482,8 +486,4 @@ private String logPrefix() { return logPrefix = buffer; } - public boolean willReconnect() { - return doReconnectOnEndpointQuiescence != null; - } - } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index ba21ab5f8e..6d175baa79 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -354,7 +354,7 @@ public void notifyReconnectFailed(Throwable t) { if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, DummyContextualChannelInstances.CHANNEL_RECONNECT_FAILED)) { - onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING); + syncAfterTerminated(() -> onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING)); return; } @@ -741,7 +741,7 @@ private void onEndpointQuiescence() { } // neither connectionWatchdog nor doReconnectOnEndpointQuiescence could be null - connectionWatchdog.reconnectOnEndpointQuiescence(); + connectionWatchdog.reconnectOnAutoBatchFlushEndpointQuiescence(); } private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, From 0d94a5b94d078626bba70805f9c827d7a1b9d1d6 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 9 Aug 2024 09:33:45 +0800 Subject: [PATCH 20/35] refactor: better error msg for ConnectionWatchDog --- .../core/protocol/ConnectionWatchdog.java | 85 ++++++++++--------- .../DefaultAutoBatchFlushEndpoint.java | 3 +- 2 files changed, 45 insertions(+), 43 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java index 43005eea81..a9de311641 100644 --- a/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java +++ b/src/main/java/io/lettuce/core/protocol/ConnectionWatchdog.java @@ -21,14 +21,16 @@ import java.net.SocketAddress; import java.time.Duration; -import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; +import java.util.function.Supplier; import io.lettuce.core.ClientOptions; import io.lettuce.core.ConnectionBuilder; import io.lettuce.core.ConnectionEvents; +import io.lettuce.core.RedisException; import io.lettuce.core.event.EventBus; import io.lettuce.core.event.connection.ReconnectAttemptEvent; import io.lettuce.core.event.connection.ReconnectFailedEvent; @@ -84,9 +86,9 @@ public class ConnectionWatchdog extends ChannelInboundHandlerAdapter { private final String epid; - private final boolean useAutoBatchFlushEndpoint; + private final boolean useAutoBatchFlush; - private final Endpoint endpoint; + private final Consumer> endpointFailedToReconnectNotifier; private Channel channel; @@ -148,8 +150,15 @@ public ConnectionWatchdog(Delay reconnectDelay, ClientOptions clientOptions, Boo this.eventBus = eventBus; this.redisUri = (String) bootstrap.config().attrs().get(ConnectionBuilder.REDIS_URI); this.epid = endpoint.getId(); - this.endpoint = endpoint; - this.useAutoBatchFlushEndpoint = endpoint instanceof AutoBatchFlushEndpoint; + if (endpoint instanceof AutoBatchFlushEndpoint) { + this.useAutoBatchFlush = true; + endpointFailedToReconnectNotifier = throwableSupplier -> ((AutoBatchFlushEndpoint) endpoint) + .notifyReconnectFailed(throwableSupplier.get()); + } else { + this.useAutoBatchFlush = false; + endpointFailedToReconnectNotifier = ignoredThrowableSupplier -> { + }; + } Mono wrappedSocketAddressSupplier = socketAddressSupplier.doOnNext(addr -> remoteAddress = addr) .onErrorResume(t -> { @@ -215,20 +224,11 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { channel = null; if (listenOnChannelInactive && !reconnectionHandler.isReconnectSuspended()) { - if (!isEventLoopGroupActive()) { - logger.debug("isEventLoopGroupActive() == false"); - return; - } - - if (!isListenOnChannelInactive()) { - logger.debug("Skip reconnect scheduling, listener disabled"); - return; - } - - if (!useAutoBatchFlushEndpoint) { + if (!useAutoBatchFlush) { this.scheduleReconnect(); + } else { + doReconnectOnAutoBatchFlushEndpointQuiescence = this::scheduleReconnect; } - doReconnectOnAutoBatchFlushEndpointQuiescence = this::scheduleReconnect; // otherwise, will be called later by BatchFlushEndpoint#onEndpointQuiescence } else { logger.debug("{} Reconnect scheduling disabled", logPrefix(), ctx); @@ -237,7 +237,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { super.channelInactive(ctx); } - boolean willReconnect() { + boolean willReconnectOnAutoBatchFlushEndpointQuiescence() { return doReconnectOnAutoBatchFlushEndpointQuiescence != null; } @@ -261,14 +261,16 @@ public void scheduleReconnect() { logger.debug("{} scheduleReconnect()", logPrefix()); if (!isEventLoopGroupActive()) { - logger.debug("isEventLoopGroupActive() == false"); - notifyEndpointFailedToConnectIfNeeded(); + final String errMsg = "isEventLoopGroupActive() == false"; + logger.debug(errMsg); + notifyEndpointFailedToReconnect(errMsg); return; } if (!isListenOnChannelInactive()) { - logger.debug("Skip reconnect scheduling, listener disabled"); - notifyEndpointFailedToConnectIfNeeded(); + final String errMsg = "Skip reconnect scheduling, listener disabled"; + logger.debug(errMsg); + notifyEndpointFailedToReconnect(errMsg); return; } @@ -285,8 +287,9 @@ public void scheduleReconnect() { reconnectScheduleTimeout = null; if (!isEventLoopGroupActive()) { - logger.warn("Cannot execute scheduled reconnect timer, reconnect workers are terminated"); - notifyEndpointFailedToConnectIfNeeded(); + final String errMsg = "Cannot execute scheduled reconnect timer, reconnect workers are terminated"; + logger.warn(errMsg); + notifyEndpointFailedToReconnect(errMsg); return; } @@ -302,18 +305,12 @@ public void scheduleReconnect() { } } else { logger.debug("{} Skipping scheduleReconnect() because I have an active channel", logPrefix()); - notifyEndpointFailedToConnectIfNeeded(); + notifyEndpointFailedToReconnect("Skipping scheduleReconnect() because I have an active channel"); } } - private void notifyEndpointFailedToConnectIfNeeded() { - notifyEndpointFailedToConnectIfNeeded(new CancellationException()); - } - - private void notifyEndpointFailedToConnectIfNeeded(Exception e) { - if (useAutoBatchFlushEndpoint) { - ((AutoBatchFlushEndpoint) endpoint).notifyReconnectFailed(e); - } + void notifyEndpointFailedToReconnect(String msg) { + endpointFailedToReconnectNotifier.accept(() -> new RedisException(msg)); } /** @@ -335,26 +332,29 @@ public void run(int attempt) throws Exception { * @param delay retry delay. * @throws Exception when reconnection fails. */ - private void run(int attempt, Duration delay) throws Exception { + private void run(int attempt, Duration delay) { reconnectSchedulerSync.set(false); reconnectScheduleTimeout = null; if (!isEventLoopGroupActive()) { - logger.debug("isEventLoopGroupActive() == false"); - notifyEndpointFailedToConnectIfNeeded(); + final String errMsg = "isEventLoopGroupActive() == false"; + logger.debug(errMsg); + notifyEndpointFailedToReconnect(errMsg); return; } if (!isListenOnChannelInactive()) { - logger.debug("Skip reconnect scheduling, listener disabled"); - notifyEndpointFailedToConnectIfNeeded(); + final String errMsg = "Skip reconnect scheduling, listener disabled"; + logger.debug(errMsg); + notifyEndpointFailedToReconnect(errMsg); return; } if (isReconnectSuspended()) { - logger.debug("Skip reconnect scheduling, reconnect is suspended"); - notifyEndpointFailedToConnectIfNeeded(); + final String msg = "Skip reconnect scheduling, reconnect is suspended"; + logger.debug(msg); + notifyEndpointFailedToReconnect(msg); return; } @@ -411,13 +411,14 @@ private void run(int attempt, Duration delay) throws Exception { if (!isReconnectSuspended()) { scheduleReconnect(); } else { - notifyEndpointFailedToConnectIfNeeded(); + endpointFailedToReconnectNotifier + .accept(() -> new RedisException("got error and then reconnect is suspended", t)); } }); } catch (Exception e) { logger.log(warnLevel, "Cannot reconnect: {}", e.toString()); eventBus.publish(new ReconnectFailedEvent(redisUri, epid, LocalAddress.ANY, remoteAddress, e, attempt)); - notifyEndpointFailedToConnectIfNeeded(e); + endpointFailedToReconnectNotifier.accept(() -> e); } } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index 6d175baa79..e17c1c9809 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -390,7 +390,8 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, return; } - boolean willReconnect = connectionWatchdog != null && connectionWatchdog.willReconnect(); + boolean willReconnect = connectionWatchdog != null + && connectionWatchdog.willReconnectOnAutoBatchFlushEndpointQuiescence(); RedisException exception = null; // Unlike DefaultEndpoint, here we don't check reliability since connectionWatchdog.willReconnect() already does it. if (isClosed()) { From 4b41a12d295666b07f721ee4500bf1f17e852bf6 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 9 Aug 2024 12:01:33 +0800 Subject: [PATCH 21/35] chore: remove unused code --- .../lettuce/core/context/AutoBatchFlushEndPointContext.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java index 03de30152c..59d6f05f78 100644 --- a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java @@ -26,14 +26,8 @@ public static class HasOngoingSendLoop { */ final AtomicInteger safe; - /** - * Used in single thread. - */ - boolean unsafe; - public HasOngoingSendLoop() { safe = new AtomicInteger(); - unsafe = false; } /** From a0da050d24a89b6ee6251ecec61179eacd5b3501 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 9 Aug 2024 12:16:29 +0800 Subject: [PATCH 22/35] chore: default batch size to 20 --- src/main/java/io/lettuce/core/AutoBatchFlushOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java index 7c61c1d04a..e71f12e561 100644 --- a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -24,7 +24,7 @@ public class AutoBatchFlushOptions implements Serializable { public static final int DEFAULT_WRITE_SPIN_COUNT = 16; - public static final int DEFAULT_BATCH_SIZE = 8; + public static final int DEFAULT_BATCH_SIZE = 20; private final boolean enableAutoBatchFlush; From 51c57cba791456979c3daa24a0543af3b6d19c12 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 9 Aug 2024 16:24:33 +0800 Subject: [PATCH 23/35] chore: simplify code, avoid recursion --- .../DefaultAutoBatchFlushEndpoint.java | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index e17c1c9809..b5e85d9bbc 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -625,11 +625,11 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { } // Otherwise: - // 1. offer() (volatile write) synchronizes-before hasOngoingSendLoop.safe.get() == 1 (volatile read) + // 1. offer() (volatile write of producerIndex) synchronizes-before hasOngoingSendLoop.safe.get() == 1 (volatile read) // 2. hasOngoingSendLoop.safe.get() == 1 (volatile read) synchronizes-before // hasOngoingSendLoop.safe.set(0) (volatile write) in first loopSend0() // 3. hasOngoingSendLoop.safe.set(0) (volatile write) synchronizes-before - // second loopSend0(), which will call poll() + // second loopSend0(), which will call poll() (volatile read of producerIndex) } private void loopSend(final ContextualChannel chan, boolean entered) { @@ -645,33 +645,28 @@ private void loopSend(final ContextualChannel chan, boolean entered) { } private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, final ContextualChannel chan, - int remainingSpinnCount, final boolean entered) { + int remainingSpinnCount, boolean entered) { do { final int count = pollBatch(autoBatchFlushEndPointContext, chan); if (count < 0) { return; } if (count < batchSize) { + if (!entered) { + return; + } // queue was empty - break; + // The send loop will be triggered later when a new task is added, + // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. + autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); + entered = false; + // // Guarantee thread-safety: no dangling tasks in the queue. } } while (--remainingSpinnCount > 0); - if (remainingSpinnCount <= 0) { - // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. - chan.eventLoop().execute(() -> loopSend(chan, entered)); - return; - } - - if (entered) { - // The send loop will be triggered later when a new task is added, - // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. - autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); - // // Guarantee thread-safety: no dangling tasks in the queue. - loopSend0(autoBatchFlushEndPointContext, chan, remainingSpinnCount, false); - // chan.eventLoop().schedule(() -> loopSend0(batchFlushEndPointContext, chan, writeSpinCount, false), 100, - // TimeUnit.NANOSECONDS); - } + final boolean finalEntered = entered; + // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. + chan.eventLoop().execute(() -> loopSend(chan, finalEntered)); } private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, ContextualChannel chan) { @@ -713,19 +708,25 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { final ConnectionContext connectionContext = chan.context; final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); + if (closeStatus == null) { + return; + } + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = connectionContext.autoBatchFlushEndPointContext; - if (autoBatchFlushEndPointContext.isDone() && closeStatus != null) { - if (closeStatus.isWillReconnect()) { - onWillReconnect(closeStatus, autoBatchFlushEndPointContext); - } else { - onWontReconnect(closeStatus, autoBatchFlushEndPointContext); - } + if (!autoBatchFlushEndPointContext.isDone()) { + return; + } - if (chan.context.setChannelQuiescentOnce()) { - onEndpointQuiescence(); - } else { - ExceptionUtils.maybeFire(logger, canFire, "unexpected: setEndpointQuiescenceOncePerConnection() failed"); - } + if (closeStatus.isWillReconnect()) { + onWillReconnect(closeStatus, autoBatchFlushEndPointContext); + } else { + onWontReconnect(closeStatus, autoBatchFlushEndPointContext); + } + + if (chan.context.setChannelQuiescentOnce()) { + onEndpointQuiescence(); + } else { + ExceptionUtils.maybeFire(logger, canFire, "unexpected: setEndpointQuiescenceOncePerConnection() failed"); } } From 799a3b54d0d47542eaa684156a9f739da0420cf9 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 9 Aug 2024 16:41:35 +0800 Subject: [PATCH 24/35] chore: add isEmpty(), fix logPrefix --- .../UnboundedMpscOfferFirstQueue.java | 2 ++ .../ConcurrentLinkedMpscOfferFirstQueue.java | 5 +++++ .../JcToolsUnboundedMpscOfferFirstQueue.java | 5 +++++ .../DefaultAutoBatchFlushEndpoint.java | 18 ++++++++++++++++-- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java index 42b3e7ceeb..834aecb436 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java @@ -33,4 +33,6 @@ public interface UnboundedMpscOfferFirstQueue { @Nullable E poll(); + boolean isEmpty(); + } diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java index 5def1b5466..c6022f56d2 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java @@ -42,4 +42,9 @@ public E poll() { return delegate.poll(); } + @Override + public boolean isEmpty() { + return delegate.isEmpty(); + } + } diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java index feaa8d2ee8..17ed4de647 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java @@ -52,6 +52,11 @@ public E poll() { return mpscQueue.poll(); } + @Override + public boolean isEmpty() { + return mpscQueue.isEmpty() && unsafeQueues.isEmpty(); + } + private E pollFromUnsafeQueues() { Queue first = unsafeQueues.getFirst(); E e = first.poll(); diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index b5e85d9bbc..b06a012a3e 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -351,6 +351,7 @@ public void notifyChannelActive(Channel channel) { @Override public void notifyReconnectFailed(Throwable t) { this.failedToReconnectReason = t; + this.logPrefix = null; if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, DummyContextualChannelInstances.CHANNEL_RECONNECT_FAILED)) { @@ -400,6 +401,7 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, } if (willReconnect) { + this.logPrefix = null; CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT); // Create a synchronize-before with this.channel = CHANNEL_WILL_RECONNECT if (isClosed()) { @@ -413,6 +415,7 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, } if (!willReconnect) { + this.logPrefix = null; CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); } inactiveChan.context @@ -583,12 +586,19 @@ private boolean isClosed() { } protected String logPrefix() { - if (logPrefix != null) { return logPrefix; } - String buffer = "[" + ChannelLogDescriptor.logDescriptor(channel.getDelegate()) + ", " + "epid=" + getId() + ']'; + final ContextualChannel chan = this.channel; + if (!chan.context.initialState.isConnected()) { + final String buffer = "[" + chan.context.initialState + ", " + "epid=" + getId() + ']'; + logPrefix = buffer; + return buffer; + } + + final String buffer = "[CONNECTED, " + ChannelLogDescriptor.logDescriptor(chan.getDelegate()) + ", " + "epid=" + getId() + + ']'; logPrefix = buffer; return buffer; } @@ -659,6 +669,9 @@ private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPoin // The send loop will be triggered later when a new task is added, // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); + if (taskQueue.isEmpty()) { + return; + } entered = false; // // Guarantee thread-safety: no dangling tasks in the queue. } @@ -735,6 +748,7 @@ private void onEndpointQuiescence() { return; } + this.logPrefix = null; // Create happens-before with channelActive() if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, DummyContextualChannelInstances.CHANNEL_CONNECTING)) { From 63ec4f95246a50ae147de4a1457b00712d01639d Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 14 Aug 2024 15:34:49 +0800 Subject: [PATCH 25/35] chore: ensure thread safety for taskQueue consuming --- .../DefaultAutoBatchFlushEndpoint.java | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index b06a012a3e..dd9600d187 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -295,8 +295,6 @@ public RedisCommand write(RedisCommand command) { @Override public void notifyChannelActive(Channel channel) { - lastEventLoop = channel.eventLoop(); - final ContextualChannel contextualChannel = new ContextualChannel(channel, ConnectionContext.State.CONNECTED); this.logPrefix = null; @@ -308,11 +306,12 @@ public void notifyChannelActive(Channel channel) { return; } + lastEventLoop = channel.eventLoop(); + // Created a synchronize-before with set channel to CHANNEL_CONNECTING, if (isClosed()) { logger.info("{} Closing channel because endpoint is already closed", logPrefix()); channel.close(); - // Cleaning will be done later in notifyChannelInactiveAfterWatchdogDecision, we are happy so far. return; } @@ -355,7 +354,7 @@ public void notifyReconnectFailed(Throwable t) { if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, DummyContextualChannelInstances.CHANNEL_RECONNECT_FAILED)) { - syncAfterTerminated(() -> onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING)); + onUnexpectedState("notifyReconnectFailed", ConnectionContext.State.CONNECTING); return; } @@ -363,7 +362,7 @@ public void notifyReconnectFailed(Throwable t) { if (isClosed()) { onEndpointClosed(); } else { - cancelCommands("reconnect failed"); + onReconnectFailed(); } }); } @@ -381,8 +380,16 @@ public void notifyChannelInactive(Channel channel) { public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, Deque> retryableQueuedCommands) { final ContextualChannel inactiveChan = this.channel; - if (!inactiveChan.context.initialState.isConnected() || inactiveChan.getDelegate() != channel) { + if (!inactiveChan.context.initialState.isConnected()) { + logger.error("[unexpected][{}] notifyChannelInactive: channel initial state not connected", logPrefix()); + onUnexpectedState("notifyChannelInactiveAfterWatchdogDecision", ConnectionContext.State.CONNECTED); + return; + } + + if (inactiveChan.getDelegate() != channel) { logger.error("[unexpected][{}] notifyChannelInactive: channel not match", logPrefix()); + onUnexpectedState("notifyChannelInactiveAfterWatchdogDecision: channel not match", + ConnectionContext.State.CONNECTED); return; } @@ -453,8 +460,7 @@ public void flushCommands() { if (isClosed()) { onEndpointClosed(); } else { - fulfillCommands("Reconnect failed", - cmd -> cmd.completeExceptionally(new RedisException("Reconnect failed"))); + onReconnectFailed(); } }); return; @@ -546,11 +552,9 @@ public void reset() { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } // Unsafe to call cancelBufferedCommands() here. - // cancelBufferedCommands("Reset"); } private void resetInternal() { - if (debugEnabled) { logger.debug("{} reset()", logPrefix()); } @@ -559,8 +563,8 @@ private void resetInternal() { if (chan.context.initialState.isConnected()) { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } - // Unsafe to call cancelBufferedCommands() here. - cancelCommands("Reset"); + LettuceAssert.assertState(lastEventLoop.inEventLoop(), "must be called in lastEventLoop thread"); + cancelCommands("resetInternal"); } /** @@ -568,10 +572,8 @@ private void resetInternal() { */ @Override public void initialState() { - - // Thread safe since we are not connected yet. - cancelCommands("initialState"); - + // Unsafe to call cancelCommands() here. + // No need to cancel. ContextualChannel currentChannel = this.channel; if (currentChannel.context.initialState.isConnected()) { ChannelFuture close = currentChannel.close(); @@ -717,7 +719,9 @@ private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPoint } private void trySetEndpointQuiescence(ContextualChannel chan) { - LettuceAssert.isTrue(chan.eventLoop().inEventLoop(), "unexpected: not in event loop"); + final EventLoop eventLoop = chan.eventLoop(); + LettuceAssert.isTrue(eventLoop.inEventLoop(), "unexpected: not in event loop"); + LettuceAssert.isTrue(eventLoop == lastEventLoop, "unexpected: lastEventLoop not match"); final ConnectionContext connectionContext = chan.context; final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); @@ -821,6 +825,10 @@ private final void onEndpointClosed(Queue>... queues) { fulfillCommands("endpoint closed", callbackOnClose, queues); } + private final void onReconnectFailed() { + fulfillCommands("reconnect failed", cmd -> cmd.completeExceptionally(getFailedToReconnectReason())); + } + @SafeVarargs private final void fulfillCommands(String message, Consumer> commandConsumer, Queue>... queues) { @@ -875,6 +883,14 @@ private final void fulfillCommands(String message, Consumer RedisCommand processActivationCommand(RedisCommand command) { if (!ActivationCommand.isActivationCommand(command)) { @@ -936,7 +952,8 @@ private Throwable validateWrite(@SuppressWarnings("unused") int commands) { private void onUnexpectedState(String caller, ConnectionContext.State exp) { final ConnectionContext.State actual = this.channel.context.initialState; logger.error("{}[{}][unexpected] : unexpected state: exp '{}' got '{}'", logPrefix(), caller, exp, actual); - cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual)); + syncAfterTerminated( + () -> cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual))); } private void channelFlush(Channel channel) { From c88b52f30f3471bc5da9ff9e78c2f36a7b6c2b4f Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 14 Aug 2024 15:42:50 +0800 Subject: [PATCH 26/35] chore: add useMpscQueue to AutoBatchFlushOptions, default to true --- .../lettuce/core/AutoBatchFlushOptions.java | 25 +++++++++++++++++++ ...eue.java => UnboundedOfferFirstQueue.java} | 2 +- ...a => ConcurrentLinkedOfferFirstQueue.java} | 6 ++--- .../JcToolsUnboundedMpscOfferFirstQueue.java | 4 +-- .../DefaultAutoBatchFlushEndpoint.java | 8 +++--- 5 files changed, 36 insertions(+), 9 deletions(-) rename src/main/java/io/lettuce/core/datastructure/queue/offerfirst/{UnboundedMpscOfferFirstQueue.java => UnboundedOfferFirstQueue.java} (93%) rename src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/{ConcurrentLinkedMpscOfferFirstQueue.java => ConcurrentLinkedOfferFirstQueue.java} (78%) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java index e71f12e561..09a754a8c8 100644 --- a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -26,16 +26,21 @@ public class AutoBatchFlushOptions implements Serializable { public static final int DEFAULT_BATCH_SIZE = 20; + public static final boolean DEFAULT_USE_MPSC_QUEUE = true; + private final boolean enableAutoBatchFlush; private final int writeSpinCount; private final int batchSize; + private final boolean useMpscQueue; + public AutoBatchFlushOptions(AutoBatchFlushOptions.Builder builder) { this.enableAutoBatchFlush = builder.enableAutoBatchFlush; this.writeSpinCount = builder.writeSpinCount; this.batchSize = builder.batchSize; + this.useMpscQueue = builder.useMpscQueue; } /** @@ -63,6 +68,8 @@ public static class Builder { private int batchSize = DEFAULT_BATCH_SIZE; + private boolean useMpscQueue = DEFAULT_USE_MPSC_QUEUE; + /** * Enable auto batch flush. * @@ -100,6 +107,17 @@ public Builder batchSize(int batchSize) { return this; } + /** + * @param useMpscQueue use MPSC queue. If {@code false}, a {@link java.util.concurrent.ConcurrentLinkedQueue} is used, + * which has lower performance but is safer to consume across multiple threads, the option may be removed in the + * future if the mpsc queue is proven to be safe. + * @return {@code this} + */ + public Builder useMpscQueue(boolean useMpscQueue) { + this.useMpscQueue = useMpscQueue; + return this; + } + /** * Create a new instance of {@link AutoBatchFlushOptions}. * @@ -132,4 +150,11 @@ public int getBatchSize() { return batchSize; } + /** + * @return {@code true} if the queue is a MPSC queue + */ + public boolean usesMpscQueue() { + return useMpscQueue; + } + } diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedOfferFirstQueue.java similarity index 93% rename from src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java rename to src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedOfferFirstQueue.java index 834aecb436..84e75a2cc0 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/UnboundedOfferFirstQueue.java @@ -7,7 +7,7 @@ /** * @author chenxiaofan */ -public interface UnboundedMpscOfferFirstQueue { +public interface UnboundedOfferFirstQueue { /** * add element to the tail of the queue. The method is concurrent safe. diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java similarity index 78% rename from src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java rename to src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java index c6022f56d2..f97a4668bf 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/ConcurrentLinkedOfferFirstQueue.java @@ -5,16 +5,16 @@ import javax.annotation.Nullable; -import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedOfferFirstQueue; /** * @author chenxiaofan */ -public class ConcurrentLinkedMpscOfferFirstQueue implements UnboundedMpscOfferFirstQueue { +public class ConcurrentLinkedOfferFirstQueue implements UnboundedOfferFirstQueue { private final ConcurrentLinkedDeque delegate; - public ConcurrentLinkedMpscOfferFirstQueue() { + public ConcurrentLinkedOfferFirstQueue() { this.delegate = new ConcurrentLinkedDeque<>(); } diff --git a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java index 17ed4de647..dde8b997ce 100644 --- a/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java +++ b/src/main/java/io/lettuce/core/datastructure/queue/offerfirst/impl/JcToolsUnboundedMpscOfferFirstQueue.java @@ -7,13 +7,13 @@ import javax.annotation.Nullable; -import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedOfferFirstQueue; import io.netty.util.internal.PlatformDependent; /** * @author chenxiaofan */ -public class JcToolsUnboundedMpscOfferFirstQueue implements UnboundedMpscOfferFirstQueue { +public class JcToolsUnboundedMpscOfferFirstQueue implements UnboundedOfferFirstQueue { /** * The queues can only be manipulated in a single thread env. diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index dd9600d187..f9a83cb98b 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -43,7 +43,8 @@ import io.lettuce.core.constant.DummyContextualChannelInstances; import io.lettuce.core.context.AutoBatchFlushEndPointContext; import io.lettuce.core.context.ConnectionContext; -import io.lettuce.core.datastructure.queue.offerfirst.UnboundedMpscOfferFirstQueue; +import io.lettuce.core.datastructure.queue.offerfirst.UnboundedOfferFirstQueue; +import io.lettuce.core.datastructure.queue.offerfirst.impl.ConcurrentLinkedOfferFirstQueue; import io.lettuce.core.datastructure.queue.offerfirst.impl.JcToolsUnboundedMpscOfferFirstQueue; import io.lettuce.core.internal.Futures; import io.lettuce.core.internal.LettuceAssert; @@ -155,7 +156,7 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final String cachedEndpointId; - protected final UnboundedMpscOfferFirstQueue taskQueue; + protected final UnboundedOfferFirstQueue taskQueue; private final boolean canFire; @@ -192,7 +193,8 @@ protected DefaultAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResou this.rejectCommandsWhileDisconnected = isRejectCommand(clientOptions); long endpointId = ENDPOINT_COUNTER.incrementAndGet(); this.cachedEndpointId = "0x" + Long.toHexString(endpointId); - this.taskQueue = new JcToolsUnboundedMpscOfferFirstQueue<>(); + this.taskQueue = clientOptions.getAutoBatchFlushOptions().usesMpscQueue() ? new JcToolsUnboundedMpscOfferFirstQueue<>() + : new ConcurrentLinkedOfferFirstQueue<>(); this.canFire = false; this.callbackOnClose = callbackOnClose; this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); From cc01006fe8fe7082398c5ff54ee17866158103af Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 14 Aug 2024 15:52:11 +0800 Subject: [PATCH 27/35] chore: remove unused code --- .../lettuce/core/context/AutoBatchFlushEndPointContext.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java index 59d6f05f78..144e14108c 100644 --- a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java @@ -62,11 +62,6 @@ public Throwable getFirstDiscontinueReason() { private int flyingCmdNum; - @SuppressWarnings("unused") - public int getFlyingCmdNum() { - return flyingCmdNum; - } - public final HasOngoingSendLoop hasOngoingSendLoop = new HasOngoingSendLoop(); public void add(int n) { From e8d515abdc8744f5203caf07bcdc8404c1423bab Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Wed, 14 Aug 2024 16:06:47 +0800 Subject: [PATCH 28/35] chore: cancel commands in initialState()/reset() if not using mpsc queue --- .../protocol/DefaultAutoBatchFlushEndpoint.java | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index f9a83cb98b..8adf3d1851 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -170,6 +170,8 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final int batchSize; + private final boolean usesMpscQueue; + /** * Create a new {@link AutoBatchFlushEndpoint}. * @@ -193,8 +195,8 @@ protected DefaultAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResou this.rejectCommandsWhileDisconnected = isRejectCommand(clientOptions); long endpointId = ENDPOINT_COUNTER.incrementAndGet(); this.cachedEndpointId = "0x" + Long.toHexString(endpointId); - this.taskQueue = clientOptions.getAutoBatchFlushOptions().usesMpscQueue() ? new JcToolsUnboundedMpscOfferFirstQueue<>() - : new ConcurrentLinkedOfferFirstQueue<>(); + this.usesMpscQueue = clientOptions.getAutoBatchFlushOptions().usesMpscQueue(); + this.taskQueue = usesMpscQueue ? new JcToolsUnboundedMpscOfferFirstQueue<>() : new ConcurrentLinkedOfferFirstQueue<>(); this.canFire = false; this.callbackOnClose = callbackOnClose; this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); @@ -553,7 +555,10 @@ public void reset() { if (chan.context.initialState.isConnected()) { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } - // Unsafe to call cancelBufferedCommands() here. + if (!usesMpscQueue) { + cancelCommands("reset"); + } + // Otherwise, unsafe to call cancelBufferedCommands() here. } private void resetInternal() { @@ -574,8 +579,10 @@ private void resetInternal() { */ @Override public void initialState() { - // Unsafe to call cancelCommands() here. - // No need to cancel. + if (!usesMpscQueue) { + cancelCommands("initialState"); + } + // Otherwise, unsafe to call cancelBufferedCommands() here. ContextualChannel currentChannel = this.channel; if (currentChannel.context.initialState.isConnected()) { ChannelFuture close = currentChannel.close(); From 8ed038c0ed81e1b5aaa928e6202103661a462024 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 16 Aug 2024 12:58:21 +0800 Subject: [PATCH 29/35] fix: closing unexpected channel in internalCloseConnectionIfNeeded --- .../core/protocol/DefaultAutoBatchFlushEndpoint.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index 8adf3d1851..b8bb3b6f62 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -1110,16 +1110,17 @@ private boolean shouldNotRetry(Throwable cause) { } private void internalCloseConnectionIfNeeded(Throwable reason) { - if (chan.context.isChannelInactiveEventFired() || !chan.isActive()) { + final ContextualChannel chanLocal = this.chan; // the value may be changed in the future, so save it on stack. + if (chanLocal.context.isChannelInactiveEventFired() || !chanLocal.isActive()) { return; } logger.error( "[internalCloseConnectionIfNeeded][interesting][{}] close the connection due to write error, reason: '{}'", endpoint.logPrefix(), reason.getMessage(), reason); - chan.eventLoop().schedule(() -> { - if (chan.isActive()) { - chan.close(); + chanLocal.eventLoop().schedule(() -> { + if (chanLocal.isActive()) { + chanLocal.close(); } }, 1, TimeUnit.SECONDS); } From 7a54c8bc2d55a07ecfa0b8336d14fe3f65fd2d02 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 16 Aug 2024 12:58:55 +0800 Subject: [PATCH 30/35] fix: activiation command should be sent immediately upon channelActive events --- .../io/lettuce/core/ContextualChannel.java | 4 - .../StatefulRedisClusterConnectionImpl.java | 18 +++- .../DefaultAutoBatchFlushEndpoint.java | 101 ++++++++++++------ .../io/lettuce/core/utils/ExceptionUtils.java | 20 ++-- 4 files changed, 95 insertions(+), 48 deletions(-) diff --git a/src/main/java/io/lettuce/core/ContextualChannel.java b/src/main/java/io/lettuce/core/ContextualChannel.java index 188698e0cf..384fd043ca 100644 --- a/src/main/java/io/lettuce/core/ContextualChannel.java +++ b/src/main/java/io/lettuce/core/ContextualChannel.java @@ -27,10 +27,6 @@ public class ContextualChannel implements Channel { public final ConnectionContext context; - public ConnectionContext getContext() { - return context; - } - public Channel getDelegate() { return delegate; } diff --git a/src/main/java/io/lettuce/core/cluster/StatefulRedisClusterConnectionImpl.java b/src/main/java/io/lettuce/core/cluster/StatefulRedisClusterConnectionImpl.java index 3e89689016..640321d10d 100644 --- a/src/main/java/io/lettuce/core/cluster/StatefulRedisClusterConnectionImpl.java +++ b/src/main/java/io/lettuce/core/cluster/StatefulRedisClusterConnectionImpl.java @@ -19,8 +19,6 @@ */ package io.lettuce.core.cluster; -import static io.lettuce.core.protocol.CommandType.*; - import java.lang.reflect.InvocationHandler; import java.lang.reflect.Proxy; import java.time.Duration; @@ -57,6 +55,12 @@ import io.lettuce.core.protocol.ConnectionIntent; import io.lettuce.core.protocol.ConnectionWatchdog; import io.lettuce.core.protocol.RedisCommand; +import io.netty.util.internal.logging.InternalLogger; +import io.netty.util.internal.logging.InternalLoggerFactory; + +import static io.lettuce.core.protocol.CommandType.AUTH; +import static io.lettuce.core.protocol.CommandType.READONLY; +import static io.lettuce.core.protocol.CommandType.READWRITE; /** * A thread-safe connection to a Redis Cluster. Multiple threads may share one {@link StatefulRedisClusterConnectionImpl} @@ -70,6 +74,8 @@ public class StatefulRedisClusterConnectionImpl extends RedisChannelHandler implements StatefulRedisClusterConnection { + private static final InternalLogger logger = InternalLoggerFactory.getInstance(StatefulRedisClusterConnectionImpl.class); + private final ClusterPushHandler pushHandler; protected final RedisCodec codec; @@ -208,7 +214,13 @@ public CompletableFuture> getConnectionAsync(Strin public void activated() { super.activated(); - async.clusterMyId().thenAccept(connectionState::setNodeId); + async.clusterMyId().whenComplete((nodeId, throwable) -> { + if (throwable != null) { + logger.warn("Failed to retrieve current cluster node ID: {}", throwable); + } else { + connectionState.setNodeId(nodeId); + } + }); } ClusterDistributionChannelWriter getClusterDistributionChannelWriter() { diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index b8bb3b6f62..2bb6d1d8ac 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -65,6 +65,7 @@ * * @author Mark Paluch */ +@SuppressWarnings("DuplicatedCode") public class DefaultAutoBatchFlushEndpoint implements RedisChannelWriter, AutoBatchFlushEndpoint, PushHandler { private static final InternalLogger logger = InternalLoggerFactory.getInstance(AutoBatchFlushEndpoint.class); @@ -235,10 +236,9 @@ public List getPushListeners() { @Override public RedisCommand write(RedisCommand command) { - LettuceAssert.notNull(command, "Command must not be null"); - - final Throwable validation = validateWrite(1); + final ContextualChannel chan = this.channel; + final Throwable validation = validateWrite(chan, 1, inActivation); if (validation != null) { command.completeExceptionally(validation); return command; @@ -246,16 +246,17 @@ public RedisCommand write(RedisCommand command) { try { if (inActivation) { + // needs write and flush activation command immediately, cannot queue it. command = processActivationCommand(command); - } - - this.taskQueue.offer(command); - QUEUE_SIZE.incrementAndGet(this); + writeAndFlushActivationCommand(chan, command); + } else { + this.taskQueue.offer(command); + QUEUE_SIZE.incrementAndGet(this); - if (autoFlushCommands) { - flushCommands(); + if (autoFlushCommands) { + flushCommands(); + } } - } finally { if (debugEnabled) { logger.debug("{} write() done", logPrefix()); @@ -268,10 +269,10 @@ public RedisCommand write(RedisCommand command) { @SuppressWarnings("unchecked") @Override public Collection> write(Collection> commands) { - LettuceAssert.notNull(commands, "Commands must not be null"); - final Throwable validation = validateWrite(commands.size()); + final ContextualChannel chan = this.channel; + final Throwable validation = validateWrite(chan, commands.size(), inActivation); if (validation != null) { commands.forEach(it -> it.completeExceptionally(validation)); return (Collection>) commands; @@ -279,14 +280,16 @@ public RedisCommand write(RedisCommand command) { try { if (inActivation) { + // needs write and flush activation commands immediately, cannot queue it. commands = processActivationCommands(commands); - } - - this.taskQueue.offer(commands); - QUEUE_SIZE.addAndGet(this, commands.size()); + writeAndFlushActivationCommands(chan, commands); + } else { + this.taskQueue.offer(commands); + QUEUE_SIZE.addAndGet(this, commands.size()); - if (autoFlushCommands) { - flushCommands(); + if (autoFlushCommands) { + flushCommands(); + } } } finally { if (debugEnabled) { @@ -297,6 +300,19 @@ public RedisCommand write(RedisCommand command) { return (Collection>) commands; } + private void writeAndFlushActivationCommand(ContextualChannel chan, RedisCommand command) { + channelWrite(chan, command).addListener(WrittenToChannel.newInstance(this, chan, command, true)); + channelFlush(chan); + } + + private void writeAndFlushActivationCommands(ContextualChannel chan, + Collection> commands) { + for (RedisCommand command : commands) { + channelWrite(chan, command).addListener(WrittenToChannel.newInstance(this, chan, command, true)); + } + channelFlush(chan); + } + @Override public void notifyChannelActive(Channel channel) { final ContextualChannel contextualChannel = new ContextualChannel(channel, ConnectionContext.State.CONNECTED); @@ -494,8 +510,8 @@ public void close() { } @Override + @SuppressWarnings("java:S125" /* The comments are necessary to prove the correctness code */) public CompletableFuture closeAsync() { - if (debugEnabled) { logger.debug("{} closeAsync()", logPrefix()); } @@ -650,7 +666,7 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { // 2. hasOngoingSendLoop.safe.get() == 1 (volatile read) synchronizes-before // hasOngoingSendLoop.safe.set(0) (volatile write) in first loopSend0() // 3. hasOngoingSendLoop.safe.set(0) (volatile write) synchronizes-before - // second loopSend0(), which will call poll() (volatile read of producerIndex) + // taskQueue.isEmpty() (volatile read of producerIndex), which guarantees to see the offered task. } private void loopSend(final ContextualChannel chan, boolean entered) { @@ -703,13 +719,13 @@ private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPoint if (o instanceof RedisCommand) { RedisCommand cmd = (RedisCommand) o; - channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd)); + channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd, false)); count++; } else { @SuppressWarnings("unchecked") Collection> commands = (Collection>) o; for (RedisCommand cmd : commands) { - channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd)); + channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd, false)); } count += commands.size(); } @@ -770,6 +786,7 @@ private void onEndpointQuiescence() { } // neither connectionWatchdog nor doReconnectOnEndpointQuiescence could be null + // noinspection DataFlowIssue connectionWatchdog.reconnectOnAutoBatchFlushEndpointQuiescence(); } @@ -839,6 +856,7 @@ private final void onReconnectFailed() { } @SafeVarargs + @SuppressWarnings("java:S3776" /* Suppress cognitive complexity warning */) private final void fulfillCommands(String message, Consumer> commandConsumer, Queue>... queues) { int totalCancelledTaskNum = 0; @@ -901,7 +919,6 @@ private Throwable getFailedToReconnectReason() { } private RedisCommand processActivationCommand(RedisCommand command) { - if (!ActivationCommand.isActivationCommand(command)) { return new ActivationCommand<>(command); } @@ -926,7 +943,7 @@ private RedisCommand processActivationCommand(RedisCommand clientOptions.getRequestQueueSize()) { + if (!isActivationCommand /* activation command should never be excluded due to queue full */ && boundedQueues + && queueSize + commands > clientOptions.getRequestQueueSize()) { return new RedisException("Request queue size exceeded: " + clientOptions.getRequestQueueSize() + ". Commands are not accepted until the queue size drops."); } - final ContextualChannel chan = this.channel; - switch (chan.context.initialState) { + final ConnectionContext.State initialState = chan.context.initialState; + final boolean rejectCommandsWhileDisconnectedLocal = this.rejectCommandsWhileDisconnected || isActivationCommand; + switch (initialState) { case ENDPOINT_CLOSED: return new RedisException("Connection is closed"); case RECONNECT_FAILED: return failedToReconnectReason; case WILL_RECONNECT: case CONNECTING: - return rejectCommandsWhileDisconnected ? new RedisException("Currently not connected. Commands are rejected.") + return rejectCommandsWhileDisconnectedLocal + ? new RedisException("Currently not connected. Commands are rejected.") : null; case CONNECTED: - return !chan.isActive() && rejectCommandsWhileDisconnected ? new RedisException("Connection is closed") : null; + return !chan.isActive() && rejectCommandsWhileDisconnectedLocal ? new RedisException("Channel is closed") + : null; default: - throw new IllegalStateException("unexpected state: " + chan.context.initialState); + throw new IllegalStateException("unexpected state: " + initialState); } } @@ -1023,6 +1044,8 @@ protected WrittenToChannel newObject(Recycler.Handle handle) { private RedisCommand cmd; + private boolean isActivationCommand; + private ContextualChannel chan; private WrittenToChannel(Recycler.Handle handle) { @@ -1035,21 +1058,32 @@ private WrittenToChannel(Recycler.Handle handle) { * @return new instance */ static WrittenToChannel newInstance(DefaultAutoBatchFlushEndpoint endpoint, ContextualChannel chan, - RedisCommand command) { + RedisCommand command, boolean isActivationCommand) { WrittenToChannel entry = RECYCLER.get(); entry.endpoint = endpoint; entry.chan = chan; entry.cmd = command; + entry.isActivationCommand = isActivationCommand; + + LettuceAssert.assertState(isActivationCommand == ActivationCommand.isActivationCommand(command), + "unexpected: isActivationCommand not match"); return entry; } @Override public void operationComplete(Future future) { - final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = chan.context.autoBatchFlushEndPointContext; try { + if (isActivationCommand) { + if (!future.isSuccess()) { + cmd.completeExceptionally(future.cause()); + } + return; + } + + final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = chan.context.autoBatchFlushEndPointContext; QUEUE_SIZE.decrementAndGet(endpoint); autoBatchFlushEndPointContext.done(1); @@ -1105,7 +1139,7 @@ private Throwable checkSendResult(Future sendFuture) { } private boolean shouldNotRetry(Throwable cause) { - return endpoint.reliability == Reliability.AT_MOST_ONCE || ActivationCommand.isActivationCommand(cmd) + return endpoint.reliability == Reliability.AT_MOST_ONCE || ExceptionUtils.oneOf(cause, SHOULD_NOT_RETRY_EXCEPTION_TYPES); } @@ -1129,6 +1163,7 @@ private void recycle() { this.endpoint = null; this.chan = null; this.cmd = null; + this.isActivationCommand = false; handle.recycle(this); } diff --git a/src/main/java/io/lettuce/core/utils/ExceptionUtils.java b/src/main/java/io/lettuce/core/utils/ExceptionUtils.java index 49ed6e548c..50220abc08 100644 --- a/src/main/java/io/lettuce/core/utils/ExceptionUtils.java +++ b/src/main/java/io/lettuce/core/utils/ExceptionUtils.java @@ -1,17 +1,17 @@ package io.lettuce.core.utils; -import io.lettuce.core.output.CommandOutput; -import io.lettuce.core.protocol.RedisCommand; -import io.netty.channel.socket.ChannelOutputShutdownException; -import io.netty.util.internal.logging.InternalLogger; - import java.io.IOException; import java.nio.channels.ClosedChannelException; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Set; -import java.util.function.Function; + +import io.lettuce.core.output.CommandOutput; +import io.lettuce.core.protocol.RedisCommand; +import io.netty.channel.socket.ChannelOutputShutdownException; +import io.netty.util.internal.logging.InternalLogLevel; +import io.netty.util.internal.logging.InternalLogger; public class ExceptionUtils { @@ -26,12 +26,16 @@ public static void maybeLogSendError(InternalLogger logger, Throwable cause) { return; } + final String message = "Unexpected exception during request: {}"; + final InternalLogLevel logLevel; + if (cause instanceof IOException && (SUPPRESS_IO_EXCEPTION_MESSAGES.contains(cause.getMessage()) || cause instanceof ChannelOutputShutdownException)) { - logger.debug("[maybeLogSendError] error during request: {}", cause.getMessage(), cause); + logLevel = InternalLogLevel.DEBUG; } else { - logger.error("[maybeLogSendError][attention] unexpected exception during request: {}", cause.getMessage(), cause); + logLevel = InternalLogLevel.WARN; } + logger.log(logLevel, message, cause.toString(), cause); } /** From e328aa0c4b7dd0fca79ab2b46bc7376175d8beb3 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Sat, 17 Aug 2024 10:19:49 +0800 Subject: [PATCH 31/35] fix: revert protectMode upon channelActive event, complete non-retryable activation command in drainStackUponChannelInactive() --- .../lettuce/core/protocol/CommandHandler.java | 13 ++++--- .../DefaultAutoBatchFlushEndpoint.java | 36 +++++++++---------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/CommandHandler.java b/src/main/java/io/lettuce/core/protocol/CommandHandler.java index de6cf0f0e1..e19021b96e 100644 --- a/src/main/java/io/lettuce/core/protocol/CommandHandler.java +++ b/src/main/java/io/lettuce/core/protocol/CommandHandler.java @@ -190,13 +190,18 @@ void setBuffer(ByteBuf buffer) { return drainCommands(stack); } - private Deque> drainStack() { + private Deque> drainStackUponChannelInactive() { final Deque> target = new ArrayDeque<>(stack.size()); RedisCommand cmd; while ((cmd = stack.poll()) != null) { - if (!cmd.isDone() && !ActivationCommand.isActivationCommand(cmd)) { - target.add(cmd); + if (!cmd.isDone()) { + if (!ActivationCommand.isActivationCommand(cmd)) { + target.add(cmd); + } else { + cmd.completeExceptionally( + new RedisConnectionException("activation command won't be retried upon channel inactive")); + } } } @@ -379,7 +384,7 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception { endpoint.notifyChannelInactive(ctx.channel()); Deque> autoBatchFlushRetryableDrainQueuedCommands = UnmodifiableDeque.emptyDeque(); if (supportsAutoBatchFlush) { - autoBatchFlushRetryableDrainQueuedCommands = drainStack(); + autoBatchFlushRetryableDrainQueuedCommands = drainStackUponChannelInactive(); } else { endpoint.notifyDrainQueuedCommands(this); } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index 2bb6d1d8ac..f433b21329 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -133,6 +133,8 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { protected volatile @Nonnull ContextualChannel channel = DummyContextualChannelInstances.CHANNEL_CONNECTING; + private volatile Throwable failedToReconnectReason; + private final Consumer> callbackOnClose; private final boolean rejectCommandsWhileDisconnected; @@ -153,19 +155,17 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private ConnectionFacade connectionFacade; - private volatile Throwable connectionError; - private final String cachedEndpointId; protected final UnboundedOfferFirstQueue taskQueue; private final boolean canFire; - private volatile boolean inProtectMode; + private volatile EventLoop lastEventLoop = null; - private volatile Throwable failedToReconnectReason; + private volatile Throwable connectionError; - private volatile EventLoop lastEventLoop = null; + private volatile boolean inProtectMode; private final int writeSpinCount; @@ -316,17 +316,16 @@ private void writeAndFlushActivationCommands(ContextualChannel chan, @Override public void notifyChannelActive(Channel channel) { final ContextualChannel contextualChannel = new ContextualChannel(channel, ConnectionContext.State.CONNECTED); - - this.logPrefix = null; - this.connectionError = null; - if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_CONNECTING, contextualChannel)) { channel.close(); onUnexpectedState("notifyChannelActive", ConnectionContext.State.CONNECTING); return; } - lastEventLoop = channel.eventLoop(); + this.lastEventLoop = channel.eventLoop(); + this.connectionError = null; + this.inProtectMode = false; + this.logPrefix = null; // Created a synchronize-before with set channel to CHANNEL_CONNECTING, if (isClosed()) { @@ -398,7 +397,7 @@ public void notifyChannelInactive(Channel channel) { @Override public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, - Deque> retryableQueuedCommands) { + Deque> retryablePendingCommands) { final ContextualChannel inactiveChan = this.channel; if (!inactiveChan.context.initialState.isConnected()) { logger.error("[unexpected][{}] notifyChannelInactive: channel initial state not connected", logPrefix()); @@ -446,7 +445,7 @@ public void notifyChannelInactiveAfterWatchdogDecision(Channel channel, CHANNEL.set(this, DummyContextualChannelInstances.CHANNEL_ENDPOINT_CLOSED); } inactiveChan.context - .setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryableQueuedCommands, exception)); + .setCloseStatus(new ConnectionContext.CloseStatus(willReconnect, retryablePendingCommands, exception)); trySetEndpointQuiescence(inactiveChan); } @@ -945,11 +944,11 @@ private RedisCommand processActivationCommand(RedisCommand Date: Tue, 20 Aug 2024 15:06:28 +0800 Subject: [PATCH 32/35] chore: default batch size to 32, refine code style --- .../lettuce/core/AutoBatchFlushOptions.java | 2 +- .../DefaultAutoBatchFlushEndpoint.java | 99 ++++++++++--------- 2 files changed, 56 insertions(+), 45 deletions(-) diff --git a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java index 09a754a8c8..1166f4c4d1 100644 --- a/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java +++ b/src/main/java/io/lettuce/core/AutoBatchFlushOptions.java @@ -24,7 +24,7 @@ public class AutoBatchFlushOptions implements Serializable { public static final int DEFAULT_WRITE_SPIN_COUNT = 16; - public static final int DEFAULT_BATCH_SIZE = 20; + public static final int DEFAULT_BATCH_SIZE = 32; public static final boolean DEFAULT_USE_MPSC_QUEUE = true; diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index f433b21329..7326aff060 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -650,13 +650,22 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { } if (chan.context.autoBatchFlushEndPointContext.hasOngoingSendLoop.tryEnter()) { - // Benchmark result of using tryEnterSafeGetVolatile() or not (1 thread, async get): - // 1. uses tryEnterSafeGetVolatile() to avoid unnecessary eventLoop.execute() calls - // Avg latency: 3.2956217278663s - // Avg QPS: 495238.50056392356/s - // 2. uses eventLoop.execute() directly - // Avg latency: 3.2677197021496998s - // Avg QPS: 476925.0751855796/s + // Benchmark result: + // Redis: + // engine: 7.1.0 + // server: AWS elasticcache cache.r7g.large + // Client: EC2-c5n.2xlarge + // Test Model: + // multi-thread sync exists (./bench-multi-thread-exists.sh -b 32 -s 10 -n 80000 -t 64) + // Test Parameter: + // thread num: 64, loop num: 80000, batch size: 32, write spin count: 10 + // + // With tryEnter(): + // Avg latency: 0.64917373203125ms + // Avg QPS: 196037.67991971457/s + // Without tryEnter(): + // Avg latency: 0.6618976359375001ms + // Avg QPS: 192240.1301551348/s eventLoop.execute(() -> loopSend(chan, true)); } @@ -681,31 +690,32 @@ private void loopSend(final ContextualChannel chan, boolean entered) { } private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, final ContextualChannel chan, - int remainingSpinnCount, boolean entered) { + int remainingSpinnCount, final boolean entered) { do { - final int count = pollBatch(autoBatchFlushEndPointContext, chan); + final int count = DefaultAutoBatchFlushEndpoint.this.pollBatch(autoBatchFlushEndPointContext, chan); + if (count == 0) { + break; + } if (count < 0) { return; } - if (count < batchSize) { - if (!entered) { - return; - } - // queue was empty - // The send loop will be triggered later when a new task is added, - // // Don't setUnsafe here because loopSend0() may result in a delayed loopSend() call. - autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); - if (taskQueue.isEmpty()) { - return; - } - entered = false; - // // Guarantee thread-safety: no dangling tasks in the queue. - } } while (--remainingSpinnCount > 0); - final boolean finalEntered = entered; - // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. - chan.eventLoop().execute(() -> loopSend(chan, finalEntered)); + if (remainingSpinnCount <= 0) { + // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. + chan.eventLoop().execute(() -> loopSend(chan, entered)); + return; + } + + if (entered) { + // queue was empty + // The send loop will be triggered later when a new task is added, + autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); + // Guarantee thread-safety: no dangling tasks in the queue, see scheduleSendJobIfNeeded() + if (!taskQueue.isEmpty()) { + loopSend0(autoBatchFlushEndPointContext, chan, remainingSpinnCount, false); + } + } } private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, ContextualChannel chan) { @@ -771,24 +781,6 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { } } - private void onEndpointQuiescence() { - if (channel.context.initialState == ConnectionContext.State.ENDPOINT_CLOSED) { - return; - } - - this.logPrefix = null; - // Create happens-before with channelActive() - if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, - DummyContextualChannelInstances.CHANNEL_CONNECTING)) { - onUnexpectedState("onEndpointQuiescence", ConnectionContext.State.WILL_RECONNECT); - return; - } - - // neither connectionWatchdog nor doReconnectOnEndpointQuiescence could be null - // noinspection DataFlowIssue - connectionWatchdog.reconnectOnAutoBatchFlushEndpointQuiescence(); - } - private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeStatus, final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext) { final @Nullable Deque> retryableFailedToSendTasks = autoBatchFlushEndPointContext @@ -831,6 +823,25 @@ private void onWontReconnect(@Nonnull final ConnectionContext.CloseStatus closeS } } + private void onEndpointQuiescence() { + if (channel.context.initialState == ConnectionContext.State.ENDPOINT_CLOSED) { + return; + } + + this.logPrefix = null; + // Create happens-before with channelActive() + if (!CHANNEL.compareAndSet(this, DummyContextualChannelInstances.CHANNEL_WILL_RECONNECT, + DummyContextualChannelInstances.CHANNEL_CONNECTING)) { + onUnexpectedState("onEndpointQuiescence", ConnectionContext.State.WILL_RECONNECT); + return; + } + + // notify connectionWatchDog that it is safe to reconnect now. + // neither connectionWatchdog nor doReconnectOnEndpointQuiescence could be null + // noinspection DataFlowIssue + connectionWatchdog.reconnectOnAutoBatchFlushEndpointQuiescence(); + } + private void offerFirstAll(Deque> commands) { commands.forEach(cmd -> { if (cmd instanceof DemandAware.Sink) { From 5ad30d0c49c1a2d0814906c29fd706f01bd2b9d5 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 29 Aug 2024 16:50:10 +0800 Subject: [PATCH 33/35] fix: two bugs: 1, autoBatchFlushEndPointContext.add() should always be before autoBatchFlushEndPointContext.done(1) othewise the flyingTaskNum could be negative; 2, make sure lastEventLoop is never null --- .../DefaultAutoBatchFlushEndpoint.java | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index 7326aff060..c5c84e30f2 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -15,24 +15,6 @@ */ package io.lettuce.core.protocol; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Deque; -import java.util.HashSet; -import java.util.List; -import java.util.Queue; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; -import java.util.function.Consumer; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - import io.lettuce.core.ClientOptions; import io.lettuce.core.ConnectionEvents; import io.lettuce.core.ContextualChannel; @@ -55,11 +37,29 @@ import io.netty.channel.EventLoop; import io.netty.handler.codec.EncoderException; import io.netty.util.Recycler; +import io.netty.util.concurrent.EventExecutor; import io.netty.util.concurrent.Future; import io.netty.util.concurrent.GenericFutureListener; import io.netty.util.internal.logging.InternalLogger; import io.netty.util.internal.logging.InternalLoggerFactory; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Deque; +import java.util.HashSet; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Consumer; + /** * Default {@link Endpoint} implementation. * @@ -161,7 +161,7 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final boolean canFire; - private volatile EventLoop lastEventLoop = null; + private volatile EventExecutor lastEventExecutor; private volatile Throwable connectionError; @@ -202,6 +202,7 @@ protected DefaultAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResou this.callbackOnClose = callbackOnClose; this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); this.batchSize = clientOptions.getAutoBatchFlushOptions().getBatchSize(); + this.lastEventExecutor = clientResources.eventExecutorGroup().next(); } @Override @@ -322,7 +323,7 @@ public void notifyChannelActive(Channel channel) { return; } - this.lastEventLoop = channel.eventLoop(); + this.lastEventExecutor = channel.eventLoop(); this.connectionError = null; this.inProtectMode = false; this.logPrefix = null; @@ -585,7 +586,7 @@ private void resetInternal() { if (chan.context.initialState.isConnected()) { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } - LettuceAssert.assertState(lastEventLoop.inEventLoop(), "must be called in lastEventLoop thread"); + LettuceAssert.assertState(lastEventExecutor.inEventLoop(), "must be called in lastEventLoop thread"); cancelCommands("resetInternal"); } @@ -727,22 +728,23 @@ private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPoint } if (o instanceof RedisCommand) { + autoBatchFlushEndPointContext.add(1); RedisCommand cmd = (RedisCommand) o; channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd, false)); count++; } else { @SuppressWarnings("unchecked") Collection> commands = (Collection>) o; + final int commandsSize = commands.size(); // size() could be expensive for some collections so cache it! + autoBatchFlushEndPointContext.add(commandsSize); for (RedisCommand cmd : commands) { channelWrite(chan, cmd).addListener(WrittenToChannel.newInstance(this, chan, cmd, false)); } - count += commands.size(); + count += commandsSize; } } if (count > 0) { - autoBatchFlushEndPointContext.add(count); - channelFlush(chan); if (autoBatchFlushEndPointContext.hasRetryableFailedToSendCommands()) { // Wait for onConnectionClose event() @@ -755,7 +757,7 @@ private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPoint private void trySetEndpointQuiescence(ContextualChannel chan) { final EventLoop eventLoop = chan.eventLoop(); LettuceAssert.isTrue(eventLoop.inEventLoop(), "unexpected: not in event loop"); - LettuceAssert.isTrue(eventLoop == lastEventLoop, "unexpected: lastEventLoop not match"); + LettuceAssert.isTrue(eventLoop == lastEventExecutor, "unexpected: lastEventLoop not match"); final ConnectionContext connectionContext = chan.context; final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); @@ -1019,14 +1021,14 @@ private ChannelFuture channelWrite(Channel channel, RedisCommand comman * is terminated (state is RECONNECT_FAILED/ENDPOINT_CLOSED) */ private void syncAfterTerminated(Runnable runnable) { - final EventLoop localLastEventLoop = lastEventLoop; - LettuceAssert.notNull(localLastEventLoop, "lastEventLoop must not be null after terminated"); - if (localLastEventLoop.inEventLoop()) { + final EventExecutor localLastEventExecutor = lastEventExecutor; + if (localLastEventExecutor.inEventLoop()) { runnable.run(); } else { - localLastEventLoop.execute(() -> { + localLastEventExecutor.execute(() -> { runnable.run(); - LettuceAssert.isTrue(lastEventLoop == localLastEventLoop, "lastEventLoop must not be changed after terminated"); + LettuceAssert.isTrue(lastEventExecutor == localLastEventExecutor, + "lastEventLoop must not be changed after terminated"); }); } } From 82ca33f24de3b3a3e60512f10b97a851a4f815da Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Thu, 29 Aug 2024 19:08:51 +0800 Subject: [PATCH 34/35] chore: refine logging --- .../AutoBatchFlushEndPointContext.java | 7 +++- .../DefaultAutoBatchFlushEndpoint.java | 37 ++++++++++--------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java index 144e14108c..ac89b4a675 100644 --- a/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java +++ b/src/main/java/io/lettuce/core/context/AutoBatchFlushEndPointContext.java @@ -106,7 +106,12 @@ public boolean addRetryableFailedToSendCommand(RedisCommand retryableCo return true; } - retryableFailedToSendCommands.add(retryableCommand); + try { + retryableFailedToSendCommands.add(retryableCommand); + } catch (Exception e) { + logger.error("[unexpected] retryableFailedToSendCommands is empty, but we are adding a command: {}", + retryableCommand); + } return false; } diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index c5c84e30f2..8053cf3eb3 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -15,6 +15,24 @@ */ package io.lettuce.core.protocol; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Deque; +import java.util.HashSet; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Consumer; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + import io.lettuce.core.ClientOptions; import io.lettuce.core.ConnectionEvents; import io.lettuce.core.ContextualChannel; @@ -43,23 +61,6 @@ import io.netty.util.internal.logging.InternalLogger; import io.netty.util.internal.logging.InternalLoggerFactory; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Deque; -import java.util.HashSet; -import java.util.List; -import java.util.Queue; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicIntegerFieldUpdater; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; -import java.util.function.Consumer; - /** * Default {@link Endpoint} implementation. * @@ -779,7 +780,7 @@ private void trySetEndpointQuiescence(ContextualChannel chan) { if (chan.context.setChannelQuiescentOnce()) { onEndpointQuiescence(); } else { - ExceptionUtils.maybeFire(logger, canFire, "unexpected: setEndpointQuiescenceOncePerConnection() failed"); + ExceptionUtils.maybeFire(logger, canFire, "unexpected: quiescence already acquired"); } } From 5c50660c3cb46fd37f8981b89618fb2db8cbe607 Mon Sep 17 00:00:00 2001 From: "xiaofan.chen" Date: Fri, 21 Feb 2025 10:33:14 +0800 Subject: [PATCH 35/35] feat: add OwnershipSynchronizer to abstract consumer migration --- .../io/lettuce/core/concurrency/Owner.java | 45 +++++ .../concurrency/OwnershipSynchronizer.java | 176 ++++++++++++++++++ .../DefaultAutoBatchFlushEndpoint.java | 167 ++++++++--------- .../OwnershipSynchronizerTest.java | 119 ++++++++++++ 4 files changed, 415 insertions(+), 92 deletions(-) create mode 100644 src/main/java/io/lettuce/core/concurrency/Owner.java create mode 100644 src/main/java/io/lettuce/core/concurrency/OwnershipSynchronizer.java create mode 100644 src/test/java/io/lettuce/core/concurrency/OwnershipSynchronizerTest.java diff --git a/src/main/java/io/lettuce/core/concurrency/Owner.java b/src/main/java/io/lettuce/core/concurrency/Owner.java new file mode 100644 index 0000000000..7615217f31 --- /dev/null +++ b/src/main/java/io/lettuce/core/concurrency/Owner.java @@ -0,0 +1,45 @@ +package io.lettuce.core.concurrency; + +import io.lettuce.core.internal.LettuceAssert; +import io.netty.util.concurrent.EventExecutor; +import io.netty.util.concurrent.SingleThreadEventExecutor; + +/** + * @author chenxiaofan + */ +class Owner { + + final SingleThreadEventExecutor eventExecutor; + + // if positive, no other thread can preempt the ownership. + private final int runningTaskNum; + + public Owner(EventExecutor eventExecutor, int runningTaskNum) { + if (runningTaskNum < 0) { + throw new IllegalArgumentException(String.format("negative runningTaskNum: %d", runningTaskNum)); + } + LettuceAssert.assertState(eventExecutor instanceof SingleThreadEventExecutor, + () -> String.format("unexpected event executor, expect %s got %s", SingleThreadEventExecutor.class.getName(), + eventExecutor.getClass().getName())); + this.eventExecutor = (SingleThreadEventExecutor) eventExecutor; + this.runningTaskNum = runningTaskNum; + } + + public boolean inEventLoop() { + return eventExecutor.inEventLoop(); + } + + public Owner toAdd(int n) { + return new Owner(eventExecutor, runningTaskNum + n); + } + + @SuppressWarnings("BooleanMethodIsAlwaysInverted") + public boolean isDone() { + return runningTaskNum == 0; + } + + public String getThreadName() { + return eventExecutor.threadProperties().name(); + } + +} diff --git a/src/main/java/io/lettuce/core/concurrency/OwnershipSynchronizer.java b/src/main/java/io/lettuce/core/concurrency/OwnershipSynchronizer.java new file mode 100644 index 0000000000..0f6607c179 --- /dev/null +++ b/src/main/java/io/lettuce/core/concurrency/OwnershipSynchronizer.java @@ -0,0 +1,176 @@ +package io.lettuce.core.concurrency; + +import java.time.Duration; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Consumer; + +import io.lettuce.core.internal.LettuceAssert; +import io.netty.util.concurrent.EventExecutor; +import io.netty.util.internal.logging.InternalLogger; + +/** + * @author chenxiaofan + */ +public class OwnershipSynchronizer { + + public static final int LOOP_CHECK_PERIOD = 100_000; + + public static class FailedToPreemptOwnershipException extends Exception { + + public FailedToPreemptOwnershipException() { + super("failed to preempt ownership"); + } + + } + + private static final AtomicReferenceFieldUpdater OWNER = AtomicReferenceFieldUpdater + .newUpdater(OwnershipSynchronizer.class, Owner.class, "owner"); + + @SuppressWarnings("java:S3077") + private volatile Owner owner; + + final InternalLogger logger; + + final R protectedResource; + + /** + * Create OwnershipSynchronizer instance. + * + * @param protectedResource protected resource, which can only be accessed by the owner thread, e.g. mpsc queue. + * @param initialOwnerEventExecutor initial owner thread. + * @param initialRunningTaskNum initial running task number. + * @param logger logger. + */ + public OwnershipSynchronizer(R protectedResource, EventExecutor initialOwnerEventExecutor, int initialRunningTaskNum, + InternalLogger logger) { + this.protectedResource = protectedResource; + this.owner = new Owner(initialOwnerEventExecutor, initialRunningTaskNum); + this.logger = logger; + } + + /** + * Safely run a task in current owner thread and release its memory effect to next owner thread. + * + * @param task task to run + */ + public void execute(Consumer task) { + Owner cur; + do { + cur = this.owner; + if (isOwnerCurrentThreadAndPreemptPrevented(cur)) { + // already prevented preemption, safe to skip expensive add/done calls + task.accept(protectedResource); + return; + } + } while (!OWNER.compareAndSet(this, cur, cur.toAdd(1))); + + if (cur.inEventLoop()) { + try { + task.accept(protectedResource); + } finally { + done(1); + } + } else { + try { + cur.eventExecutor.execute(() -> { + try { + task.accept(protectedResource); + } finally { + done(1); + } + }); + } catch (Exception e) { + logger.error("failed to execute task in owner thread", e); + done(1); + throw e; + } + } + } + + /** + * Preempt ownership only when there is no running tasks in current owner + * + * @param eventExecutor new thread + * @param runningTaskNumber running task number to add + */ + @SuppressWarnings("unused") + public void preempt(EventExecutor eventExecutor, int runningTaskNumber) throws FailedToPreemptOwnershipException { + preempt(eventExecutor, runningTaskNumber, Long.MAX_VALUE); + } + + /** + * Preempt ownership only when there is no running tasks in current owner + * + * @param eventExecutor new thread + * @param runningTaskNumber running task number to add + * @param timeout timeout + */ + public void preempt(EventExecutor eventExecutor, int runningTaskNumber, Duration timeout) + throws FailedToPreemptOwnershipException { + preempt(eventExecutor, runningTaskNumber, System.nanoTime() + timeout.toNanos()); + } + + @SuppressWarnings("java:S3776" /* complexity */) + private void preempt(EventExecutor eventExecutor, int runningTaskNumber, long deadline) + throws FailedToPreemptOwnershipException { + Owner newOwner = null; + int i = 0; + while (true) { + final Owner cur = this.owner; + + if (cur.eventExecutor == eventExecutor) { + if (runningTaskNumber == 0 || OWNER.compareAndSet(this, cur, cur.toAdd(runningTaskNumber))) { // prevent preempt + return; + } + } else if (cur.isDone()) { + if (newOwner == null) { + newOwner = new Owner(eventExecutor, runningTaskNumber); + } + + if (OWNER.compareAndSet(this, cur, newOwner)) { + logger.debug("ownership preempted by a new thread [{}]", newOwner.getThreadName()); + // established happens-before with done() + return; + } + } + + // 1. unsafe to preempt, wait for the owner to finish + // 2. CAS failed + if (deadline < Long.MAX_VALUE && ++i > LOOP_CHECK_PERIOD) { + if (System.nanoTime() > deadline) { + throw new FailedToPreemptOwnershipException(); + } + i = 0; + } + } + } + + /** + * done n tasks in current owner. + * + * @param n number of tasks to be done. + */ + public void done(int n) { + Owner cur; + do { + cur = this.owner; + assertIsOwnerThreadAndPreemptPrevented(cur); + } while (!OWNER.compareAndSet(this, cur, cur.toAdd(-n))); + // create happens-before with preempt() + } + + public void assertIsOwnerThreadAndPreemptPrevented() { + assertIsOwnerThreadAndPreemptPrevented(this.owner); + } + + private void assertIsOwnerThreadAndPreemptPrevented(Owner cur) { + LettuceAssert.assertState(isOwnerCurrentThreadAndPreemptPrevented(cur), + () -> "[executeInOwnerWithPreemptPrevention] unexpected: " + + (cur.inEventLoop() ? "preemption not prevented" : "owner is not this thread")); + } + + private boolean isOwnerCurrentThreadAndPreemptPrevented(Owner owner) { + return owner.inEventLoop() && !owner.isDone(); + } + +} diff --git a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java index 8053cf3eb3..d114e66a72 100644 --- a/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java +++ b/src/main/java/io/lettuce/core/protocol/DefaultAutoBatchFlushEndpoint.java @@ -15,6 +15,7 @@ */ package io.lettuce.core.protocol; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Deque; @@ -40,6 +41,7 @@ import io.lettuce.core.RedisConnectionException; import io.lettuce.core.RedisException; import io.lettuce.core.api.push.PushListener; +import io.lettuce.core.concurrency.OwnershipSynchronizer; import io.lettuce.core.constant.DummyContextualChannelInstances; import io.lettuce.core.context.AutoBatchFlushEndPointContext; import io.lettuce.core.context.ConnectionContext; @@ -55,7 +57,6 @@ import io.netty.channel.EventLoop; import io.netty.handler.codec.EncoderException; import io.netty.util.Recycler; -import io.netty.util.concurrent.EventExecutor; import io.netty.util.concurrent.Future; import io.netty.util.concurrent.GenericFutureListener; import io.netty.util.internal.logging.InternalLogger; @@ -144,7 +145,7 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final boolean debugEnabled = logger.isDebugEnabled(); - protected final CompletableFuture closeFuture = new CompletableFuture<>(); + private final CompletableFuture closeFuture = new CompletableFuture<>(); private String logPrefix; @@ -152,17 +153,18 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private boolean inActivation = false; - protected @Nullable ConnectionWatchdog connectionWatchdog; + private @Nullable ConnectionWatchdog connectionWatchdog; private ConnectionFacade connectionFacade; private final String cachedEndpointId; - protected final UnboundedOfferFirstQueue taskQueue; + private final UnboundedOfferFirstQueue taskQueue; - private final boolean canFire; + private final OwnershipSynchronizer> taskQueueConsumeSync; // make sure only one consumer + // exists at any given time - private volatile EventExecutor lastEventExecutor; + private final boolean canFire; private volatile Throwable connectionError; @@ -172,8 +174,6 @@ protected static void cancelCommandOnEndpointClose(RedisCommand cmd) { private final int batchSize; - private final boolean usesMpscQueue; - /** * Create a new {@link AutoBatchFlushEndpoint}. * @@ -197,13 +197,14 @@ protected DefaultAutoBatchFlushEndpoint(ClientOptions clientOptions, ClientResou this.rejectCommandsWhileDisconnected = isRejectCommand(clientOptions); long endpointId = ENDPOINT_COUNTER.incrementAndGet(); this.cachedEndpointId = "0x" + Long.toHexString(endpointId); - this.usesMpscQueue = clientOptions.getAutoBatchFlushOptions().usesMpscQueue(); - this.taskQueue = usesMpscQueue ? new JcToolsUnboundedMpscOfferFirstQueue<>() : new ConcurrentLinkedOfferFirstQueue<>(); + this.taskQueue = clientOptions.getAutoBatchFlushOptions().usesMpscQueue() ? new JcToolsUnboundedMpscOfferFirstQueue<>() + : new ConcurrentLinkedOfferFirstQueue<>(); this.canFire = false; this.callbackOnClose = callbackOnClose; this.writeSpinCount = clientOptions.getAutoBatchFlushOptions().getWriteSpinCount(); this.batchSize = clientOptions.getAutoBatchFlushOptions().getBatchSize(); - this.lastEventExecutor = clientResources.eventExecutorGroup().next(); + this.taskQueueConsumeSync = new OwnershipSynchronizer<>(taskQueue, clientResources.eventExecutorGroup().next(), + 0 /* allows to be preempted by first event loop thread */, logger); } @Override @@ -324,7 +325,16 @@ public void notifyChannelActive(Channel channel) { return; } - this.lastEventExecutor = channel.eventLoop(); + try { + this.taskQueueConsumeSync.preempt(channel.eventLoop(), + 1 /* disallow preempt until reached quiescent point, see onEndpointQuiescence() */, Duration.ofSeconds(5)); + } catch (OwnershipSynchronizer.FailedToPreemptOwnershipException e) { + logger.error("notifyChannelActive failed to preemt", e); + channel.close(); + onUnexpectedState("notifyChannelActive", ConnectionContext.State.CONNECTED); + return; + } + this.connectionError = null; this.inProtectMode = false; this.logPrefix = null; @@ -361,7 +371,7 @@ public void notifyChannelActive(Channel channel) { } if (clientOptions.isCancelCommandsOnReconnectFailure()) { - resetInternal(); + reset(); } throw e; @@ -379,11 +389,11 @@ public void notifyReconnectFailed(Throwable t) { return; } - syncAfterTerminated(() -> { + taskQueueConsumeSync.execute(tq -> { if (isClosed()) { - onEndpointClosed(); + onEndpointClosed(tq); } else { - onReconnectFailed(); + onReconnectFailed(tq); } }); } @@ -474,14 +484,14 @@ public void flushCommands() { final ContextualChannel chan = this.channel; switch (chan.context.initialState) { case ENDPOINT_CLOSED: - syncAfterTerminated(this::onEndpointClosed); + taskQueueConsumeSync.execute(this::onEndpointClosed); return; case RECONNECT_FAILED: - syncAfterTerminated(() -> { + taskQueueConsumeSync.execute(tq -> { if (isClosed()) { - onEndpointClosed(); + onEndpointClosed(tq); } else { - onReconnectFailed(); + onReconnectFailed(tq); } }); return; @@ -563,7 +573,6 @@ public void disconnect() { */ @Override public void reset() { - if (debugEnabled) { logger.debug("{} reset()", logPrefix()); } @@ -572,23 +581,7 @@ public void reset() { if (chan.context.initialState.isConnected()) { chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); } - if (!usesMpscQueue) { - cancelCommands("reset"); - } - // Otherwise, unsafe to call cancelBufferedCommands() here. - } - - private void resetInternal() { - if (debugEnabled) { - logger.debug("{} reset()", logPrefix()); - } - - ContextualChannel chan = channel; - if (chan.context.initialState.isConnected()) { - chan.pipeline().fireUserEventTriggered(new ConnectionEvents.Reset()); - } - LettuceAssert.assertState(lastEventExecutor.inEventLoop(), "must be called in lastEventLoop thread"); - cancelCommands("resetInternal"); + taskQueueConsumeSync.execute(tq -> cancelCommands(tq, "reset")); } /** @@ -596,10 +589,8 @@ private void resetInternal() { */ @Override public void initialState() { - if (!usesMpscQueue) { - cancelCommands("initialState"); - } - // Otherwise, unsafe to call cancelBufferedCommands() here. + taskQueueConsumeSync.execute(tq -> cancelCommands(tq, "initialState")); + ContextualChannel currentChannel = this.channel; if (currentChannel.context.initialState.isConnected()) { ChannelFuture close = currentChannel.close(); @@ -637,17 +628,15 @@ public String getId() { } private void scheduleSendJobOnConnected(final ContextualChannel chan) { - LettuceAssert.assertState(chan.eventLoop().inEventLoop(), "must be called in event loop thread"); - // Schedule directly - loopSend(chan, false); + loopDrain(chan, false); } private void scheduleSendJobIfNeeded(final ContextualChannel chan) { final EventLoop eventLoop = chan.eventLoop(); if (eventLoop.inEventLoop()) { // Possible in reactive() mode. - loopSend(chan, false); + loopDrain(chan, false); return; } @@ -668,18 +657,24 @@ private void scheduleSendJobIfNeeded(final ContextualChannel chan) { // Without tryEnter(): // Avg latency: 0.6618976359375001ms // Avg QPS: 192240.1301551348/s - eventLoop.execute(() -> loopSend(chan, true)); + try { + eventLoop.execute(() -> loopDrain(chan, true)); + } catch (Exception e) { + logger.error("scheduleSendJobIfNeeded failed", e); + chan.context.autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); + throw e; + } } // Otherwise: // 1. offer() (volatile write of producerIndex) synchronizes-before hasOngoingSendLoop.safe.get() == 1 (volatile read) // 2. hasOngoingSendLoop.safe.get() == 1 (volatile read) synchronizes-before - // hasOngoingSendLoop.safe.set(0) (volatile write) in first loopSend0() + // hasOngoingSendLoop.safe.set(0) (volatile write) in first loopDrain0() // 3. hasOngoingSendLoop.safe.set(0) (volatile write) synchronizes-before // taskQueue.isEmpty() (volatile read of producerIndex), which guarantees to see the offered task. } - private void loopSend(final ContextualChannel chan, boolean entered) { + private void loopDrain(final ContextualChannel chan, boolean entered) { final ConnectionContext connectionContext = chan.context; final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext = connectionContext.autoBatchFlushEndPointContext; if (connectionContext.isChannelInactiveEventFired() @@ -688,13 +683,13 @@ private void loopSend(final ContextualChannel chan, boolean entered) { } LettuceAssert.assertState(channel == chan, "unexpected: channel not match but closeStatus == null"); - loopSend0(autoBatchFlushEndPointContext, chan, writeSpinCount, entered); + loopDrain0(autoBatchFlushEndPointContext, chan, writeSpinCount, entered); } - private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, final ContextualChannel chan, + private void loopDrain0(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, final ContextualChannel chan, int remainingSpinnCount, final boolean entered) { do { - final int count = DefaultAutoBatchFlushEndpoint.this.pollBatch(autoBatchFlushEndPointContext, chan); + final int count = pollAndFlushInBatch(autoBatchFlushEndPointContext, chan); if (count == 0) { break; } @@ -705,7 +700,7 @@ private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPoin if (remainingSpinnCount <= 0) { // Don't need to exitUnsafe since we still have an ongoing consume tasks in this thread. - chan.eventLoop().execute(() -> loopSend(chan, entered)); + chan.eventLoop().execute(() -> loopDrain(chan, entered)); return; } @@ -715,14 +710,17 @@ private void loopSend0(final AutoBatchFlushEndPointContext autoBatchFlushEndPoin autoBatchFlushEndPointContext.hasOngoingSendLoop.exit(); // Guarantee thread-safety: no dangling tasks in the queue, see scheduleSendJobIfNeeded() if (!taskQueue.isEmpty()) { - loopSend0(autoBatchFlushEndPointContext, chan, remainingSpinnCount, false); + loopDrain0(autoBatchFlushEndPointContext, chan, remainingSpinnCount, false); } } } - private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, ContextualChannel chan) { + private int pollAndFlushInBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext, ContextualChannel chan) { int count = 0; while (count < batchSize) { + if (debugEnabled) { + taskQueueConsumeSync.assertIsOwnerThreadAndPreemptPrevented(); + } final Object o = this.taskQueue.poll(); if (o == null) { break; @@ -758,7 +756,6 @@ private int pollBatch(final AutoBatchFlushEndPointContext autoBatchFlushEndPoint private void trySetEndpointQuiescence(ContextualChannel chan) { final EventLoop eventLoop = chan.eventLoop(); LettuceAssert.isTrue(eventLoop.inEventLoop(), "unexpected: not in event loop"); - LettuceAssert.isTrue(eventLoop == lastEventExecutor, "unexpected: lastEventLoop not match"); final ConnectionContext connectionContext = chan.context; final @Nullable ConnectionContext.CloseStatus closeStatus = connectionContext.getCloseStatus(); @@ -809,7 +806,7 @@ private void onWillReconnect(@Nonnull final ConnectionContext.CloseStatus closeS // follow the same logic as DefaultEndpoint if (inProtectMode) { - cancelCommands("inProtectMode"); + taskQueueConsumeSync.execute(tq -> cancelCommands(tq, "inProtectMode")); } } @@ -817,16 +814,18 @@ private void onWontReconnect(@Nonnull final ConnectionContext.CloseStatus closeS final AutoBatchFlushEndPointContext autoBatchFlushEndPointContext) { // No need to use syncAfterTerminated() since we are already in the event loop. if (isClosed()) { - onEndpointClosed(closeStatus.getAndClearRetryablePendingCommands(), - autoBatchFlushEndPointContext.getAndClearRetryableFailedToSendCommands()); + taskQueueConsumeSync.execute(tq -> onEndpointClosed(tq, closeStatus.getAndClearRetryablePendingCommands(), + autoBatchFlushEndPointContext.getAndClearRetryableFailedToSendCommands())); } else { - fulfillCommands("onConnectionClose called and won't reconnect", - it -> it.completeExceptionally(closeStatus.getErr()), closeStatus.getAndClearRetryablePendingCommands(), - autoBatchFlushEndPointContext.getAndClearRetryableFailedToSendCommands()); + taskQueueConsumeSync.execute(tq -> fulfillCommands("onConnectionClose called and won't reconnect", + it -> it.completeExceptionally(closeStatus.getErr()), tq, closeStatus.getAndClearRetryablePendingCommands(), + autoBatchFlushEndPointContext.getAndClearRetryableFailedToSendCommands())); } } private void onEndpointQuiescence() { + taskQueueConsumeSync.done(1); // allows preemption + if (channel.context.initialState == ConnectionContext.State.ENDPOINT_CLOSED) { return; } @@ -851,29 +850,29 @@ private void offerFirstAll(Deque> commands) { ((DemandAware.Sink) cmd).removeSource(); } }); - this.taskQueue.offerFirstAll(commands); + taskQueueConsumeSync.execute(tq -> tq.offerFirstAll(commands)); QUEUE_SIZE.addAndGet(this, commands.size()); } - private void cancelCommands(String message) { - fulfillCommands(message, RedisCommand::cancel); + private void cancelCommands(UnboundedOfferFirstQueue tq, String message) { + fulfillCommands(message, RedisCommand::cancel, tq); } @SafeVarargs - private final void onEndpointClosed(Queue>... queues) { - fulfillCommands("endpoint closed", callbackOnClose, queues); + private final void onEndpointClosed(UnboundedOfferFirstQueue tq, Queue>... queues) { + fulfillCommands("endpoint closed", callbackOnClose, tq, queues); } - private final void onReconnectFailed() { - fulfillCommands("reconnect failed", cmd -> cmd.completeExceptionally(getFailedToReconnectReason())); + private void onReconnectFailed(UnboundedOfferFirstQueue tq) { + fulfillCommands("reconnect failed", cmd -> cmd.completeExceptionally(getFailedToReconnectReason()), tq); } @SafeVarargs @SuppressWarnings("java:S3776" /* Suppress cognitive complexity warning */) private final void fulfillCommands(String message, Consumer> commandConsumer, - Queue>... queues) { + UnboundedOfferFirstQueue taskQueue, Queue>... internalQueues) { int totalCancelledTaskNum = 0; - for (Queue> queue : queues) { + for (Queue> queue : internalQueues) { while (true) { RedisCommand cmd = queue.poll(); if (cmd == null) { @@ -890,7 +889,7 @@ private final void fulfillCommands(String message, Consumer cancelCommands(String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual))); + taskQueueConsumeSync.execute( + tq -> cancelCommands(tq, String.format("%s: state not match: expect '%s', got '%s'", caller, exp, actual))); } private void channelFlush(Channel channel) { @@ -1017,23 +1016,6 @@ private ChannelFuture channelWrite(Channel channel, RedisCommand comman return channel.write(command); } - /* - * Synchronize after the endpoint is terminated. This is to ensure only one thread can access the task queue after endpoint - * is terminated (state is RECONNECT_FAILED/ENDPOINT_CLOSED) - */ - private void syncAfterTerminated(Runnable runnable) { - final EventExecutor localLastEventExecutor = lastEventExecutor; - if (localLastEventExecutor.inEventLoop()) { - runnable.run(); - } else { - localLastEventExecutor.execute(() -> { - runnable.run(); - LettuceAssert.isTrue(lastEventExecutor == localLastEventExecutor, - "lastEventLoop must not be changed after terminated"); - }); - } - } - private enum Reliability { AT_MOST_ONCE, AT_LEAST_ONCE } @@ -1103,7 +1085,7 @@ public void operationComplete(Future future) { final Throwable retryableErr = checkSendResult(future); if (retryableErr != null && autoBatchFlushEndPointContext.addRetryableFailedToSendCommand(cmd, retryableErr)) { - // Close connection on first transient write failure + // Close connection on first transient write failure. internalCloseConnectionIfNeeded(retryableErr); } @@ -1163,6 +1145,7 @@ private void internalCloseConnectionIfNeeded(Throwable reason) { return; } + // It is really rare (maybe impossible?) that the connection is still active. logger.error( "[internalCloseConnectionIfNeeded][interesting][{}] close the connection due to write error, reason: '{}'", endpoint.logPrefix(), reason.getMessage(), reason); diff --git a/src/test/java/io/lettuce/core/concurrency/OwnershipSynchronizerTest.java b/src/test/java/io/lettuce/core/concurrency/OwnershipSynchronizerTest.java new file mode 100644 index 0000000000..b0a6894bb4 --- /dev/null +++ b/src/test/java/io/lettuce/core/concurrency/OwnershipSynchronizerTest.java @@ -0,0 +1,119 @@ +package io.lettuce.core.concurrency; + +import io.lettuce.core.resource.ClientResources; +import io.lettuce.core.resource.DefaultClientResources; +import io.netty.util.concurrent.EventExecutor; +import io.netty.util.internal.logging.InternalLogger; +import io.netty.util.internal.logging.InternalLoggerFactory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + +import static org.assertj.core.api.Assertions.assertThat; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +class OwnershipSynchronizerTest { + + private static final InternalLogger logger = InternalLoggerFactory.getInstance(OwnershipSynchronizerTest.class); + + private static final int THREADS = 30; + + private static final int ITERATIONS = 10000; + + private static final int RUNS = 40; + + private static final int EXPECT_RESULT = THREADS * ITERATIONS * RUNS; + + private static final int NUM_PREEMPTS = 1000; + + @Test + void testOwnershipSynchronizer() { + final ClientResources clientResources = DefaultClientResources.builder().build(); + for (int i = 0; i < 10; i++) { + test(clientResources); + } + clientResources.shutdown(); + } + + private void test(ClientResources clientResources) { + final OwnershipSynchronizer ownershipSynchronizer = new OwnershipSynchronizer<>(new IntWrapper(0), + clientResources.eventExecutorGroup().next(), 0, logger); + + Thread[] threads = new Thread[THREADS]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread(() -> { + for (int j = 0; j < ITERATIONS; j++) { + ownershipSynchronizer.execute(counter -> { + for (int k = 0; k < RUNS; k++) { + counter.increment(); + } + }); + } + }); + threads[i].start(); + } + + for (int i = 0; i < NUM_PREEMPTS; i++) { + final EventExecutor eventExecutor = clientResources.eventExecutorGroup().next(); + eventExecutor.execute(() -> { + try { + ownershipSynchronizer.preempt(eventExecutor, 1); + } catch (OwnershipSynchronizer.FailedToPreemptOwnershipException e) { + throw new RuntimeException(e); + } + eventExecutor.schedule(() -> ownershipSynchronizer.done(1), 3, java.util.concurrent.TimeUnit.MILLISECONDS); + }); + try { + Thread.sleep(1); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + for (Thread thread : threads) { + try { + thread.join(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + for (int i = 0; i < Runtime.getRuntime().availableProcessors(); i++) { + final EventExecutor eventExecutor = clientResources.eventExecutorGroup().next(); + try { + eventExecutor.submit(() -> { + }).get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + assertThat(ownershipSynchronizer.protectedResource.getValue()).isEqualTo(EXPECT_RESULT); + } + + public static class IntWrapper { + + private int value; + + public IntWrapper(int value) { + this.value = value; + } + + public int getValue() { + return value; + } + + public void setValue(int value) { + this.value = value; + } + + public void increment() { + value++; + } + + } + +}