diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 00000000..4a6a1abd --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[resolver] +incompatible-rust-versions = "fallback" diff --git a/proto/apipb.proto b/proto/apipb.proto new file mode 100644 index 00000000..0f7f6aa4 --- /dev/null +++ b/proto/apipb.proto @@ -0,0 +1,24 @@ +syntax = "proto3"; +package apipb; + +import "gogoproto/gogo.proto"; +import "rustproto.proto"; + +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (rustproto.lite_runtime_all) = true; + +// KeyspaceIdentity identifies a V3 tenant keyspace. +// +// For V3 tenant-scoped APIs, namespace_id must be non-zero and keyspace_id must +// satisfy 0 < keyspace_id < 2^24. Namespace 0 and keyspace 0 mean +// missing/invalid identity and must be rejected by V3 validators. +message KeyspaceIdentity { + uint32 namespace_id = 1; + uint32 keyspace_id = 2; +} diff --git a/proto/autoid.proto b/proto/autoid.proto index 252ae58e..1f37aba3 100644 --- a/proto/autoid.proto +++ b/proto/autoid.proto @@ -1,47 +1,52 @@ syntax = "proto3"; package autoid; +import "apipb.proto"; import "gogoproto/gogo.proto"; import "rustproto.proto"; -option (gogoproto.sizer_all) = true; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - message AutoIDRequest { - int64 dbID = 1; - int64 tblID = 2; - bool isUnsigned = 3; - uint64 n = 4; - int64 increment = 5; - int64 offset = 6; - uint32 keyspaceID = 7; + int64 dbID = 1; + int64 tblID = 2; + bool isUnsigned = 3; + uint64 n = 4; + int64 increment = 5; + int64 offset = 6; + // V1/V2 compatibility keyspace id. V3 should use identity. + uint32 keyspaceID = 7; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 8; } message AutoIDResponse { - int64 min = 1; - int64 max = 2; + int64 min = 1; + int64 max = 2; - bytes errmsg = 3; + bytes errmsg = 3; } message RebaseRequest { - int64 dbID = 1; - int64 tblID = 2; - bool isUnsigned = 3; - int64 base = 4; - bool force = 5; + int64 dbID = 1; + int64 tblID = 2; + bool isUnsigned = 3; + int64 base = 4; + bool force = 5; } message RebaseResponse { - bytes errmsg = 1; + bytes errmsg = 1; } service AutoIDAlloc { - rpc AllocAutoID(AutoIDRequest) returns (AutoIDResponse) {} - rpc Rebase(RebaseRequest) returns (RebaseResponse) {} + rpc AllocAutoID(AutoIDRequest) returns (AutoIDResponse) {} + rpc Rebase(RebaseRequest) returns (RebaseResponse) {} } - diff --git a/proto/coprocessor.proto b/proto/coprocessor.proto index d0f83a0d..43ada4c6 100644 --- a/proto/coprocessor.proto +++ b/proto/coprocessor.proto @@ -2,121 +2,202 @@ syntax = "proto3"; package coprocessor; import "errorpb.proto"; -import "kvrpcpb.proto"; import "gogoproto/gogo.proto"; -import "rustproto.proto"; +import "kvrpcpb.proto"; import "metapb.proto"; +import "rustproto.proto"; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - - // [start, end) message KeyRange { - bytes start = 1; - bytes end = 2; + bytes start = 1; + bytes end = 2; +} + +// KeyRange with an attached read_ts (version). +// It is used by TiCI versioned lookup. Callers must ensure `range` is a point range. +message VersionedKeyRange { + KeyRange range = 1; + uint64 read_ts = 2; } message Request { - kvrpcpb.Context context = 1; - int64 tp = 2; - bytes data = 3; - uint64 start_ts = 7; - repeated KeyRange ranges = 4; - - // If cache is enabled, TiKV returns cache hit instead of data if - // its last version matches this `cache_if_match_version`. - bool is_cache_enabled = 5; - uint64 cache_if_match_version = 6; - // Any schema-ful storage to validate schema correctness if necessary. - int64 schema_ver = 8; - bool is_trace_enabled = 9; - // paging_size is 0 when it's disabled, otherwise, it should be a positive number. - uint64 paging_size = 10; - // tasks stores the batched coprocessor tasks sent to the same tikv store. - repeated StoreBatchTask tasks = 11; - uint64 connection_id = 12; // This is the session id between a client and tidb - string connection_alias = 13; // This is the session alias between a client and tidb + kvrpcpb.Context context = 1; + int64 tp = 2; + bytes data = 3; + uint64 start_ts = 7; + repeated KeyRange ranges = 4; + + // If cache is enabled, TiKV returns cache hit instead of data if + // its last version matches this `cache_if_match_version`. + bool is_cache_enabled = 5; + uint64 cache_if_match_version = 6; + // Any schema-ful storage to validate schema correctness if necessary. + int64 schema_ver = 8; + bool is_trace_enabled = 9; + // paging_size is 0 when it's disabled, otherwise, it should be a positive number. + uint64 paging_size = 10; + // tasks stores the batched coprocessor tasks sent to the same tikv store. + repeated StoreBatchTask tasks = 11; + uint64 connection_id = 12; // This is the session id between a client and tidb + string connection_alias = 13; // This is the session alias between a client and tidb + + repeated TableShardInfos table_shard_infos = 14; // Shard infos for FTS index, used by TiFlash reading TiCI. + // Versioned point ranges for TiCI lookup. + // When `versioned_ranges` is non-empty, all `versioned_ranges[i].range` must be point range. + repeated VersionedKeyRange versioned_ranges = 15; + // max_keys_read is 0 when disabled, otherwise limits storage engine keys read per coprocessor task. + uint64 max_keys_read = 16; } message Response { - bytes data = 1 [(gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", (gogoproto.nullable) = false]; - errorpb.Error region_error = 2; - kvrpcpb.LockInfo locked = 3; - string other_error = 4; - KeyRange range = 5; - - // This field is always filled for compatibility consideration. However - // newer TiDB should respect `exec_details_v2` field instead. - kvrpcpb.ExecDetails exec_details = 6; - // This field is provided in later versions, containing more detailed - // information. - kvrpcpb.ExecDetailsV2 exec_details_v2 = 11; + bytes data = 1 [ + (gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", + (gogoproto.nullable) = false + ]; + errorpb.Error region_error = 2; + kvrpcpb.LockInfo locked = 3; + string other_error = 4; + KeyRange range = 5; + + // This field is always filled for compatibility consideration. However + // newer TiDB should respect `exec_details_v2` field instead. + kvrpcpb.ExecDetails exec_details = 6; + // This field is provided in later versions, containing more detailed + // information. + kvrpcpb.ExecDetailsV2 exec_details_v2 = 11; + + bool is_cache_hit = 7; + uint64 cache_last_version = 8; + bool can_be_cached = 9; + + reserved 10; + + // Contains the latest buckets version of the region. + // Clients should query PD to update buckets in cache if its is stale. + uint64 latest_buckets_version = 12; + + // StoreBatchTaskResponse is the collection of batch task responses. + repeated StoreBatchTaskResponse batch_responses = 13; +} - bool is_cache_hit = 7; - uint64 cache_last_version = 8; - bool can_be_cached = 9; +message RegionInfo { + uint64 region_id = 1; + metapb.RegionEpoch region_epoch = 2; + repeated KeyRange ranges = 3; +} - reserved 10; +message ShardInfo { + uint64 shard_id = 1; + uint64 shard_epoch = 2; + repeated KeyRange ranges = 3; +} - // Contains the latest buckets version of the region. - // Clients should query PD to update buckets in cache if its is stale. - uint64 latest_buckets_version = 12; +message TableShardInfos { + // The executor ID is used to identify the tici executor. + string executor_id = 1; + // The shard_infos contains the shard information for each tici executor. + repeated ShardInfo shard_infos = 2; +} - // StoreBatchTaskResponse is the collection of batch task responses. - repeated StoreBatchTaskResponse batch_responses = 13; +message TiCIEstimateCountRequest { + kvrpcpb.Context context = 1; + uint64 start_ts = 2; + int64 table_id = 3; + int64 index_id = 4; + bytes fts_query_info = 5; + string time_zone_name = 6; + int64 time_zone_offset = 7; + repeated ShardInfo shard_infos = 8; } -message RegionInfo { - uint64 region_id = 1; - metapb.RegionEpoch region_epoch = 2; - repeated KeyRange ranges = 3; +message TiCIEstimateCountResponse { + uint64 est_count = 1; + string other_error = 2; } message TableRegions { - int64 physical_table_id = 1; - repeated RegionInfo regions = 2; + int64 physical_table_id = 1; + repeated RegionInfo regions = 2; } message BatchRequest { - kvrpcpb.Context context = 1; - int64 tp = 2; - bytes data = 3; - repeated RegionInfo regions = 4; - uint64 start_ts = 5; - // Any schema-ful storage to validate schema correctness if necessary. - int64 schema_ver = 6; - // Used for partition table scan - repeated TableRegions table_regions = 7; - string log_id = 8; - uint64 connection_id = 9; // This is the session id between a client and tidb - string connection_alias = 10; // This is the session alias between a client and tidb + kvrpcpb.Context context = 1; + int64 tp = 2; + bytes data = 3; + repeated RegionInfo regions = 4; + uint64 start_ts = 5; + // Any schema-ful storage to validate schema correctness if necessary. + int64 schema_ver = 6; + // Used for partition table scan + repeated TableRegions table_regions = 7; + string log_id = 8; + uint64 connection_id = 9; // This is the session id between a client and tidb + string connection_alias = 10; // This is the session alias between a client and tidb + repeated TableShardInfos table_shard_infos = 11; // Shard infos for FTS index, used by TiFlash reading TiCI. } message BatchResponse { - bytes data = 1 [(gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", (gogoproto.nullable) = false]; - string other_error = 2; - kvrpcpb.ExecDetails exec_details = 3; - repeated metapb.Region retry_regions = 4; + bytes data = 1 [ + (gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", + (gogoproto.nullable) = false + ]; + string other_error = 2; + kvrpcpb.ExecDetails exec_details = 3; + repeated metapb.Region retry_regions = 4; + repeated ShardInfo retry_shards = 5; } message StoreBatchTask { - uint64 region_id = 1; - metapb.RegionEpoch region_epoch = 2; - metapb.Peer peer = 3; - repeated KeyRange ranges = 4; - uint64 task_id = 5; + uint64 region_id = 1; + metapb.RegionEpoch region_epoch = 2; + metapb.Peer peer = 3; + repeated KeyRange ranges = 4; + uint64 task_id = 5; + // Versioned point ranges for TiCI lookup. + // When `versioned_ranges` is non-empty, all `versioned_ranges[i].range` must be point range. + repeated VersionedKeyRange versioned_ranges = 6; } message StoreBatchTaskResponse { - bytes data = 1 [(gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", (gogoproto.nullable) = false]; - errorpb.Error region_error = 2; - kvrpcpb.LockInfo locked = 3; - string other_error = 4; - uint64 task_id = 5; - kvrpcpb.ExecDetailsV2 exec_details_v2 = 6; + bytes data = 1 [ + (gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", + (gogoproto.nullable) = false + ]; + errorpb.Error region_error = 2; + kvrpcpb.LockInfo locked = 3; + string other_error = 4; + uint64 task_id = 5; + kvrpcpb.ExecDetailsV2 exec_details_v2 = 6; +} + +message DelegateRequest { + kvrpcpb.Context context = 1; + uint64 start_ts = 2; + repeated KeyRange ranges = 3; + // Used for avoid redundant mem-table copying. + // If the sequence is the same, tikv-server will not return the mem-table. + uint64 mem_table_sequence = 4; + + // Used for avoid redundant snapshot copying. + // If the sequence is the same, tikv-server will not return the snapshot. + uint64 snapshot_sequence = 5; +} + +message DelegateResponse { + bytes mem_table_data = 1; + bytes snapshot = 2; + errorpb.Error region_error = 3; + kvrpcpb.LockInfo locked = 4; + string other_error = 5; + // Used for avoid redundant mem-table copying. + uint64 mem_table_sequence = 6; } diff --git a/proto/disaggregated.proto b/proto/disaggregated.proto index adaa8f61..bbe5dc24 100644 --- a/proto/disaggregated.proto +++ b/proto/disaggregated.proto @@ -1,169 +1,181 @@ syntax = "proto3"; package disaggregated; +import "apipb.proto"; import "coprocessor.proto"; +import "gogoproto/gogo.proto"; import "kvrpcpb.proto"; +import "rustproto.proto"; option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; +option (rustproto.lite_runtime_all) = true; /// For S3 lock service /// message S3LockResult { - oneof error { - Success success = 1; - NotOwner not_owner = 2; - Conflict conflict = 3; - } + oneof error { + Success success = 1; + NotOwner not_owner = 2; + Conflict conflict = 3; + } } message Success {} + // Error caused by S3GC owner changed // client should retry -message NotOwner{ -} +message NotOwner {} + // Error caused by concurrency conflict, // request cancel message Conflict { - string reason = 1; + string reason = 1; } message TryAddLockRequest { - // The data file key to add lock - bytes data_file_key = 1; - // The lock store id - uint64 lock_store_id = 3; - // The upload sequence number of lock store - uint64 lock_seq = 4; + // The data file key to add lock + bytes data_file_key = 1; + // The lock store id + uint64 lock_store_id = 3; + // The upload sequence number of lock store + uint64 lock_seq = 4; } message TryAddLockResponse { - S3LockResult result = 1; + S3LockResult result = 1; } message TryMarkDeleteRequest { - // The data file key to be marked as deleted - bytes data_file_key = 1; + // The data file key to be marked as deleted + bytes data_file_key = 1; } message TryMarkDeleteResponse { - S3LockResult result = 1; + S3LockResult result = 1; } /// For disagg compute node init its disagg configuration /// -message GetDisaggConfigRequest { -} +message GetDisaggConfigRequest {} message DisaggS3Config { - string bucket = 1; - string root = 2; - string endpoint = 3; + string bucket = 1; + string root = 2; + string endpoint = 3; } message GetDisaggConfigResponse { - DisaggS3Config s3_config = 1; + DisaggS3Config s3_config = 1; } /// For compute task dispatch and data exchange /// message DisaggTaskMeta { - uint64 start_ts = 1; // start ts of a query - // gather_id + query_ts + server_id + local_query_id to represent a global unique query. - int64 gather_id = 9; // used to distinguish different gathers in the mpp query - uint64 query_ts = 2; // timestamp when start to execute query, used for TiFlash miniTSO schedule. - uint64 server_id = 3; // TiDB server id - uint64 local_query_id = 4; // unique local query_id if tidb don't restart. - int64 task_id = 5; // if task id is -1 , it indicates a tidb task. - string executor_id = 6; // the exectuor id - uint32 keyspace_id = 7; // keyspace id of the request - kvrpcpb.APIVersion api_version = 8; // API version of the request - uint64 connection_id = 10; // This is the session id between a client and tidb - string connection_alias = 11; // This is the session alias between a client and tidb + uint64 start_ts = 1; // start ts of a query + // gather_id + query_ts + server_id + local_query_id to represent a global unique query. + int64 gather_id = 9; // used to distinguish different gathers in the mpp query + uint64 query_ts = 2; // timestamp when start to execute query, used for TiFlash miniTSO schedule. + uint64 server_id = 3; // TiDB server id + uint64 local_query_id = 4; // unique local query_id if tidb don't restart. + int64 task_id = 5; // if task id is -1 , it indicates a tidb task. + string executor_id = 6; // the exectuor id + uint32 keyspace_id = 7; // V1/V2 compatibility keyspace id of the request. V3 should use keyspace_identity. + kvrpcpb.APIVersion api_version = 8; // API version of the request + uint64 connection_id = 10; // This is the session id between a client and tidb + string connection_alias = 11; // This is the session alias between a client and tidb + // V3 keyspace identity of the request. + apipb.KeyspaceIdentity keyspace_identity = 12; } message DisaggReadError { - int32 code = 1; - string msg = 2; + int32 code = 1; + string msg = 2; } message EstablishDisaggTaskError { - oneof errors { - ErrorRegion error_region = 1; - ErrorLocked error_locked = 2; + oneof errors { + ErrorRegion error_region = 1; + ErrorLocked error_locked = 2; - ErrorOther error_other = 99; - } + ErrorOther error_other = 99; + } } message ErrorRegion { - string msg = 1; - // The read node needs to update its region cache about these regions. - repeated uint64 region_ids = 2; + string msg = 1; + // The read node needs to update its region cache about these regions. + repeated uint64 region_ids = 2; } message ErrorLocked { - string msg = 1; - // The read node needs to resolve these locks. - repeated kvrpcpb.LockInfo locked = 2; + string msg = 1; + // The read node needs to resolve these locks. + repeated kvrpcpb.LockInfo locked = 2; } message ErrorOther { - int32 code = 1; - string msg = 2; + int32 code = 1; + string msg = 2; } message EstablishDisaggTaskRequest { - DisaggTaskMeta meta = 1; - string address = 2; // target address of this task. - // The write node needs to ensure that subsequent - // FetchDisaggPagesRequest can be processed within timeout_s. - // unit: seconds - int64 timeout_s = 3; - // The key ranges, Region meta that read node need to execute TableScan - repeated coprocessor.RegionInfo regions = 4; - int64 schema_ver = 5; - // Used for PartitionTableScan - repeated coprocessor.TableRegions table_regions = 6; - // The encoded TableScan/PartitionTableScan + Selection. - bytes encoded_plan = 7; + DisaggTaskMeta meta = 1; + string address = 2; // target address of this task. + // The write node needs to ensure that subsequent + // FetchDisaggPagesRequest can be processed within timeout_s. + // unit: seconds + int64 timeout_s = 3; + // The key ranges, Region meta that read node need to execute TableScan + repeated coprocessor.RegionInfo regions = 4; + int64 schema_ver = 5; + // Used for PartitionTableScan + repeated coprocessor.TableRegions table_regions = 6; + // The encoded TableScan/PartitionTableScan + Selection. + bytes encoded_plan = 7; } message EstablishDisaggTaskResponse { - EstablishDisaggTaskError error = 1; + EstablishDisaggTaskError error = 1; - // Write node maintains a snapshot with a lease time. - // Read node should read the delta pages - // (ColumnFileInMemory and ColumnFileTiny) - // along with this store_id and snapshot_id. - uint64 store_id = 3; // metapb.Store.id - DisaggTaskMeta snapshot_id = 4; - // Serialized disaggregated tasks (per physical table) - repeated bytes tables = 5; + // Write node maintains a snapshot with a lease time. + // Read node should read the delta pages + // (ColumnFileInMemory and ColumnFileTiny) + // along with this store_id and snapshot_id. + uint64 store_id = 3; // metapb.Store.id + DisaggTaskMeta snapshot_id = 4; + // Serialized disaggregated tasks (per physical table) + repeated bytes tables = 5; } message CancelDisaggTaskRequest { - DisaggTaskMeta meta = 1; + DisaggTaskMeta meta = 1; } message CancelDisaggTaskResponse {} message FetchDisaggPagesRequest { - // The snapshot id to fetch pages - DisaggTaskMeta snapshot_id = 1; - int64 table_id = 2; - uint64 segment_id = 3; - // It must be a subset of the delta pages ids returned - // in EstablishDisaggTaskResponse.segments - repeated uint64 page_ids = 4; + // The snapshot id to fetch pages + DisaggTaskMeta snapshot_id = 1; + int64 table_id = 2; + uint64 segment_id = 3; + // It must be a subset of the delta pages ids returned + // in EstablishDisaggTaskResponse.segments + repeated uint64 page_ids = 4; } message PagesPacket { - DisaggReadError error = 1; + DisaggReadError error = 1; - // Serialized column file data - // * ColumnFilePersisted alone with its schema, page data, field offsets - repeated bytes pages = 2; - // * ColumnFileInMemory alone with its serialized block - repeated bytes chunks = 3; + // Serialized column file data + // * ColumnFilePersisted alone with its schema, page data, field offsets + repeated bytes pages = 2; + // * ColumnFileInMemory alone with its serialized block + repeated bytes chunks = 3; - // Return tipb.SelectResponse.execution_summaries in the - // last packet - repeated bytes summaries = 4; + // Return tipb.SelectResponse.execution_summaries in the + // last packet + repeated bytes summaries = 4; } diff --git a/proto/include/gogoproto/gogo.proto b/proto/include/gogoproto/gogo.proto index bc8d889f..b80c8565 100644 --- a/proto/include/gogoproto/gogo.proto +++ b/proto/include/gogoproto/gogo.proto @@ -84,6 +84,9 @@ extend google.protobuf.FileOptions { optional bool goproto_registration = 63032; optional bool messagename_all = 63033; + + optional bool goproto_sizecache_all = 63034; + optional bool goproto_unkeyed_all = 63035; } extend google.protobuf.MessageOptions { @@ -118,6 +121,9 @@ extend google.protobuf.MessageOptions { optional bool typedecl = 64030; optional bool messagename = 64033; + + optional bool goproto_sizecache = 64034; + optional bool goproto_unkeyed = 64035; } extend google.protobuf.FieldOptions { @@ -133,4 +139,6 @@ extend google.protobuf.FieldOptions { optional bool stdtime = 65010; optional bool stdduration = 65011; + optional bool wktpointer = 65012; + } diff --git a/proto/include/rustproto.proto b/proto/include/rustproto.proto index 83e76fdf..1bc53345 100644 --- a/proto/include/rustproto.proto +++ b/proto/include/rustproto.proto @@ -1,47 +1,47 @@ syntax = "proto2"; -import "google/protobuf/descriptor.proto"; - // see https://github.com/gogo/protobuf/blob/master/gogoproto/gogo.proto // for the original idea package rustproto; +import "google/protobuf/descriptor.proto"; + extend google.protobuf.FileOptions { - // When true, oneof field is generated public - optional bool expose_oneof_all = 17001; - // When true all fields are public, and not accessors generated - optional bool expose_fields_all = 17003; - // When false, `get_`, `set_`, `mut_` etc. accessors are not generated - optional bool generate_accessors_all = 17004; - // Use `bytes::Bytes` for `bytes` fields - optional bool carllerche_bytes_for_bytes_all = 17011; - // Use `bytes::Bytes` for `string` fields - optional bool carllerche_bytes_for_string_all = 17012; - // When true, will only generate codes that works with lite runtime. - optional bool lite_runtime_all = 17035; + // When true, oneof field is generated public + optional bool expose_oneof_all = 17001; + // When true all fields are public, and not accessors generated + optional bool expose_fields_all = 17003; + // When false, `get_`, `set_`, `mut_` etc. accessors are not generated + optional bool generate_accessors_all = 17004; + // Use `bytes::Bytes` for `bytes` fields + optional bool carllerche_bytes_for_bytes_all = 17011; + // Use `bytes::Bytes` for `string` fields + optional bool carllerche_bytes_for_string_all = 17012; + // When true, will only generate codes that works with lite runtime. + optional bool lite_runtime_all = 17035; } extend google.protobuf.MessageOptions { - // When true, oneof field is generated public - optional bool expose_oneof = 17001; - // When true all fields are public, and not accessors generated - optional bool expose_fields = 17003; - // When false, `get_`, `set_`, `mut_` etc. accessors are not generated - optional bool generate_accessors = 17004; - // Use `bytes::Bytes` for `bytes` fields - optional bool carllerche_bytes_for_bytes = 17011; - // Use `bytes::Bytes` for `string` fields - optional bool carllerche_bytes_for_string = 17012; + // When true, oneof field is generated public + optional bool expose_oneof = 17001; + // When true all fields are public, and not accessors generated + optional bool expose_fields = 17003; + // When false, `get_`, `set_`, `mut_` etc. accessors are not generated + optional bool generate_accessors = 17004; + // Use `bytes::Bytes` for `bytes` fields + optional bool carllerche_bytes_for_bytes = 17011; + // Use `bytes::Bytes` for `string` fields + optional bool carllerche_bytes_for_string = 17012; } extend google.protobuf.FieldOptions { - // When true all fields are public, and not accessors generated - optional bool expose_fields_field = 17003; - // When false, `get_`, `set_`, `mut_` etc. accessors are not generated - optional bool generate_accessors_field = 17004; - // Use `bytes::Bytes` for `bytes` fields - optional bool carllerche_bytes_for_bytes_field = 17011; - // Use `bytes::Bytes` for `string` fields - optional bool carllerche_bytes_for_string_field = 17012; -} \ No newline at end of file + // When true all fields are public, and not accessors generated + optional bool expose_fields_field = 17003; + // When false, `get_`, `set_`, `mut_` etc. accessors are not generated + optional bool generate_accessors_field = 17004; + // Use `bytes::Bytes` for `bytes` fields + optional bool carllerche_bytes_for_bytes_field = 17011; + // Use `bytes::Bytes` for `string` fields + optional bool carllerche_bytes_for_string_field = 17012; +} diff --git a/proto/keyspacepb.proto b/proto/keyspacepb.proto index e817e980..c941426a 100644 --- a/proto/keyspacepb.proto +++ b/proto/keyspacepb.proto @@ -1,35 +1,42 @@ syntax = "proto3"; package keyspacepb; -import "pdpb.proto"; - +import "apipb.proto"; import "gogoproto/gogo.proto"; +import "pdpb.proto"; import "rustproto.proto"; -option (gogoproto.sizer_all) = true; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - // Keyspace provides services to manage keyspaces. service Keyspace { - rpc LoadKeyspace (LoadKeyspaceRequest) returns (LoadKeyspaceResponse) {} + rpc LoadKeyspace(LoadKeyspaceRequest) returns (LoadKeyspaceResponse) {} + rpc LookupKeyspace(LookupKeyspaceRequest) returns (LookupKeyspaceResponse) {} + rpc AllocateNamespace(AllocateNamespaceRequest) returns (AllocateNamespaceResponse) {} // WatchKeyspaces first return all current keyspaces' metadata as its first response. // Then, it returns responses containing keyspaces that had their metadata changed. - rpc WatchKeyspaces (WatchKeyspacesRequest) returns (stream WatchKeyspacesResponse) {} + rpc WatchKeyspaces(WatchKeyspacesRequest) returns (stream WatchKeyspacesResponse) {} rpc UpdateKeyspaceState(UpdateKeyspaceStateRequest) returns (UpdateKeyspaceStateResponse) {} rpc GetAllKeyspaces(GetAllKeyspacesRequest) returns (GetAllKeyspacesResponse) {} } message KeyspaceMeta { + // V1/V2 compatibility keyspace id. V3 should read identity instead. uint32 id = 1; string name = 2; KeyspaceState state = 3; int64 created_at = 4; int64 state_changed_at = 5; map config = 7; + // Canonical V3 keyspace identity. + apipb.KeyspaceIdentity identity = 8; } enum KeyspaceState { @@ -42,6 +49,9 @@ enum KeyspaceState { message LoadKeyspaceRequest { pdpb.RequestHeader header = 1; string name = 2; + // Optional namespace for namespace-scoped name lookup in V3. + // If unset, V3 name-only lookup should use LookupKeyspace and may return multiple keyspaces. + uint32 namespace_id = 3; } message LoadKeyspaceResponse { @@ -49,6 +59,27 @@ message LoadKeyspaceResponse { KeyspaceMeta keyspace = 2; } +message LookupKeyspaceRequest { + pdpb.RequestHeader header = 1; + string name = 2; + // Optional namespace for namespace-scoped lookup. If unset, lookup by name is global and may return multiple keyspaces. + uint32 namespace_id = 3; +} + +message LookupKeyspaceResponse { + pdpb.ResponseHeader header = 1; + repeated KeyspaceMeta keyspaces = 2; +} + +message AllocateNamespaceRequest { + pdpb.RequestHeader header = 1; +} + +message AllocateNamespaceResponse { + pdpb.ResponseHeader header = 1; + uint32 namespace_id = 2; +} + message WatchKeyspacesRequest { pdpb.RequestHeader header = 1; } @@ -58,24 +89,32 @@ message WatchKeyspacesResponse { repeated KeyspaceMeta keyspaces = 2; } -message UpdateKeyspaceStateRequest{ +message UpdateKeyspaceStateRequest { pdpb.RequestHeader header = 1; + // V1/V2 compatibility keyspace id. V3 should use identity. uint32 id = 2; KeyspaceState state = 3; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 4; } -message UpdateKeyspaceStateResponse{ +message UpdateKeyspaceStateResponse { pdpb.ResponseHeader header = 1; KeyspaceMeta keyspace = 2; } -message GetAllKeyspacesRequest{ +message GetAllKeyspacesRequest { pdpb.RequestHeader header = 1; + // V1/V2 compatibility pagination cursor. V3 should use namespace_id and start_identity. uint32 start_id = 2; uint32 limit = 3; + // V3 namespace-limited pagination. Must be non-zero in V3. + uint32 namespace_id = 4; + // V3 pagination cursor within namespace_id. + apipb.KeyspaceIdentity start_identity = 5; } -message GetAllKeyspacesResponse{ +message GetAllKeyspacesResponse { pdpb.ResponseHeader header = 1; repeated KeyspaceMeta keyspaces = 2; } diff --git a/proto/kvrpcpb.proto b/proto/kvrpcpb.proto index 288ff39e..a905b538 100644 --- a/proto/kvrpcpb.proto +++ b/proto/kvrpcpb.proto @@ -1,21 +1,24 @@ syntax = "proto3"; package kvrpcpb; -import "metapb.proto"; +import "apipb.proto"; +import "deadlock.proto"; import "errorpb.proto"; import "gogoproto/gogo.proto"; +import "metapb.proto"; +import "resource_manager.proto"; import "rustproto.proto"; -import "deadlock.proto"; import "tracepb.proto"; -import "resource_manager.proto"; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - // This proto file defines requests, responses, and helper messages for KV and raw // APIs of TiKV (see tikvpb.proto). @@ -24,268 +27,280 @@ option java_package = "org.tikv.kvproto"; // A transactional get command. Lookup a value for `key` in the transaction with // starting timestamp = `version`. message GetRequest { - Context context = 1; - bytes key = 2; - uint64 version = 3; + Context context = 1; + bytes key = 2; + uint64 version = 3; + // If true, the response will include the commit ts of the key. + bool need_commit_ts = 4; } message GetResponse { - // A region error indicates that the request was sent to the wrong TiKV node - // (or other, similar errors). - errorpb.Error region_error = 1; - // A value could not be retrieved due to the state of the database for the requested key. - KeyError error = 2; - // A successful result. - bytes value = 3; - // True if the key does not exist in the database. - bool not_found = 4; - reserved 5; - // Time and scan details when processing the request. - ExecDetailsV2 exec_details_v2 = 6; + // A region error indicates that the request was sent to the wrong TiKV node + // (or other, similar errors). + errorpb.Error region_error = 1; + // A value could not be retrieved due to the state of the database for the requested key. + KeyError error = 2; + // A successful result. + bytes value = 3; + // True if the key does not exist in the database. + bool not_found = 4; + reserved 5; + // Time and scan details when processing the request. + ExecDetailsV2 exec_details_v2 = 6; + // The commit timestamp of the key. + // If it is zero, it means the commit timestamp is unknown. + uint64 commit_ts = 7; } // Scan fetches values for a range of keys; it is part of the transaction with // starting timestamp = `version`. message ScanRequest { - Context context = 1; - bytes start_key = 2; - // The maximum number of results to return. - uint32 limit = 3; - uint64 version = 4; - // Return only the keys found by scanning, not their values. - bool key_only = 5; - bool reverse = 6; - // For compatibility, when scanning forward, the range to scan is [start_key, end_key), where start_key < end_key; - // and when scanning backward, it scans [end_key, start_key) in descending order, where end_key < start_key. - bytes end_key = 7; - // If sample_step > 0, skips 'sample_step - 1' number of keys after each returned key. - // locks are not checked. - uint32 sample_step = 8; + Context context = 1; + bytes start_key = 2; + // The maximum number of results to return. + uint32 limit = 3; + uint64 version = 4; + // Return only the keys found by scanning, not their values. + bool key_only = 5; + bool reverse = 6; + // For compatibility, when scanning forward, the range to scan is [start_key, end_key), where start_key < end_key; + // and when scanning backward, it scans [end_key, start_key) in descending order, where end_key < start_key. + bytes end_key = 7; + // If sample_step > 0, skips 'sample_step - 1' number of keys after each returned key. + // locks are not checked. + uint32 sample_step = 8; } message ScanResponse { - errorpb.Error region_error = 1; - // Each KvPair may contain a key error. - repeated KvPair pairs = 2; - // This KeyError exists when some key is locked but we cannot check locks of all keys. - // In this case, `pairs` should be empty and the client should redo scanning all the keys - // after resolving the lock. - KeyError error = 3; + errorpb.Error region_error = 1; + // Each KvPair may contain a key error. + repeated KvPair pairs = 2; + // This KeyError exists when some key is locked but we cannot check locks of all keys. + // In this case, `pairs` should be empty and the client should redo scanning all the keys + // after resolving the lock. + KeyError error = 3; } // A prewrite is the first phase of writing to TiKV. It contains all data to be written in a transaction. // TiKV will write the data in a preliminary state. Data cannot be read until it has been committed. // The client should only commit a transaction once all prewrites succeed. message PrewriteRequest { - // What kind of checks need to be performed for keys in a pessimistic transaction. - enum PessimisticAction { - // The key needn't be locked and no extra write conflict checks are needed. - SKIP_PESSIMISTIC_CHECK = 0; - // The key should have been locked at the time of prewrite. - DO_PESSIMISTIC_CHECK = 1; - // The key doesn't need a pessimistic lock. But we need to do data constraint checks. - DO_CONSTRAINT_CHECK = 2; - } - - // for_update_ts constriants that should be checked when prewriting a pessimistic transaction. - message ForUpdateTSConstraint { - // The index of key in the prewrite request that should be checked. - uint32 index = 1; - // The expected for_update_ts of the pessimistic lock of the key. - uint64 expected_for_update_ts = 2; - } - - Context context = 1; - // The data to be written to the database. - repeated Mutation mutations = 2; - // The client picks one key to be primary (unrelated to the primary key concept in SQL). This - // key's lock is the source of truth for the state of a transaction. All other locks due to a - // transaction will point to the primary lock. - bytes primary_lock = 3; - // Identifies the transaction being written. - uint64 start_version = 4; - uint64 lock_ttl = 5; - // TiKV can skip some checks, used for speeding up data migration. - bool skip_constraint_check = 6; - // For pessimistic transaction, some mutations don't need to be locked, for example, non-unique index key. - // Keys with deferred constraint checks are not locked. - repeated PessimisticAction pessimistic_actions = 7; - // How many keys this transaction involves in this region. - uint64 txn_size = 8; - // For pessimistic transactions only; used to check if a conflict lock is already committed. - uint64 for_update_ts = 9; - // If min_commit_ts > 0, this is a large transaction request, the final commit_ts - // will be inferred from `min_commit_ts`. - uint64 min_commit_ts = 10; - // When async commit is enabled, `secondaries` should be set as the key list of all secondary - // locks if the request prewrites the primary lock. - bool use_async_commit = 11; - repeated bytes secondaries = 12; - // When the transaction involves only one region, it's possible to commit the transaction - // directly with 1PC protocol. - bool try_one_pc = 13; - // The max commit ts is reserved for limiting the commit ts of 1PC or async commit, which can be used to avoid - // inconsistency with schema change. - uint64 max_commit_ts = 14; - // The level of assertion to use on this prewrte request. - AssertionLevel assertion_level = 15; - // for_update_ts constriants that should be checked when prewriting a pessimistic transaction. - // See https://github.com/tikv/tikv/issues/14311 - repeated ForUpdateTSConstraint for_update_ts_constraints = 16; - - // Reserved for file based transaction. - repeated uint64 txn_file_chunks = 100; + // What kind of checks need to be performed for keys in a pessimistic transaction. + enum PessimisticAction { + // The key needn't be locked and no extra write conflict checks are needed. + // Deprecated in next-gen (cloud-storage-engine). + SKIP_PESSIMISTIC_CHECK = 0; + // The key should have been locked at the time of prewrite. If the lock is missing, + // the lock will be amended. This is the normal case for pessimistic transactions. + DO_PESSIMISTIC_CHECK = 1; + // The key does not acquire a pessimistic lock for performance optimization. + // Constraint checking (write conflicts + data constraints) is deferred to prewrite. + DO_CONSTRAINT_CHECK = 2; + } + + // for_update_ts constriants that should be checked when prewriting a pessimistic transaction. + message ForUpdateTSConstraint { + // The index of key in the prewrite request that should be checked. + uint32 index = 1; + // The expected for_update_ts of the pessimistic lock of the key. + uint64 expected_for_update_ts = 2; + } + + Context context = 1; + // The data to be written to the database. + repeated Mutation mutations = 2; + // The client picks one key to be primary (unrelated to the primary key concept in SQL). This + // key's lock is the source of truth for the state of a transaction. All other locks due to a + // transaction will point to the primary lock. + bytes primary_lock = 3; + // Identifies the transaction being written. + uint64 start_version = 4; + uint64 lock_ttl = 5; + // TiKV can skip some checks, used for speeding up data migration. + bool skip_constraint_check = 6; + // For pessimistic transaction, some mutations don't need to be locked, for example, non-unique index key. + // Keys with deferred constraint checks are not locked. + repeated PessimisticAction pessimistic_actions = 7; + // How many keys this transaction involves in this region. + uint64 txn_size = 8; + // For pessimistic transactions only; used to check if a conflict lock is already committed. + uint64 for_update_ts = 9; + // If min_commit_ts > 0, this is a large transaction request, the final commit_ts + // will be inferred from `min_commit_ts`. + uint64 min_commit_ts = 10; + // When async commit is enabled, `secondaries` should be set as the key list of all secondary + // locks if the request prewrites the primary lock. + bool use_async_commit = 11; + repeated bytes secondaries = 12; + // When the transaction involves only one region, it's possible to commit the transaction + // directly with 1PC protocol. + bool try_one_pc = 13; + // The max commit ts is reserved for limiting the commit ts of 1PC or async commit, which can be used to avoid + // inconsistency with schema change. + uint64 max_commit_ts = 14; + // The level of assertion to use on this prewrte request. + AssertionLevel assertion_level = 15; + // for_update_ts constriants that should be checked when prewriting a pessimistic transaction. + // See https://github.com/tikv/tikv/issues/14311 + repeated ForUpdateTSConstraint for_update_ts_constraints = 16; + + // Reserved for file based transaction. + repeated uint64 txn_file_chunks = 100; } message PrewriteResponse { - errorpb.Error region_error = 1; - repeated KeyError errors = 2; - // 0 if the min_commit_ts is not ready or any other reason that async - // commit cannot proceed. The client can then fallback to normal way to - // continue committing the transaction if prewrite are all finished. - uint64 min_commit_ts = 3; - // When the transaction is successfully committed with 1PC protocol, this field will be set to - // the commit ts of the transaction. Otherwise, if TiKV failed to commit it with 1PC or the - // transaction is not 1PC, the value will be 0. - uint64 one_pc_commit_ts = 4; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 5; + errorpb.Error region_error = 1; + repeated KeyError errors = 2; + // 0 if the min_commit_ts is not ready or any other reason that async + // commit cannot proceed. The client can then fallback to normal way to + // continue committing the transaction if prewrite are all finished. + uint64 min_commit_ts = 3; + // When the transaction is successfully committed with 1PC protocol, this field will be set to + // the commit ts of the transaction. Otherwise, if TiKV failed to commit it with 1PC or the + // transaction is not 1PC, the value will be 0. + uint64 one_pc_commit_ts = 4; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 5; } // Used to specify the behavior when a pessimistic lock request is woken up after waiting for another // lock. enum PessimisticLockWakeUpMode { - // When woken up, returns WriteConflict error to the client and the client should retry if necessary. - // In this mode, results of `return_values` or `check_existence` will be set to `values` and `not_founds` - // fields of the PessimisticLockResponse, which is compatible with old versions. - WakeUpModeNormal = 0; - // When woken up, continue trying to lock the key. This implicitly enables the `allow_lock_with_conflict` - // behavior, which means, allow acquiring the lock even if there is WriteConflict on the key. - // In this mode, `return_values` or `check_existence` fields of PessimisticLockResponse won't be used, and - // all results are carried in the `results` field. - WakeUpModeForceLock = 1; + // When woken up, returns WriteConflict error to the client and the client should retry if necessary. + // In this mode, results of `return_values` or `check_existence` will be set to `values` and `not_founds` + // fields of the PessimisticLockResponse, which is compatible with old versions. + WakeUpModeNormal = 0; + // When woken up, continue trying to lock the key. This implicitly enables the `allow_lock_with_conflict` + // behavior, which means, allow acquiring the lock even if there is WriteConflict on the key. + // In this mode, `return_values` or `check_existence` fields of PessimisticLockResponse won't be used, and + // all results are carried in the `results` field. + WakeUpModeForceLock = 1; } // Lock a set of keys to prepare to write to them. message PessimisticLockRequest { - Context context = 1; - // In this case every `Op` of the mutations must be `PessimisticLock`. - repeated Mutation mutations = 2; - bytes primary_lock = 3; - uint64 start_version = 4; - uint64 lock_ttl = 5; - // Each locking command in a pessimistic transaction has its own timestamp. If locking fails, then - // the corresponding SQL statement can be retried with a later timestamp, TiDB does not need to - // retry the whole transaction. The name comes from the `SELECT ... FOR UPDATE` SQL statement which - // is a locking read. Each `SELECT ... FOR UPDATE` in a transaction will be assigned its own - // timestamp. - uint64 for_update_ts = 6; - // If the request is the first lock request, we don't need to detect deadlock. - bool is_first_lock = 7; - // Time to wait for lock released in milliseconds when encountering locks. - // 0 means using default timeout in TiKV. Negative means no wait. - int64 wait_timeout = 8; - // If it is true, TiKV will acquire the pessimistic lock regardless of write conflict - // and return the latest value. It's only supported for single mutation. - bool force = 9 [deprecated = true]; - // If it is true, TiKV will return values of the keys if no error, so TiDB can cache the values for - // later read in the same transaction. - // When 'force' is set to true, this field is ignored. - bool return_values = 10; - // If min_commit_ts > 0, this is large transaction proto, the final commit_ts - // would be infered from min_commit_ts. - uint64 min_commit_ts = 11; - // If set to true, it means TiKV need to check if the key exists, and return the result in - // the `not_founds` feild in the response. This works no matter if `return_values` is set. If - // `return_values` is set, it simply makes no difference; otherwise, the `value` field of the - // repsonse will be empty while the `not_founds` field still indicates the keys' existence. - bool check_existence = 12; - // TiKV lock the record only when it exists - bool lock_only_if_exists = 13; - // Specifies the behavior when the request is woken up after wating for lock of another transaction. - PessimisticLockWakeUpMode wake_up_mode = 14; + Context context = 1; + // In this case every `Op` of the mutations must be `PessimisticLock`. + repeated Mutation mutations = 2; + bytes primary_lock = 3; + uint64 start_version = 4; + uint64 lock_ttl = 5; + // Each locking command in a pessimistic transaction has its own timestamp. If locking fails, then + // the corresponding SQL statement can be retried with a later timestamp, TiDB does not need to + // retry the whole transaction. The name comes from the `SELECT ... FOR UPDATE` SQL statement which + // is a locking read. Each `SELECT ... FOR UPDATE` in a transaction will be assigned its own + // timestamp. + uint64 for_update_ts = 6; + // If the request is the first lock request, we don't need to detect deadlock. + bool is_first_lock = 7; + // Time to wait for lock released in milliseconds when encountering locks. + // 0 means using default timeout in TiKV. Negative means no wait. + int64 wait_timeout = 8; + // If it is true, TiKV will acquire the pessimistic lock regardless of write conflict + // and return the latest value. It's only supported for single mutation. + bool force = 9 [deprecated = true]; + // If it is true, TiKV will return values of the keys if no error, so TiDB can cache the values for + // later read in the same transaction. + // When 'force' is set to true, this field is ignored. + bool return_values = 10; + // If min_commit_ts > 0, this is large transaction proto, the final commit_ts + // would be infered from min_commit_ts. + uint64 min_commit_ts = 11; + // If set to true, it means TiKV need to check if the key exists, and return the result in + // the `not_founds` feild in the response. This works no matter if `return_values` is set. If + // `return_values` is set, it simply makes no difference; otherwise, the `value` field of the + // repsonse will be empty while the `not_founds` field still indicates the keys' existence. + bool check_existence = 12; + // TiKV lock the record only when it exists + bool lock_only_if_exists = 13; + // Specifies the behavior when the request is woken up after wating for lock of another transaction. + PessimisticLockWakeUpMode wake_up_mode = 14; } enum PessimisticLockKeyResultType { - LockResultNormal = 0; - LockResultLockedWithConflict = 1; - LockResultFailed = 2; + LockResultNormal = 0; + LockResultLockedWithConflict = 1; + LockResultFailed = 2; } message PessimisticLockKeyResult { - PessimisticLockKeyResultType type = 1; - bytes value = 2; - bool existence = 3; - // We allow a key be locked when there is write conflict (latest commit_ts > for_update_ts). - // In this case, the key is semantically locked by a newer for_update_ts. - // For each requested key, the field is non-zero if the key is locked with write conflict, and it - // equals to the commit_ts of the latest version of the specified key. The for_update_ts field - // of the lock that's actually written to TiKV will also be this value. At the same time, - // `value` and `existence` will be returned regardless to how `return_values` and - // `check_existence` are set. - uint64 locked_with_conflict_ts = 4; - // Hint the client that resolving lock is not needed for this lock. For `PessimisticLock` - // requests only. - bool skip_resolving_lock = 11; + PessimisticLockKeyResultType type = 1; + bytes value = 2; + bool existence = 3; + // We allow a key be locked when there is write conflict (latest commit_ts > for_update_ts). + // In this case, the key is semantically locked by a newer for_update_ts. + // For each requested key, the field is non-zero if the key is locked with write conflict, and it + // equals to the commit_ts of the latest version of the specified key. The for_update_ts field + // of the lock that's actually written to TiKV will also be this value. At the same time, + // `value` and `existence` will be returned regardless to how `return_values` and + // `check_existence` are set. + uint64 locked_with_conflict_ts = 4; + // Hint the client that resolving lock is not needed for this lock. For `PessimisticLock` + // requests only. + bool skip_resolving_lock = 11; } message PessimisticLockResponse { - errorpb.Error region_error = 1; - repeated KeyError errors = 2; - // It carries the latest value and its commit ts if force in PessimisticLockRequest is true. - uint64 commit_ts = 3 [deprecated = true]; - bytes value = 4 [deprecated = true]; - // The values is set if 'return_values' is true in the request and no error. - // If 'force' is true, this field is not used. - // Only used when `wake_up_mode` is `WakeUpModeNormal`. - repeated bytes values = 5; - // Indicates whether the values at the same index is correspond to an existing key. - // In legacy TiKV, this field is not used even 'force' is false. In that case, an empty value indicates - // two possible situations: (1) the key does not exist. (2) the key exists but the value is empty. - // Only used when `wake_up_mode` is `WakeUpModeNormal`. - repeated bool not_founds = 6; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 7; - // Results of the request. Only used when `wake_up_mode` is `WakeUpModeForceLock`. - repeated PessimisticLockKeyResult results = 8; + errorpb.Error region_error = 1; + repeated KeyError errors = 2; + // It carries the latest value and its commit ts if force in PessimisticLockRequest is true. + uint64 commit_ts = 3 [deprecated = true]; + bytes value = 4 [deprecated = true]; + // The values is set if 'return_values' is true in the request and no error. + // If 'force' is true, this field is not used. + // Only used when `wake_up_mode` is `WakeUpModeNormal`. + repeated bytes values = 5; + // Indicates whether the values at the same index is correspond to an existing key. + // In legacy TiKV, this field is not used even 'force' is false. In that case, an empty value indicates + // two possible situations: (1) the key does not exist. (2) the key exists but the value is empty. + // Only used when `wake_up_mode` is `WakeUpModeNormal`. + repeated bool not_founds = 6; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 7; + // Results of the request. Only used when `wake_up_mode` is `WakeUpModeForceLock`. + repeated PessimisticLockKeyResult results = 8; } // Unlock keys locked using `PessimisticLockRequest`. message PessimisticRollbackRequest { - Context context = 1; - uint64 start_version = 2; - uint64 for_update_ts = 3; - repeated bytes keys = 4; + Context context = 1; + uint64 start_version = 2; + uint64 for_update_ts = 3; + repeated bytes keys = 4; } message PessimisticRollbackResponse { - errorpb.Error region_error = 1; - repeated KeyError errors = 2; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 3; + errorpb.Error region_error = 1; + repeated KeyError errors = 2; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 3; } // Used to update the lock_ttl of a psessimistic and/or large transaction to prevent it from been killed. message TxnHeartBeatRequest { - Context context = 1; - // The key of the lock to update. - bytes primary_lock = 2; - // Start timestamp of the large transaction. - uint64 start_version = 3; - // The new TTL the sender would like. - uint64 advise_lock_ttl = 4; - - // Reserved for file based transaction. - bool is_txn_file = 100; + Context context = 1; + // The key of the lock to update. + bytes primary_lock = 2; + // Start timestamp of the large transaction. + uint64 start_version = 3; + // The new TTL the sender would like. + uint64 advise_lock_ttl = 4; + // Optionally update PK's min_commit_ts. + // Only for non-async-commmit and non-1PC transactions. + // If it is 0, ignore this field. + uint64 min_commit_ts = 5; + + // Reserved for file based transaction. + bool is_txn_file = 100; } message TxnHeartBeatResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // The TTL actually set on the requested lock. - uint64 lock_ttl = 3; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 4; + errorpb.Error region_error = 1; + KeyError error = 2; + // The TTL actually set on the requested lock. + uint64 lock_ttl = 3; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 4; } // CheckTxnStatusRequest checks the status of a transaction. @@ -294,1211 +309,1363 @@ message TxnHeartBeatResponse { // Otherwise, returns the TTL information for the transaction. // CheckTxnStatusRequest may also push forward the minCommitTS of a large transaction. message CheckTxnStatusRequest { - Context context = 1; - // Primary key and lock ts together to locate the primary lock of a transaction. - bytes primary_key = 2; - // Starting timestamp of the transaction being checked. - uint64 lock_ts = 3; - // The start timestamp of the transaction which this request is part of. - uint64 caller_start_ts = 4; - // The client must specify the current time to TiKV using this timestamp. It is used to check TTL - // timeouts. It may be inaccurate. - uint64 current_ts = 5; - // If true, then TiKV will leave a rollback tombstone in the write CF for `primary_key`, even if - // that key is not locked. - bool rollback_if_not_exist = 6; - // This field is set to true only if the transaction is known to fall back from async commit. - // Then, CheckTxnStatus treats the transaction as non-async-commit even if the use_async_commit - // field in the primary lock is true. - bool force_sync_commit = 7; - // If the check request is used to resolve or decide the transaction status for a input pessimistic - // lock, the transaction status could not be decided if the primary lock is pessimistic too and - // it's still uncertain. - bool resolving_pessimistic_lock = 8; - // Whether it's needed to check if the lock on the key (if any) is the primary lock. - // This is for handling some corner cases when a pessimistic transaction changes its primary - // (see https://github.com/pingcap/tidb/issues/42937 for details). This field is necessary - // because the old versions of clients cannot handle some results returned from TiKV correctly. - // For new versions, this field should always be set to true. - bool verify_is_primary = 9; - - // Reserved for file based transaction. - bool is_txn_file = 100; + Context context = 1; + // Primary key and lock ts together to locate the primary lock of a transaction. + bytes primary_key = 2; + // Starting timestamp of the transaction being checked. + uint64 lock_ts = 3; + // The start timestamp of the transaction which this request is part of. + uint64 caller_start_ts = 4; + // The client must specify the current time to TiKV using this timestamp. It is used to check TTL + // timeouts. It may be inaccurate. + uint64 current_ts = 5; + // If true, then TiKV will leave a rollback tombstone in the write CF for `primary_key`, even if + // that key is not locked. + bool rollback_if_not_exist = 6; + // This field is set to true only if the transaction is known to fall back from async commit. + // Then, CheckTxnStatus treats the transaction as non-async-commit even if the use_async_commit + // field in the primary lock is true. + bool force_sync_commit = 7; + // If the check request is used to resolve or decide the transaction status for a input pessimistic + // lock, the transaction status could not be decided if the primary lock is pessimistic too and + // it's still uncertain. + bool resolving_pessimistic_lock = 8; + // Whether it's needed to check if the lock on the key (if any) is the primary lock. + // This is for handling some corner cases when a pessimistic transaction changes its primary + // (see https://github.com/pingcap/tidb/issues/42937 for details). This field is necessary + // because the old versions of clients cannot handle some results returned from TiKV correctly. + // For new versions, this field should always be set to true. + bool verify_is_primary = 9; + + // Reserved for file based transaction. + bool is_txn_file = 100; } message CheckTxnStatusResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // Three kinds of transaction status: - // locked: lock_ttl > 0 - // committed: commit_version > 0 - // rollbacked: lock_ttl = 0 && commit_version = 0 - uint64 lock_ttl = 3; - uint64 commit_version = 4; - // The action performed by TiKV (and why if the action is to rollback). - Action action = 5; - LockInfo lock_info = 6; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 7; + errorpb.Error region_error = 1; + KeyError error = 2; + // Three kinds of transaction status: + // locked: lock_ttl > 0 + // committed: commit_version > 0 + // rollbacked: lock_ttl = 0 && commit_version = 0 + uint64 lock_ttl = 3; + uint64 commit_version = 4; + // The action performed by TiKV (and why if the action is to rollback). + Action action = 5; + LockInfo lock_info = 6; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 7; } // Part of the async commit protocol, checks for locks on all supplied keys. If a lock is missing, // does not have a successful status, or belongs to another transaction, TiKV will leave a rollback // tombstone for that key. message CheckSecondaryLocksRequest { - Context context = 1; - repeated bytes keys = 2; - // Identifies the transaction we are investigating. - uint64 start_version = 3; + Context context = 1; + repeated bytes keys = 2; + // Identifies the transaction we are investigating. + uint64 start_version = 3; } message CheckSecondaryLocksResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // For each key in `keys` in `CheckSecondaryLocks`, there will be a lock in - // this list if there is a lock present and belonging to the correct transaction, - // nil otherwise. - repeated LockInfo locks = 3; - // If any of the locks have been committed, this is the commit ts used. If no - // locks have been committed, it will be zero. - uint64 commit_ts = 4; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 5; + errorpb.Error region_error = 1; + KeyError error = 2; + // For each key in `keys` in `CheckSecondaryLocks`, there will be a lock in + // this list if there is a lock present and belonging to the correct transaction, + // nil otherwise. + repeated LockInfo locks = 3; + // If any of the locks have been committed, this is the commit ts used. If no + // locks have been committed, it will be zero. + uint64 commit_ts = 4; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 5; +} + +// CommitRole indicates the current commit request is a primary commit or a secondary commit. +enum CommitRole { + // Unknown about the current request is a primary commit or a secondary commit; + Unknown = 0; + // Current request is a primary commit; + Primary = 1; + // Current request is a secondary commit; + Secondary = 2; } // The second phase of writing to TiKV. If there are no errors or conflicts, then this request // commits a transaction so that its data can be read by other transactions. message CommitRequest { - reserved 5; - reserved "binlog"; - Context context = 1; - // Identifies the transaction. - uint64 start_version = 2; - // All keys in the transaction (to be committed). - repeated bytes keys = 3; - // Timestamp for the end of the transaction. Must be greater than `start_version`. - uint64 commit_version = 4; - - // Reserved for file based transaction. - bool is_txn_file = 100; + reserved 5; + reserved "binlog"; + Context context = 1; + // Identifies the transaction. + uint64 start_version = 2; + // All keys in the transaction (to be committed). + repeated bytes keys = 3; + // Timestamp for the end of the transaction. Must be greater than `start_version`. + uint64 commit_version = 4; + // commit_role indicates the current commit request is a primary commit or a secondary commit. + // It's value maybe `Unknown` when using a client with an old version. + CommitRole commit_role = 6; + // primary_key indicates the primary key of the transaction. + // Its value may be empty when using an old version client. + bytes primary_key = 7; + // Whether committing an `use_async_commit` (i.e. should be treated as committed) prewrite. + bool use_async_commit = 8; + + // Reserved for file based transaction. + bool is_txn_file = 100; } message CommitResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // If the commit ts is derived from min_commit_ts, this field should be set. - uint64 commit_version = 3; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 4; + errorpb.Error region_error = 1; + KeyError error = 2; + // If the commit ts is derived from min_commit_ts, this field should be set. + uint64 commit_version = 3; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 4; } // Not yet implemented. message ImportRequest { - repeated Mutation mutations = 1; - uint64 commit_version = 2; + repeated Mutation mutations = 1; + uint64 commit_version = 2; } message ImportResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } // Cleanup a key by possibly unlocking it. // From 4.0 onwards, this message is no longer used. message CleanupRequest { - Context context = 1; - bytes key = 2; - uint64 start_version = 3; - // The current timestamp, used in combination with a lock's TTL to determine - // if the lock has expired. If `current_ts == 0`, then the key will be unlocked - // irrespective of its TTL. - uint64 current_ts = 4; + Context context = 1; + bytes key = 2; + uint64 start_version = 3; + // The current timestamp, used in combination with a lock's TTL to determine + // if the lock has expired. If `current_ts == 0`, then the key will be unlocked + // irrespective of its TTL. + uint64 current_ts = 4; } message CleanupResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // Set if the key is already committed. - uint64 commit_version = 3; + errorpb.Error region_error = 1; + KeyError error = 2; + // Set if the key is already committed. + uint64 commit_version = 3; } // Similar to a `Get` request. message BatchGetRequest { - Context context = 1; - repeated bytes keys = 2; - uint64 version = 3; + Context context = 1; + repeated bytes keys = 2; + uint64 version = 3; + // If true, the response will include the commit ts of the key. + bool need_commit_ts = 4; } message BatchGetResponse { - errorpb.Error region_error = 1; - repeated KvPair pairs = 2; - reserved 3; - // Time and scan details when processing the request. - ExecDetailsV2 exec_details_v2 = 4; - // This KeyError exists when some key is locked but we cannot check locks of all keys. - // In this case, `pairs` should be empty and the client should redo batch get all the keys - // after resolving the lock. - KeyError error = 5; + errorpb.Error region_error = 1; + repeated KvPair pairs = 2; + reserved 3; + // Time and scan details when processing the request. + ExecDetailsV2 exec_details_v2 = 4; + // This KeyError exists when some key is locked but we cannot check locks of all keys. + // In this case, `pairs` should be empty and the client should redo batch get all the keys + // after resolving the lock. + KeyError error = 5; } // Rollback a prewritten transaction. This will remove the preliminary data from the database, // unlock locks, and leave a rollback tombstone. message BatchRollbackRequest { - Context context = 1; - // Identify the transaction to be rolled back. - uint64 start_version = 2; - // The keys to rollback. - repeated bytes keys = 3; + Context context = 1; + // Identify the transaction to be rolled back. + uint64 start_version = 2; + // The keys to rollback. + repeated bytes keys = 3; - // Reserved for file based transaction. - bool is_txn_file = 100; + // Reserved for file based transaction. + bool is_txn_file = 100; } message BatchRollbackResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 3; + errorpb.Error region_error = 1; + KeyError error = 2; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 3; } // Scan the database for locks. Used at the start of the GC process to find all // old locks. message ScanLockRequest { - Context context = 1; - // Returns all locks with a start timestamp before `max_version`. - uint64 max_version = 2; - // Start scanning from this key. - bytes start_key = 3; - // The maximum number of locks to return. - uint32 limit = 4; - // The exclusive upperbound for scanning. - bytes end_key = 5; + Context context = 1; + // Returns all locks with a start timestamp before `max_version`. + uint64 max_version = 2; + // Start scanning from this key. + bytes start_key = 3; + // The maximum number of locks to return. + uint32 limit = 4; + // The exclusive upperbound for scanning. + bytes end_key = 5; } message ScanLockResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // Info on all locks found by the scan. - repeated LockInfo locks = 3; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 4; + errorpb.Error region_error = 1; + KeyError error = 2; + // Info on all locks found by the scan. + repeated LockInfo locks = 3; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 4; } // For all keys locked by the transaction identified by `start_version`, either // commit or rollback the transaction and unlock the key. message ResolveLockRequest { - Context context = 1; - uint64 start_version = 2; - // `commit_version == 0` means the transaction was rolled back. - // `commit_version > 0` means the transaction was committed at the given timestamp. - uint64 commit_version = 3; - repeated TxnInfo txn_infos = 4; - // Only resolve specified keys. - repeated bytes keys = 5; + Context context = 1; + uint64 start_version = 2; + // `commit_version == 0` means the transaction was rolled back. + // `commit_version > 0` means the transaction was committed at the given timestamp. + uint64 commit_version = 3; + repeated TxnInfo txn_infos = 4; + // Only resolve specified keys. + repeated bytes keys = 5; - // Reserved for file based transaction. - bool is_txn_file = 100; + // Reserved for file based transaction. + bool is_txn_file = 100; } message ResolveLockResponse { - errorpb.Error region_error = 1; - KeyError error = 2; - // Execution details about the request processing. - ExecDetailsV2 exec_details_v2 = 3; + errorpb.Error region_error = 1; + KeyError error = 2; + // Execution details about the request processing. + ExecDetailsV2 exec_details_v2 = 3; } // Request TiKV to garbage collect all non-current data older than `safe_point`. message GCRequest { - Context context = 1; - uint64 safe_point = 2; + Context context = 1; + uint64 safe_point = 2; } message GCResponse { - errorpb.Error region_error = 1; - KeyError error = 2; + errorpb.Error region_error = 1; + KeyError error = 2; } // Delete a range of data from TiKV. message DeleteRangeRequest { - Context context = 1; - bytes start_key = 2; - bytes end_key = 3; - // If true, the data will not be immediately deleted, but the operation will - // still be replicated via Raft. This is used to notify TiKV that the data - // will be deleted using `unsafe_destroy_range` soon. - bool notify_only = 4; + Context context = 1; + bytes start_key = 2; + bytes end_key = 3; + // If true, the data will not be immediately deleted, but the operation will + // still be replicated via Raft. This is used to notify TiKV that the data + // will be deleted using `unsafe_destroy_range` soon. + bool notify_only = 4; } message DeleteRangeResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } // Preparing the flashback for a region/key range will "lock" the region // so that there is no any read, write or schedule operation could be proposed before // the actual flashback operation. message PrepareFlashbackToVersionRequest { - Context context = 1; - bytes start_key = 2; - bytes end_key = 3; - // The `start_ts` which we will use to write a lock to prevent - // the `resolved_ts` from advancing during the whole process. - uint64 start_ts = 4; - // The TS version which the data will flashback to later. - uint64 version = 5; + Context context = 1; + bytes start_key = 2; + bytes end_key = 3; + // The `start_ts` which we will use to write a lock to prevent + // the `resolved_ts` from advancing during the whole process. + uint64 start_ts = 4; + // The TS version which the data will flashback to later. + uint64 version = 5; } message PrepareFlashbackToVersionResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } // Flashback the region to a specific point with the given `version`, please // make sure the region is "locked" by `PrepareFlashbackToVersionRequest` first, // otherwise this request will fail. message FlashbackToVersionRequest { - Context context = 1; - // The TS version which the data should flashback to. - uint64 version = 2; - bytes start_key = 3; - bytes end_key = 4; - // The `start_ts` and `commit_ts` which the newly written MVCC version will use. - // Please make sure the `start_ts` is the same one in `PrepareFlashbackToVersionRequest`. - uint64 start_ts = 5; - uint64 commit_ts = 6; + Context context = 1; + // The TS version which the data should flashback to. + uint64 version = 2; + bytes start_key = 3; + bytes end_key = 4; + // The `start_ts` and `commit_ts` which the newly written MVCC version will use. + // Please make sure the `start_ts` is the same one in `PrepareFlashbackToVersionRequest`. + uint64 start_ts = 5; + uint64 commit_ts = 6; } message FlashbackToVersionResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } // Raw commands. message RawGetRequest { - Context context = 1; - bytes key = 2; - string cf = 3; + Context context = 1; + bytes key = 2; + string cf = 3; } message RawGetResponse { - errorpb.Error region_error = 1; - string error = 2; - bytes value = 3; - bool not_found = 4; + errorpb.Error region_error = 1; + string error = 2; + bytes value = 3; + bool not_found = 4; } message RawBatchGetRequest { - Context context = 1; - repeated bytes keys = 2; - string cf = 3; + Context context = 1; + repeated bytes keys = 2; + string cf = 3; } message RawBatchGetResponse { - errorpb.Error region_error = 1; - repeated KvPair pairs = 2; + errorpb.Error region_error = 1; + repeated KvPair pairs = 2; } message RawPutRequest { - Context context = 1; - bytes key = 2; - bytes value = 3; - string cf = 4; - uint64 ttl = 5; - bool for_cas = 6; + Context context = 1; + bytes key = 2; + bytes value = 3; + string cf = 4; + uint64 ttl = 5; + bool for_cas = 6; } message RawPutResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } message RawBatchPutRequest { - Context context = 1; - repeated KvPair pairs = 2; - string cf = 3; - uint64 ttl = 4 [deprecated=true]; - bool for_cas = 5; + Context context = 1; + repeated KvPair pairs = 2; + string cf = 3; + uint64 ttl = 4 [deprecated = true]; + bool for_cas = 5; - // The time-to-live for each keys in seconds, and if the length of `ttls` - // is exactly one, the ttl will be applied to all keys. Otherwise, the length - // mismatch between `ttls` and `pairs` will return an error. - repeated uint64 ttls = 6; + // The time-to-live for each keys in seconds, and if the length of `ttls` + // is exactly one, the ttl will be applied to all keys. Otherwise, the length + // mismatch between `ttls` and `pairs` will return an error. + repeated uint64 ttls = 6; } message RawBatchPutResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } message RawDeleteRequest { - Context context = 1; - bytes key = 2; - string cf = 3; - bool for_cas = 4; + Context context = 1; + bytes key = 2; + string cf = 3; + bool for_cas = 4; } message RawDeleteResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } message RawBatchDeleteRequest { - Context context = 1; - repeated bytes keys = 2; - string cf = 3; - bool for_cas = 4; + Context context = 1; + repeated bytes keys = 2; + string cf = 3; + bool for_cas = 4; } message RawBatchDeleteResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } message RawScanRequest { - Context context = 1; - bytes start_key = 2; - uint32 limit = 3; - bool key_only = 4; - string cf = 5; - bool reverse = 6; - // For compatibility, when scanning forward, the range to scan is [start_key, end_key), where start_key < end_key; - // and when scanning backward, it scans [end_key, start_key) in descending order, where end_key < start_key. - bytes end_key = 7; + Context context = 1; + bytes start_key = 2; + uint32 limit = 3; + bool key_only = 4; + string cf = 5; + bool reverse = 6; + // For compatibility, when scanning forward, the range to scan is [start_key, end_key), where start_key < end_key; + // and when scanning backward, it scans [end_key, start_key) in descending order, where end_key < start_key. + bytes end_key = 7; } message RawScanResponse { - errorpb.Error region_error = 1; - repeated KvPair kvs = 2; + errorpb.Error region_error = 1; + repeated KvPair kvs = 2; } message RawDeleteRangeRequest { - Context context = 1; - bytes start_key = 2; - bytes end_key = 3; - string cf = 4; + Context context = 1; + bytes start_key = 2; + bytes end_key = 3; + string cf = 4; } message RawDeleteRangeResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } message RawBatchScanRequest { - Context context = 1; - repeated KeyRange ranges = 2; // scanning range - uint32 each_limit = 3; // max number of returning kv pairs for each scanning range - bool key_only = 4; - string cf = 5; - bool reverse = 6; + Context context = 1; + repeated KeyRange ranges = 2; // scanning range + uint32 each_limit = 3; // max number of returning kv pairs for each scanning range + bool key_only = 4; + string cf = 5; + bool reverse = 6; } message RawBatchScanResponse { - errorpb.Error region_error = 1; - repeated KvPair kvs = 2; + errorpb.Error region_error = 1; + repeated KvPair kvs = 2; } // Store commands (sent to a whole TiKV cluster, rather than a certain region). message UnsafeDestroyRangeRequest { - Context context = 1; - bytes start_key = 2; - bytes end_key = 3; + Context context = 1; + bytes start_key = 2; + bytes end_key = 3; } message UnsafeDestroyRangeResponse { - errorpb.Error region_error = 1; - string error = 2; + errorpb.Error region_error = 1; + string error = 2; } message RegisterLockObserverRequest { - Context context = 1; - uint64 max_ts = 2; + Context context = 1; + uint64 max_ts = 2; } message RegisterLockObserverResponse { - string error = 1; + string error = 1; } message CheckLockObserverRequest { - Context context = 1; - uint64 max_ts = 2; + Context context = 1; + uint64 max_ts = 2; } message CheckLockObserverResponse { - string error = 1; - bool is_clean = 2; - repeated LockInfo locks = 3; + string error = 1; + bool is_clean = 2; + repeated LockInfo locks = 3; } message RemoveLockObserverRequest { - Context context = 1; - uint64 max_ts = 2; + Context context = 1; + uint64 max_ts = 2; } message RemoveLockObserverResponse { - string error = 1; + string error = 1; } message PhysicalScanLockRequest { - Context context = 1; - uint64 max_ts = 2; - bytes start_key = 3; - uint32 limit = 4; + Context context = 1; + uint64 max_ts = 2; + bytes start_key = 3; + uint32 limit = 4; } message PhysicalScanLockResponse { - string error = 1; - repeated LockInfo locks = 2; + string error = 1; + repeated LockInfo locks = 2; } // Sent from PD to a TiKV node. message SplitRegionRequest { - Context context = 1; - bytes split_key = 2 [deprecated=true]; - repeated bytes split_keys = 3; // when use it to do batch split, `split_key` should be empty. - // Once enabled, the split_key will not be encoded. - bool is_raw_kv = 4; + Context context = 1; + bytes split_key = 2 [deprecated = true]; + repeated bytes split_keys = 3; // when use it to do batch split, `split_key` should be empty. + // Once enabled, the split_key will not be encoded. + bool is_raw_kv = 4; } message SplitRegionResponse { - errorpb.Error region_error = 1; - metapb.Region left = 2 [deprecated=true]; // set when there are only 2 result regions. - metapb.Region right = 3 [deprecated=true]; // set when there are only 2 result regions. - repeated metapb.Region regions = 4; // include all result regions. + errorpb.Error region_error = 1; + metapb.Region left = 2 [deprecated = true]; // set when there are only 2 result regions. + metapb.Region right = 3 [deprecated = true]; // set when there are only 2 result regions. + repeated metapb.Region regions = 4; // include all result regions. + + // Reserved for file based transaction. + repeated KeyError errors = 100; } // Sent from TiFlash to a TiKV node. -message ReadIndexRequest{ - Context context = 1; +message ReadIndexRequest { + Context context = 1; - // TiKV checks the given range if there is any unapplied lock - // blocking the read request. - uint64 start_ts = 2; - repeated KeyRange ranges = 3; + // TiKV checks the given range if there is any unapplied lock + // blocking the read request. + uint64 start_ts = 2; + repeated KeyRange ranges = 3; } -message ReadIndexResponse{ - errorpb.Error region_error = 1; - uint64 read_index = 2; - // If `locked` is set, this read request is blocked by a lock. - // The lock should be returned to the client. - kvrpcpb.LockInfo locked = 3; +message ReadIndexResponse { + errorpb.Error region_error = 1; + uint64 read_index = 2; + // If `locked` is set, this read request is blocked by a lock. + // The lock should be returned to the client. + kvrpcpb.LockInfo locked = 3; } // Commands for debugging transactions. message MvccGetByKeyRequest { - Context context = 1; - bytes key = 2; + Context context = 1; + bytes key = 2; } message MvccGetByKeyResponse { - errorpb.Error region_error = 1; - string error = 2; - MvccInfo info = 3; + errorpb.Error region_error = 1; + string error = 2; + MvccInfo info = 3; } message MvccGetByStartTsRequest { - Context context = 1; - uint64 start_ts = 2; + Context context = 1; + uint64 start_ts = 2; } message MvccGetByStartTsResponse { - errorpb.Error region_error = 1; - string error = 2; - bytes key = 3; - MvccInfo info = 4; + errorpb.Error region_error = 1; + string error = 2; + bytes key = 3; + MvccInfo info = 4; } // Helper messages. // Miscellaneous metadata attached to most requests. message Context { - reserved 4; - reserved "read_quorum"; - uint64 region_id = 1; - metapb.RegionEpoch region_epoch = 2; - metapb.Peer peer = 3; - uint64 term = 5; - CommandPri priority = 6; - IsolationLevel isolation_level = 7; - bool not_fill_cache = 8; - bool sync_log = 9; - - // True means execution time statistics should be recorded and returned. - bool record_time_stat = 10; - // True means RocksDB scan statistics should be recorded and returned. - bool record_scan_stat = 11; - - bool replica_read = 12; - // Read requests can ignore locks belonging to these transactions because either - // these transactions are rolled back or theirs commit_ts > read request's start_ts. - repeated uint64 resolved_locks = 13; - uint64 max_execution_duration_ms = 14; - - // After a region applies to `applied_index`, we can get a - // snapshot for the region even if the peer is a follower. - uint64 applied_index = 15; - // A hint for TiKV to schedule tasks more fairly. Query with same task ID - // may share same priority and resource quota. - uint64 task_id = 16; - - // Not required to read the most up-to-date data, replicas with `safe_ts` >= `start_ts` - // can handle read request directly - bool stale_read = 17; - - // Any additional serialized information about the request. - bytes resource_group_tag = 18; - - // Used to tell TiKV whether operations are allowed or not on different disk usages. - DiskFullOpt disk_full_opt = 19; - - // Indicates the request is a retry request and the same request may have been sent before. - bool is_retry_request = 20; - - // API version implies the encode of the key and value. - APIVersion api_version = 21; - - // Read request should read through locks belonging to these transactions because these - // transactions are committed and theirs commit_ts <= read request's start_ts. - repeated uint64 committed_locks = 22; - - // The informantion to trace a request sent to TiKV. - tracepb.TraceContext trace_context = 23; - - // The source of the request, will be used as the tag of the metrics reporting. - // This field can be set for any requests that require to report metrics with any extra labels. - string request_source = 24; - - // The source of the current transaction. - uint64 txn_source = 25; - - reserved 26; // Used for resource_group_name, now it's moved into ResourceControlContext. - - // If `busy_threshold_ms` is given, TiKV can reject the request and return a `ServerIsBusy` - // error before processing if the estimated waiting duration exceeds the threshold. - uint32 busy_threshold_ms = 27; - - // Some information used for resource control. - ResourceControlContext resource_control_context = 28; - - // The keyspace that the request is sent to. - // NOTE: This field is only meaningful while the api_version is V2. - uint32 keyspace_id = 32; - - // The buckets version that the request is sent to. - // NOTE: This field is only meaningful while enable buckets. - uint64 buckets_version = 33; - - // It tells us where the request comes from in TiDB. If it isn't from TiDB, leave it blank. - // This is for tests only and thus can be safely changed/removed without affecting compatibility. - SourceStmt source_stmt = 34; + reserved 4; + reserved "read_quorum"; + uint64 region_id = 1; + metapb.RegionEpoch region_epoch = 2; + metapb.Peer peer = 3; + uint64 term = 5; + CommandPri priority = 6; + IsolationLevel isolation_level = 7; + bool not_fill_cache = 8; + bool sync_log = 9; + + // True means execution time statistics should be recorded and returned. + bool record_time_stat = 10; + // True means RocksDB scan statistics should be recorded and returned. + bool record_scan_stat = 11; + + bool replica_read = 12; + // Read requests can ignore locks belonging to these transactions because either + // these transactions are rolled back or theirs commit_ts > read request's start_ts. + repeated uint64 resolved_locks = 13; + uint64 max_execution_duration_ms = 14; + + // After a region applies to `applied_index`, we can get a + // snapshot for the region even if the peer is a follower. + uint64 applied_index = 15; + // A hint for TiKV to schedule tasks more fairly. Query with same task ID + // may share same priority and resource quota. + uint64 task_id = 16; + + // Not required to read the most up-to-date data, replicas with `safe_ts` >= `start_ts` + // can handle read request directly + bool stale_read = 17; + + // Any additional serialized information about the request. + bytes resource_group_tag = 18; + + // Used to tell TiKV whether operations are allowed or not on different disk usages. + DiskFullOpt disk_full_opt = 19; + + // Indicates the request is a retry request and the same request may have been sent before. + bool is_retry_request = 20; + + // API version implies the encode of the key and value. + APIVersion api_version = 21; + + // Read request should read through locks belonging to these transactions because these + // transactions are committed and theirs commit_ts <= read request's start_ts. + repeated uint64 committed_locks = 22; + + // The informantion to trace a request sent to TiKV. + tracepb.TraceContext trace_context = 23; + + // The source of the request, will be used as the tag of the metrics reporting. + // This field can be set for any requests that require to report metrics with any extra labels. + string request_source = 24; + + // The source of the current transaction. + uint64 txn_source = 25; + + reserved 26; // Used for resource_group_name, now it's moved into ResourceControlContext. + + // If `busy_threshold_ms` is given, TiKV can reject the request and return a `ServerIsBusy` + // error before processing if the estimated waiting duration exceeds the threshold. + uint32 busy_threshold_ms = 27; + + // Some information used for resource control. + ResourceControlContext resource_control_context = 28; + + // The keyspace that the request is sent to. + // NOTE: This field is only meaningful while the api_version is V2. + string keyspace_name = 31; + + // The keyspace that the request is sent to. + // NOTE: This field is only meaningful while the api_version is V2. + // V3 uses keyspace_identity and must not read this legacy field as the full identity. + uint32 keyspace_id = 32; + + // The buckets version that the request is sent to. + // NOTE: This field is only meaningful while enable buckets. + uint64 buckets_version = 33; + + // It tells us where the request comes from in TiDB. If it isn't from TiDB, leave it blank. + // This is for tests only and thus can be safely changed/removed without affecting compatibility. + SourceStmt source_stmt = 34; + + // The cluster id of the request + uint64 cluster_id = 35; + + // The trace id of the request, will be used for tracing the request's execution's inner steps. + bytes trace_id = 36; + + // Control flags for trace logging behavior. + // Bit 0: immediate_log - Force immediate logging without buffering + // Bit 1: category_req_resp - Enable request/response tracing + // Bit 2: category_write_details - Enable detailed write tracing + // Bit 3: category_read_details - Enable detailed read tracing + // Bits 4-63: Reserved for future use + // This field is set by client-go based on an extractor function provided by TiDB. + uint64 trace_control_flags = 37; + + // The V3 keyspace identity that the request is sent to. + // V3 RPC key fields carry user key bytes; TiKV encodes the physical + // mode + namespace + keyspace prefix at the serving boundary. + apipb.KeyspaceIdentity keyspace_identity = 38; + + // TODO(slock): tidb should send requests with a feature flag to indicate whether it + // supports shared locks, so that tikv can fail the requests from old tidb versions + // when needed. } message ResourceControlContext { - // It's used to identify which resource group the request belongs to. - string resource_group_name = 1; + // It's used to identify which resource group the request belongs to. + string resource_group_name = 1; - // The resource consumption of the resource group that have completed at all TiKVs between the previous request to this TiKV and current request. - // It's used as penalty to make the local resource scheduling on one TiKV takes the gloabl resource consumption into consideration. - resource_manager.Consumption penalty = 2; + // The resource consumption of the resource group that have completed at all TiKVs between the previous request to this TiKV and current request. + // It's used as penalty to make the local resource scheduling on one TiKV takes the gloabl resource consumption into consideration. + resource_manager.Consumption penalty = 2; - // This priority would override the original priority of the resource group for the request. - // Used to deprioritize the runaway queries. - uint64 override_priority = 3; + // This priority would override the original priority of the resource group for the request. + // Used to deprioritize the runaway queries. + uint64 override_priority = 3; } // The API version the server and the client is using. // See more details in https://github.com/tikv/rfcs/blob/master/text/0069-api-v2.md. enum APIVersion { - // `V1` is mainly for TiDB & TxnKV, and is not safe to use RawKV along with the others. - // V1 server only accepts V1 requests. V1 raw requests with TTL will be rejected. - V1 = 0; - // - // `V1TTL` is only available to RawKV, and 8 bytes representing the unix timestamp in - // seconds for expiring time will be append to the value of all RawKV entries. For example: - // ------------------------------------------------------------ - // | User value | Expire Ts | - // ------------------------------------------------------------ - // | 0x12 0x34 0x56 | 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff | - // ------------------------------------------------------------ - // V1TTL server only accepts V1 raw requests. - // V1 client should not use `V1TTL` in request. V1 client should always send `V1`. - V1TTL = 1; - // - // `V2` use new encoding for RawKV & TxnKV to support more features. - // - // Key Encoding: - // TiDB: start with `m` or `t`, the same as `V1`. - // TxnKV: prefix with `x`, encoded as `MCE( x{keyspace id} + {user key} ) + timestamp`. - // RawKV: prefix with `r`, encoded as `MCE( r{keyspace id} + {user key} ) + timestamp`. - // Where the `{keyspace id}` is fixed-length of 3 bytes in network byte order. - // Besides, RawKV entires must be in `default` CF. - // - // Value Encoding: - // TiDB & TxnKV: the same as `V1`. - // RawKV: `{user value} + {optional fields} + {meta flag}`. The last byte in the - // raw value must be meta flags. For example: - // -------------------------------------- - // | User value | Meta flags | - // -------------------------------------- - // | 0x12 0x34 0x56 | 0x00 (0b00000000) | - // -------------------------------------- - // Bit 0 of meta flags is for TTL. If set, the value contains 8 bytes expiring time as - // unix timestamp in seconds at the very left to the meta flags. - // -------------------------------------------------------------------------------- - // | User value | Expiring time | Meta flags | - // -------------------------------------------------------------------------------- - // | 0x12 0x34 0x56 | 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff | 0x01 (0b00000001) | - // -------------------------------------------------------------------------------- - // Bit 1 is for deletion. If set, the entry is logical deleted. - // --------------------- - // | Meta flags | - // --------------------- - // | 0x02 (0b00000010) | - // --------------------- - // - // V2 server accpets V2 requests and V1 transactional requests that statrts with TiDB key - // prefix (`m` and `t`). - V2 = 2; + // `V1` is mainly for TiDB & TxnKV, and is not safe to use RawKV along with the others. + // V1 server only accepts V1 requests. V1 raw requests with TTL will be rejected. + V1 = 0; + // + // `V1TTL` is only available to RawKV, and 8 bytes representing the unix timestamp in + // seconds for expiring time will be append to the value of all RawKV entries. For example: + // ------------------------------------------------------------ + // | User value | Expire Ts | + // ------------------------------------------------------------ + // | 0x12 0x34 0x56 | 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff | + // ------------------------------------------------------------ + // V1TTL server only accepts V1 raw requests. + // V1 client should not use `V1TTL` in request. V1 client should always send `V1`. + V1TTL = 1; + // + // `V2` use new encoding for RawKV & TxnKV to support more features. + // + // Key Encoding: + // TiDB: start with `m` or `t`, the same as `V1`. + // TxnKV: prefix with `x`, encoded as `MCE( x{keyspace id} + {user key} ) + timestamp`. + // RawKV: prefix with `r`, encoded as `MCE( r{keyspace id} + {user key} ) + timestamp`. + // Where the `{keyspace id}` is fixed-length of 3 bytes in network byte order. + // Besides, RawKV entires must be in `default` CF. + // + // Value Encoding: + // TiDB & TxnKV: the same as `V1`. + // RawKV: `{user value} + {optional fields} + {meta flag}`. The last byte in the + // raw value must be meta flags. For example: + // -------------------------------------- + // | User value | Meta flags | + // -------------------------------------- + // | 0x12 0x34 0x56 | 0x00 (0b00000000) | + // -------------------------------------- + // Bit 0 of meta flags is for TTL. If set, the value contains 8 bytes expiring time as + // unix timestamp in seconds at the very left to the meta flags. + // -------------------------------------------------------------------------------- + // | User value | Expiring time | Meta flags | + // -------------------------------------------------------------------------------- + // | 0x12 0x34 0x56 | 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff | 0x01 (0b00000001) | + // -------------------------------------------------------------------------------- + // Bit 1 is for deletion. If set, the entry is logical deleted. + // --------------------- + // | Meta flags | + // --------------------- + // | 0x02 (0b00000010) | + // --------------------- + // + // V2 server accpets V2 requests and V1 transactional requests that statrts with TiDB key + // prefix (`m` and `t`). + V2 = 2; + // + // `V3` uses user-key wire semantics for normal KV RPCs. Servers encode an + // 8-byte physical prefix at the serving boundary: + // mode(1) + namespace_id(4 bytes, big endian) + keyspace_id(3 bytes, big endian). + // The keyspace identity is carried by Context.keyspace_identity. + V3 = 3; } message SourceStmt { - uint64 start_ts = 1; - uint64 connection_id = 2; - uint64 stmt_id = 3; - // session alias set by user - string session_alias = 4; + uint64 start_ts = 1; + uint64 connection_id = 2; + uint64 stmt_id = 3; + // session alias set by user + string session_alias = 4; } message LockInfo { - bytes primary_lock = 1; - uint64 lock_version = 2; - bytes key = 3; - uint64 lock_ttl = 4; - // How many keys this transaction involves in this region. - uint64 txn_size = 5; - Op lock_type = 6; - uint64 lock_for_update_ts = 7; - // Fields for transactions that are using Async Commit. - bool use_async_commit = 8; - uint64 min_commit_ts = 9; - repeated bytes secondaries = 10; - // The time elapsed since last update of lock wait info when waiting. - // It's used in timeout errors. 0 means unknown or not applicable. - // It can be used to help the client decide whether to try resolving the lock. - uint64 duration_to_last_update_ms = 11; - - // Reserved for file based transaction. - bool is_txn_file = 100; + bytes primary_lock = 1; + uint64 lock_version = 2; + bytes key = 3; + uint64 lock_ttl = 4; + // How many keys this transaction involves in this region. + uint64 txn_size = 5; + Op lock_type = 6; + uint64 lock_for_update_ts = 7; + // Fields for transactions that are using Async Commit. + bool use_async_commit = 8; + uint64 min_commit_ts = 9; + repeated bytes secondaries = 10; + // The time elapsed since last update of lock wait info when waiting. + // It's used in timeout errors. 0 means unknown or not applicable. + // It can be used to help the client decide whether to try resolving the lock. + uint64 duration_to_last_update_ms = 11; + // When lock_type is SharedLock, this describes transactions holding the shared lock. + // Important: when lock_type is SharedLock, all shared locks must use shared_lock_infos; + // DO NOT read from the wrapper LockInfo. + // TODO(slock): tidb should send requests with a feature flag to indicate whether it + // supports shared locks, so that tikv can fail the requests from old tidb versions + // when needed. + repeated LockInfo shared_lock_infos = 12; + + // Reserved for file based transaction. + bool is_txn_file = 100; } message KeyError { - LockInfo locked = 1; // Client should backoff or cleanup the lock then retry. - string retryable = 2; // Client may restart the txn. e.g write conflict. - string abort = 3; // Client should abort the txn. - WriteConflict conflict = 4; // Write conflict is moved from retryable to here. - AlreadyExist already_exist = 5; // Key already exists - Deadlock deadlock = 6; // Deadlock is used in pessimistic transaction for single statement rollback. - CommitTsExpired commit_ts_expired = 7; // Commit ts is earlier than min commit ts of a transaction. - TxnNotFound txn_not_found = 8; // Txn not found when checking txn status. - CommitTsTooLarge commit_ts_too_large = 9; // Calculated commit TS exceeds the limit given by the user. - AssertionFailed assertion_failed = 10; // Assertion of a `Mutation` is evaluated as a failure. - PrimaryMismatch primary_mismatch = 11; // CheckTxnStatus is sent to a lock that's not the primary. + LockInfo locked = 1; // Client should backoff or cleanup the lock then retry. + string retryable = 2; // Client may restart the txn. e.g write conflict. + string abort = 3; // Client should abort the txn. + WriteConflict conflict = 4; // Write conflict is moved from retryable to here. + AlreadyExist already_exist = 5; // Key already exists + Deadlock deadlock = 6; // Deadlock is used in pessimistic transaction for single statement rollback. + CommitTsExpired commit_ts_expired = 7; // Commit ts is earlier than min commit ts of a transaction. + TxnNotFound txn_not_found = 8; // Txn not found when checking txn status. + CommitTsTooLarge commit_ts_too_large = 9; // Calculated commit TS exceeds the limit given by the user. + AssertionFailed assertion_failed = 10; // Assertion of a `Mutation` is evaluated as a failure. + PrimaryMismatch primary_mismatch = 11; // CheckTxnStatus is sent to a lock that's not the primary. + TxnLockNotFound txn_lock_not_found = 12; // TxnLockNotFound indicates the txn lock is not found. + + // Extra information for error debugging + DebugInfo debug_info = 100; } message WriteConflict { - enum Reason { - Unknown = 0; - Optimistic = 1; // in optimistic transactions. - PessimisticRetry = 2; // a lock acquisition request waits for a lock and awakes, or meets a newer version of data, let TiDB retry. - SelfRolledBack = 3; // the transaction itself has been rolled back when it tries to prewrite. - RcCheckTs = 4; // RcCheckTs failure by meeting a newer version, let TiDB retry. - LazyUniquenessCheck = 5; // write conflict found in lazy uniqueness check in pessimistic transactions. - } - - uint64 start_ts = 1; - uint64 conflict_ts = 2; - bytes key = 3; - bytes primary = 4; - uint64 conflict_commit_ts = 5; - Reason reason = 6; + enum Reason { + Unknown = 0; + Optimistic = 1; // in optimistic transactions. + PessimisticRetry = 2; // a lock acquisition request waits for a lock and awakes, or meets a newer version of data, let TiDB retry. + SelfRolledBack = 3; // the transaction itself has been rolled back when it tries to prewrite. + RcCheckTs = 4; // RcCheckTs failure by meeting a newer version, let TiDB retry. + LazyUniquenessCheck = 5; // write conflict found when deferring constraint checks in pessimistic transactions. Deprecated in next-gen (cloud-storage-engine). + NotLockedKeyConflict = 6; // write conflict found on keys that do not acquire pessimistic locks in pessimistic transactions. + } + + uint64 start_ts = 1; + uint64 conflict_ts = 2; + bytes key = 3; + bytes primary = 4; + uint64 conflict_commit_ts = 5; + Reason reason = 6; } message AlreadyExist { - bytes key = 1; + bytes key = 1; } message Deadlock { - uint64 lock_ts = 1; - bytes lock_key = 2; - uint64 deadlock_key_hash = 3; - repeated deadlock.WaitForEntry wait_chain = 4; + uint64 lock_ts = 1; + bytes lock_key = 2; + + // The hash of `deadlock_key` field. + uint64 deadlock_key_hash = 3; + repeated deadlock.WaitForEntry wait_chain = 4; + + // The key that the current transaction has already acquired and blocks another transaction to form the deadlock. + bytes deadlock_key = 5; } message CommitTsExpired { - uint64 start_ts = 1; - uint64 attempted_commit_ts = 2; - bytes key = 3; - uint64 min_commit_ts = 4; + uint64 start_ts = 1; + uint64 attempted_commit_ts = 2; + bytes key = 3; + uint64 min_commit_ts = 4; } message TxnNotFound { - uint64 start_ts = 1; - bytes primary_key = 2; + uint64 start_ts = 1; + bytes primary_key = 2; } message CommitTsTooLarge { - uint64 commit_ts = 1; // The calculated commit TS. + uint64 commit_ts = 1; // The calculated commit TS. } message AssertionFailed { - uint64 start_ts = 1; - bytes key = 2; - Assertion assertion = 3; - uint64 existing_start_ts = 4; - uint64 existing_commit_ts = 5; + uint64 start_ts = 1; + bytes key = 2; + Assertion assertion = 3; + uint64 existing_start_ts = 4; + uint64 existing_commit_ts = 5; } message PrimaryMismatch { - LockInfo lock_info = 1; + LockInfo lock_info = 1; +} + +message TxnLockNotFound { + bytes key = 1; +} + +message MvccDebugInfo { + bytes key = 1; + MvccInfo mvcc = 2; +} + +message DebugInfo { + repeated MvccDebugInfo mvcc_info = 1; } enum CommandPri { - Normal = 0; // Normal is the default value. - Low = 1; - High = 2; + Normal = 0; // Normal is the default value. + Low = 1; + High = 2; } enum IsolationLevel { - SI = 0; // SI = snapshot isolation - RC = 1; // RC = read committed - RCCheckTS = 2; // RC read and it's needed to check if there exists more recent versions. + SI = 0; // SI = snapshot isolation + RC = 1; // RC = read committed + RCCheckTS = 2; // RC read and it's needed to check if there exists more recent versions. } // Operation allowed info during each TiKV storage threshold. enum DiskFullOpt { - NotAllowedOnFull = 0; // The default value, means operations are not allowed either under almost full or already full. - AllowedOnAlmostFull = 1; // Means operations will be allowed when disk is almost full. - AllowedOnAlreadyFull = 2; // Means operations will be allowed when disk is already full. + NotAllowedOnFull = 0; // The default value, means operations are not allowed either under almost full or already full. + AllowedOnAlmostFull = 1; // Means operations will be allowed when disk is almost full. + AllowedOnAlreadyFull = 2; // Means operations will be allowed when disk is already full. } message TimeDetail { - // Off-cpu wall time elapsed in TiKV side. Usually this includes queue waiting time and - // other kind of waitings in series. (Wait time in the raftstore is not included.) - uint64 wait_wall_time_ms = 1; - // Off-cpu and on-cpu wall time elapsed to actually process the request payload. It does not - // include `wait_wall_time`. - // This field is very close to the CPU time in most cases. Some wait time spend in RocksDB - // cannot be excluded for now, like Mutex wait time, which is included in this field, so that - // this field is called wall time instead of CPU time. - uint64 process_wall_time_ms = 2; - // KV read wall Time means the time used in key/value scan and get. - uint64 kv_read_wall_time_ms = 3; - // Total wall clock time spent on this RPC in TiKV . - uint64 total_rpc_wall_time_ns = 4; + // Off-cpu wall time elapsed in TiKV side. Usually this includes queue waiting time and + // other kind of waitings in series. (Wait time in the raftstore is not included.) + uint64 wait_wall_time_ms = 1; + // Off-cpu and on-cpu wall time elapsed to actually process the request payload. It does not + // include `wait_wall_time`. + // This field is very close to the CPU time in most cases. Some wait time spend in RocksDB + // cannot be excluded for now, like Mutex wait time, which is included in this field, so that + // this field is called wall time instead of CPU time. + uint64 process_wall_time_ms = 2; + // KV read wall Time means the time used in key/value scan and get. + uint64 kv_read_wall_time_ms = 3; + // Total wall clock time spent on this RPC in TiKV . + uint64 total_rpc_wall_time_ns = 4; } message TimeDetailV2 { - // Off-cpu wall time elapsed in TiKV side. Usually this includes queue waiting time and - // other kind of waitings in series. (Wait time in the raftstore is not included.) - uint64 wait_wall_time_ns = 1; - // Off-cpu and on-cpu wall time elapsed to actually process the request payload. It does not - // include `wait_wall_time` and `suspend_wall_time`. - // This field is very close to the CPU time in most cases. Some wait time spend in RocksDB - // cannot be excluded for now, like Mutex wait time, which is included in this field, so that - // this field is called wall time instead of CPU time. - uint64 process_wall_time_ns = 2; - // Cpu wall time elapsed that task is waiting in queue. - uint64 process_suspend_wall_time_ns = 3; - // KV read wall Time means the time used in key/value scan and get. - uint64 kv_read_wall_time_ns = 4; - // Total wall clock time spent on this RPC in TiKV . - uint64 total_rpc_wall_time_ns = 5; + // Off-cpu wall time elapsed in TiKV side. Usually this includes queue waiting time and + // other kind of waitings in series. (Wait time in the raftstore is not included.) + uint64 wait_wall_time_ns = 1; + // Off-cpu and on-cpu wall time elapsed to actually process the request payload. It does not + // include `wait_wall_time` and `suspend_wall_time`. + // This field is very close to the CPU time in most cases. Some wait time spend in RocksDB + // cannot be excluded for now, like Mutex wait time, which is included in this field, so that + // this field is called wall time instead of CPU time. + uint64 process_wall_time_ns = 2; + // Cpu wall time elapsed that task is waiting in queue. + uint64 process_suspend_wall_time_ns = 3; + // KV read wall Time means the time used in key/value scan and get. + uint64 kv_read_wall_time_ns = 4; + // Total wall clock time spent on this RPC in TiKV . + uint64 total_rpc_wall_time_ns = 5; + // Time spent on the gRPC layer. + uint64 kv_grpc_process_time_ns = 6; + // Time spent on waiting for run again in grpc pool from other executor pool. + uint64 kv_grpc_wait_time_ns = 7; } message ScanInfo { - int64 total = 1; - int64 processed = 2; - int64 read_bytes = 3; + int64 total = 1; + int64 processed = 2; + int64 read_bytes = 3; } // Only reserved for compatibility. message ScanDetail { - ScanInfo write = 1; - ScanInfo lock = 2; - ScanInfo data = 3; + ScanInfo write = 1; + ScanInfo lock = 2; + ScanInfo data = 3; } message ScanDetailV2 { - // Number of user keys scanned from the storage. - // It does not include deleted version or RocksDB tombstone keys. - // For Coprocessor requests, it includes keys that has been filtered out by - // Selection. - uint64 processed_versions = 1; + // Number of user keys scanned from the storage. + // It does not include deleted version or RocksDB tombstone keys. + // For Coprocessor requests, it includes keys that has been filtered out by + // Selection. + uint64 processed_versions = 1; + + // Number of bytes of user key-value pairs scanned from the storage, i.e. + // total size of data returned from MVCC layer. + uint64 processed_versions_size = 8; - // Number of bytes of user key-value pairs scanned from the storage, i.e. - // total size of data returned from MVCC layer. - uint64 processed_versions_size = 8; + // Approximate number of MVCC keys meet during scanning. It includes + // deleted versions, but does not include RocksDB tombstone keys. + // + // When this field is notably larger than `processed_versions`, it means + // there are a lot of deleted MVCC keys. + uint64 total_versions = 2; - // Approximate number of MVCC keys meet during scanning. It includes - // deleted versions, but does not include RocksDB tombstone keys. - // - // When this field is notably larger than `processed_versions`, it means - // there are a lot of deleted MVCC keys. - uint64 total_versions = 2; + // Total number of deletes and single deletes skipped over during + // iteration, i.e. how many RocksDB tombstones are skipped. + uint64 rocksdb_delete_skipped_count = 3; - // Total number of deletes and single deletes skipped over during - // iteration, i.e. how many RocksDB tombstones are skipped. - uint64 rocksdb_delete_skipped_count = 3; + // Total number of internal keys skipped over during iteration. + // See https://github.com/facebook/rocksdb/blob/9f1c84ca471d8b1ad7be9f3eebfc2c7e07dfd7a7/include/rocksdb/perf_context.h#L84 for details. + uint64 rocksdb_key_skipped_count = 4; - // Total number of internal keys skipped over during iteration. - // See https://github.com/facebook/rocksdb/blob/9f1c84ca471d8b1ad7be9f3eebfc2c7e07dfd7a7/include/rocksdb/perf_context.h#L84 for details. - uint64 rocksdb_key_skipped_count = 4; + // Total number of RocksDB block cache hits. + uint64 rocksdb_block_cache_hit_count = 5; - // Total number of RocksDB block cache hits. - uint64 rocksdb_block_cache_hit_count = 5; + // Total number of block reads (with IO). + uint64 rocksdb_block_read_count = 6; - // Total number of block reads (with IO). - uint64 rocksdb_block_read_count = 6; + // Total number of bytes from block reads. + uint64 rocksdb_block_read_byte = 7; - // Total number of bytes from block reads. - uint64 rocksdb_block_read_byte = 7; + // Total time used for block reads. + uint64 rocksdb_block_read_nanos = 9; - // Total time used for block reads. - uint64 rocksdb_block_read_nanos = 9; + // Time used for getting a raftstore snapshot (including proposing read index, leader confirmation and getting the RocksDB snapshot). + uint64 get_snapshot_nanos = 10; - // Time used for getting a raftstore snapshot (including proposing read index, leader confirmation and getting the RocksDB snapshot). - uint64 get_snapshot_nanos = 10; + // Time used for proposing read index from read pool to store pool, equals 0 when performing lease read. + uint64 read_index_propose_wait_nanos = 11; - // Time used for proposing read index from read pool to store pool, equals 0 when performing lease read. - uint64 read_index_propose_wait_nanos = 11; + // Time used for leader confirmation, equals 0 when performing lease read. + uint64 read_index_confirm_wait_nanos = 12; - // Time used for leader confirmation, equals 0 when performing lease read. - uint64 read_index_confirm_wait_nanos = 12; + // Time used for read pool scheduling. + uint64 read_pool_schedule_wait_nanos = 13; - // Time used for read pool scheduling. - uint64 read_pool_schedule_wait_nanos = 13; + // The total MVCC key-value size meet during scanning. It includes + // deleted versions, but does not include RocksDB tombstone keys. + // + // When this field is notably larger than `processed_versions_size`, it means + // there are a lot of deleted MVCC keys. + uint64 total_versions_size = 14; + + // Total number of IA segment cache hits for this request. + uint64 ia_cache_hit_count = 15; + + // Total number of IA remote segment reads for this request. + // + // IA segments are only read on cache misses, so this is also the IA segment + // cache miss count for the request. + uint64 ia_remote_read_segment_count = 16; + + // Total number of logical bytes returned from IA remote segment reads for this + // request. + uint64 ia_remote_read_segment_bytes = 17; + + // Total time spent serving IA remote segment reads for this request. + uint64 ia_remote_read_segment_nanos = 18; } message ExecDetails { - // Available when ctx.record_time_stat = true or meet slow query. - TimeDetail time_detail = 1; + // Available when ctx.record_time_stat = true or meet slow query. + TimeDetail time_detail = 1; - // Available when ctx.record_scan_stat = true or meet slow query. - ScanDetail scan_detail = 2; + // Available when ctx.record_scan_stat = true or meet slow query. + ScanDetail scan_detail = 2; - // See https://github.com/pingcap/kvproto/pull/689 - reserved 3; - reserved 4; + // See https://github.com/pingcap/kvproto/pull/689 + reserved 3; + reserved 4; } message ExecDetailsV2 { - // Available when ctx.record_time_stat = true or meet slow query. - // deprecated. Should use `time_detail_v2` instead. - TimeDetail time_detail = 1; - - // Available when ctx.record_scan_stat = true or meet slow query. - ScanDetailV2 scan_detail_v2 = 2; - - // Raftstore writing durations of the request. Only available for some write requests. - WriteDetail write_detail = 3; - // Available when ctx.record_time_stat = true or meet slow query. - TimeDetailV2 time_detail_v2 = 4; + // Available when ctx.record_time_stat = true or meet slow query. + // deprecated. Should use `time_detail_v2` instead. + TimeDetail time_detail = 1; + + // Available when ctx.record_scan_stat = true or meet slow query. + ScanDetailV2 scan_detail_v2 = 2; + + // Raftstore writing durations of the request. Only available for some write requests. + WriteDetail write_detail = 3; + // Available when ctx.record_time_stat = true or meet slow query. + TimeDetailV2 time_detail_v2 = 4; + + // RU (Request Unit) consumption details. + RUV2 ru_v2 = 5; +} + +message RUV2 { + uint64 kv_engine_cache_miss = 1; + ExecutorInputs executor_inputs = 2; + uint64 coprocessor_executor_iterations = 3; + uint64 coprocessor_response_bytes = 4; + uint64 raftstore_store_write_trigger_wb_bytes = 5; + uint64 storage_processed_keys_batch_get = 6; + uint64 storage_processed_keys_get = 7; + // Filled by client-go. + uint64 read_rpc_count = 8; + // Filled by client-go. + uint64 write_rpc_count = 9; +} + +message ExecutorInputs { + uint64 tikv_coprocessor_executor_work_total_batch_index_scan = 1; + uint64 tikv_coprocessor_executor_work_total_batch_table_scan = 2; + uint64 tikv_coprocessor_executor_work_total_batch_selection = 3; + uint64 tikv_coprocessor_executor_work_total_batch_top_n = 4; + uint64 tikv_coprocessor_executor_work_total_batch_limit = 5; + uint64 tikv_coprocessor_executor_work_total_batch_simple_aggr = 6; + uint64 tikv_coprocessor_executor_work_total_batch_fast_hash_aggr = 7; } message WriteDetail { - // Wait duration in the store loop. - uint64 store_batch_wait_nanos = 1; + // Wait duration in the store loop. + uint64 store_batch_wait_nanos = 1; - // Wait duration before sending proposal to peers. - uint64 propose_send_wait_nanos = 2; + // Wait duration before sending proposal to peers. + uint64 propose_send_wait_nanos = 2; - // Total time spent on persisting the log. - uint64 persist_log_nanos = 3; + // Total time spent on persisting the log. + uint64 persist_log_nanos = 3; - // Wait time until the Raft log write leader begins to write. - uint64 raft_db_write_leader_wait_nanos = 4; + // Wait time until the Raft log write leader begins to write. + uint64 raft_db_write_leader_wait_nanos = 4; - // Time spent on synchronizing the Raft log to the disk. - uint64 raft_db_sync_log_nanos = 5; + // Time spent on synchronizing the Raft log to the disk. + uint64 raft_db_sync_log_nanos = 5; - // Time spent on writing the Raft log to the Raft memtable. - uint64 raft_db_write_memtable_nanos = 6; + // Time spent on writing the Raft log to the Raft memtable. + uint64 raft_db_write_memtable_nanos = 6; - // Time waiting for peers to confirm the proposal (counting from the instant when the leader sends the proposal message). - uint64 commit_log_nanos = 7; + // Time waiting for peers to confirm the proposal (counting from the instant when the leader sends the proposal message). + uint64 commit_log_nanos = 7; - // Wait duration in the apply loop. - uint64 apply_batch_wait_nanos = 8; + // Wait duration in the apply loop. + uint64 apply_batch_wait_nanos = 8; - // Total time spend to applying the log. - uint64 apply_log_nanos = 9; + // Total time spend to applying the log. + uint64 apply_log_nanos = 9; - // Wait time until the KV RocksDB lock is acquired. - uint64 apply_mutex_lock_nanos = 10; + // Wait time until the KV RocksDB lock is acquired. + uint64 apply_mutex_lock_nanos = 10; - // Wait time until becoming the KV RocksDB write leader. - uint64 apply_write_leader_wait_nanos = 11; + // Wait time until becoming the KV RocksDB write leader. + uint64 apply_write_leader_wait_nanos = 11; - // Time spent on writing the KV DB WAL to the disk. - uint64 apply_write_wal_nanos = 12; + // Time spent on writing the KV DB WAL to the disk. + uint64 apply_write_wal_nanos = 12; - // Time spent on writing to the memtable of the KV RocksDB. - uint64 apply_write_memtable_nanos = 13; + // Time spent on writing to the memtable of the KV RocksDB. + uint64 apply_write_memtable_nanos = 13; - // Time spent on waiting in the latch. - uint64 latch_wait_nanos = 14; + // Time spent on waiting in the latch. + uint64 latch_wait_nanos = 14; - // Processing time in the transaction layer. - uint64 process_nanos = 15; + // Processing time in the transaction layer. + uint64 process_nanos = 15; - // Wait time because of the scheduler flow control or quota limiter throttling. - uint64 throttle_nanos = 16; + // Wait time because of the scheduler flow control or quota limiter throttling. + uint64 throttle_nanos = 16; - // Wait time in the waiter manager for pessimistic locking. - uint64 pessimistic_lock_wait_nanos = 17; + // Wait time in the waiter manager for pessimistic locking. + uint64 pessimistic_lock_wait_nanos = 17; } message KvPair { - KeyError error = 1; - bytes key = 2; - bytes value = 3; + KeyError error = 1; + bytes key = 2; + bytes value = 3; + // The commit timestamp of the key. + // If it is zero, it means the commit timestamp is unknown. + uint64 commit_ts = 4; } enum Op { - Put = 0; - Del = 1; - Lock = 2; - Rollback = 3; - // insert operation has a constraint that key should not exist before. - Insert = 4; - PessimisticLock = 5; - CheckNotExists = 6; + Put = 0; + Del = 1; + Lock = 2; + Rollback = 3; + // insert operation has a constraint that key should not exist before. + Insert = 4; + // PessimisticLock is exclusive lock acquired in pessimistic transaction. + PessimisticLock = 5; + CheckNotExists = 6; + // SharedLock likes Lock but in shared mode. + SharedLock = 7; + // SharedPessimisticLock is shared lock acquired in pessimistic transaction. + SharedPessimisticLock = 8; } enum Assertion { - None = 0; - Exist = 1; - NotExist = 2; + None = 0; + Exist = 1; + NotExist = 2; } enum AssertionLevel { - // No assertion. - Off = 0; - // Assertion is enabled, but not enforced when it might affect performance. - Fast = 1; - // Assertion is enabled and enforced. - Strict = 2; + // No assertion. + Off = 0; + // Assertion is enabled, but not enforced when it might affect performance. + Fast = 1; + // Assertion is enabled and enforced. + Strict = 2; } message Mutation { - Op op = 1; - bytes key = 2; - bytes value = 3; - Assertion assertion = 4; + Op op = 1; + bytes key = 2; + bytes value = 3; + Assertion assertion = 4; } message MvccWrite { - Op type = 1; - uint64 start_ts = 2; - uint64 commit_ts = 3; - bytes short_value = 4; - bool has_overlapped_rollback = 5; - bool has_gc_fence = 6; - uint64 gc_fence = 7; - uint64 last_change_ts = 8; - uint64 versions_to_last_change = 9; + Op type = 1; + uint64 start_ts = 2; + uint64 commit_ts = 3; + bytes short_value = 4; + bool has_overlapped_rollback = 5; + bool has_gc_fence = 6; + uint64 gc_fence = 7; + uint64 last_change_ts = 8; + uint64 versions_to_last_change = 9; } message MvccValue { - uint64 start_ts = 1; - bytes value = 2; + uint64 start_ts = 1; + bytes value = 2; } message MvccLock { - Op type = 1; - uint64 start_ts = 2; - bytes primary = 3; - bytes short_value = 4; - uint64 ttl = 5; - uint64 for_update_ts = 6; - uint64 txn_size = 7; - bool use_async_commit = 8; - repeated bytes secondaries = 9; - repeated uint64 rollback_ts = 10; - uint64 last_change_ts = 11; - uint64 versions_to_last_change = 12; + Op type = 1; + uint64 start_ts = 2; + bytes primary = 3; + bytes short_value = 4; + uint64 ttl = 5; + uint64 for_update_ts = 6; + uint64 txn_size = 7; + bool use_async_commit = 8; + repeated bytes secondaries = 9; + repeated uint64 rollback_ts = 10; + uint64 last_change_ts = 11; + uint64 versions_to_last_change = 12; } message MvccInfo { - MvccLock lock = 1; - repeated MvccWrite writes = 2; - repeated MvccValue values = 3; + MvccLock lock = 1; + repeated MvccWrite writes = 2; + repeated MvccValue values = 3; } message TxnInfo { - uint64 txn = 1; - uint64 status = 2; + uint64 txn = 1; + uint64 status = 2; - // Reserved for file based transaction. - bool is_txn_file = 100; + // Reserved for file based transaction. + bool is_txn_file = 100; } enum Action { - NoAction = 0; - TTLExpireRollback = 1; - LockNotExistRollback = 2; - MinCommitTSPushed = 3; - TTLExpirePessimisticRollback = 4; - LockNotExistDoNothing = 5; + NoAction = 0; + TTLExpireRollback = 1; + LockNotExistRollback = 2; + MinCommitTSPushed = 3; + TTLExpirePessimisticRollback = 4; + LockNotExistDoNothing = 5; } message KeyRange { - bytes start_key = 1; - bytes end_key = 2; + bytes start_key = 1; + bytes end_key = 2; } enum ExtraOp { - Noop = 0; - // ReadOldValue represents to output the previous value for delete/update operations. - ReadOldValue = 1; + Noop = 0; + // ReadOldValue represents to output the previous value for delete/update operations. + ReadOldValue = 1; } message LeaderInfo { - uint64 region_id = 1; - uint64 peer_id = 2; - uint64 term = 3; - metapb.RegionEpoch region_epoch = 4; - ReadState read_state = 5; + uint64 region_id = 1; + uint64 peer_id = 2; + uint64 term = 3; + metapb.RegionEpoch region_epoch = 4; + ReadState read_state = 5; } message ReadState { - uint64 applied_index = 1; - uint64 safe_ts = 2; + uint64 applied_index = 1; + uint64 safe_ts = 2; } message CheckLeaderRequest { - repeated LeaderInfo regions = 1; - uint64 ts = 2; + repeated LeaderInfo regions = 1; + uint64 ts = 2; } message CheckLeaderResponse { - repeated uint64 regions = 1; - uint64 ts = 2; + repeated uint64 regions = 1; + uint64 ts = 2; } message StoreSafeTSRequest { - // Get the minimal `safe_ts` from regions that overlap with the key range [`start_key`, `end_key`) - // An empty key range means all regions in the store - KeyRange key_range = 1; + // Get the minimal `safe_ts` from regions that overlap with the key range [`start_key`, `end_key`) + // An empty key range means all regions in the store + KeyRange key_range = 1; } message StoreSafeTSResponse { - uint64 safe_ts = 1; + uint64 safe_ts = 1; } message RawGetKeyTTLRequest { - Context context = 1; - bytes key = 2; - string cf = 3; + Context context = 1; + bytes key = 2; + string cf = 3; } message RawGetKeyTTLResponse { - errorpb.Error region_error = 1; - string error = 2; - uint64 ttl = 3; - bool not_found = 4; + errorpb.Error region_error = 1; + string error = 2; + uint64 ttl = 3; + bool not_found = 4; } message RawCASRequest { - Context context = 1; - bytes key = 2; - bytes value = 3; - bool previous_not_exist = 4; - bytes previous_value = 5; - string cf = 6; - uint64 ttl = 7; + Context context = 1; + bytes key = 2; + bytes value = 3; + bool previous_not_exist = 4; + bytes previous_value = 5; + string cf = 6; + uint64 ttl = 7; + // If `delete` is true, the request is to delete the key when the comparison succeeds + bool delete = 8; } message RawCASResponse { - errorpb.Error region_error = 1; - string error = 2; - bool succeed = 3; - // The previous value regardless of whether the comparison is succeed. - bool previous_not_exist = 4; - bytes previous_value = 5; + errorpb.Error region_error = 1; + string error = 2; + bool succeed = 3; + // The previous value regardless of whether the comparison is succeed. + bool previous_not_exist = 4; + bytes previous_value = 5; } message GetLockWaitInfoRequest { - Context context = 1; - // TODO: There may need some filter options to be used on conditional querying, e.g., finding - // the lock waiting status for some specified transaction. + Context context = 1; + // TODO: There may need some filter options to be used on conditional querying, e.g., finding + // the lock waiting status for some specified transaction. } message GetLockWaitInfoResponse { - errorpb.Error region_error = 1; - string error = 2; - repeated deadlock.WaitForEntry entries = 3; + errorpb.Error region_error = 1; + string error = 2; + repeated deadlock.WaitForEntry entries = 3; } message GetLockWaitHistoryRequest { - Context context = 1; - // TODO: There may need some filter options to be used on conditional querying, e.g., finding - // the lock waiting status for some specified transaction. + Context context = 1; + // TODO: There may need some filter options to be used on conditional querying, e.g., finding + // the lock waiting status for some specified transaction. } message GetLockWaitHistoryResponse { - errorpb.Error region_error = 1; - string error = 2; - repeated deadlock.WaitForEntry entries = 3; + errorpb.Error region_error = 1; + string error = 2; + repeated deadlock.WaitForEntry entries = 3; } message RawCoprocessorRequest { - kvrpcpb.Context context = 1; + kvrpcpb.Context context = 1; - string copr_name = 2; - // Coprorcessor version constraint following SEMVER definition. - string copr_version_req = 3; + string copr_name = 2; + // Coprorcessor version constraint following SEMVER definition. + string copr_version_req = 3; - repeated KeyRange ranges = 4; - bytes data = 5; + repeated KeyRange ranges = 4; + bytes data = 5; } message RawCoprocessorResponse { - errorpb.Error region_error = 1; - // Error message for cases like if no coprocessor with a matching name is found - // or on a version mismatch between plugin_api and the coprocessor. - string error = 2; - bytes data = 3; + errorpb.Error region_error = 1; + // Error message for cases like if no coprocessor with a matching name is found + // or on a version mismatch between plugin_api and the coprocessor. + string error = 2; + bytes data = 3; } enum ChecksumAlgorithm { - Crc64_Xor = 0; + Crc64_Xor = 0; } message RawChecksumRequest { - Context context = 1; - ChecksumAlgorithm algorithm = 2; - repeated KeyRange ranges = 3; + Context context = 1; + ChecksumAlgorithm algorithm = 2; + repeated KeyRange ranges = 3; } message RawChecksumResponse { - errorpb.Error region_error = 1; - string error = 2; - uint64 checksum = 3; - uint64 total_kvs = 4; - uint64 total_bytes = 5; + errorpb.Error region_error = 1; + string error = 2; + uint64 checksum = 3; + uint64 total_kvs = 4; + uint64 total_bytes = 5; } message CompactError { - oneof error { - CompactErrorInvalidStartKey err_invalid_start_key = 1; - CompactErrorPhysicalTableNotExist err_physical_table_not_exist = 2; - CompactErrorCompactInProgress err_compact_in_progress = 3; - CompactErrorTooManyPendingTasks err_too_many_pending_tasks = 4; - } + oneof error { + CompactErrorInvalidStartKey err_invalid_start_key = 1; + CompactErrorPhysicalTableNotExist err_physical_table_not_exist = 2; + CompactErrorCompactInProgress err_compact_in_progress = 3; + CompactErrorTooManyPendingTasks err_too_many_pending_tasks = 4; + } } message CompactErrorInvalidStartKey {} @@ -1510,47 +1677,136 @@ message CompactErrorCompactInProgress {} message CompactErrorTooManyPendingTasks {} message CompactRequest { - // If specified, the compaction will start from this start key. - // If unspecified, the compaction will start from beginning. - // NOTE 1: The start key should be never manually constructed. You should always use a key - // returned in CompactResponse. - // NOTE 2: the compaction range will be always restricted by physical_table_id. - bytes start_key = 1; - - // The physical table that will be compacted. - // - // TODO: this is information that TiKV doesn't need to know. - // See https://github.com/pingcap/kvproto/issues/912 - int64 physical_table_id = 2; - - // The logical table id of the compaction. When receiving parallel requests with the same - // logical table id, err_compact_in_progress will be returned. - // - // TODO: this is information that TiKV doesn't need to know. - // See https://github.com/pingcap/kvproto/issues/912 - int64 logical_table_id = 3; - - // API version of the request - APIVersion api_version = 7; - // Keyspace of the table located in. - uint32 keyspace_id = 8; + // If specified, the compaction will start from this start key. + // If unspecified, the compaction will start from beginning. + // NOTE 1: The start key should be never manually constructed. You should always use a key + // returned in CompactResponse. + // NOTE 2: the compaction range will be always restricted by physical_table_id. + bytes start_key = 1; + + // The physical table that will be compacted. + // + // TODO: this is information that TiKV doesn't need to know. + // See https://github.com/pingcap/kvproto/issues/912 + int64 physical_table_id = 2; + + // The logical table id of the compaction. When receiving parallel requests with the same + // logical table id, err_compact_in_progress will be returned. + // + // TODO: this is information that TiKV doesn't need to know. + // See https://github.com/pingcap/kvproto/issues/912 + int64 logical_table_id = 3; + + // API version of the request + APIVersion api_version = 7; + // Keyspace of the table located in. + // NOTE: This field is only meaningful for V1/V2 compatibility. V3 uses + // keyspace_identity and must not read this field as the full identity. + uint32 keyspace_id = 8; + // V3 keyspace identity of the table located in. + apipb.KeyspaceIdentity keyspace_identity = 9; } message CompactResponse { - CompactError error = 1; + CompactError error = 1; - // The compaction is done incrementally. If there are more data to compact, this field - // will be set. The client can request to compact more data according to the `compacted_end_key`. - bool has_remaining = 2; + // The compaction is done incrementally. If there are more data to compact, this field + // will be set. The client can request to compact more data according to the `compacted_end_key`. + bool has_remaining = 2; - bytes compacted_start_key = 3; - bytes compacted_end_key = 4; + bytes compacted_start_key = 3; + bytes compacted_end_key = 4; } message TiFlashSystemTableRequest { - string sql = 1; + string sql = 1; } message TiFlashSystemTableResponse { - bytes data = 1 [(gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", (gogoproto.nullable) = false]; -} + bytes data = 1 [ + (gogoproto.customtype) = "github.com/pingcap/kvproto/pkg/sharedbytes.SharedBytes", + (gogoproto.nullable) = false + ]; +} + +// Flush is introduced from the pipelined DML protocol. +// A Flush request writes some keys and values to TiKV, storing in LOCK and DEFAULT CF, just like a Prewrite request. +message FlushRequest { + Context context = 1; + repeated Mutation mutations = 2; + bytes primary_key = 3; + uint64 start_ts = 4; + uint64 min_commit_ts = 5; + // generation of the flush request. It is a monotonically increasing number in each transaction. + uint64 generation = 6; + uint64 lock_ttl = 7; + AssertionLevel assertion_level = 8; +} + +message FlushResponse { + errorpb.Error region_error = 1; + repeated KeyError errors = 2; + ExecDetailsV2 exec_details_v2 = 3; +} + +// BufferBatchGet is introduced from the pipelined DML protocol. +// It is similar to a BatchGet request, except that it can only read the data that has been flushed by itself. +message BufferBatchGetRequest { + Context context = 1; + repeated bytes keys = 2; + uint64 version = 3; +} + +message BufferBatchGetResponse { + errorpb.Error region_error = 1; + KeyError error = 2; + repeated KvPair pairs = 3; + // Time and scan details when processing the request. + ExecDetailsV2 exec_details_v2 = 4; +} + +// Actively request TiKV to report health feedback information. TiKV won't omit the health feedback information when sending the +// `BatchCommandsResponse` that contains this response. +// The health feedback information won't be replied in the response, but will be attached to `BatchCommandsResponse.health_feedback` field as usual. +// Only works when batch RPC is enabled. +message GetHealthFeedbackRequest { + Context context = 1; +} + +message GetHealthFeedbackResponse { + // The error field is added for keeping consistent. This request won't meet any region error as it's store level rather than region level. + errorpb.Error region_error = 1; + HealthFeedback health_feedback = 2; +} + +message HealthFeedback { + uint64 store_id = 1; + // The sequence number of the feedback message. + // It's defined as an incrementing integer, starting from the unix timestamp (milliseconds) at + // the time that the TiKV node is started. + // This can be useful for filtering out out-of-order feedback messages. + // Note that considering the possibility of system clock changing, this field doesn't guarantee + // uniqueness and monotonic if the TiKV node is restarted. + uint64 feedback_seq_no = 2; + // The slow_score calculated in raftstore module. Due to some limitations of slow score, this would + // be replaced by `SlowTrend` in the future. + int32 slow_score = 3; +} + +message BroadcastTxnStatusRequest { + Context context = 1; + repeated TxnStatus txn_status = 2; +} + +message TxnStatus { + uint64 start_ts = 1; + // a non-zero min_commit_ts indicates the transaction is ongoing + uint64 min_commit_ts = 2; + // a non-zero commit_ts indicates the transaction is committed + uint64 commit_ts = 3; + bool rolled_back = 4; + // The txn has unlocked all keys, implying that it can be removed from txn_status_cache. + bool is_completed = 5; +} + +message BroadcastTxnStatusResponse {} diff --git a/proto/metapb.proto b/proto/metapb.proto index ef3c8d45..4e38559e 100644 --- a/proto/metapb.proto +++ b/proto/metapb.proto @@ -5,25 +5,27 @@ import "encryptionpb.proto"; import "gogoproto/gogo.proto"; import "rustproto.proto"; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - message Cluster { - uint64 id = 1; - // max peer count for a region. - // pd will do the auto-balance if region peer count mismatches. - uint32 max_peer_count = 2; - // more attributes...... + uint64 id = 1; + // max peer count for a region. + // pd will do the auto-balance if region peer count mismatches. + uint32 max_peer_count = 2; + // more attributes...... } enum StoreState { - Up = 0; - Offline = 1; - Tombstone = 2; + Up = 0; + Offline = 1; + Tombstone = 2; } // NodeState is going to replace StoreState to make the state concept more clear. @@ -31,122 +33,129 @@ enum StoreState { // "Removing" is just like previous `Offline` which is more accurate. // "Removed" has the same meaning with `Tombstone`. enum NodeState { - Preparing = 0; - Serving = 1; - Removing = 2; - Removed = 3; + Preparing = 0; + Serving = 1; + Removing = 2; + Removed = 3; } // Case insensitive key/value for replica constraints. message StoreLabel { - string key = 1; - string value = 2; + string key = 1; + string value = 2; } message Store { - uint64 id = 1; - // Address to handle client requests (kv, cop, etc.) - string address = 2; - StoreState state = 3; - repeated StoreLabel labels = 4; - string version = 5; - // Address to handle peer requests (raft messages from other store). - // Empty means same as address. - string peer_address = 6; - // Status address provides the HTTP service for external components - string status_address = 7; - string git_hash = 8; - // The start timestamp of the current store - int64 start_timestamp = 9; - string deploy_path = 10; - // The last heartbeat timestamp of the store. - int64 last_heartbeat = 11; - // If the store is physically destroyed, which means it can never up again. - bool physically_destroyed = 12; - // NodeState is used to replace StoreState which will be deprecated in the future. - NodeState node_state = 13; + uint64 id = 1; + // Address to handle client requests (kv, cop, etc.) + string address = 2; + StoreState state = 3; + repeated StoreLabel labels = 4; + string version = 5; + // Address to handle peer requests (raft messages from other store). + // Empty means same as address. + string peer_address = 6; + // Status address provides the HTTP service for external components + string status_address = 7; + string git_hash = 8; + // The start timestamp of the current store + int64 start_timestamp = 9; + string deploy_path = 10; + // The last heartbeat timestamp of the store. + int64 last_heartbeat = 11; + // If the store is physically destroyed, which means it can never up again. + bool physically_destroyed = 12; + // NodeState is used to replace StoreState which will be deprecated in the future. + NodeState node_state = 13; } message RegionEpoch { - // Conf change version, auto increment when add or remove peer - uint64 conf_ver = 1; - // Region version, auto increment when split or merge - uint64 version = 2; + // Conf change version, auto increment when add or remove peer + uint64 conf_ver = 1; + // Region version, auto increment when split or merge + uint64 version = 2; } message BucketStats { - // total read in bytes of each bucket - repeated uint64 read_bytes = 1; + // total read in bytes of each bucket + repeated uint64 read_bytes = 1; - // total write in bytes of each bucket - repeated uint64 write_bytes = 2; + // total write in bytes of each bucket + repeated uint64 write_bytes = 2; - // total read qps of each bucket - repeated uint64 read_qps = 3; + // total read qps of each bucket + repeated uint64 read_qps = 3; - // total write qps of each bucket - repeated uint64 write_qps = 4; + // total write qps of each bucket + repeated uint64 write_qps = 4; - // total read keys of each bucket - repeated uint64 read_keys = 5; + // total read keys of each bucket + repeated uint64 read_keys = 5; - // total write keys of each bucket - repeated uint64 write_keys = 6; + // total write keys of each bucket + repeated uint64 write_keys = 6; } message Buckets { - uint64 region_id = 1; + uint64 region_id = 1; - // A hint indicate if keys have changed. - uint64 version = 2; + // A hint indicate if keys have changed. + uint64 version = 2; - // keys of buckets, include start/end key of region - repeated bytes keys = 3; + // keys of buckets, include start/end key of region + repeated bytes keys = 3; - // bucket stats - BucketStats stats = 4; + // bucket stats + BucketStats stats = 4; + + // The period in milliseconds that stats are collected with in + uint64 period_in_ms = 5; +} - // The period in milliseconds that stats are collected with in - uint64 period_in_ms = 5; +message BucketMeta { + // A hint indicate if keys have changed. + uint64 version = 1; + // keys of buckets, include start/end key of region + repeated bytes keys = 2; } message Region { - uint64 id = 1; - // Region key range [start_key, end_key). - bytes start_key = 2; - bytes end_key = 3; - RegionEpoch region_epoch = 4; - repeated Peer peers = 5; - // Encryption metadata for start_key and end_key. encryption_meta.iv is IV for start_key. - // IV for end_key is calculated from (encryption_meta.iv + len(start_key)). - // The field is only used by PD and should be ignored otherwise. - // If encryption_meta is empty (i.e. nil), it means start_key and end_key are unencrypted. - encryptionpb.EncryptionMeta encryption_meta = 6; - // The flashback state indicates whether this region is in the flashback state. - // TODO: only check by `flashback_start_ts` in the future. Keep for compatibility now. - bool is_in_flashback = 7; - // The start_ts that the current flashback progress is using. - uint64 flashback_start_ts = 8; + uint64 id = 1; + // Region key range [start_key, end_key). + bytes start_key = 2; + bytes end_key = 3; + RegionEpoch region_epoch = 4; + repeated Peer peers = 5; + // Encryption metadata for start_key and end_key. encryption_meta.iv is IV for start_key. + // IV for end_key is calculated from (encryption_meta.iv + len(start_key)). + // The field is only used by PD and should be ignored otherwise. + // If encryption_meta is empty (i.e. nil), it means start_key and end_key are unencrypted. + encryptionpb.EncryptionMeta encryption_meta = 6; + // The flashback state indicates whether this region is in the flashback state. + // TODO: only check by `flashback_start_ts` in the future. Keep for compatibility now. + bool is_in_flashback = 7; + // The start_ts that the current flashback progress is using. + uint64 flashback_start_ts = 8; } enum PeerRole { - // Voter -> Voter - Voter = 0; - // Learner/None -> Learner - Learner = 1; - // Learner/None -> Voter - IncomingVoter = 2; - // Voter -> Learner - DemotingVoter = 3; - // We forbid Voter -> None, it can introduce unavailability as discussed in - // etcd-io/etcd#7625 - // Learner -> None can be apply directly, doesn't need to be stored as - // joint state. + // Voter -> Voter + Voter = 0; + // Learner/None -> Learner + Learner = 1; + // Learner/None -> Voter + IncomingVoter = 2; + // Voter -> Learner + DemotingVoter = 3; + // We forbid Voter -> None, it can introduce unavailability as discussed in + // etcd-io/etcd#7625 + // Learner -> None can be apply directly, doesn't need to be stored as + // joint state. } message Peer { - uint64 id = 1; - uint64 store_id = 2; - PeerRole role = 3; - bool is_witness = 4; + uint64 id = 1; + uint64 store_id = 2; + PeerRole role = 3; + bool is_witness = 4; } diff --git a/proto/mpp.proto b/proto/mpp.proto index 268e218d..08c35ea5 100644 --- a/proto/mpp.proto +++ b/proto/mpp.proto @@ -1,103 +1,113 @@ syntax = "proto3"; package mpp; -import "gogoproto/gogo.proto"; +import "apipb.proto"; import "coprocessor.proto"; -import "metapb.proto"; +import "gogoproto/gogo.proto"; import "kvrpcpb.proto"; +import "metapb.proto"; +import "rustproto.proto"; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; - -option java_package = "org.tikv.kvproto"; +option (rustproto.lite_runtime_all) = true; // TaskMeta contains meta of a mpp plan, including query's ts and task address. message TaskMeta { - uint64 start_ts = 1; // start ts of a query - int64 task_id = 2; // if task id is -1 , it indicates a tidb task. - int64 partition_id = 3; // Only used for hash partition - string address = 4; // target address of this task. - uint64 gather_id = 5; // used to distinguish different gathers in the mpp query. - uint64 query_ts = 6; // timestamp when start to execute query, used for TiFlash miniTSO schedule. - uint64 local_query_id = 7; // unique local query_id if tidb don't restart. So we can use gather_id + query_ts + local_query_id + server_id to represent a global unique query. - uint64 server_id = 8; // TiDB server id - int64 mpp_version = 9; // mpp version - uint32 keyspace_id = 10; // keyspace id of the request - string coordinator_address = 11; // coordinator_address of this query - bool report_execution_summary = 12; // Only when coordinator_address is not empty, this flag can be true. When set to true, TiFlash only report execution summary through ReportMPPTaskStatus service, don't include summaries in MppDataPacket - kvrpcpb.APIVersion api_version = 16; // API version of the request - string resource_group_name = 17; - uint64 connection_id = 18; // This is the session id between a client and tidb - string connection_alias = 19; // This is the session alias between a client and tidb + uint64 start_ts = 1; // start ts of a query + int64 task_id = 2; // if task id is -1 , it indicates a tidb task. + int64 partition_id = 3; // Only used for hash partition + string address = 4; // target address of this task. + uint64 gather_id = 5; // used to distinguish different gathers in the mpp query. + uint64 query_ts = 6; // timestamp when start to execute query, used for TiFlash miniTSO schedule. + uint64 local_query_id = 7; // unique local query_id if tidb don't restart. So we can use gather_id + query_ts + local_query_id + server_id to represent a global unique query. + uint64 server_id = 8; // TiDB server id + int64 mpp_version = 9; // mpp version + uint32 keyspace_id = 10; // V1/V2 compatibility keyspace id of the request. V3 should use keyspace_identity. + string coordinator_address = 11; // coordinator_address of this query + bool report_execution_summary = 12; // Only when coordinator_address is not empty, this flag can be true. When set to true, TiFlash only report execution summary through ReportMPPTaskStatus service, don't include summaries in MppDataPacket + kvrpcpb.APIVersion api_version = 16; // API version of the request + string resource_group_name = 17; + uint64 connection_id = 18; // This is the session id between a client and tidb + string connection_alias = 19; // This is the session alias between a client and tidb + string sql_digest = 20; + string plan_digest = 21; + // V3 keyspace identity of the request. + apipb.KeyspaceIdentity keyspace_identity = 22; } -message IsAliveRequest { -} +message IsAliveRequest {} message IsAliveResponse { - bool available = 1; - int64 mpp_version = 2; + bool available = 1; + int64 mpp_version = 2; } // Dipsatch the task request to different tiflash servers. message DispatchTaskRequest { - TaskMeta meta = 1; - bytes encoded_plan = 2; - int64 timeout = 3; - repeated coprocessor.RegionInfo regions = 4; - // If this task contains table scan, we still need their region info. - int64 schema_ver = 5; - // Used for partition table scan - repeated coprocessor.TableRegions table_regions = 6; + TaskMeta meta = 1; + bytes encoded_plan = 2; + int64 timeout = 3; + repeated coprocessor.RegionInfo regions = 4; + // If this task contains table scan, we still need their region info. + int64 schema_ver = 5; + // Used for partition table scan + repeated coprocessor.TableRegions table_regions = 6; + // Shard infos for TiCI/FTS routing in MPP dispatch path. + repeated coprocessor.TableShardInfos table_shard_infos = 7; } // Get response of DispatchTaskRequest. message DispatchTaskResponse { - Error error = 1; - repeated metapb.Region retry_regions = 2; + Error error = 1; + repeated metapb.Region retry_regions = 2; } // CancelTaskRequest closes the execution of a task. message CancelTaskRequest { - TaskMeta meta = 1; - Error error = 2; + TaskMeta meta = 1; + Error error = 2; } message CancelTaskResponse { - Error error = 1; + Error error = 1; } // ReportTaskStatus reports the execution status of a task. // when TiFlash reports status to TiDB, ReportTaskStatusRequest serialize tipb.TiFlashExecutionInfo into data; message ReportTaskStatusRequest { - TaskMeta meta = 1; - bytes data = 2; - Error error = 3; + TaskMeta meta = 1; + bytes data = 2; + Error error = 3; } message ReportTaskStatusResponse { - Error error = 1; + Error error = 1; } // build connection between different tasks. Data is sent by the tasks that are closer to the data sources. message EstablishMPPConnectionRequest { - TaskMeta sender_meta = 1; // node closer to the source - TaskMeta receiver_meta = 2; // node closer to the tidb mpp gather. + TaskMeta sender_meta = 1; // node closer to the source + TaskMeta receiver_meta = 2; // node closer to the tidb mpp gather. } // when TiFlash sends data to TiDB, Data packets wrap tipb.SelectResponse, i.e., serialize tipb.SelectResponse into data; // when TiFlash sends data to TiFlash, data blocks are serialized into chunks, and the execution_summaries in tipb.SelectResponse are serialized into data only for the last packet. message MPPDataPacket { - bytes data = 1; - Error error = 2; - repeated bytes chunks = 3; - repeated uint64 stream_ids = 4; - int64 version = 5; // version of data packet format + bytes data = 1; + Error error = 2; + repeated bytes chunks = 3; + repeated uint64 stream_ids = 4; + int64 version = 5; // version of data packet format } message Error { - int32 code = 1; - string msg = 2; - int64 mpp_version = 3; + int32 code = 1; + string msg = 2; + int64 mpp_version = 3; } diff --git a/proto/pdpb.proto b/proto/pdpb.proto index c8fec849..49a9bf29 100644 --- a/proto/pdpb.proto +++ b/proto/pdpb.proto @@ -1,1127 +1,1455 @@ syntax = "proto3"; package pdpb; -import "metapb.proto"; +import "apipb.proto"; import "eraftpb.proto"; +import "gogoproto/gogo.proto"; +import "metapb.proto"; import "raft_serverpb.proto"; import "replication_modepb.proto"; - -import "gogoproto/gogo.proto"; import "rustproto.proto"; -option (gogoproto.sizer_all) = true; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - service PD { - // GetClusterInfo get the information of this cluster. It does not require - // the cluster_id in request matchs the id of this cluster. - rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) {} + // GetClusterInfo get the information of this cluster. It does not require + // the cluster_id in request matchs the id of this cluster. + rpc GetClusterInfo(GetClusterInfoRequest) returns (GetClusterInfoResponse) {} + + // GetMembers get the member list of this cluster. It does not require + // the cluster_id in request matchs the id of this cluster. + rpc GetMembers(GetMembersRequest) returns (GetMembersResponse) {} + + rpc Tso(stream TsoRequest) returns (stream TsoResponse) {} + + rpc Bootstrap(BootstrapRequest) returns (BootstrapResponse) {} - // GetMembers get the member list of this cluster. It does not require - // the cluster_id in request matchs the id of this cluster. - rpc GetMembers(GetMembersRequest) returns (GetMembersResponse) {} + rpc IsBootstrapped(IsBootstrappedRequest) returns (IsBootstrappedResponse) {} - rpc Tso(stream TsoRequest) returns (stream TsoResponse) {} + rpc AllocID(AllocIDRequest) returns (AllocIDResponse) {} - rpc Bootstrap(BootstrapRequest) returns (BootstrapResponse) {} + rpc IsSnapshotRecovering(IsSnapshotRecoveringRequest) returns (IsSnapshotRecoveringResponse) {} - rpc IsBootstrapped(IsBootstrappedRequest) returns (IsBootstrappedResponse) {} + rpc GetStore(GetStoreRequest) returns (GetStoreResponse) {} - rpc AllocID(AllocIDRequest) returns (AllocIDResponse) {} + rpc PutStore(PutStoreRequest) returns (PutStoreResponse) {} - rpc IsSnapshotRecovering(IsSnapshotRecoveringRequest) returns (IsSnapshotRecoveringResponse) {} + rpc GetAllStores(GetAllStoresRequest) returns (GetAllStoresResponse) {} - rpc GetStore(GetStoreRequest) returns (GetStoreResponse) {} + rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {} - rpc PutStore(PutStoreRequest) returns (PutStoreResponse) {} + rpc RegionHeartbeat(stream RegionHeartbeatRequest) returns (stream RegionHeartbeatResponse) {} - rpc GetAllStores(GetAllStoresRequest) returns (GetAllStoresResponse) {} + rpc GetRegion(GetRegionRequest) returns (GetRegionResponse) {} - rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {} + rpc GetPrevRegion(GetRegionRequest) returns (GetRegionResponse) {} - rpc RegionHeartbeat(stream RegionHeartbeatRequest) returns (stream RegionHeartbeatResponse) {} + rpc GetRegionByID(GetRegionByIDRequest) returns (GetRegionResponse) {} - rpc GetRegion(GetRegionRequest) returns (GetRegionResponse) {} + rpc QueryRegion(stream QueryRegionRequest) returns (stream QueryRegionResponse) {} - rpc GetPrevRegion(GetRegionRequest) returns (GetRegionResponse) {} + // Deprecated: use BatchScanRegions instead. + rpc ScanRegions(ScanRegionsRequest) returns (ScanRegionsResponse) {} - rpc GetRegionByID(GetRegionByIDRequest) returns (GetRegionResponse) {} + rpc BatchScanRegions(BatchScanRegionsRequest) returns (BatchScanRegionsResponse) {} - rpc ScanRegions(ScanRegionsRequest) returns (ScanRegionsResponse) {} + rpc AskSplit(AskSplitRequest) returns (AskSplitResponse) { + // Use AskBatchSplit instead. + option deprecated = true; + } - rpc AskSplit(AskSplitRequest) returns (AskSplitResponse) { - // Use AskBatchSplit instead. - option deprecated = true; - } + rpc ReportSplit(ReportSplitRequest) returns (ReportSplitResponse) { + // Use ResportBatchSplit instead. + option deprecated = true; + } - rpc ReportSplit(ReportSplitRequest) returns (ReportSplitResponse) { - // Use ResportBatchSplit instead. - option deprecated = true; - } + rpc AskBatchSplit(AskBatchSplitRequest) returns (AskBatchSplitResponse) {} - rpc AskBatchSplit(AskBatchSplitRequest) returns (AskBatchSplitResponse) {} + rpc ReportBatchSplit(ReportBatchSplitRequest) returns (ReportBatchSplitResponse) {} - rpc ReportBatchSplit(ReportBatchSplitRequest) returns (ReportBatchSplitResponse) {} + rpc GetClusterConfig(GetClusterConfigRequest) returns (GetClusterConfigResponse) {} - rpc GetClusterConfig(GetClusterConfigRequest) returns (GetClusterConfigResponse) {} + rpc PutClusterConfig(PutClusterConfigRequest) returns (PutClusterConfigResponse) {} - rpc PutClusterConfig(PutClusterConfigRequest) returns (PutClusterConfigResponse) {} + rpc ScatterRegion(ScatterRegionRequest) returns (ScatterRegionResponse) {} - rpc ScatterRegion(ScatterRegionRequest) returns (ScatterRegionResponse) {} + rpc GetGCSafePoint(GetGCSafePointRequest) returns (GetGCSafePointResponse) {} - rpc GetGCSafePoint(GetGCSafePointRequest) returns (GetGCSafePointResponse) {} + rpc UpdateGCSafePoint(UpdateGCSafePointRequest) returns (UpdateGCSafePointResponse) {} - rpc UpdateGCSafePoint(UpdateGCSafePointRequest) returns (UpdateGCSafePointResponse) {} + rpc UpdateServiceGCSafePoint(UpdateServiceGCSafePointRequest) returns (UpdateServiceGCSafePointResponse) {} - rpc UpdateServiceGCSafePoint(UpdateServiceGCSafePointRequest) returns (UpdateServiceGCSafePointResponse) {} + rpc GetGCSafePointV2(GetGCSafePointV2Request) returns (GetGCSafePointV2Response) {} - rpc GetGCSafePointV2(GetGCSafePointV2Request) returns (GetGCSafePointV2Response) {} + rpc WatchGCSafePointV2(WatchGCSafePointV2Request) returns (stream WatchGCSafePointV2Response) {} - rpc WatchGCSafePointV2(WatchGCSafePointV2Request) returns (stream WatchGCSafePointV2Response) {} + rpc UpdateGCSafePointV2(UpdateGCSafePointV2Request) returns (UpdateGCSafePointV2Response) {} - rpc UpdateGCSafePointV2(UpdateGCSafePointV2Request) returns (UpdateGCSafePointV2Response) {} + rpc UpdateServiceSafePointV2(UpdateServiceSafePointV2Request) returns (UpdateServiceSafePointV2Response) {} - rpc UpdateServiceSafePointV2(UpdateServiceSafePointV2Request) returns (UpdateServiceSafePointV2Response) {} + rpc GetAllGCSafePointV2(GetAllGCSafePointV2Request) returns (GetAllGCSafePointV2Response) {} - rpc GetAllGCSafePointV2(GetAllGCSafePointV2Request) returns (GetAllGCSafePointV2Response) {} + rpc AdvanceGCSafePoint(AdvanceGCSafePointRequest) returns (AdvanceGCSafePointResponse) {} - rpc SyncRegions(stream SyncRegionRequest) returns (stream SyncRegionResponse) {} + rpc AdvanceTxnSafePoint(AdvanceTxnSafePointRequest) returns (AdvanceTxnSafePointResponse) {} - rpc GetOperator(GetOperatorRequest) returns (GetOperatorResponse) {} + rpc SetGCBarrier(SetGCBarrierRequest) returns (SetGCBarrierResponse) {} - rpc SyncMaxTS(SyncMaxTSRequest) returns (SyncMaxTSResponse) {} + rpc DeleteGCBarrier(DeleteGCBarrierRequest) returns (DeleteGCBarrierResponse) {} - rpc SplitRegions(SplitRegionsRequest) returns (SplitRegionsResponse) {} + rpc SetGlobalGCBarrier(SetGlobalGCBarrierRequest) returns (SetGlobalGCBarrierResponse) {} - rpc SplitAndScatterRegions(SplitAndScatterRegionsRequest) returns (SplitAndScatterRegionsResponse) {} + rpc DeleteGlobalGCBarrier(DeleteGlobalGCBarrierRequest) returns (DeleteGlobalGCBarrierResponse) {} - rpc GetDCLocationInfo(GetDCLocationInfoRequest) returns (GetDCLocationInfoResponse) {} + rpc GetGCState(GetGCStateRequest) returns (GetGCStateResponse) {} - rpc StoreGlobalConfig(StoreGlobalConfigRequest) returns (StoreGlobalConfigResponse) {} + rpc GetAllKeyspacesGCStates(GetAllKeyspacesGCStatesRequest) returns (GetAllKeyspacesGCStatesResponse) {} - rpc LoadGlobalConfig(LoadGlobalConfigRequest) returns (LoadGlobalConfigResponse) {} + rpc SyncRegions(stream SyncRegionRequest) returns (stream SyncRegionResponse) {} - rpc WatchGlobalConfig(WatchGlobalConfigRequest) returns (stream WatchGlobalConfigResponse) {} + rpc GetOperator(GetOperatorRequest) returns (GetOperatorResponse) {} - rpc ReportBuckets(stream ReportBucketsRequest) returns (ReportBucketsResponse) {} + rpc SyncMaxTS(SyncMaxTSRequest) returns (SyncMaxTSResponse) {} - rpc ReportMinResolvedTS(ReportMinResolvedTsRequest) returns (ReportMinResolvedTsResponse) {} + rpc SplitRegions(SplitRegionsRequest) returns (SplitRegionsResponse) {} - rpc SetExternalTimestamp(SetExternalTimestampRequest) returns (SetExternalTimestampResponse) {} + rpc SplitAndScatterRegions(SplitAndScatterRegionsRequest) returns (SplitAndScatterRegionsResponse) {} - rpc GetExternalTimestamp(GetExternalTimestampRequest) returns (GetExternalTimestampResponse) {} + rpc GetDCLocationInfo(GetDCLocationInfoRequest) returns (GetDCLocationInfoResponse) {} - // Get the minimum timestamp across all keyspace groups from API server - // TODO: Currently, we need to ask API server to get the minimum timestamp. - // Once we support service discovery, we can remove it. - rpc GetMinTS (GetMinTSRequest) returns (GetMinTSResponse) {} + rpc StoreGlobalConfig(StoreGlobalConfigRequest) returns (StoreGlobalConfigResponse) {} + + rpc LoadGlobalConfig(LoadGlobalConfigRequest) returns (LoadGlobalConfigResponse) {} + + rpc WatchGlobalConfig(WatchGlobalConfigRequest) returns (stream WatchGlobalConfigResponse) {} + + rpc ReportBuckets(stream ReportBucketsRequest) returns (ReportBucketsResponse) {} + + rpc ReportMinResolvedTS(ReportMinResolvedTsRequest) returns (ReportMinResolvedTsResponse) {} + + rpc SetExternalTimestamp(SetExternalTimestampRequest) returns (SetExternalTimestampResponse) {} + + rpc GetExternalTimestamp(GetExternalTimestampRequest) returns (GetExternalTimestampResponse) {} + + // Get the minimum timestamp across all keyspace groups from API server + // TODO: Currently, we need to ask API server to get the minimum timestamp. + // Once we support service discovery, we can remove it. + rpc GetMinTS(GetMinTSRequest) returns (GetMinTSResponse) {} } message WatchGlobalConfigRequest { - string config_path = 1; - int64 revision = 2; + string config_path = 1; + int64 revision = 2; } message WatchGlobalConfigResponse { - repeated GlobalConfigItem changes = 1; - int64 revision = 2; - ResponseHeader header = 3; + repeated GlobalConfigItem changes = 1; + int64 revision = 2; + ResponseHeader header = 3; } message StoreGlobalConfigRequest { - repeated GlobalConfigItem changes = 1; - string config_path = 2; + repeated GlobalConfigItem changes = 1; + string config_path = 2; } message StoreGlobalConfigResponse { - Error error = 1; + Error error = 1; } message LoadGlobalConfigRequest { - repeated string names = 1; - string config_path = 2; + repeated string names = 1; + string config_path = 2; } message LoadGlobalConfigResponse { - repeated GlobalConfigItem items = 1; - int64 revision = 2; + repeated GlobalConfigItem items = 1; + int64 revision = 2; } enum EventType { - PUT = 0; - DELETE = 1; + PUT = 0; + DELETE = 1; } message GlobalConfigItem { - string name = 1; - // this field 'value' is replaced by the field 'payload'. - string value = 2; - Error error = 3; - EventType kind = 4; - // Since item value needs to support marshal of different struct types, - // it should be set to bytes instead of string. - bytes payload = 5; + string name = 1; + // this field 'value' is replaced by the field 'payload'. + string value = 2; + Error error = 3; + EventType kind = 4; + // Since item value needs to support marshal of different struct types, + // it should be set to bytes instead of string. + bytes payload = 5; } message RequestHeader { - // cluster_id is the ID of the cluster which be sent to. - uint64 cluster_id = 1; - // sender_id is the ID of the sender server, also member ID or etcd ID. - uint64 sender_id = 2; + // cluster_id is the ID of the cluster which be sent to. + uint64 cluster_id = 1; + // sender_id is the ID of the sender server, also member ID or etcd ID. + // sender_id is used in PD internal communication. + uint64 sender_id = 2; + // caller_id is the ID of the client which sends the request, such as tikv, + // tidb, cdc, etc. + string caller_id = 3; + // caller_component is the component of the client which sends the request, + // such as ddl, optimizer, etc. + string caller_component = 4; } message ResponseHeader { - // cluster_id is the ID of the cluster which sent the response. - uint64 cluster_id = 1; - Error error = 2; + // cluster_id is the ID of the cluster which sent the response. + uint64 cluster_id = 1; + Error error = 2; } enum ErrorType { - OK = 0; - UNKNOWN = 1; - NOT_BOOTSTRAPPED = 2; - STORE_TOMBSTONE = 3; - ALREADY_BOOTSTRAPPED = 4; - INCOMPATIBLE_VERSION = 5; - REGION_NOT_FOUND = 6; - GLOBAL_CONFIG_NOT_FOUND = 7; - DUPLICATED_ENTRY = 8; - ENTRY_NOT_FOUND = 9; - INVALID_VALUE = 10; - // required watch revision is smaller than current compact/min revision. - DATA_COMPACTED = 11; + OK = 0; + UNKNOWN = 1; + NOT_BOOTSTRAPPED = 2; + STORE_TOMBSTONE = 3; + ALREADY_BOOTSTRAPPED = 4; + INCOMPATIBLE_VERSION = 5; + REGION_NOT_FOUND = 6; + GLOBAL_CONFIG_NOT_FOUND = 7; + DUPLICATED_ENTRY = 8; + ENTRY_NOT_FOUND = 9; + INVALID_VALUE = 10; + // required watch revision is smaller than current compact/min revision. + DATA_COMPACTED = 11; + REGIONS_NOT_CONTAIN_ALL_KEY_RANGE = 12; } message Error { - ErrorType type = 1; - string message = 2; + ErrorType type = 1; + string message = 2; } message TsoRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint32 count = 2; - string dc_location = 3; + uint32 count = 2; + string dc_location = 3; + // V3 keyspace identity for tenant-scoped TSO requests. + apipb.KeyspaceIdentity identity = 4; } message Timestamp { - int64 physical = 1; - int64 logical = 2; - // Number of suffix bits used for global distinction, - // PD client will use this to compute a TSO's logical part. - uint32 suffix_bits = 3; + int64 physical = 1; + int64 logical = 2; + // Number of suffix bits used for global distinction, + // PD client will use this to compute a TSO's logical part. + uint32 suffix_bits = 3; } message TsoResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint32 count = 2; - Timestamp timestamp = 3; + uint32 count = 2; + Timestamp timestamp = 3; } message BootstrapRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Store store = 2; - metapb.Region region = 3; + metapb.Store store = 2; + metapb.Region region = 3; } message BootstrapResponse { - ResponseHeader header = 1; - replication_modepb.ReplicationStatus replication_status = 2; + ResponseHeader header = 1; + replication_modepb.ReplicationStatus replication_status = 2; } message IsBootstrappedRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message IsBootstrappedResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - bool bootstrapped = 2; + bool bootstrapped = 2; } message AllocIDRequest { - RequestHeader header = 1; + RequestHeader header = 1; + + uint32 count = 2; } message AllocIDResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint64 id = 2; + uint64 id = 2; + uint32 count = 3; } message IsSnapshotRecoveringRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message IsSnapshotRecoveringResponse { - ResponseHeader header = 1; - bool marked = 2; + ResponseHeader header = 1; + bool marked = 2; } message GetStoreRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint64 store_id = 2; + uint64 store_id = 2; } message GetStoreResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - metapb.Store store = 2; - StoreStats stats = 3; + metapb.Store store = 2; + StoreStats stats = 3; } message PutStoreRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Store store = 2; + metapb.Store store = 2; } message PutStoreResponse { - ResponseHeader header = 1; - replication_modepb.ReplicationStatus replication_status = 2; + ResponseHeader header = 1; + replication_modepb.ReplicationStatus replication_status = 2; } message GetAllStoresRequest { - RequestHeader header = 1; - // Do NOT return tombstone stores if set to true. - bool exclude_tombstone_stores = 2; + RequestHeader header = 1; + // Do NOT return tombstone stores if set to true. + bool exclude_tombstone_stores = 2; } message GetAllStoresResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - repeated metapb.Store stores = 2; + repeated metapb.Store stores = 2; } message GetRegionRequest { - RequestHeader header = 1; + RequestHeader header = 1; - bytes region_key = 2; - bool need_buckets = 3; + // Physical key bytes used for Region lookup. + bytes region_key = 2; + bool need_buckets = 3; } message GetRegionResponse { - reserved 4; + reserved 4; - ResponseHeader header = 1; + ResponseHeader header = 1; - metapb.Region region = 2; - metapb.Peer leader = 3; - // Leader considers that these peers are down. - repeated PeerStats down_peers = 5; - // Pending peers are the peers that the leader can't consider as - // working followers. - repeated metapb.Peer pending_peers = 6; - // buckets isn't nil if GetRegion.* requests set need_buckets. - metapb.Buckets buckets = 7; + metapb.Region region = 2; + metapb.Peer leader = 3; + // Leader considers that these peers are down. + repeated PeerStats down_peers = 5; + // Pending peers are the peers that the leader can't consider as + // working followers. + repeated metapb.Peer pending_peers = 6; + // buckets isn't nil if GetRegion.* requests set need_buckets. + metapb.Buckets buckets = 7; } message GetRegionByIDRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint64 region_id = 2; - bool need_buckets = 3; + uint64 region_id = 2; + bool need_buckets = 3; } -// Use GetRegionResponse as the response of GetRegionByIDRequest. +message QueryRegionRequest { + RequestHeader header = 1; + + // Whether to include the buckets info within the response. + bool need_buckets = 2; + // The region IDs to query. + repeated uint64 ids = 3; + // Physical key bytes to query. + repeated bytes keys = 4; + // Previous physical key bytes to query. + repeated bytes prev_keys = 5; +} + +message QueryRegionResponse { + ResponseHeader header = 1; + // This array functions as a map corresponding to the region IDs, + // preserving the order of the input region keys, if they are present. + repeated uint64 key_id_map = 2; + // This array functions as a map corresponding to the previous region IDs, + // preserving the order of the input previous region keys, if they are present. + repeated uint64 prev_key_id_map = 3; + // RegionID -> RegionResponse + map regions_by_id = 4; +} + +message RegionResponse { + metapb.Region region = 1; + metapb.Peer leader = 2; + repeated PeerStats down_peers = 3; + repeated metapb.Peer pending_peers = 4; + metapb.Buckets buckets = 5; +} + +// Use GetRegionResponse as the response of GetRegionByIDRequest. +// Deprecated: use BatchScanRegionsRequest instead. message ScanRegionsRequest { - RequestHeader header = 1; + RequestHeader header = 1; - bytes start_key = 2; - int32 limit = 3; // no limit when limit <= 0. - bytes end_key = 4; // end_key is +inf when it is empty. + // Physical start key bytes. + bytes start_key = 2; + int32 limit = 3; // no limit when limit <= 0. + bytes end_key = 4; // Physical end key bytes. end_key is +inf when it is empty. } message Region { - metapb.Region region = 1; - metapb.Peer leader = 2; - // Leader considers that these peers are down. - repeated PeerStats down_peers = 3; - // Pending peers are the peers that the leader can't consider as - // working followers. - repeated metapb.Peer pending_peers = 4; + metapb.Region region = 1; + metapb.Peer leader = 2; + // Leader considers that these peers are down. + repeated PeerStats down_peers = 3; + // Pending peers are the peers that the leader can't consider as + // working followers. + repeated metapb.Peer pending_peers = 4; + // buckets isn't nil only when need_buckets is true. + metapb.Buckets buckets = 5; } message ScanRegionsResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; + + // Keep for backword compatibability. + repeated metapb.Region region_metas = 2; + repeated metapb.Peer leaders = 3; + + // Extended region info with down/pending peers. + repeated Region regions = 4; +} + +message KeyRange { + // Physical start key bytes. + bytes start_key = 1; + bytes end_key = 2; // Physical end key bytes. end_key is +inf when it is empty. +} + +message BatchScanRegionsRequest { + RequestHeader header = 1; + bool need_buckets = 2; + + repeated KeyRange ranges = 3; // Physical key ranges. The given ranges must be in order. + int32 limit = 4; // limit the total number of regions to scan. + // If contain_all_key_range is true, the output must contain all + // key ranges in the request. + // If the output does not contain all key ranges, the request is considered + // failed and returns an error(REGIONS_NOT_CONTAIN_ALL_KEY_RANGE). + bool contain_all_key_range = 5; +} - // Keep for backword compatibability. - repeated metapb.Region region_metas = 2; - repeated metapb.Peer leaders = 3; +message BatchScanRegionsResponse { + ResponseHeader header = 1; - // Extended region info with down/pending peers. - repeated Region regions = 4; + // the returned regions are flattened into a list, because the given ranges can located in the same range, we do not return duplicated regions then. + repeated Region regions = 2; } message GetClusterConfigRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message GetClusterConfigResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - metapb.Cluster cluster = 2; + metapb.Cluster cluster = 2; } message PutClusterConfigRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Cluster cluster = 2; + metapb.Cluster cluster = 2; } message PutClusterConfigResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; } message Member { - // name is the name of the PD member. - string name = 1; - // member_id is the unique id of the PD member. - uint64 member_id = 2; - repeated string peer_urls = 3; - repeated string client_urls = 4; - int32 leader_priority = 5; - string deploy_path = 6; - string binary_version = 7; - string git_hash = 8; - string dc_location = 9; + // name is the name of the PD member. + string name = 1; + // member_id is the unique id of the PD member. + uint64 member_id = 2; + repeated string peer_urls = 3; + repeated string client_urls = 4; + int32 leader_priority = 5; + string deploy_path = 6; + string binary_version = 7; + string git_hash = 8; + string dc_location = 9; } message GetMembersRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message GetMembersResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - repeated Member members = 2; - Member leader = 3; - Member etcd_leader = 4; - map tso_allocator_leaders = 5; + repeated Member members = 2; + Member leader = 3; + Member etcd_leader = 4; + map tso_allocator_leaders = 5; } message GetClusterInfoRequest { - ResponseHeader header = 1; + ResponseHeader header = 1; } enum ServiceMode { - UNKNOWN_SVC_MODE = 0; - PD_SVC_MODE = 1; - API_SVC_MODE = 2; + UNKNOWN_SVC_MODE = 0; + PD_SVC_MODE = 1; + API_SVC_MODE = 2; } message GetClusterInfoResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - repeated ServiceMode serviceModes = 2; - // If service mode is API_SVC_MODE, this field will be set to the - // registered tso service addresses. - repeated string tso_urls = 3; + repeated ServiceMode serviceModes = 2; + // If service mode is API_SVC_MODE, this field will be set to the + // registered tso service addresses. + repeated string tso_urls = 3; } message PeerStats { - metapb.Peer peer = 1; - uint64 down_seconds = 2; + metapb.Peer peer = 1; + uint64 down_seconds = 2; } message RegionHeartbeatRequest { - RequestHeader header = 1; - - metapb.Region region = 2; - // Leader Peer sending the heartbeat. - metapb.Peer leader = 3; - // Leader considers that these peers are down. - repeated PeerStats down_peers = 4; - // Pending peers are the peers that the leader can't consider as - // working followers. - repeated metapb.Peer pending_peers = 5; - // Bytes read/written during this period. - uint64 bytes_written = 6; - uint64 bytes_read = 7; - // Keys read/written during this period. - uint64 keys_written = 8; - uint64 keys_read = 9; - // Approximate region size. - uint64 approximate_size = 10; - reserved 11; - // Actually reported time interval - TimeInterval interval = 12; - // Approximate number of keys. - uint64 approximate_keys = 13; - // Term is the term of raft group. - uint64 term = 14; - replication_modepb.RegionReplicationStatus replication_status = 15; - // QueryStats reported write query stats, and there are read query stats in store heartbeat - QueryStats query_stats = 16; - // cpu_usage is the CPU time usage of the leader region since the last heartbeat, - // which is calculated by cpu_time_delta/heartbeat_reported_interval. - uint64 cpu_usage = 17; - // (Serverless) Approximate size of key-value pairs for billing. - // It's counted on size of user key & value (excluding metadata fields), before compression, and latest versions only. - uint64 approximate_kv_size = 18; + RequestHeader header = 1; + + metapb.Region region = 2; + // Leader Peer sending the heartbeat. + metapb.Peer leader = 3; + // Leader considers that these peers are down. + repeated PeerStats down_peers = 4; + // Pending peers are the peers that the leader can't consider as + // working followers. + repeated metapb.Peer pending_peers = 5; + // Bytes read/written during this period. + uint64 bytes_written = 6; + uint64 bytes_read = 7; + // Keys read/written during this period. + uint64 keys_written = 8; + uint64 keys_read = 9; + // Approximate region size. + uint64 approximate_size = 10; + reserved 11; + // Actually reported time interval + TimeInterval interval = 12; + // Approximate number of keys. + uint64 approximate_keys = 13; + // Term is the term of raft group. + uint64 term = 14; + replication_modepb.RegionReplicationStatus replication_status = 15; + // QueryStats reported write query stats, and there are read query stats in store heartbeat + QueryStats query_stats = 16; + // cpu_usage is the total CPU time usage of the leader region since the last heartbeat, + // which is calculated by cpu_time_delta/heartbeat_reported_interval. + // Deprecated: use cpu_stats instead. + uint64 cpu_usage = 17 [deprecated = true]; + // cpu_stats reports CPU usage breakdown for the leader region by kind + // (e.g. unified read). + CPUStats cpu_stats = 21; + // Approximate size of row-based key-value pairs for billing. + // It's counted on size of user key & value (excluding metadata fields), before compression, and latest versions only. + uint64 approximate_kv_size = 18; + // Approximate size of column-based key-value pairs for billing. + // It's counted on size of user key & value (excluding metadata fields), before compression, and latest versions only. + uint64 approximate_columnar_kv_size = 19; + + // BucketMeta is the bucket version and keys of this region if TiKV enabled the bucket feature + metapb.BucketMeta bucket_meta = 20; } message ChangePeer { - metapb.Peer peer = 1; - eraftpb.ConfChangeType change_type = 2; + metapb.Peer peer = 1; + eraftpb.ConfChangeType change_type = 2; } message ChangePeerV2 { - // If changes is empty, it means that to exit joint state. - repeated ChangePeer changes = 1; + // If changes is empty, it means that to exit joint state. + repeated ChangePeer changes = 1; } message TransferLeader { - metapb.Peer peer = 1; - repeated metapb.Peer peers = 2; + metapb.Peer peer = 1; + repeated metapb.Peer peers = 2; } message Merge { - metapb.Region target = 1; + metapb.Region target = 1; } message SplitRegion { - CheckPolicy policy = 1; - repeated bytes keys = 2; + CheckPolicy policy = 1; + // Physical split key bytes. + repeated bytes keys = 2; } message SwitchWitness { - uint64 peer_id = 1; - bool is_witness = 2; + uint64 peer_id = 1; + bool is_witness = 2; } message BatchSwitchWitness { - repeated SwitchWitness switch_witnesses = 1; + repeated SwitchWitness switch_witnesses = 1; } enum CheckPolicy { - SCAN = 0; - APPROXIMATE = 1; - USEKEY = 2; + SCAN = 0; + APPROXIMATE = 1; + USEKEY = 2; +} + +message ChangeSplit { + // auto_split_enabled configures whether the corresponding Region is allowed to be auto split by size or load. + bool auto_split_enabled = 1; } message RegionHeartbeatResponse { - ResponseHeader header = 1; - - // Notice, Pd only allows handling reported epoch >= current pd's. - // Leader peer reports region status with RegionHeartbeatRequest - // to pd regularly, pd will determine whether this region - // should do ChangePeer or not. - // E,g, max peer number is 3, region A, first only peer 1 in A. - // 1. Pd region state -> Peers (1), ConfVer (1). - // 2. Leader peer 1 reports region state to pd, pd finds the - // peer number is < 3, so first changes its current region - // state -> Peers (1, 2), ConfVer (1), and returns ChangePeer Adding 2. - // 3. Leader does ChangePeer, then reports Peers (1, 2), ConfVer (2), - // pd updates its state -> Peers (1, 2), ConfVer (2). - // 4. Leader may report old Peers (1), ConfVer (1) to pd before ConfChange - // finished, pd stills responses ChangePeer Adding 2, of course, we must - // guarantee the second ChangePeer can't be applied in TiKV. - ChangePeer change_peer = 2; - // Pd can return transfer_leader to let TiKV does leader transfer itself. - TransferLeader transfer_leader = 3; - // ID of the region - uint64 region_id = 4; - metapb.RegionEpoch region_epoch = 5; - // Leader of the region at the moment of the corresponding request was made. - metapb.Peer target_peer = 6; - Merge merge = 7; - // PD sends split_region to let TiKV split a region into two regions. - SplitRegion split_region = 8; - // Multiple change peer operations atomically. - // Note: PD can use both ChangePeer and ChangePeerV2 at the same time - // (not in the same RegionHeartbeatResponse). - // Now, PD use ChangePeerV2 in following scenarios: - // 1. replacing peers - // 2. demoting voter directly - ChangePeerV2 change_peer_v2 = 9; - BatchSwitchWitness switch_witnesses = 10; + ResponseHeader header = 1; + + // Notice, Pd only allows handling reported epoch >= current pd's. + // Leader peer reports region status with RegionHeartbeatRequest + // to pd regularly, pd will determine whether this region + // should do ChangePeer or not. + // E,g, max peer number is 3, region A, first only peer 1 in A. + // 1. Pd region state -> Peers (1), ConfVer (1). + // 2. Leader peer 1 reports region state to pd, pd finds the + // peer number is < 3, so first changes its current region + // state -> Peers (1, 2), ConfVer (1), and returns ChangePeer Adding 2. + // 3. Leader does ChangePeer, then reports Peers (1, 2), ConfVer (2), + // pd updates its state -> Peers (1, 2), ConfVer (2). + // 4. Leader may report old Peers (1), ConfVer (1) to pd before ConfChange + // finished, pd stills responses ChangePeer Adding 2, of course, we must + // guarantee the second ChangePeer can't be applied in TiKV. + ChangePeer change_peer = 2; + // Pd can return transfer_leader to let TiKV does leader transfer itself. + TransferLeader transfer_leader = 3; + // ID of the region + uint64 region_id = 4; + metapb.RegionEpoch region_epoch = 5; + // Leader of the region at the moment of the corresponding request was made. + metapb.Peer target_peer = 6; + Merge merge = 7; + // PD sends split_region to let TiKV split a region into two regions. + SplitRegion split_region = 8; + // Multiple change peer operations atomically. + // Note: PD can use both ChangePeer and ChangePeerV2 at the same time + // (not in the same RegionHeartbeatResponse). + // Now, PD use ChangePeerV2 in following scenarios: + // 1. replacing peers + // 2. demoting voter directly + ChangePeerV2 change_peer_v2 = 9; + BatchSwitchWitness switch_witnesses = 10; + ChangeSplit change_split = 11; } message AskSplitRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Region region = 2; + metapb.Region region = 2; } message AskSplitResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - // We split the region into two, first uses the origin - // parent region id, and the second uses the new_region_id. - // We must guarantee that the new_region_id is global unique. - uint64 new_region_id = 2; - // The peer ids for the new split region. - repeated uint64 new_peer_ids = 3; + // We split the region into two, first uses the origin + // parent region id, and the second uses the new_region_id. + // We must guarantee that the new_region_id is global unique. + uint64 new_region_id = 2; + // The peer ids for the new split region. + repeated uint64 new_peer_ids = 3; } message ReportSplitRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Region left = 2; - metapb.Region right = 3; + metapb.Region left = 2; + metapb.Region right = 3; } message ReportSplitResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; +} + +enum SplitReason { + ADMIN = 0; + SIZE = 1; + LOAD = 2; } message AskBatchSplitRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Region region = 2; - uint32 split_count = 3; + metapb.Region region = 2; + uint32 split_count = 3; + SplitReason reason = 4; } message SplitID { - uint64 new_region_id = 1; - repeated uint64 new_peer_ids = 2; + uint64 new_region_id = 1; + repeated uint64 new_peer_ids = 2; } message AskBatchSplitResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - repeated SplitID ids = 2; + repeated SplitID ids = 2; } message ReportBatchSplitRequest { - RequestHeader header = 1; + RequestHeader header = 1; - repeated metapb.Region regions = 2; + repeated metapb.Region regions = 2; } message ReportBatchSplitResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; } message TimeInterval { - // The unix timestamp in seconds of the start of this period. - uint64 start_timestamp = 1; - // The unix timestamp in seconds of the end of this period. - uint64 end_timestamp = 2; + // The unix timestamp in seconds of the start of this period. + uint64 start_timestamp = 1; + // The unix timestamp in seconds of the end of this period. + uint64 end_timestamp = 2; } message RecordPair { - string key = 1; - uint64 value = 2; + string key = 1; + uint64 value = 2; } message PeerStat { - uint64 region_id = 1; - uint64 read_keys = 2; - uint64 read_bytes = 3; - QueryStats query_stats = 4; - uint64 written_keys = 5; - uint64 written_bytes = 6; + uint64 region_id = 1; + uint64 read_keys = 2; + uint64 read_bytes = 3; + QueryStats query_stats = 4; + uint64 written_keys = 5; + uint64 written_bytes = 6; + // cpu_stats is the CPU usage of the region's unified read pool since the last heartbeat, + // which is calculated by cpu_time_delta/heartbeat_reported_interval. + CPUStats cpu_stats = 7; } message StoreStats { - uint64 store_id = 1; - // Capacity for the store. - uint64 capacity = 2; - // Available size for the store. - uint64 available = 3; - // Total region count in this store. - uint32 region_count = 4; - // Current sending snapshot count. - uint32 sending_snap_count = 5; - // Current receiving snapshot count. - uint32 receiving_snap_count = 6; - // When the store is started (unix timestamp in seconds). - uint32 start_time = 7; - // How many region is applying snapshot. - uint32 applying_snap_count = 8; - // If the store is busy - bool is_busy = 9; - // Actually used space by db - uint64 used_size = 10; - // Bytes written for the store during this period. - uint64 bytes_written = 11; - // Keys written for the store during this period. - uint64 keys_written = 12; - // Bytes read for the store during this period. - uint64 bytes_read = 13; - // Keys read for the store during this period. - uint64 keys_read = 14; - // Actually reported time interval - TimeInterval interval = 15; - // Threads' CPU usages in the store - repeated RecordPair cpu_usages = 16; - // Threads' read disk I/O rates in the store - repeated RecordPair read_io_rates = 17; - // Threads' write disk I/O rates in the store - repeated RecordPair write_io_rates = 18; - // Operations' latencies in the store - repeated RecordPair op_latencies = 19; - // Hot peer stat in the store - repeated PeerStat peer_stats = 20; - // Store query stats - QueryStats query_stats = 21; - // Score that represents the speed of the store, ranges in [1, 100], lower is better. - uint64 slow_score = 22; - // Damaged regions on the store that need to be removed by PD. - repeated uint64 damaged_regions_id = 23; - // If the apply worker is busy, namely high apply wait duration - bool is_apply_busy = 24; - // Snapshot stats in the store - repeated SnapshotStat snapshot_stats = 25; - SlowTrend slow_trend = 26; - // If the grpc server is paused. - bool is_grpc_paused = 27; - // Total memory of the store in bytes. - uint64 total_memory = 28; - // Used memory of the store in bytes. - uint64 used_memory = 29; -} - -message SlowTrend{ - double cause_value = 1; - double cause_rate = 2; - double result_value = 3; - double result_rate = 4; -} - -message SnapshotStat{ - uint64 region_id = 1; - // Generate snapshot duration - uint64 generate_duration_sec = 2; - // Send snapshot duration - uint64 send_duration_sec = 3; - // |-- waiting --|-- generate --|-- send --| - // |-----------total duration---------------| - // Total duration include waiting and executing duration - uint64 total_duration_sec = 4; - // Size is the transport data size - uint64 transport_size = 5; + uint64 store_id = 1; + // Capacity for the store. + uint64 capacity = 2; + // Available size for the store. + uint64 available = 3; + // Total region count in this store. + uint32 region_count = 4; + // Current sending snapshot count. + uint32 sending_snap_count = 5; + // Current receiving snapshot count. + uint32 receiving_snap_count = 6; + // When the store is started (unix timestamp in seconds). + uint32 start_time = 7; + // How many region is applying snapshot. + uint32 applying_snap_count = 8; + // If the store is busy + bool is_busy = 9; + // Actually used space by db + uint64 used_size = 10; + // Bytes written for the store during this period. + uint64 bytes_written = 11; + // Keys written for the store during this period. + uint64 keys_written = 12; + // Bytes read for the store during this period. + uint64 bytes_read = 13; + // Keys read for the store during this period. + uint64 keys_read = 14; + // Actually reported time interval + TimeInterval interval = 15; + // Threads' CPU usages in the store + repeated RecordPair cpu_usages = 16; + // Threads' read disk I/O rates in the store + repeated RecordPair read_io_rates = 17; + // Threads' write disk I/O rates in the store + repeated RecordPair write_io_rates = 18; + // Operations' latencies in the store + repeated RecordPair op_latencies = 19; + // Hot peer stat in the store + repeated PeerStat peer_stats = 20; + // Store query stats + QueryStats query_stats = 21; + // Score that represents the speed of the store, ranges in [1, 100], lower is better. + uint64 slow_score = 22; + // Damaged regions on the store that need to be removed by PD. + repeated uint64 damaged_regions_id = 23; + // If the apply worker is busy, namely high apply wait duration + bool is_apply_busy = 24; + // Snapshot stats in the store + repeated SnapshotStat snapshot_stats = 25; + SlowTrend slow_trend = 26; + // If the grpc server is paused. + bool is_grpc_paused = 27; + // Total memory of the store in bytes. + uint64 total_memory = 28; + // Used memory of the store in bytes. + uint64 used_memory = 29; + // Network_slow_scores indicate the network status between TiKV nodes, ranging from 1 to 100 (lower is better). + // StoreID -> score + map network_slow_scores = 30; + // The statistics about DFS uploads. + repeated DfsStatItem dfs = 31; + // True if the store is undergoing graceful shutdown. + bool is_stopping = 32; + + // Reserved for a downstream fork + reserved 1000 to 1199; +} + +message DfsStatScope { + // When true, the statistic is not tied to any keyspace. + bool is_global = 1; + // The keyspace of this statistic. Ignore when is_global is true. + // NOTE: This field is only meaningful for V1/V2 compatibility. V3 should use identity. + uint32 keyspace_id = 2; + // The component that provides the statistic. + string component = 3; + // V3 keyspace identity of this statistic. Ignore when is_global is true or identities is set. + apipb.KeyspaceIdentity identity = 4; + // V3 multi-keyspace statistic scope. Ignore when is_global is true. + repeated apipb.KeyspaceIdentity identities = 5; +} + +message DfsStatItem { + DfsStatScope scope = 1; + // Number of bytes written to DFS. + uint64 written_bytes = 2; + // Number of write requests sent to DFS. + uint64 write_requests = 3; +} + +message SlowTrend { + double cause_value = 1; + double cause_rate = 2; + double result_value = 3; + double result_rate = 4; +} + +message SnapshotStat { + uint64 region_id = 1; + // Generate snapshot duration + uint64 generate_duration_sec = 2; + // Send snapshot duration + uint64 send_duration_sec = 3; + // |-- waiting --|-- generate --|-- send --| + // |-----------total duration---------------| + // Total duration include waiting and executing duration + uint64 total_duration_sec = 4; + // Size is the transport data size + uint64 transport_size = 5; } message PeerReport { - raft_serverpb.RaftLocalState raft_state = 1; - raft_serverpb.RegionLocalState region_state = 2; - bool is_force_leader = 3; - // The peer has proposed but uncommitted commit merge. - bool has_commit_merge = 4; + raft_serverpb.RaftLocalState raft_state = 1; + raft_serverpb.RegionLocalState region_state = 2; + bool is_force_leader = 3; + // The peer has proposed but uncommitted commit merge. + bool has_commit_merge = 4; + // raft applied index + uint64 applied_index = 5; } message StoreReport { - repeated PeerReport peer_reports = 1; - uint64 step = 2; + repeated PeerReport peer_reports = 1; + uint64 step = 2; } message StoreHeartbeatRequest { - RequestHeader header = 1; + RequestHeader header = 1; - StoreStats stats = 2; - // Detailed store report that is only filled up on PD's demand for online unsafe recovery. - StoreReport store_report = 3; - replication_modepb.StoreDRAutoSyncStatus dr_autosync_status = 4; + StoreStats stats = 2; + // Detailed store report that is only filled up on PD's demand for online unsafe recovery. + StoreReport store_report = 3; + replication_modepb.StoreDRAutoSyncStatus dr_autosync_status = 4; } message DemoteFailedVoters { - uint64 region_id = 1; - repeated metapb.Peer failed_voters = 2; + uint64 region_id = 1; + repeated metapb.Peer failed_voters = 2; } message ForceLeader { - // The store ids of the failed stores, TiKV uses it to decide if a peer is alive. - repeated uint64 failed_stores = 1; - // The region ids of the peer which is to be force leader. - repeated uint64 enter_force_leaders = 2; + // The store ids of the failed stores, TiKV uses it to decide if a peer is alive. + repeated uint64 failed_stores = 1; + // The region ids of the peer which is to be force leader. + repeated uint64 enter_force_leaders = 2; } message RecoveryPlan { - // Create empty regions to fill the key range hole. - repeated metapb.Region creates = 1; - // Update the meta of the regions, including peer lists, epoch and key range. - repeated metapb.Region updates = 2 [deprecated=true]; - // Tombstone the peers on the store locally. - repeated uint64 tombstones = 3; - // Issue conf change that demote voters on failed stores to learners on the regions. - repeated DemoteFailedVoters demotes = 4; - // Make the peers to be force leaders. - ForceLeader force_leader = 5; - // Step is an increasing number to note the round of recovery, - // It should be filled in the corresponding store report. - uint64 step = 6; + // Create empty regions to fill the key range hole. + repeated metapb.Region creates = 1; + // Update the meta of the regions, including peer lists, epoch and key range. + repeated metapb.Region updates = 2 [deprecated = true]; + // Tombstone the peers on the store locally. + repeated uint64 tombstones = 3; + // Issue conf change that demote voters on failed stores to learners on the regions. + repeated DemoteFailedVoters demotes = 4; + // Make the peers to be force leaders. + ForceLeader force_leader = 5; + // Step is an increasing number to note the round of recovery, + // It should be filled in the corresponding store report. + uint64 step = 6; } message AwakenRegions { - // Awake all regions if abnormal_stores is empty. - repeated uint64 abnormal_stores = 1; + // Awake all regions if abnormal_stores is empty. + repeated uint64 abnormal_stores = 1; } enum ControlGrpcEvent { - // Pause TiKV grpc server. - PAUSE = 0; - // Resume TiKV grpc server. - RESUME = 1; + // Pause TiKV grpc server. + PAUSE = 0; + // Resume TiKV grpc server. + RESUME = 1; } message ControlGrpc { - ControlGrpcEvent ctrl_event = 1; + ControlGrpcEvent ctrl_event = 1; } message StoreHeartbeatResponse { - ResponseHeader header = 1; - replication_modepb.ReplicationStatus replication_status = 2; - string cluster_version = 3; - - // Used by online unsafe recovery to request store report. - // Now it's substituted by reusing recovery_plan field. PD will send a empty - // recovery plan instead to request store report. - bool require_detailed_report = 4 [deprecated=true]; - // Operations of recovery. After the plan is executed, TiKV should attach the - // store report in store heartbeat. - RecoveryPlan recovery_plan = 5; - // Pd can return awaken_regions to let TiKV awaken hibernated regions itself. - AwakenRegions awaken_regions = 6; - // Pd can return operations to let TiKV forcely PAUSE | RESUME grpc server. - ControlGrpc control_grpc = 7; + ResponseHeader header = 1; + replication_modepb.ReplicationStatus replication_status = 2; + string cluster_version = 3; + + // Used by online unsafe recovery to request store report. + // Now it's substituted by reusing recovery_plan field. PD will send a empty + // recovery plan instead to request store report. + bool require_detailed_report = 4 [deprecated = true]; + // Operations of recovery. After the plan is executed, TiKV should attach the + // store report in store heartbeat. + RecoveryPlan recovery_plan = 5; + // Pd can return awaken_regions to let TiKV awaken hibernated regions itself. + AwakenRegions awaken_regions = 6; + // Pd can return operations to let TiKV forcely PAUSE | RESUME grpc server. + ControlGrpc control_grpc = 7; + // NodeState is going to mark the state of the store. + metapb.NodeState state = 8; } message ScatterRegionRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint64 region_id = 2 [deprecated=true]; + uint64 region_id = 2 [deprecated = true]; - // PD will use these region information if it can't find the region. - // For example, the region is just split and hasn't report to PD yet. - metapb.Region region = 3; - metapb.Peer leader = 4; + // PD will use these region information if it can't find the region. + // For example, the region is just split and hasn't report to PD yet. + metapb.Region region = 3; + metapb.Peer leader = 4; - // If group is defined, the regions with the same group would be scattered as a whole group. - // If not defined, the regions would be scattered in a cluster level. - string group = 5; + // If group is defined, the regions with the same group would be scattered as a whole group. + // If not defined, the regions would be scattered in a cluster level. + string group = 5; - // If regions_id is defined, the region_id would be ignored. - repeated uint64 regions_id = 6; - uint64 retry_limit = 7; - bool skip_store_limit = 8; + // If regions_id is defined, the region_id would be ignored. + repeated uint64 regions_id = 6; + uint64 retry_limit = 7; + bool skip_store_limit = 8; } message ScatterRegionResponse { - ResponseHeader header = 1; - uint64 finished_percentage = 2; + ResponseHeader header = 1; + uint64 finished_percentage = 2; + repeated uint64 failed_regions_id = 3; } message GetGCSafePointRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message GetGCSafePointResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint64 safe_point = 2; + uint64 safe_point = 2; } message UpdateGCSafePointRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint64 safe_point = 2; + uint64 safe_point = 2; } message UpdateGCSafePointResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint64 new_safe_point = 2; + uint64 new_safe_point = 2; } message UpdateServiceGCSafePointRequest { - RequestHeader header = 1; + RequestHeader header = 1; - bytes service_id = 2; - int64 TTL = 3; - uint64 safe_point = 4; + bytes service_id = 2; + int64 TTL = 3; + uint64 safe_point = 4; } message UpdateServiceGCSafePointResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - bytes service_id = 2; - int64 TTL = 3; - uint64 min_safe_point = 4; + bytes service_id = 2; + int64 TTL = 3; + uint64 min_safe_point = 4; } message GetGCSafePointV2Request { - RequestHeader header = 1; + RequestHeader header = 1; - uint32 keyspace_id = 2; + // V1/V2 compatibility keyspace id. V3 should use identity. + uint32 keyspace_id = 2; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 3; } message GetGCSafePointV2Response { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint64 safe_point = 2; + uint64 safe_point = 2; + // V3 keyspace identity served by this response. + apipb.KeyspaceIdentity identity = 3; } message WatchGCSafePointV2Request { - RequestHeader header = 1; - int64 revision = 2; - + RequestHeader header = 1; + int64 revision = 2; } // SafePointEvent is for the rpc WatchGCSafePointV2. message SafePointEvent { - uint32 keyspace_id = 1; - uint64 safe_point = 2; - EventType type = 3; + // V1/V2 compatibility keyspace id. V3 should use identity. + uint32 keyspace_id = 1; + uint64 safe_point = 2; + EventType type = 3; + // V3 keyspace identity served by this event. + apipb.KeyspaceIdentity identity = 4; } message WatchGCSafePointV2Response { - ResponseHeader header = 1; - repeated SafePointEvent events = 2; - int64 revision = 3; + ResponseHeader header = 1; + repeated SafePointEvent events = 2; + int64 revision = 3; } message UpdateGCSafePointV2Request { - RequestHeader header = 1; + RequestHeader header = 1; - uint32 keyspace_id = 2; - uint64 safe_point = 3; + // V1/V2 compatibility keyspace id. V3 should use identity. + uint32 keyspace_id = 2; + uint64 safe_point = 3; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 4; } message UpdateGCSafePointV2Response { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint64 new_safe_point = 2; + uint64 new_safe_point = 2; + // V3 keyspace identity served by this response. + apipb.KeyspaceIdentity identity = 3; } message UpdateServiceSafePointV2Request { - RequestHeader header = 1; - - uint32 keyspace_id = 2; - bytes service_id = 3; - uint64 safe_point = 4; - // Safe point will be set to expire on (PD Server time + TTL), - // pass in a ttl < 0 to remove target safe point; - // pass in MAX_INT64 to set a safe point that never expire. - // This should be set by component that may crash unexpectedly so that it doesn't block - // cluster garbage collection. - int64 ttl = 5; + RequestHeader header = 1; + + // V1/V2 compatibility keyspace id. V3 should use identity. + uint32 keyspace_id = 2; + bytes service_id = 3; + uint64 safe_point = 4; + // Safe point will be set to expire on (PD Server time + TTL), + // pass in a ttl < 0 to remove target safe point; + // pass in MAX_INT64 to set a safe point that never expire. + // This should be set by component that may crash unexpectedly so that it doesn't block + // cluster garbage collection. + int64 ttl = 5; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 6; } message UpdateServiceSafePointV2Response { - ResponseHeader header = 1; + ResponseHeader header = 1; - bytes service_id = 2; - int64 ttl = 3; - uint64 min_safe_point = 4; + bytes service_id = 2; + int64 ttl = 3; + uint64 min_safe_point = 4; + // V3 keyspace identity served by this response. + apipb.KeyspaceIdentity identity = 5; } message GetAllGCSafePointV2Request { - RequestHeader header = 1; + RequestHeader header = 1; } message GCSafePointV2 { - uint32 keyspace_id = 1; - uint64 gc_safe_point = 2; + // V1/V2 compatibility keyspace id. V3 should use identity. + uint32 keyspace_id = 1; + uint64 gc_safe_point = 2; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 3; } message GetAllGCSafePointV2Response { - ResponseHeader header = 1; - repeated GCSafePointV2 gc_safe_points = 2; - int64 revision = 3; + ResponseHeader header = 1; + repeated GCSafePointV2 gc_safe_points = 2; + int64 revision = 3; +} + +// A wrapper over keyspace scope. +// keyspace_id is kept for V1/V2 compatibility. V3 should use identity and reject +// missing/invalid namespace or keyspace IDs in tenant-scoped requests. +message KeyspaceScope { + uint32 keyspace_id = 1; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 2; +} + +message AdvanceGCSafePointRequest { + RequestHeader header = 1; + + KeyspaceScope keyspace_scope = 2; + uint64 target = 3; +} + +message AdvanceGCSafePointResponse { + ResponseHeader header = 1; + + uint64 old_gc_safe_point = 2; + uint64 new_gc_safe_point = 3; +} + +message AdvanceTxnSafePointRequest { + RequestHeader header = 1; + + KeyspaceScope keyspace_scope = 2; + uint64 target = 3; +} + +message AdvanceTxnSafePointResponse { + ResponseHeader header = 1; + + uint64 old_txn_safe_point = 2; + uint64 new_txn_safe_point = 3; + string blocker_description = 4; +} + +message SetGCBarrierRequest { + RequestHeader header = 1; + + KeyspaceScope keyspace_scope = 2; + + string barrier_id = 3; + uint64 barrier_ts = 4; + int64 ttl_seconds = 5; +} + +message GCBarrierInfo { + string barrier_id = 1; + uint64 barrier_ts = 2; + int64 ttl_seconds = 3; +} + +message SetGCBarrierResponse { + ResponseHeader header = 1; + + GCBarrierInfo new_barrier_info = 2; +} + +message DeleteGCBarrierRequest { + RequestHeader header = 1; + + KeyspaceScope keyspace_scope = 2; + + string barrier_id = 3; +} + +message DeleteGCBarrierResponse { + ResponseHeader header = 1; + + GCBarrierInfo deleted_barrier_info = 2; +} + +message SetGlobalGCBarrierRequest { + RequestHeader header = 1; + string barrier_id = 2; + uint64 barrier_ts = 3; + int64 ttl_seconds = 4; +} + +message SetGlobalGCBarrierResponse { + ResponseHeader header = 1; + GlobalGCBarrierInfo new_barrier_info = 2; +} + +message DeleteGlobalGCBarrierRequest { + RequestHeader header = 1; + string barrier_id = 2; +} + +message DeleteGlobalGCBarrierResponse { + ResponseHeader header = 1; + GlobalGCBarrierInfo deleted_barrier_info = 2; +} + +message GlobalGCBarrierInfo { + string barrier_id = 1; + uint64 barrier_ts = 2; + int64 ttl_seconds = 3; +} + +message GetGCStateRequest { + RequestHeader header = 1; + + KeyspaceScope keyspace_scope = 2; +} + +message GCState { + KeyspaceScope keyspace_scope = 1; + + bool is_keyspace_level_gc = 2; + uint64 txn_safe_point = 3; + uint64 gc_safe_point = 4; + repeated GCBarrierInfo gc_barriers = 5; +} + +message GetGCStateResponse { + ResponseHeader header = 1; + + GCState gc_state = 2; +} + +message GetAllKeyspacesGCStatesRequest { + RequestHeader header = 1; +} + +message GetAllKeyspacesGCStatesResponse { + ResponseHeader header = 1; + + repeated GCState gc_states = 2; + repeated GlobalGCBarrierInfo global_gc_barriers = 3; } message RegionStat { - // Bytes read/written during this period. - uint64 bytes_written = 1; - uint64 bytes_read = 2; - // Keys read/written during this period. - uint64 keys_written = 3; - uint64 keys_read = 4; + // Bytes read/written during this period. + uint64 bytes_written = 1; + uint64 bytes_read = 2; + // Keys read/written during this period. + uint64 keys_written = 3; + uint64 keys_read = 4; } -message SyncRegionRequest{ - RequestHeader header = 1; - Member member = 2; - // the follower PD will use the start index to locate historical changes - // that require synchronization. - uint64 start_index = 3; +message SyncRegionRequest { + RequestHeader header = 1; + Member member = 2; + // the follower PD will use the start index to locate historical changes + // that require synchronization. + uint64 start_index = 3; } message PeersStats { - repeated PeerStats peers = 1; + repeated PeerStats peers = 1; } message Peers { - repeated metapb.Peer peers = 1; + repeated metapb.Peer peers = 1; } -message SyncRegionResponse{ - ResponseHeader header = 1; - // the leader PD will send the repsonds include - // changed regions records and the index of the first record. - repeated metapb.Region regions = 2; - uint64 start_index = 3; - repeated RegionStat region_stats = 4; - repeated metapb.Peer region_leaders = 5; - // the buckets informations without stats. - repeated metapb.Buckets buckets = 6; - repeated PeersStats down_peers = 16; - repeated Peers pending_peers = 17; +message SyncRegionResponse { + ResponseHeader header = 1; + // the leader PD will send the repsonds include + // changed regions records and the index of the first record. + repeated metapb.Region regions = 2; + uint64 start_index = 3; + repeated RegionStat region_stats = 4; + repeated metapb.Peer region_leaders = 5; + // the buckets informations without stats. + repeated metapb.Buckets buckets = 6; + repeated PeersStats down_peers = 16; + repeated Peers pending_peers = 17; } message GetOperatorRequest { - RequestHeader header = 1; - uint64 region_id = 2; + RequestHeader header = 1; + uint64 region_id = 2; } enum OperatorStatus { - SUCCESS = 0; - TIMEOUT = 1; - CANCEL = 2; - REPLACE = 3; - RUNNING = 4; + SUCCESS = 0; + TIMEOUT = 1; + CANCEL = 2; + REPLACE = 3; + RUNNING = 4; } message GetOperatorResponse { - ResponseHeader header = 1; - uint64 region_id = 2; - bytes desc = 3; - OperatorStatus status = 4; - bytes kind = 5; + ResponseHeader header = 1; + uint64 region_id = 2; + bytes desc = 3; + OperatorStatus status = 4; + bytes kind = 5; } message SyncMaxTSRequest { - RequestHeader header = 1; - Timestamp max_ts = 2; - // If skip_check is true, the sync will try to write the max_ts without checking whether it's bigger. - bool skip_check = 3; + RequestHeader header = 1; + Timestamp max_ts = 2; + // If skip_check is true, the sync will try to write the max_ts without checking whether it's bigger. + bool skip_check = 3; } message SyncMaxTSResponse { - ResponseHeader header = 1; - Timestamp max_local_ts = 2; - repeated string synced_dcs = 3; + ResponseHeader header = 1; + Timestamp max_local_ts = 2; + repeated string synced_dcs = 3; } message SplitRegionsRequest { - RequestHeader header = 1; - repeated bytes split_keys = 2; - uint64 retry_limit = 3; + RequestHeader header = 1; + // Physical split key bytes. + repeated bytes split_keys = 2; + uint64 retry_limit = 3; } message SplitRegionsResponse { - ResponseHeader header = 1; - uint64 finished_percentage = 2; - repeated uint64 regions_id = 3; + ResponseHeader header = 1; + uint64 finished_percentage = 2; + repeated uint64 regions_id = 3; } message SplitAndScatterRegionsRequest { - RequestHeader header = 1; - repeated bytes split_keys = 2; - string group = 3; - uint64 retry_limit = 4; + RequestHeader header = 1; + // Physical split key bytes. + repeated bytes split_keys = 2; + string group = 3; + uint64 retry_limit = 4; } message SplitAndScatterRegionsResponse { - ResponseHeader header = 1; - uint64 split_finished_percentage = 2; - uint64 scatter_finished_percentage = 3; - repeated uint64 regions_id = 4; + ResponseHeader header = 1; + uint64 split_finished_percentage = 2; + uint64 scatter_finished_percentage = 3; + repeated uint64 regions_id = 4; } message GetDCLocationInfoRequest { - RequestHeader header = 1; - string dc_location = 2; + RequestHeader header = 1; + string dc_location = 2; } message GetDCLocationInfoResponse { - ResponseHeader header = 1; - // suffix sign - int32 suffix = 2; - // max_ts will be included into this response if PD leader think the receiver needs, - // which it's set when the number of the max suffix bits changes. - Timestamp max_ts = 3; + ResponseHeader header = 1; + // suffix sign + int32 suffix = 2; + // max_ts will be included into this response if PD leader think the receiver needs, + // which it's set when the number of the max suffix bits changes. + Timestamp max_ts = 3; } message QueryStats { - uint64 GC = 1; - uint64 Get = 2; - uint64 Scan = 3; - uint64 Coprocessor = 4; - uint64 Delete = 5; - uint64 DeleteRange = 6; - uint64 Put = 7; - uint64 Prewrite = 8; - uint64 AcquirePessimisticLock = 9; - uint64 Commit = 10; - uint64 Rollback = 11; + uint64 GC = 1; + uint64 Get = 2; + uint64 Scan = 3; + uint64 Coprocessor = 4; + uint64 Delete = 5; + uint64 DeleteRange = 6; + uint64 Put = 7; + uint64 Prewrite = 8; + uint64 AcquirePessimisticLock = 9; + uint64 Commit = 10; + uint64 Rollback = 11; +} + +// CPU usage breakdown by kind. New kinds may be added in the future. +message CPUStats { + // UnifiedRead is the CPU usage of the unified read pool. + uint64 UnifiedRead = 1; + // Scheduler is the CPU usage of the scheduler pool, it contains `sched-pool`, `sched-high`, and `sched-pri`. + uint64 Scheduler = 2; } enum QueryKind { - Others = 0; - GC = 1; - Get = 2; - Scan = 3; - Coprocessor = 4; - Delete = 5; - DeleteRange = 6; - Put = 7; - Prewrite = 8; - AcquirePessimisticLock = 9; - Commit = 10; - Rollback = 11; + Others = 0; + GC = 1; + Get = 2; + Scan = 3; + Coprocessor = 4; + Delete = 5; + DeleteRange = 6; + Put = 7; + Prewrite = 8; + AcquirePessimisticLock = 9; + Commit = 10; + Rollback = 11; } message ReportBucketsRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.RegionEpoch region_epoch = 2; - metapb.Buckets buckets = 3; + metapb.RegionEpoch region_epoch = 2; + metapb.Buckets buckets = 3; } message ReportBucketsResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; } message ReportMinResolvedTsRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint64 store_id = 2; + uint64 store_id = 2; - uint64 min_resolved_ts = 3; + uint64 min_resolved_ts = 3; } message ReportMinResolvedTsResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; } message SetExternalTimestampRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint64 timestamp = 2; + uint64 timestamp = 2; } message SetExternalTimestampResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; } message GetExternalTimestampRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message GetExternalTimestampResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint64 timestamp = 2; + uint64 timestamp = 2; } message GetMinTSRequest { - RequestHeader header = 1; + RequestHeader header = 1; } message GetMinTSResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - Timestamp timestamp = 2; + Timestamp timestamp = 2; } diff --git a/proto/resource_manager.proto b/proto/resource_manager.proto index aa7de748..f9912f94 100644 --- a/proto/resource_manager.proto +++ b/proto/resource_manager.proto @@ -1,11 +1,15 @@ syntax = "proto3"; package resource_manager; +import "apipb.proto"; import "gogoproto/gogo.proto"; import "rustproto.proto"; -option (gogoproto.sizer_all) = true; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; @@ -23,27 +27,61 @@ service ResourceManager { rpc AcquireTokenBuckets(stream TokenBucketsRequest) returns (stream TokenBucketsResponse) {} } -message ListResourceGroupsRequest{} +// KeyspaceIDValue is a wrapper for the value of keyspace ID. +// Because the 0 value is a valid keyspace ID in V1/V2, we need to use a wrapper to distinguish it from the null keyspace ID. +// V3 tenant-scoped resource filters should use apipb.KeyspaceIdentity fields instead. +message KeyspaceIDValue { + uint32 value = 1; +} + +message ListResourceGroupsRequest { + bool with_ru_stats = 1; + // There're two cases for this field: + // - If the keyspace ID is not set, it means this may be a message from an older version. + // To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + // - If the keyspace ID is set to a valid value, the listed resource groups will be filtered + // by the given keyspace ID. + KeyspaceIDValue keyspace_id = 2; + // V3 keyspace identity filter. + apipb.KeyspaceIdentity keyspace_identity = 3; +} -message ListResourceGroupsResponse{ +message ListResourceGroupsResponse { Error error = 1; repeated ResourceGroup groups = 2; } message GetResourceGroupRequest { string resource_group_name = 1; -} - -message GetResourceGroupResponse{ + bool with_ru_stats = 2; + // There're two cases for this field: + // - If the keyspace ID is not set, it means this may be a message from an older version. + // To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + // - If the keyspace ID is set to a valid value, it will try to get the resource group within + // the given keyspace ID. + KeyspaceIDValue keyspace_id = 3; + // V3 keyspace identity filter. + apipb.KeyspaceIdentity keyspace_identity = 4; +} + +message GetResourceGroupResponse { Error error = 1; ResourceGroup group = 2; } message DeleteResourceGroupRequest { string resource_group_name = 1; + // There're two cases for this field: + // - If the keyspace ID is not set, it means this may be a message from an older version. + // To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + // - If the keyspace ID is set to a valid value, it will try to delete the resource group within + // the given keyspace ID. + KeyspaceIDValue keyspace_id = 2; + // V3 keyspace identity filter. + apipb.KeyspaceIdentity keyspace_identity = 3; } -message DeleteResourceGroupResponse{ +message DeleteResourceGroupResponse { Error error = 1; string body = 2; } @@ -64,25 +102,33 @@ message TokenBucketsRequest { } message TokenBucketRequest { - message RequestRU { - repeated RequestUnitItem request_r_u = 1; - } - message RequestRawResource { - repeated RawResourceItem request_raw_resource = 1; - } - - string resource_group_name = 1; - oneof request { - // RU mode, group settings with WRU/RRU etc resource abstract unit. - RequestRU ru_items = 2; - // Raw mode, group settings with CPU/IO etc resource unit. - RequestRawResource raw_resource_items = 3; - } - // Aggregate statistics in group level. - Consumption consumption_since_last_request = 4; - // label background request. - bool is_background = 5; - bool is_tiflash = 6; + message RequestRU { + repeated RequestUnitItem request_r_u = 1; + } + message RequestRawResource { + repeated RawResourceItem request_raw_resource = 1; + } + + string resource_group_name = 1; + oneof request { + // RU mode, group settings with WRU/RRU etc resource abstract unit. + RequestRU ru_items = 2; + // Raw mode, group settings with CPU/IO etc resource unit. + RequestRawResource raw_resource_items = 3; + } + // Aggregate statistics in group level. + Consumption consumption_since_last_request = 4; + // label background request. + bool is_background = 5; + bool is_tiflash = 6; + // There're two cases for this field: + // - If the keyspace ID is not set, it means this may be a message from an older version. + // To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + // - If the keyspace ID is set to a valid value, it will try to request the token bucket from + // the resource group within the given keyspace ID. + KeyspaceIDValue keyspace_id = 7; + // V3 keyspace identity filter. + apipb.KeyspaceIdentity keyspace_identity = 8; } message TokenBucketsResponse { @@ -96,6 +142,14 @@ message TokenBucketResponse { repeated GrantedRUTokenBucket granted_r_u_tokens = 2; // Raw mode repeated GrantedRawResourceTokenBucket granted_resource_tokens = 3; + // There're two cases for this field: + // - If the keyspace ID is not set, it means this may be a message from an older version, + // which can be safely ignored to keep compatibility. + // - If the keyspace ID is set to a valid value, it means this response is from the resource + // group within this keyspace ID. + KeyspaceIDValue keyspace_id = 4; + // V3 keyspace identity served by this response. + apipb.KeyspaceIdentity keyspace_identity = 5; } message GrantedRUTokenBucket { @@ -116,7 +170,7 @@ enum RequestUnitType { enum RawResourceType { CPU = 0; - IOReadFlow = 1; + IOReadFlow = 1; IOWriteFlow = 2; } @@ -129,6 +183,14 @@ message Consumption { double sql_layer_cpu_time_ms = 6; double kv_read_rpc_count = 7; double kv_write_rpc_count = 8; + uint64 read_cross_az_traffic_bytes = 9; + uint64 write_cross_az_traffic_bytes = 10; + // RUv2 is an experimental v2 RU calculation. + // For now it only records the consumption without actual token deduction. + double tikv_r_u_v2 = 11; + double tidb_r_u_v2 = 12; + // tiflash_r_u_v2 equals tiflash RRU + tiflash WRU. + double tiflash_r_u_v2 = 13; } message RequestUnitItem { @@ -142,12 +204,12 @@ message RawResourceItem { } enum GroupMode { - Unknown = 0; - RUMode = 1; - RawMode = 2; + Unknown = 0; + RUMode = 1; + RawMode = 2; } -// ResourceGroup the settings definitions. +// ResourceGroup the settings definitions. message ResourceGroup { string name = 1; GroupMode mode = 2; @@ -159,7 +221,18 @@ message ResourceGroup { uint32 priority = 5; // Runaway queries settings RunawaySettings runaway_settings = 6; + // Background task control settings. BackgroundSettings background_settings = 7; + // RU consumption statistics. + Consumption RUStats = 8; + // The keyspace ID that the resource group belongs to. + // There're two cases for this field: + // - If the keyspace ID is not set, it means this may be a message from an older version. + // To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + // - If the keyspace ID is set to a valid value, it will directly be used. + KeyspaceIDValue keyspace_id = 9; + // V3 keyspace identity that the resource group belongs to. + apipb.KeyspaceIdentity keyspace_identity = 10; } message GroupRequestUnitSettings { @@ -176,7 +249,7 @@ message TokenBucket { TokenLimitSettings settings = 1; // Once used to reconfigure, the tokens is delta tokens. double tokens = 2; -} +} message TokenLimitSettings { uint64 fill_rate = 1; @@ -185,14 +258,15 @@ message TokenLimitSettings { } message Error { - string message = 1; + string message = 1; } enum RunawayAction { NoneAction = 0; - DryRun = 1; // do nothing - CoolDown = 2; // deprioritize the task - Kill = 3; // kill the task + DryRun = 1; // do nothing + CoolDown = 2; // deprioritize the task + Kill = 3; // kill the task + SwitchGroup = 4; // switch the task to another group } enum RunawayWatchType { @@ -204,30 +278,38 @@ enum RunawayWatchType { message RunawayRule { uint64 exec_elapsed_time_ms = 1; + int64 processed_keys = 2; + int64 request_unit = 3; } message RunawayWatch { // how long would the watch last int64 lasting_duration_ms = 1; - RunawayWatchType type = 2; + RunawayWatchType type = 2; } message RunawaySettings { - RunawayRule rule = 1; + RunawayRule rule = 1; RunawayAction action = 2; RunawayWatch watch = 3; + // When the runaway action is `SwitchGroup`, + // this field will be used to indicate which group to switch. + string switch_group_name = 4; } message BackgroundSettings { - repeated string job_types = 1; + // background task types. + repeated string job_types = 1; + // the percentage limit of total resource(cpu/io) that background tasks can use. + uint64 utilization_limit = 2; } message Participant { - // name is the unique name of the resource manager participant. - string name = 1; - // id is the unique id of the resource manager participant. - uint64 id = 2; - // listen_urls is the serivce endpoint list in the url format. - // listen_urls[0] is primary service endpoint. - repeated string listen_urls = 3; + // name is the unique name of the resource manager participant. + string name = 1; + // id is the unique id of the resource manager participant. + uint64 id = 2; + // listen_urls is the serivce endpoint list in the url format. + // listen_urls[0] is primary service endpoint. + repeated string listen_urls = 3; } diff --git a/proto/schedulingpb.proto b/proto/schedulingpb.proto index 83b7de46..96e4003f 100644 --- a/proto/schedulingpb.proto +++ b/proto/schedulingpb.proto @@ -1,197 +1,217 @@ syntax = "proto3"; package schedulingpb; -import "pdpb.proto"; import "gogoproto/gogo.proto"; -import "rustproto.proto"; import "metapb.proto"; +import "pdpb.proto"; +import "rustproto.proto"; -option (gogoproto.sizer_all) = true; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - service Scheduling { - rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {} + rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {} + + rpc RegionHeartbeat(stream RegionHeartbeatRequest) returns (stream RegionHeartbeatResponse) {} - rpc RegionHeartbeat(stream RegionHeartbeatRequest) returns (stream RegionHeartbeatResponse) {} + rpc SplitRegions(SplitRegionsRequest) returns (SplitRegionsResponse) {} - rpc SplitRegions(SplitRegionsRequest) returns (SplitRegionsResponse) {} + rpc ScatterRegions(ScatterRegionsRequest) returns (ScatterRegionsResponse) {} - rpc ScatterRegions(ScatterRegionsRequest) returns (ScatterRegionsResponse) {} + rpc GetOperator(GetOperatorRequest) returns (GetOperatorResponse) {} - rpc GetOperator(GetOperatorRequest) returns (GetOperatorResponse) {} - - rpc AskBatchSplit(AskBatchSplitRequest) returns (AskBatchSplitResponse) {} + rpc AskBatchSplit(AskBatchSplitRequest) returns (AskBatchSplitResponse) {} + + rpc RegionBuckets(stream RegionBucketsRequest) returns (stream RegionBucketsResponse) {} } message RequestHeader { - // cluster_id is the ID of the cluster which be sent to. - uint64 cluster_id = 1; - // sender_id is the ID of the sender server. - uint64 sender_id = 2; + // cluster_id is the ID of the cluster which be sent to. + uint64 cluster_id = 1; + // sender_id is the ID of the sender server. + uint64 sender_id = 2; } message ResponseHeader { - // cluster_id is the ID of the cluster which sent the response. - uint64 cluster_id = 1; - Error error = 2; + // cluster_id is the ID of the cluster which sent the response. + uint64 cluster_id = 1; + Error error = 2; } enum ErrorType { - OK = 0; - UNKNOWN = 1; - NOT_BOOTSTRAPPED = 2; - ALREADY_BOOTSTRAPPED = 3; - INVALID_VALUE = 4; - CLUSTER_MISMATCHED = 5; + OK = 0; + UNKNOWN = 1; + NOT_BOOTSTRAPPED = 2; + ALREADY_BOOTSTRAPPED = 3; + INVALID_VALUE = 4; + CLUSTER_MISMATCHED = 5; } message Error { - ErrorType type = 1; - string message = 2; + ErrorType type = 1; + string message = 2; } message Participant { - // name is the unique name of the scheduling participant. - string name = 1; - // id is the unique id of the scheduling participant. - uint64 id = 2; - // listen_urls is the serivce endpoint list in the url format. - // listen_urls[0] is primary service endpoint. - repeated string listen_urls = 3; + // name is the unique name of the scheduling participant. + string name = 1; + // id is the unique id of the scheduling participant. + uint64 id = 2; + // listen_urls is the serivce endpoint list in the url format. + // listen_urls[0] is primary service endpoint. + repeated string listen_urls = 3; } message StoreHeartbeatRequest { - RequestHeader header = 1; - pdpb.StoreStats stats = 2; + RequestHeader header = 1; + pdpb.StoreStats stats = 2; } message StoreHeartbeatResponse { - ResponseHeader header = 1; - string cluster_version = 2; + ResponseHeader header = 1; + string cluster_version = 2; } message RegionHeartbeatRequest { - RequestHeader header = 1; - - metapb.Region region = 2; - // Leader Peer sending the heartbeat. - metapb.Peer leader = 3; - // Term is the term of raft group. - uint64 term = 4; - // Leader considers that these peers are down. - repeated pdpb.PeerStats down_peers = 5; - // Pending peers are the peers that the leader can't consider as - // working followers. - repeated metapb.Peer pending_peers = 6; - // Bytes read/written during this period. - uint64 bytes_written = 7; - uint64 bytes_read = 8; - // Keys read/written during this period. - uint64 keys_written = 9; - uint64 keys_read = 10; - // Approximate region size. - uint64 approximate_size = 11; - // Approximate number of keys. - uint64 approximate_keys = 12; - // QueryStats reported write query stats, and there are read query stats in store heartbeat - pdpb.QueryStats query_stats = 13; - // Actually reported time interval - pdpb.TimeInterval interval = 14; + RequestHeader header = 1; + + metapb.Region region = 2; + // Leader Peer sending the heartbeat. + metapb.Peer leader = 3; + // Term is the term of raft group. + uint64 term = 4; + // Leader considers that these peers are down. + repeated pdpb.PeerStats down_peers = 5; + // Pending peers are the peers that the leader can't consider as + // working followers. + repeated metapb.Peer pending_peers = 6; + // Bytes read/written during this period. + uint64 bytes_written = 7; + uint64 bytes_read = 8; + // Keys read/written during this period. + uint64 keys_written = 9; + uint64 keys_read = 10; + // Approximate region size. + uint64 approximate_size = 11; + // Approximate number of keys. + uint64 approximate_keys = 12; + // QueryStats reported write query stats, and there are read query stats in store heartbeat + pdpb.QueryStats query_stats = 13; + // Actually reported time interval + pdpb.TimeInterval interval = 14; + // BucketMeta is the bucket version and keys of this region if TiKV enabled the bucket feature + metapb.BucketMeta bucket_meta = 15; } message RegionHeartbeatResponse { - ResponseHeader header = 1; - - // ID of the region - uint64 region_id = 2; - metapb.RegionEpoch region_epoch = 3; - // Leader of the region at the moment of the corresponding request was made. - metapb.Peer target_peer = 4; - // Notice, Pd only allows handling reported epoch >= current pd's. - // Leader peer reports region status with RegionHeartbeatRequest - // to pd regularly, pd will determine whether this region - // should do ChangePeer or not. - // E,g, max peer number is 3, region A, first only peer 1 in A. - // 1. Pd region state -> Peers (1), ConfVer (1). - // 2. Leader peer 1 reports region state to pd, pd finds the - // peer number is < 3, so first changes its current region - // state -> Peers (1, 2), ConfVer (1), and returns ChangePeer Adding 2. - // 3. Leader does ChangePeer, then reports Peers (1, 2), ConfVer (2), - // pd updates its state -> Peers (1, 2), ConfVer (2). - // 4. Leader may report old Peers (1), ConfVer (1) to pd before ConfChange - // finished, pd stills responses ChangePeer Adding 2, of course, we must - // guarantee the second ChangePeer can't be applied in TiKV. - pdpb.ChangePeer change_peer = 5; - // Pd can return transfer_leader to let TiKV does leader transfer itself. - pdpb.TransferLeader transfer_leader = 6; - pdpb.Merge merge = 7; - // PD sends split_region to let TiKV split a region into two regions. - pdpb.SplitRegion split_region = 8; - // Multiple change peer operations atomically. - // Note: PD can use both ChangePeer and ChangePeerV2 at the same time - // (not in the same RegionHeartbeatResponse). - // Now, PD use ChangePeerV2 in following scenarios: - // 1. replacing peers - // 2. demoting voter directly - pdpb.ChangePeerV2 change_peer_v2 = 9; - pdpb.BatchSwitchWitness switch_witnesses = 10; + ResponseHeader header = 1; + + // ID of the region + uint64 region_id = 2; + metapb.RegionEpoch region_epoch = 3; + // Leader of the region at the moment of the corresponding request was made. + metapb.Peer target_peer = 4; + // Notice, Pd only allows handling reported epoch >= current pd's. + // Leader peer reports region status with RegionHeartbeatRequest + // to pd regularly, pd will determine whether this region + // should do ChangePeer or not. + // E,g, max peer number is 3, region A, first only peer 1 in A. + // 1. Pd region state -> Peers (1), ConfVer (1). + // 2. Leader peer 1 reports region state to pd, pd finds the + // peer number is < 3, so first changes its current region + // state -> Peers (1, 2), ConfVer (1), and returns ChangePeer Adding 2. + // 3. Leader does ChangePeer, then reports Peers (1, 2), ConfVer (2), + // pd updates its state -> Peers (1, 2), ConfVer (2). + // 4. Leader may report old Peers (1), ConfVer (1) to pd before ConfChange + // finished, pd stills responses ChangePeer Adding 2, of course, we must + // guarantee the second ChangePeer can't be applied in TiKV. + pdpb.ChangePeer change_peer = 5; + // Pd can return transfer_leader to let TiKV does leader transfer itself. + pdpb.TransferLeader transfer_leader = 6; + pdpb.Merge merge = 7; + // PD sends split_region to let TiKV split a region into two regions. + pdpb.SplitRegion split_region = 8; + // Multiple change peer operations atomically. + // Note: PD can use both ChangePeer and ChangePeerV2 at the same time + // (not in the same RegionHeartbeatResponse). + // Now, PD use ChangePeerV2 in following scenarios: + // 1. replacing peers + // 2. demoting voter directly + pdpb.ChangePeerV2 change_peer_v2 = 9; + pdpb.BatchSwitchWitness switch_witnesses = 10; + pdpb.ChangeSplit change_split = 11; } message ScatterRegionsRequest { - RequestHeader header = 1; - // If group is defined, the regions with the same group would be scattered as a whole group. - // If not defined, the regions would be scattered in a cluster level. - string group = 2; + RequestHeader header = 1; + // If group is defined, the regions with the same group would be scattered as a whole group. + // If not defined, the regions would be scattered in a cluster level. + string group = 2; - // If regions_id is defined, the region_id would be ignored. - repeated uint64 regions_id = 3; - uint64 retry_limit = 4; - bool skip_store_limit = 5; + // If regions_id is defined, the region_id would be ignored. + repeated uint64 regions_id = 3; + uint64 retry_limit = 4; + bool skip_store_limit = 5; } message ScatterRegionsResponse { - ResponseHeader header = 1; - uint64 finished_percentage = 2; + ResponseHeader header = 1; + uint64 finished_percentage = 2; + repeated uint64 failed_regions_id = 3; } message SplitRegionsRequest { - RequestHeader header = 1; - repeated bytes split_keys = 2; - uint64 retry_limit = 3; + RequestHeader header = 1; + // Physical split key bytes. + repeated bytes split_keys = 2; + uint64 retry_limit = 3; } message SplitRegionsResponse { - ResponseHeader header = 1; - uint64 finished_percentage = 2; - repeated uint64 regions_id = 3; + ResponseHeader header = 1; + uint64 finished_percentage = 2; + repeated uint64 regions_id = 3; } message GetOperatorRequest { - RequestHeader header = 1; - uint64 region_id = 2; + RequestHeader header = 1; + uint64 region_id = 2; } message GetOperatorResponse { - ResponseHeader header = 1; - uint64 region_id = 2; - bytes desc = 3; - pdpb.OperatorStatus status = 4; - bytes kind = 5; + ResponseHeader header = 1; + uint64 region_id = 2; + bytes desc = 3; + pdpb.OperatorStatus status = 4; + bytes kind = 5; } message AskBatchSplitRequest { - RequestHeader header = 1; + RequestHeader header = 1; - metapb.Region region = 2; - uint32 split_count = 3; + metapb.Region region = 2; + uint32 split_count = 3; } message AskBatchSplitResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; + + repeated pdpb.SplitID ids = 2; +} + +message RegionBucketsRequest { + RequestHeader header = 1; + + metapb.RegionEpoch region_epoch = 2; + metapb.Buckets buckets = 3; +} - repeated pdpb.SplitID ids = 2; +message RegionBucketsResponse { + ResponseHeader header = 1; } diff --git a/proto/tsopb.proto b/proto/tsopb.proto index 061bcadc..3c23886f 100644 --- a/proto/tsopb.proto +++ b/proto/tsopb.proto @@ -1,126 +1,159 @@ syntax = "proto3"; package tsopb; -import "pdpb.proto"; - +import "apipb.proto"; import "gogoproto/gogo.proto"; +import "pdpb.proto"; import "rustproto.proto"; -option (gogoproto.sizer_all) = true; +option java_package = "org.tikv.kvproto"; +option (gogoproto.goproto_sizecache_all) = false; +option (gogoproto.goproto_unkeyed_all) = false; +option (gogoproto.goproto_unrecognized_all) = false; option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; option (gogoproto.unmarshaler_all) = true; option (rustproto.lite_runtime_all) = true; -option java_package = "org.tikv.kvproto"; - service TSO { - rpc Tso(stream TsoRequest) returns (stream TsoResponse) {} - // Find the keyspace group that the keyspace belongs to by keyspace id. - rpc FindGroupByKeyspaceID (FindGroupByKeyspaceIDRequest) returns (FindGroupByKeyspaceIDResponse) {} - // Get the minimum timestamp across all keyspace groups served by the TSO server who receives - // and handle the request. If the TSO server/pod is not serving any keyspace group, return - // an empty timestamp, and the client needs to skip the empty timestamps when collecting - // the min timestamp from all TSO servers/pods. - rpc GetMinTS (GetMinTSRequest) returns (GetMinTSResponse) {} + rpc Tso(stream TsoRequest) returns (stream TsoResponse) {} + // Find the keyspace group that the keyspace belongs to by keyspace id. + rpc FindGroupByKeyspaceID(FindGroupByKeyspaceIDRequest) returns (FindGroupByKeyspaceIDResponse) {} + // Find the keyspace group that the keyspace belongs to by V3 keyspace identity. + rpc FindGroupByKeyspace(FindGroupByKeyspaceRequest) returns (FindGroupByKeyspaceResponse) {} + // Get the minimum timestamp across all keyspace groups served by the TSO server who receives + // and handle the request. If the TSO server/pod is not serving any keyspace group, return + // an empty timestamp, and the client needs to skip the empty timestamps when collecting + // the min timestamp from all TSO servers/pods. + rpc GetMinTS(GetMinTSRequest) returns (GetMinTSResponse) {} } message RequestHeader { - // cluster_id is the ID of the cluster which be sent to. - uint64 cluster_id = 1; - // sender_id is the ID of the sender server. - uint64 sender_id = 2; - - // keyspace_id is the unique id of the tenant/keyspace. - uint32 keyspace_id = 3; - // keyspace_group_id is the unique id of the keyspace group to which the tenant/keyspace belongs. - uint32 keyspace_group_id = 4; + // cluster_id is the ID of the cluster which be sent to. + uint64 cluster_id = 1; + // sender_id is the ID of the sender server. + uint64 sender_id = 2; + + // keyspace_id is the unique id of the tenant/keyspace in V1/V2. + // V3 should use identity and must not read this legacy field as the full identity. + uint32 keyspace_id = 3; + // keyspace_group_id is the unique id of the keyspace group to which the tenant/keyspace belongs. + uint32 keyspace_group_id = 4; + // callee_id is the ID of the server which the client expects to receive the request. + // such as tso-0, tso-1, pd-0, pd-1 etc. + // This field is used to check if the request is sent to the expected server. + // If it is not matched, the server will return an error. + string callee_id = 5; + // V3 keyspace identity of the request. + apipb.KeyspaceIdentity identity = 6; } message ResponseHeader { - // cluster_id is the ID of the cluster which sent the response. - uint64 cluster_id = 1; - Error error = 2; - - // keyspace_id is the unique id of the tenant/keyspace as the response receiver. - uint32 keyspace_id = 3; - // keyspace_group_id is the unique id of the keyspace group to which the tenant/keyspace belongs. - uint32 keyspace_group_id = 4; + // cluster_id is the ID of the cluster which sent the response. + uint64 cluster_id = 1; + Error error = 2; + + // keyspace_id is the unique id of the tenant/keyspace as the response receiver in V1/V2. + // V3 should use identity and must not read this legacy field as the full identity. + uint32 keyspace_id = 3; + // keyspace_group_id is the unique id of the keyspace group to which the tenant/keyspace belongs. + uint32 keyspace_group_id = 4; + // V3 keyspace identity served by this response. + apipb.KeyspaceIdentity identity = 5; } enum ErrorType { - OK = 0; - UNKNOWN = 1; - NOT_BOOTSTRAPPED = 2; - ALREADY_BOOTSTRAPPED = 3; - INVALID_VALUE = 4; - CLUSTER_MISMATCHED = 5; + OK = 0; + UNKNOWN = 1; + NOT_BOOTSTRAPPED = 2; + ALREADY_BOOTSTRAPPED = 3; + INVALID_VALUE = 4; + CLUSTER_MISMATCHED = 5; } message Error { - ErrorType type = 1; - string message = 2; + ErrorType type = 1; + string message = 2; } message TsoRequest { - RequestHeader header = 1; + RequestHeader header = 1; - uint32 count = 2; - string dc_location = 3; + uint32 count = 2; + string dc_location = 3; } message TsoResponse { - ResponseHeader header = 1; + ResponseHeader header = 1; - uint32 count = 2; - pdpb.Timestamp timestamp = 3; + uint32 count = 2; + pdpb.Timestamp timestamp = 3; } message Participant { - // name is the unique name of the TSO participant. - string name = 1; - // id is the unique id of the TSO participant. - uint64 id = 2; - // listen_urls is the serivce endpoint list in the url format. - // listen_urls[0] is primary service endpoint. - repeated string listen_urls = 3; + // name is the unique name of the TSO participant. + string name = 1; + // id is the unique id of the TSO participant. + uint64 id = 2; + // listen_urls is the serivce endpoint list in the url format. + // listen_urls[0] is primary service endpoint. + repeated string listen_urls = 3; } message KeyspaceGroupMember { - string address = 1; - bool is_primary = 2; + string address = 1; + bool is_primary = 2; } message SplitState { - uint32 split_source = 1; + uint32 split_source = 1; } message KeyspaceGroup { - uint32 id = 1; - string user_kind = 2; - SplitState split_state = 3; - repeated KeyspaceGroupMember members = 4; + uint32 id = 1; + string user_kind = 2; + SplitState split_state = 3; + repeated KeyspaceGroupMember members = 4; + // V3 keyspace identities that belong to this group. Membership may cross namespaces. + repeated apipb.KeyspaceIdentity keyspace_identities = 5; } - + message FindGroupByKeyspaceIDRequest { - RequestHeader header = 1; - uint32 keyspace_id = 2; + RequestHeader header = 1; + // V1/V2 compatibility keyspace id. V3 should use FindGroupByKeyspaceRequest.identity. + uint32 keyspace_id = 2; + uint64 mod_revision = 3; } message FindGroupByKeyspaceIDResponse { - ResponseHeader header = 1; - KeyspaceGroup keyspace_group = 2; + ResponseHeader header = 1; + KeyspaceGroup keyspace_group = 2; + uint64 mod_revision = 3; +} + +message FindGroupByKeyspaceRequest { + RequestHeader header = 1; + // V3 keyspace identity. + apipb.KeyspaceIdentity identity = 2; + uint64 mod_revision = 3; +} + +message FindGroupByKeyspaceResponse { + ResponseHeader header = 1; + KeyspaceGroup keyspace_group = 2; + uint64 mod_revision = 3; } message GetMinTSRequest { - RequestHeader header = 1; - string dc_location = 2; + RequestHeader header = 1; + string dc_location = 2; } message GetMinTSResponse { - ResponseHeader header = 1; - pdpb.Timestamp timestamp = 2; - // the count of keyspace group primaries that the TSO server/pod is serving - uint32 keyspace_groups_serving = 3; - // the total count of keyspace groups - uint32 keyspace_groups_total = 4; + ResponseHeader header = 1; + pdpb.Timestamp timestamp = 2; + // the count of keyspace group primaries that the TSO server/pod is serving + uint32 keyspace_groups_serving = 3; + // the total count of keyspace groups + uint32 keyspace_groups_total = 4; } diff --git a/src/config.rs b/src/config.rs index 92a67e65..b6dee363 100644 --- a/src/config.rs +++ b/src/config.rs @@ -6,6 +6,14 @@ use std::time::Duration; use serde_derive::Deserialize; use serde_derive::Serialize; +/// A V3 keyspace identity. +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub struct KeyspaceIdentity { + pub namespace_id: u32, + pub keyspace_id: u32, +} + /// The configuration for either a [`RawClient`](crate::RawClient) or a /// [`TransactionClient`](crate::TransactionClient). /// @@ -26,6 +34,9 @@ pub struct Config { pub timeout: Duration, pub grpc_max_decoding_message_size: usize, pub keyspace: Option, + pub keyspace_identity: Option, + pub keyspace_namespace_id: Option, + pub keyspace_global_name_lookup: bool, } const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(2); @@ -40,6 +51,9 @@ impl Default for Config { timeout: DEFAULT_REQUEST_TIMEOUT, grpc_max_decoding_message_size: DEFAULT_GRPC_MAX_DECODING_MESSAGE_SIZE, keyspace: None, + keyspace_identity: None, + keyspace_namespace_id: None, + keyspace_global_name_lookup: false, } } } @@ -115,6 +129,47 @@ impl Config { #[must_use] pub fn with_keyspace(mut self, keyspace: &str) -> Self { self.keyspace = Some(keyspace.to_owned()); + self.keyspace_identity = None; + self.keyspace_namespace_id = None; + self.keyspace_global_name_lookup = false; + self + } + + /// Set namespace-scoped API V3 keyspace-name lookup for the client. + #[must_use] + pub fn with_keyspace_namespace_id(mut self, namespace_id: u32) -> Self { + self.keyspace_identity = None; + self.keyspace_namespace_id = Some(namespace_id); + self.keyspace_global_name_lookup = false; + self + } + + /// Resolve the keyspace by globally unique name and use API V3 identity when PD returns one. + /// + /// This is intended for DB9-style deployments where keyspace names are globally unique even + /// though PD's native API V3 uniqueness rule is namespace-scoped. + #[must_use] + pub fn with_keyspace_global_name_lookup(mut self, keyspace: &str) -> Self { + self.keyspace = Some(keyspace.to_owned()); + self.keyspace_identity = None; + self.keyspace_namespace_id = None; + self.keyspace_global_name_lookup = true; + self + } + + /// Set the API V3 keyspace identity for the client. + /// + /// API V3 has no default keyspace. Both namespace id and keyspace id must be non-zero, and + /// keyspace id must be less than `2^24`. + #[must_use] + pub fn with_keyspace_identity(mut self, namespace_id: u32, keyspace_id: u32) -> Self { + self.keyspace = None; + self.keyspace_namespace_id = None; + self.keyspace_global_name_lookup = false; + self.keyspace_identity = Some(KeyspaceIdentity { + namespace_id, + keyspace_id, + }); self } } diff --git a/src/generated/apipb.rs b/src/generated/apipb.rs new file mode 100644 index 00000000..e24432bf --- /dev/null +++ b/src/generated/apipb.rs @@ -0,0 +1,14 @@ +// This file is @generated by prost-build. +/// KeyspaceIdentity identifies a V3 tenant keyspace. +/// +/// For V3 tenant-scoped APIs, namespace_id must be non-zero and keyspace_id must +/// satisfy 0 \< keyspace_id \< 2^24. Namespace 0 and keyspace 0 mean +/// missing/invalid identity and must be rejected by V3 validators. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct KeyspaceIdentity { + #[prost(uint32, tag = "1")] + pub namespace_id: u32, + #[prost(uint32, tag = "2")] + pub keyspace_id: u32, +} diff --git a/src/generated/autoid.rs b/src/generated/autoid.rs index 503024ab..eacca1ec 100644 --- a/src/generated/autoid.rs +++ b/src/generated/autoid.rs @@ -14,8 +14,12 @@ pub struct AutoIdRequest { pub increment: i64, #[prost(int64, tag = "6")] pub offset: i64, + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "7")] pub keyspace_id: u32, + /// V3 keyspace identity. + #[prost(message, optional, tag = "8")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/src/generated/coprocessor.rs b/src/generated/coprocessor.rs index 907f9697..89f65c11 100644 --- a/src/generated/coprocessor.rs +++ b/src/generated/coprocessor.rs @@ -8,6 +8,16 @@ pub struct KeyRange { #[prost(bytes = "vec", tag = "2")] pub end: ::prost::alloc::vec::Vec, } +/// KeyRange with an attached read_ts (version). +/// It is used by TiCI versioned lookup. Callers must ensure `range` is a point range. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VersionedKeyRange { + #[prost(message, optional, tag = "1")] + pub range: ::core::option::Option, + #[prost(uint64, tag = "2")] + pub read_ts: u64, +} #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Request { @@ -44,6 +54,16 @@ pub struct Request { /// This is the session alias between a client and tidb #[prost(string, tag = "13")] pub connection_alias: ::prost::alloc::string::String, + /// Shard infos for FTS index, used by TiFlash reading TiCI. + #[prost(message, repeated, tag = "14")] + pub table_shard_infos: ::prost::alloc::vec::Vec, + /// Versioned point ranges for TiCI lookup. + /// When `versioned_ranges` is non-empty, all `versioned_ranges\[i\].range` must be point range. + #[prost(message, repeated, tag = "15")] + pub versioned_ranges: ::prost::alloc::vec::Vec, + /// max_keys_read is 0 when disabled, otherwise limits storage engine keys read per coprocessor task. + #[prost(uint64, tag = "16")] + pub max_keys_read: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -92,6 +112,54 @@ pub struct RegionInfo { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct ShardInfo { + #[prost(uint64, tag = "1")] + pub shard_id: u64, + #[prost(uint64, tag = "2")] + pub shard_epoch: u64, + #[prost(message, repeated, tag = "3")] + pub ranges: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TableShardInfos { + /// The executor ID is used to identify the tici executor. + #[prost(string, tag = "1")] + pub executor_id: ::prost::alloc::string::String, + /// The shard_infos contains the shard information for each tici executor. + #[prost(message, repeated, tag = "2")] + pub shard_infos: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TiCiEstimateCountRequest { + #[prost(message, optional, tag = "1")] + pub context: ::core::option::Option, + #[prost(uint64, tag = "2")] + pub start_ts: u64, + #[prost(int64, tag = "3")] + pub table_id: i64, + #[prost(int64, tag = "4")] + pub index_id: i64, + #[prost(bytes = "vec", tag = "5")] + pub fts_query_info: ::prost::alloc::vec::Vec, + #[prost(string, tag = "6")] + pub time_zone_name: ::prost::alloc::string::String, + #[prost(int64, tag = "7")] + pub time_zone_offset: i64, + #[prost(message, repeated, tag = "8")] + pub shard_infos: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TiCiEstimateCountResponse { + #[prost(uint64, tag = "1")] + pub est_count: u64, + #[prost(string, tag = "2")] + pub other_error: ::prost::alloc::string::String, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct TableRegions { #[prost(int64, tag = "1")] pub physical_table_id: i64, @@ -125,6 +193,9 @@ pub struct BatchRequest { /// This is the session alias between a client and tidb #[prost(string, tag = "10")] pub connection_alias: ::prost::alloc::string::String, + /// Shard infos for FTS index, used by TiFlash reading TiCI. + #[prost(message, repeated, tag = "11")] + pub table_shard_infos: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -137,6 +208,8 @@ pub struct BatchResponse { pub exec_details: ::core::option::Option, #[prost(message, repeated, tag = "4")] pub retry_regions: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "5")] + pub retry_shards: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -151,6 +224,10 @@ pub struct StoreBatchTask { pub ranges: ::prost::alloc::vec::Vec, #[prost(uint64, tag = "5")] pub task_id: u64, + /// Versioned point ranges for TiCI lookup. + /// When `versioned_ranges` is non-empty, all `versioned_ranges\[i\].range` must be point range. + #[prost(message, repeated, tag = "6")] + pub versioned_ranges: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -168,3 +245,38 @@ pub struct StoreBatchTaskResponse { #[prost(message, optional, tag = "6")] pub exec_details_v2: ::core::option::Option, } +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DelegateRequest { + #[prost(message, optional, tag = "1")] + pub context: ::core::option::Option, + #[prost(uint64, tag = "2")] + pub start_ts: u64, + #[prost(message, repeated, tag = "3")] + pub ranges: ::prost::alloc::vec::Vec, + /// Used for avoid redundant mem-table copying. + /// If the sequence is the same, tikv-server will not return the mem-table. + #[prost(uint64, tag = "4")] + pub mem_table_sequence: u64, + /// Used for avoid redundant snapshot copying. + /// If the sequence is the same, tikv-server will not return the snapshot. + #[prost(uint64, tag = "5")] + pub snapshot_sequence: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DelegateResponse { + #[prost(bytes = "vec", tag = "1")] + pub mem_table_data: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "2")] + pub snapshot: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub region_error: ::core::option::Option, + #[prost(message, optional, tag = "4")] + pub locked: ::core::option::Option, + #[prost(string, tag = "5")] + pub other_error: ::prost::alloc::string::String, + /// Used for avoid redundant mem-table copying. + #[prost(uint64, tag = "6")] + pub mem_table_sequence: u64, +} diff --git a/src/generated/disaggregated.rs b/src/generated/disaggregated.rs index 0b24ba7a..dd44f117 100644 --- a/src/generated/disaggregated.rs +++ b/src/generated/disaggregated.rs @@ -111,7 +111,7 @@ pub struct DisaggTaskMeta { /// the exectuor id #[prost(string, tag = "6")] pub executor_id: ::prost::alloc::string::String, - /// keyspace id of the request + /// V1/V2 compatibility keyspace id of the request. V3 should use keyspace_identity. #[prost(uint32, tag = "7")] pub keyspace_id: u32, /// API version of the request @@ -123,6 +123,9 @@ pub struct DisaggTaskMeta { /// This is the session alias between a client and tidb #[prost(string, tag = "11")] pub connection_alias: ::prost::alloc::string::String, + /// V3 keyspace identity of the request. + #[prost(message, optional, tag = "12")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/src/generated/keyspacepb.rs b/src/generated/keyspacepb.rs index 3cf798d9..2b31b4b1 100644 --- a/src/generated/keyspacepb.rs +++ b/src/generated/keyspacepb.rs @@ -2,6 +2,7 @@ #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct KeyspaceMeta { + /// V1/V2 compatibility keyspace id. V3 should read identity instead. #[prost(uint32, tag = "1")] pub id: u32, #[prost(string, tag = "2")] @@ -17,6 +18,9 @@ pub struct KeyspaceMeta { ::prost::alloc::string::String, ::prost::alloc::string::String, >, + /// Canonical V3 keyspace identity. + #[prost(message, optional, tag = "8")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -25,6 +29,10 @@ pub struct LoadKeyspaceRequest { pub header: ::core::option::Option, #[prost(string, tag = "2")] pub name: ::prost::alloc::string::String, + /// Optional namespace for namespace-scoped name lookup in V3. + /// If unset, V3 name-only lookup should use LookupKeyspace and may return multiple keyspaces. + #[prost(uint32, tag = "3")] + pub namespace_id: u32, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -36,6 +44,39 @@ pub struct LoadKeyspaceResponse { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct LookupKeyspaceRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(string, tag = "2")] + pub name: ::prost::alloc::string::String, + /// Optional namespace for namespace-scoped lookup. If unset, lookup by name is global and may return multiple keyspaces. + #[prost(uint32, tag = "3")] + pub namespace_id: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct LookupKeyspaceResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub keyspaces: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AllocateNamespaceRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AllocateNamespaceResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(uint32, tag = "2")] + pub namespace_id: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct WatchKeyspacesRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, @@ -53,10 +94,14 @@ pub struct WatchKeyspacesResponse { pub struct UpdateKeyspaceStateRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "2")] pub id: u32, #[prost(enumeration = "KeyspaceState", tag = "3")] pub state: i32, + /// V3 keyspace identity. + #[prost(message, optional, tag = "4")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -71,10 +116,17 @@ pub struct UpdateKeyspaceStateResponse { pub struct GetAllKeyspacesRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// V1/V2 compatibility pagination cursor. V3 should use namespace_id and start_identity. #[prost(uint32, tag = "2")] pub start_id: u32, #[prost(uint32, tag = "3")] pub limit: u32, + /// V3 namespace-limited pagination. Must be non-zero in V3. + #[prost(uint32, tag = "4")] + pub namespace_id: u32, + /// V3 pagination cursor within namespace_id. + #[prost(message, optional, tag = "5")] + pub start_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -227,6 +279,56 @@ pub mod keyspace_client { .insert(GrpcMethod::new("keyspacepb.Keyspace", "LoadKeyspace")); self.inner.unary(req, path, codec).await } + pub async fn lookup_keyspace( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/keyspacepb.Keyspace/LookupKeyspace", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("keyspacepb.Keyspace", "LookupKeyspace")); + self.inner.unary(req, path, codec).await + } + pub async fn allocate_namespace( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/keyspacepb.Keyspace/AllocateNamespace", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("keyspacepb.Keyspace", "AllocateNamespace")); + self.inner.unary(req, path, codec).await + } /// WatchKeyspaces first return all current keyspaces' metadata as its first response. /// Then, it returns responses containing keyspaces that had their metadata changed. pub async fn watch_keyspaces( diff --git a/src/generated/kvrpcpb.rs b/src/generated/kvrpcpb.rs index 4bf66314..2af9dd32 100644 --- a/src/generated/kvrpcpb.rs +++ b/src/generated/kvrpcpb.rs @@ -10,6 +10,9 @@ pub struct GetRequest { pub key: ::prost::alloc::vec::Vec, #[prost(uint64, tag = "3")] pub version: u64, + /// If true, the response will include the commit ts of the key. + #[prost(bool, tag = "4")] + pub need_commit_ts: bool, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -30,6 +33,10 @@ pub struct GetResponse { /// Time and scan details when processing the request. #[prost(message, optional, tag = "6")] pub exec_details_v2: ::core::option::Option, + /// The commit timestamp of the key. + /// If it is zero, it means the commit timestamp is unknown. + #[prost(uint64, tag = "7")] + pub commit_ts: u64, } /// Scan fetches values for a range of keys; it is part of the transaction with /// starting timestamp = `version`. @@ -166,10 +173,13 @@ pub mod prewrite_request { #[repr(i32)] pub enum PessimisticAction { /// The key needn't be locked and no extra write conflict checks are needed. + /// Deprecated in next-gen (cloud-storage-engine). SkipPessimisticCheck = 0, - /// The key should have been locked at the time of prewrite. + /// The key should have been locked at the time of prewrite. If the lock is missing, + /// the lock will be amended. This is the normal case for pessimistic transactions. DoPessimisticCheck = 1, - /// The key doesn't need a pessimistic lock. But we need to do data constraint checks. + /// The key does not acquire a pessimistic lock for performance optimization. + /// Constraint checking (write conflicts + data constraints) is deferred to prewrite. DoConstraintCheck = 2, } impl PessimisticAction { @@ -366,6 +376,11 @@ pub struct TxnHeartBeatRequest { /// The new TTL the sender would like. #[prost(uint64, tag = "4")] pub advise_lock_ttl: u64, + /// Optionally update PK's min_commit_ts. + /// Only for non-async-commmit and non-1PC transactions. + /// If it is 0, ignore this field. + #[prost(uint64, tag = "5")] + pub min_commit_ts: u64, /// Reserved for file based transaction. #[prost(bool, tag = "100")] pub is_txn_file: bool, @@ -506,6 +521,17 @@ pub struct CommitRequest { /// Timestamp for the end of the transaction. Must be greater than `start_version`. #[prost(uint64, tag = "4")] pub commit_version: u64, + /// commit_role indicates the current commit request is a primary commit or a secondary commit. + /// It's value maybe `Unknown` when using a client with an old version. + #[prost(enumeration = "CommitRole", tag = "6")] + pub commit_role: i32, + /// primary_key indicates the primary key of the transaction. + /// Its value may be empty when using an old version client. + #[prost(bytes = "vec", tag = "7")] + pub primary_key: ::prost::alloc::vec::Vec, + /// Whether committing an `use_async_commit` (i.e. should be treated as committed) prewrite. + #[prost(bool, tag = "8")] + pub use_async_commit: bool, /// Reserved for file based transaction. #[prost(bool, tag = "100")] pub is_txn_file: bool, @@ -579,6 +605,9 @@ pub struct BatchGetRequest { pub keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, #[prost(uint64, tag = "3")] pub version: u64, + /// If true, the response will include the commit ts of the key. + #[prost(bool, tag = "4")] + pub need_commit_ts: bool, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1112,6 +1141,9 @@ pub struct SplitRegionResponse { /// include all result regions. #[prost(message, repeated, tag = "4")] pub regions: ::prost::alloc::vec::Vec, + /// Reserved for file based transaction. + #[prost(message, repeated, tag = "100")] + pub errors: ::prost::alloc::vec::Vec, } /// Sent from TiFlash to a TiKV node. #[allow(clippy::derive_partial_eq_without_eq)] @@ -1257,6 +1289,11 @@ pub struct Context { pub resource_control_context: ::core::option::Option, /// The keyspace that the request is sent to. /// NOTE: This field is only meaningful while the api_version is V2. + #[prost(string, tag = "31")] + pub keyspace_name: ::prost::alloc::string::String, + /// The keyspace that the request is sent to. + /// NOTE: This field is only meaningful while the api_version is V2. + /// V3 uses keyspace_identity and must not read this legacy field as the full identity. #[prost(uint32, tag = "32")] pub keyspace_id: u32, /// The buckets version that the request is sent to. @@ -1267,6 +1304,26 @@ pub struct Context { /// This is for tests only and thus can be safely changed/removed without affecting compatibility. #[prost(message, optional, tag = "34")] pub source_stmt: ::core::option::Option, + /// The cluster id of the request + #[prost(uint64, tag = "35")] + pub cluster_id: u64, + /// The trace id of the request, will be used for tracing the request's execution's inner steps. + #[prost(bytes = "vec", tag = "36")] + pub trace_id: ::prost::alloc::vec::Vec, + /// Control flags for trace logging behavior. + /// Bit 0: immediate_log - Force immediate logging without buffering + /// Bit 1: category_req_resp - Enable request/response tracing + /// Bit 2: category_write_details - Enable detailed write tracing + /// Bit 3: category_read_details - Enable detailed read tracing + /// Bits 4-63: Reserved for future use + /// This field is set by client-go based on an extractor function provided by TiDB. + #[prost(uint64, tag = "37")] + pub trace_control_flags: u64, + /// The V3 keyspace identity that the request is sent to. + /// V3 RPC key fields carry user key bytes; TiKV encodes the physical + /// mode + namespace + keyspace prefix at the serving boundary. + #[prost(message, optional, tag = "38")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1326,6 +1383,14 @@ pub struct LockInfo { /// It can be used to help the client decide whether to try resolving the lock. #[prost(uint64, tag = "11")] pub duration_to_last_update_ms: u64, + /// When lock_type is SharedLock, this describes transactions holding the shared lock. + /// Important: when lock_type is SharedLock, all shared locks must use shared_lock_infos; + /// DO NOT read from the wrapper LockInfo. + /// TODO(slock): tidb should send requests with a feature flag to indicate whether it + /// supports shared locks, so that tikv can fail the requests from old tidb versions + /// when needed. + #[prost(message, repeated, tag = "12")] + pub shared_lock_infos: ::prost::alloc::vec::Vec, /// Reserved for file based transaction. #[prost(bool, tag = "100")] pub is_txn_file: bool, @@ -1366,6 +1431,12 @@ pub struct KeyError { /// CheckTxnStatus is sent to a lock that's not the primary. #[prost(message, optional, tag = "11")] pub primary_mismatch: ::core::option::Option, + /// TxnLockNotFound indicates the txn lock is not found. + #[prost(message, optional, tag = "12")] + pub txn_lock_not_found: ::core::option::Option, + /// Extra information for error debugging + #[prost(message, optional, tag = "100")] + pub debug_info: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1407,8 +1478,10 @@ pub mod write_conflict { SelfRolledBack = 3, /// RcCheckTs failure by meeting a newer version, let TiDB retry. RcCheckTs = 4, - /// write conflict found in lazy uniqueness check in pessimistic transactions. + /// write conflict found when deferring constraint checks in pessimistic transactions. Deprecated in next-gen (cloud-storage-engine). LazyUniquenessCheck = 5, + /// write conflict found on keys that do not acquire pessimistic locks in pessimistic transactions. + NotLockedKeyConflict = 6, } impl Reason { /// String value of the enum field names used in the ProtoBuf definition. @@ -1423,6 +1496,7 @@ pub mod write_conflict { Reason::SelfRolledBack => "SelfRolledBack", Reason::RcCheckTs => "RcCheckTs", Reason::LazyUniquenessCheck => "LazyUniquenessCheck", + Reason::NotLockedKeyConflict => "NotLockedKeyConflict", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -1434,6 +1508,7 @@ pub mod write_conflict { "SelfRolledBack" => Some(Self::SelfRolledBack), "RcCheckTs" => Some(Self::RcCheckTs), "LazyUniquenessCheck" => Some(Self::LazyUniquenessCheck), + "NotLockedKeyConflict" => Some(Self::NotLockedKeyConflict), _ => None, } } @@ -1452,10 +1527,14 @@ pub struct Deadlock { pub lock_ts: u64, #[prost(bytes = "vec", tag = "2")] pub lock_key: ::prost::alloc::vec::Vec, + /// The hash of `deadlock_key` field. #[prost(uint64, tag = "3")] pub deadlock_key_hash: u64, #[prost(message, repeated, tag = "4")] pub wait_chain: ::prost::alloc::vec::Vec, + /// The key that the current transaction has already acquired and blocks another transaction to form the deadlock. + #[prost(bytes = "vec", tag = "5")] + pub deadlock_key: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1506,6 +1585,26 @@ pub struct PrimaryMismatch { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct TxnLockNotFound { + #[prost(bytes = "vec", tag = "1")] + pub key: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct MvccDebugInfo { + #[prost(bytes = "vec", tag = "1")] + pub key: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "2")] + pub mvcc: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DebugInfo { + #[prost(message, repeated, tag = "1")] + pub mvcc_info: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct TimeDetail { /// Off-cpu wall time elapsed in TiKV side. Usually this includes queue waiting time and /// other kind of waitings in series. (Wait time in the raftstore is not included.) @@ -1548,6 +1647,12 @@ pub struct TimeDetailV2 { /// Total wall clock time spent on this RPC in TiKV . #[prost(uint64, tag = "5")] pub total_rpc_wall_time_ns: u64, + /// Time spent on the gRPC layer. + #[prost(uint64, tag = "6")] + pub kv_grpc_process_time_ns: u64, + /// Time spent on waiting for run again in grpc pool from other executor pool. + #[prost(uint64, tag = "7")] + pub kv_grpc_wait_time_ns: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1622,6 +1727,29 @@ pub struct ScanDetailV2 { /// Time used for read pool scheduling. #[prost(uint64, tag = "13")] pub read_pool_schedule_wait_nanos: u64, + /// The total MVCC key-value size meet during scanning. It includes + /// deleted versions, but does not include RocksDB tombstone keys. + /// + /// When this field is notably larger than `processed_versions_size`, it means + /// there are a lot of deleted MVCC keys. + #[prost(uint64, tag = "14")] + pub total_versions_size: u64, + /// Total number of IA segment cache hits for this request. + #[prost(uint64, tag = "15")] + pub ia_cache_hit_count: u64, + /// Total number of IA remote segment reads for this request. + /// + /// IA segments are only read on cache misses, so this is also the IA segment + /// cache miss count for the request. + #[prost(uint64, tag = "16")] + pub ia_remote_read_segment_count: u64, + /// Total number of logical bytes returned from IA remote segment reads for this + /// request. + #[prost(uint64, tag = "17")] + pub ia_remote_read_segment_bytes: u64, + /// Total time spent serving IA remote segment reads for this request. + #[prost(uint64, tag = "18")] + pub ia_remote_read_segment_nanos: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1649,6 +1777,51 @@ pub struct ExecDetailsV2 { /// Available when ctx.record_time_stat = true or meet slow query. #[prost(message, optional, tag = "4")] pub time_detail_v2: ::core::option::Option, + /// RU (Request Unit) consumption details. + #[prost(message, optional, tag = "5")] + pub ru_v2: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct Ruv2 { + #[prost(uint64, tag = "1")] + pub kv_engine_cache_miss: u64, + #[prost(message, optional, tag = "2")] + pub executor_inputs: ::core::option::Option, + #[prost(uint64, tag = "3")] + pub coprocessor_executor_iterations: u64, + #[prost(uint64, tag = "4")] + pub coprocessor_response_bytes: u64, + #[prost(uint64, tag = "5")] + pub raftstore_store_write_trigger_wb_bytes: u64, + #[prost(uint64, tag = "6")] + pub storage_processed_keys_batch_get: u64, + #[prost(uint64, tag = "7")] + pub storage_processed_keys_get: u64, + /// Filled by client-go. + #[prost(uint64, tag = "8")] + pub read_rpc_count: u64, + /// Filled by client-go. + #[prost(uint64, tag = "9")] + pub write_rpc_count: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ExecutorInputs { + #[prost(uint64, tag = "1")] + pub tikv_coprocessor_executor_work_total_batch_index_scan: u64, + #[prost(uint64, tag = "2")] + pub tikv_coprocessor_executor_work_total_batch_table_scan: u64, + #[prost(uint64, tag = "3")] + pub tikv_coprocessor_executor_work_total_batch_selection: u64, + #[prost(uint64, tag = "4")] + pub tikv_coprocessor_executor_work_total_batch_top_n: u64, + #[prost(uint64, tag = "5")] + pub tikv_coprocessor_executor_work_total_batch_limit: u64, + #[prost(uint64, tag = "6")] + pub tikv_coprocessor_executor_work_total_batch_simple_aggr: u64, + #[prost(uint64, tag = "7")] + pub tikv_coprocessor_executor_work_total_batch_fast_hash_aggr: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1714,6 +1887,10 @@ pub struct KvPair { pub key: ::prost::alloc::vec::Vec, #[prost(bytes = "vec", tag = "3")] pub value: ::prost::alloc::vec::Vec, + /// The commit timestamp of the key. + /// If it is zero, it means the commit timestamp is unknown. + #[prost(uint64, tag = "4")] + pub commit_ts: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1905,6 +2082,9 @@ pub struct RawCasRequest { pub cf: ::prost::alloc::string::String, #[prost(uint64, tag = "7")] pub ttl: u64, + /// If `delete` is true, the request is to delete the key when the comparison succeeds + #[prost(bool, tag = "8")] + pub delete: bool, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -2068,8 +2248,13 @@ pub struct CompactRequest { #[prost(enumeration = "ApiVersion", tag = "7")] pub api_version: i32, /// Keyspace of the table located in. + /// NOTE: This field is only meaningful for V1/V2 compatibility. V3 uses + /// keyspace_identity and must not read this field as the full identity. #[prost(uint32, tag = "8")] pub keyspace_id: u32, + /// V3 keyspace identity of the table located in. + #[prost(message, optional, tag = "9")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -2097,6 +2282,129 @@ pub struct TiFlashSystemTableResponse { #[prost(bytes = "vec", tag = "1")] pub data: ::prost::alloc::vec::Vec, } +/// Flush is introduced from the pipelined DML protocol. +/// A Flush request writes some keys and values to TiKV, storing in LOCK and DEFAULT CF, just like a Prewrite request. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FlushRequest { + #[prost(message, optional, tag = "1")] + pub context: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub mutations: ::prost::alloc::vec::Vec, + #[prost(bytes = "vec", tag = "3")] + pub primary_key: ::prost::alloc::vec::Vec, + #[prost(uint64, tag = "4")] + pub start_ts: u64, + #[prost(uint64, tag = "5")] + pub min_commit_ts: u64, + /// generation of the flush request. It is a monotonically increasing number in each transaction. + #[prost(uint64, tag = "6")] + pub generation: u64, + #[prost(uint64, tag = "7")] + pub lock_ttl: u64, + #[prost(enumeration = "AssertionLevel", tag = "8")] + pub assertion_level: i32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FlushResponse { + #[prost(message, optional, tag = "1")] + pub region_error: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub errors: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub exec_details_v2: ::core::option::Option, +} +/// BufferBatchGet is introduced from the pipelined DML protocol. +/// It is similar to a BatchGet request, except that it can only read the data that has been flushed by itself. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BufferBatchGetRequest { + #[prost(message, optional, tag = "1")] + pub context: ::core::option::Option, + #[prost(bytes = "vec", repeated, tag = "2")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, + #[prost(uint64, tag = "3")] + pub version: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BufferBatchGetResponse { + #[prost(message, optional, tag = "1")] + pub region_error: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub error: ::core::option::Option, + #[prost(message, repeated, tag = "3")] + pub pairs: ::prost::alloc::vec::Vec, + /// Time and scan details when processing the request. + #[prost(message, optional, tag = "4")] + pub exec_details_v2: ::core::option::Option, +} +/// Actively request TiKV to report health feedback information. TiKV won't omit the health feedback information when sending the +/// `BatchCommandsResponse` that contains this response. +/// The health feedback information won't be replied in the response, but will be attached to `BatchCommandsResponse.health_feedback` field as usual. +/// Only works when batch RPC is enabled. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetHealthFeedbackRequest { + #[prost(message, optional, tag = "1")] + pub context: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetHealthFeedbackResponse { + /// The error field is added for keeping consistent. This request won't meet any region error as it's store level rather than region level. + #[prost(message, optional, tag = "1")] + pub region_error: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub health_feedback: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct HealthFeedback { + #[prost(uint64, tag = "1")] + pub store_id: u64, + /// The sequence number of the feedback message. + /// It's defined as an incrementing integer, starting from the unix timestamp (milliseconds) at + /// the time that the TiKV node is started. + /// This can be useful for filtering out out-of-order feedback messages. + /// Note that considering the possibility of system clock changing, this field doesn't guarantee + /// uniqueness and monotonic if the TiKV node is restarted. + #[prost(uint64, tag = "2")] + pub feedback_seq_no: u64, + /// The slow_score calculated in raftstore module. Due to some limitations of slow score, this would + /// be replaced by `SlowTrend` in the future. + #[prost(int32, tag = "3")] + pub slow_score: i32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BroadcastTxnStatusRequest { + #[prost(message, optional, tag = "1")] + pub context: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub txn_status: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct TxnStatus { + #[prost(uint64, tag = "1")] + pub start_ts: u64, + /// a non-zero min_commit_ts indicates the transaction is ongoing + #[prost(uint64, tag = "2")] + pub min_commit_ts: u64, + /// a non-zero commit_ts indicates the transaction is committed + #[prost(uint64, tag = "3")] + pub commit_ts: u64, + #[prost(bool, tag = "4")] + pub rolled_back: bool, + /// The txn has unlocked all keys, implying that it can be removed from txn_status_cache. + #[prost(bool, tag = "5")] + pub is_completed: bool, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BroadcastTxnStatusResponse {} /// Used to specify the behavior when a pessimistic lock request is woken up after waiting for another /// lock. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] @@ -2163,6 +2471,39 @@ impl PessimisticLockKeyResultType { } } } +/// CommitRole indicates the current commit request is a primary commit or a secondary commit. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] +pub enum CommitRole { + /// Unknown about the current request is a primary commit or a secondary commit; + Unknown = 0, + /// Current request is a primary commit; + Primary = 1, + /// Current request is a secondary commit; + Secondary = 2, +} +impl CommitRole { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + CommitRole::Unknown => "Unknown", + CommitRole::Primary => "Primary", + CommitRole::Secondary => "Secondary", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "Unknown" => Some(Self::Unknown), + "Primary" => Some(Self::Primary), + "Secondary" => Some(Self::Secondary), + _ => None, + } + } +} /// The API version the server and the client is using. /// See more details in #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] @@ -2217,6 +2558,11 @@ pub enum ApiVersion { /// V2 server accpets V2 requests and V1 transactional requests that statrts with TiDB key /// prefix (`m` and `t`). V2 = 2, + /// `V3` uses user-key wire semantics for normal KV RPCs. Servers encode an + /// 8-byte physical prefix at the serving boundary: + /// mode(1) + namespace_id(4 bytes, big endian) + keyspace_id(3 bytes, big endian). + /// The keyspace identity is carried by Context.keyspace_identity. + V3 = 3, } impl ApiVersion { /// String value of the enum field names used in the ProtoBuf definition. @@ -2228,6 +2574,7 @@ impl ApiVersion { ApiVersion::V1 => "V1", ApiVersion::V1ttl => "V1TTL", ApiVersion::V2 => "V2", + ApiVersion::V3 => "V3", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2236,6 +2583,7 @@ impl ApiVersion { "V1" => Some(Self::V1), "V1TTL" => Some(Self::V1ttl), "V2" => Some(Self::V2), + "V3" => Some(Self::V3), _ => None, } } @@ -2344,8 +2692,13 @@ pub enum Op { Rollback = 3, /// insert operation has a constraint that key should not exist before. Insert = 4, + /// PessimisticLock is exclusive lock acquired in pessimistic transaction. PessimisticLock = 5, CheckNotExists = 6, + /// SharedLock likes Lock but in shared mode. + SharedLock = 7, + /// SharedPessimisticLock is shared lock acquired in pessimistic transaction. + SharedPessimisticLock = 8, } impl Op { /// String value of the enum field names used in the ProtoBuf definition. @@ -2361,6 +2714,8 @@ impl Op { Op::Insert => "Insert", Op::PessimisticLock => "PessimisticLock", Op::CheckNotExists => "CheckNotExists", + Op::SharedLock => "SharedLock", + Op::SharedPessimisticLock => "SharedPessimisticLock", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -2373,6 +2728,8 @@ impl Op { "Insert" => Some(Self::Insert), "PessimisticLock" => Some(Self::PessimisticLock), "CheckNotExists" => Some(Self::CheckNotExists), + "SharedLock" => Some(Self::SharedLock), + "SharedPessimisticLock" => Some(Self::SharedPessimisticLock), _ => None, } } diff --git a/src/generated/metapb.rs b/src/generated/metapb.rs index 8839fb0f..7ee897eb 100644 --- a/src/generated/metapb.rs +++ b/src/generated/metapb.rs @@ -110,6 +110,16 @@ pub struct Buckets { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct BucketMeta { + /// A hint indicate if keys have changed. + #[prost(uint64, tag = "1")] + pub version: u64, + /// keys of buckets, include start/end key of region + #[prost(bytes = "vec", repeated, tag = "2")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Region { #[prost(uint64, tag = "1")] pub id: u64, diff --git a/src/generated/mod.rs b/src/generated/mod.rs index da3c98ea..8478a732 100644 --- a/src/generated/mod.rs +++ b/src/generated/mod.rs @@ -1,4 +1,7 @@ // This file is @generated by prost-build. +pub mod apipb { + include!("apipb.rs"); +} pub mod autoid { include!("autoid.rs"); } diff --git a/src/generated/mpp.rs b/src/generated/mpp.rs index e68ab94b..d200535d 100644 --- a/src/generated/mpp.rs +++ b/src/generated/mpp.rs @@ -30,7 +30,7 @@ pub struct TaskMeta { /// mpp version #[prost(int64, tag = "9")] pub mpp_version: i64, - /// keyspace id of the request + /// V1/V2 compatibility keyspace id of the request. V3 should use keyspace_identity. #[prost(uint32, tag = "10")] pub keyspace_id: u32, /// coordinator_address of this query @@ -50,6 +50,13 @@ pub struct TaskMeta { /// This is the session alias between a client and tidb #[prost(string, tag = "19")] pub connection_alias: ::prost::alloc::string::String, + #[prost(string, tag = "20")] + pub sql_digest: ::prost::alloc::string::String, + #[prost(string, tag = "21")] + pub plan_digest: ::prost::alloc::string::String, + /// V3 keyspace identity of the request. + #[prost(message, optional, tag = "22")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -80,6 +87,9 @@ pub struct DispatchTaskRequest { /// Used for partition table scan #[prost(message, repeated, tag = "6")] pub table_regions: ::prost::alloc::vec::Vec, + /// Shard infos for TiCI/FTS routing in MPP dispatch path. + #[prost(message, repeated, tag = "7")] + pub table_shard_infos: ::prost::alloc::vec::Vec, } /// Get response of DispatchTaskRequest. #[allow(clippy::derive_partial_eq_without_eq)] diff --git a/src/generated/pdpb.rs b/src/generated/pdpb.rs index 548b333c..6cda6509 100644 --- a/src/generated/pdpb.rs +++ b/src/generated/pdpb.rs @@ -71,8 +71,17 @@ pub struct RequestHeader { #[prost(uint64, tag = "1")] pub cluster_id: u64, /// sender_id is the ID of the sender server, also member ID or etcd ID. + /// sender_id is used in PD internal communication. #[prost(uint64, tag = "2")] pub sender_id: u64, + /// caller_id is the ID of the client which sends the request, such as tikv, + /// tidb, cdc, etc. + #[prost(string, tag = "3")] + pub caller_id: ::prost::alloc::string::String, + /// caller_component is the component of the client which sends the request, + /// such as ddl, optimizer, etc. + #[prost(string, tag = "4")] + pub caller_component: ::prost::alloc::string::String, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -100,6 +109,9 @@ pub struct TsoRequest { pub count: u32, #[prost(string, tag = "3")] pub dc_location: ::prost::alloc::string::String, + /// V3 keyspace identity for tenant-scoped TSO requests. + #[prost(message, optional, tag = "4")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -162,6 +174,8 @@ pub struct IsBootstrappedResponse { pub struct AllocIdRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + #[prost(uint32, tag = "2")] + pub count: u32, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -170,6 +184,8 @@ pub struct AllocIdResponse { pub header: ::core::option::Option, #[prost(uint64, tag = "2")] pub id: u64, + #[prost(uint32, tag = "3")] + pub count: u32, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -243,6 +259,7 @@ pub struct GetAllStoresResponse { pub struct GetRegionRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// Physical key bytes used for Region lookup. #[prost(bytes = "vec", tag = "2")] pub region_key: ::prost::alloc::vec::Vec, #[prost(bool, tag = "3")] @@ -280,15 +297,67 @@ pub struct GetRegionByIdRequest { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct QueryRegionRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + /// Whether to include the buckets info within the response. + #[prost(bool, tag = "2")] + pub need_buckets: bool, + /// The region IDs to query. + #[prost(uint64, repeated, tag = "3")] + pub ids: ::prost::alloc::vec::Vec, + /// Physical key bytes to query. + #[prost(bytes = "vec", repeated, tag = "4")] + pub keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, + /// Previous physical key bytes to query. + #[prost(bytes = "vec", repeated, tag = "5")] + pub prev_keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct QueryRegionResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + /// This array functions as a map corresponding to the region IDs, + /// preserving the order of the input region keys, if they are present. + #[prost(uint64, repeated, tag = "2")] + pub key_id_map: ::prost::alloc::vec::Vec, + /// This array functions as a map corresponding to the previous region IDs, + /// preserving the order of the input previous region keys, if they are present. + #[prost(uint64, repeated, tag = "3")] + pub prev_key_id_map: ::prost::alloc::vec::Vec, + /// RegionID -> RegionResponse + #[prost(map = "uint64, message", tag = "4")] + pub regions_by_id: ::std::collections::HashMap, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RegionResponse { + #[prost(message, optional, tag = "1")] + pub region: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub leader: ::core::option::Option, + #[prost(message, repeated, tag = "3")] + pub down_peers: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "4")] + pub pending_peers: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "5")] + pub buckets: ::core::option::Option, +} +/// Use GetRegionResponse as the response of GetRegionByIDRequest. +/// Deprecated: use BatchScanRegionsRequest instead. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct ScanRegionsRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// Physical start key bytes. #[prost(bytes = "vec", tag = "2")] pub start_key: ::prost::alloc::vec::Vec, /// no limit when limit \<= 0. #[prost(int32, tag = "3")] pub limit: i32, - /// end_key is +inf when it is empty. + /// Physical end key bytes. end_key is +inf when it is empty. #[prost(bytes = "vec", tag = "4")] pub end_key: ::prost::alloc::vec::Vec, } @@ -306,6 +375,9 @@ pub struct Region { /// working followers. #[prost(message, repeated, tag = "4")] pub pending_peers: ::prost::alloc::vec::Vec, + /// buckets isn't nil only when need_buckets is true. + #[prost(message, optional, tag = "5")] + pub buckets: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -323,6 +395,45 @@ pub struct ScanRegionsResponse { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct KeyRange { + /// Physical start key bytes. + #[prost(bytes = "vec", tag = "1")] + pub start_key: ::prost::alloc::vec::Vec, + /// Physical end key bytes. end_key is +inf when it is empty. + #[prost(bytes = "vec", tag = "2")] + pub end_key: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BatchScanRegionsRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(bool, tag = "2")] + pub need_buckets: bool, + /// Physical key ranges. The given ranges must be in order. + #[prost(message, repeated, tag = "3")] + pub ranges: ::prost::alloc::vec::Vec, + /// limit the total number of regions to scan. + #[prost(int32, tag = "4")] + pub limit: i32, + /// If contain_all_key_range is true, the output must contain all + /// key ranges in the request. + /// If the output does not contain all key ranges, the request is considered + /// failed and returns an error(REGIONS_NOT_CONTAIN_ALL_KEY_RANGE). + #[prost(bool, tag = "5")] + pub contain_all_key_range: bool, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct BatchScanRegionsResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + /// the returned regions are flattened into a list, because the given ranges can located in the same range, we do not return duplicated regions then. + #[prost(message, repeated, tag = "2")] + pub regions: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct GetClusterConfigRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, @@ -468,14 +579,27 @@ pub struct RegionHeartbeatRequest { /// QueryStats reported write query stats, and there are read query stats in store heartbeat #[prost(message, optional, tag = "16")] pub query_stats: ::core::option::Option, - /// cpu_usage is the CPU time usage of the leader region since the last heartbeat, + /// cpu_usage is the total CPU time usage of the leader region since the last heartbeat, /// which is calculated by cpu_time_delta/heartbeat_reported_interval. + /// Deprecated: use cpu_stats instead. + #[deprecated] #[prost(uint64, tag = "17")] pub cpu_usage: u64, - /// (Serverless) Approximate size of key-value pairs for billing. + /// cpu_stats reports CPU usage breakdown for the leader region by kind + /// (e.g. unified read). + #[prost(message, optional, tag = "21")] + pub cpu_stats: ::core::option::Option, + /// Approximate size of row-based key-value pairs for billing. /// It's counted on size of user key & value (excluding metadata fields), before compression, and latest versions only. #[prost(uint64, tag = "18")] pub approximate_kv_size: u64, + /// Approximate size of column-based key-value pairs for billing. + /// It's counted on size of user key & value (excluding metadata fields), before compression, and latest versions only. + #[prost(uint64, tag = "19")] + pub approximate_columnar_kv_size: u64, + /// BucketMeta is the bucket version and keys of this region if TiKV enabled the bucket feature + #[prost(message, optional, tag = "20")] + pub bucket_meta: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -511,6 +635,7 @@ pub struct Merge { pub struct SplitRegion { #[prost(enumeration = "CheckPolicy", tag = "1")] pub policy: i32, + /// Physical split key bytes. #[prost(bytes = "vec", repeated, tag = "2")] pub keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, } @@ -530,6 +655,13 @@ pub struct BatchSwitchWitness { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct ChangeSplit { + /// auto_split_enabled configures whether the corresponding Region is allowed to be auto split by size or load. + #[prost(bool, tag = "1")] + pub auto_split_enabled: bool, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct RegionHeartbeatResponse { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, @@ -576,6 +708,8 @@ pub struct RegionHeartbeatResponse { pub change_peer_v2: ::core::option::Option, #[prost(message, optional, tag = "10")] pub switch_witnesses: ::core::option::Option, + #[prost(message, optional, tag = "11")] + pub change_split: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -624,6 +758,8 @@ pub struct AskBatchSplitRequest { pub region: ::core::option::Option, #[prost(uint32, tag = "3")] pub split_count: u32, + #[prost(enumeration = "SplitReason", tag = "4")] + pub reason: i32, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -688,6 +824,10 @@ pub struct PeerStat { pub written_keys: u64, #[prost(uint64, tag = "6")] pub written_bytes: u64, + /// cpu_stats is the CPU usage of the region's unified read pool since the last heartbeat, + /// which is calculated by cpu_time_delta/heartbeat_reported_interval. + #[prost(message, optional, tag = "7")] + pub cpu_stats: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -777,6 +917,48 @@ pub struct StoreStats { /// Used memory of the store in bytes. #[prost(uint64, tag = "29")] pub used_memory: u64, + /// Network_slow_scores indicate the network status between TiKV nodes, ranging from 1 to 100 (lower is better). + /// StoreID -> score + #[prost(map = "uint64, uint64", tag = "30")] + pub network_slow_scores: ::std::collections::HashMap, + /// The statistics about DFS uploads. + #[prost(message, repeated, tag = "31")] + pub dfs: ::prost::alloc::vec::Vec, + /// True if the store is undergoing graceful shutdown. + #[prost(bool, tag = "32")] + pub is_stopping: bool, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DfsStatScope { + /// When true, the statistic is not tied to any keyspace. + #[prost(bool, tag = "1")] + pub is_global: bool, + /// The keyspace of this statistic. Ignore when is_global is true. + /// NOTE: This field is only meaningful for V1/V2 compatibility. V3 should use identity. + #[prost(uint32, tag = "2")] + pub keyspace_id: u32, + /// The component that provides the statistic. + #[prost(string, tag = "3")] + pub component: ::prost::alloc::string::String, + /// V3 keyspace identity of this statistic. Ignore when is_global is true or identities is set. + #[prost(message, optional, tag = "4")] + pub identity: ::core::option::Option, + /// V3 multi-keyspace statistic scope. Ignore when is_global is true. + #[prost(message, repeated, tag = "5")] + pub identities: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DfsStatItem { + #[prost(message, optional, tag = "1")] + pub scope: ::core::option::Option, + /// Number of bytes written to DFS. + #[prost(uint64, tag = "2")] + pub written_bytes: u64, + /// Number of write requests sent to DFS. + #[prost(uint64, tag = "3")] + pub write_requests: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -822,6 +1004,9 @@ pub struct PeerReport { /// The peer has proposed but uncommitted commit merge. #[prost(bool, tag = "4")] pub has_commit_merge: bool, + /// raft applied index + #[prost(uint64, tag = "5")] + pub applied_index: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -928,6 +1113,9 @@ pub struct StoreHeartbeatResponse { /// Pd can return operations to let TiKV forcely PAUSE | RESUME grpc server. #[prost(message, optional, tag = "7")] pub control_grpc: ::core::option::Option, + /// NodeState is going to mark the state of the store. + #[prost(enumeration = "super::metapb::NodeState", tag = "8")] + pub state: i32, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -962,6 +1150,8 @@ pub struct ScatterRegionResponse { pub header: ::core::option::Option, #[prost(uint64, tag = "2")] pub finished_percentage: u64, + #[prost(uint64, repeated, tag = "3")] + pub failed_regions_id: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1022,8 +1212,12 @@ pub struct UpdateServiceGcSafePointResponse { pub struct GetGcSafePointV2Request { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "2")] pub keyspace_id: u32, + /// V3 keyspace identity. + #[prost(message, optional, tag = "3")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1032,6 +1226,9 @@ pub struct GetGcSafePointV2Response { pub header: ::core::option::Option, #[prost(uint64, tag = "2")] pub safe_point: u64, + /// V3 keyspace identity served by this response. + #[prost(message, optional, tag = "3")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1045,12 +1242,16 @@ pub struct WatchGcSafePointV2Request { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SafePointEvent { + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "1")] pub keyspace_id: u32, #[prost(uint64, tag = "2")] pub safe_point: u64, #[prost(enumeration = "EventType", tag = "3")] pub r#type: i32, + /// V3 keyspace identity served by this event. + #[prost(message, optional, tag = "4")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1067,10 +1268,14 @@ pub struct WatchGcSafePointV2Response { pub struct UpdateGcSafePointV2Request { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "2")] pub keyspace_id: u32, #[prost(uint64, tag = "3")] pub safe_point: u64, + /// V3 keyspace identity. + #[prost(message, optional, tag = "4")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1079,12 +1284,16 @@ pub struct UpdateGcSafePointV2Response { pub header: ::core::option::Option, #[prost(uint64, tag = "2")] pub new_safe_point: u64, + /// V3 keyspace identity served by this response. + #[prost(message, optional, tag = "3")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct UpdateServiceSafePointV2Request { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "2")] pub keyspace_id: u32, #[prost(bytes = "vec", tag = "3")] @@ -1098,6 +1307,9 @@ pub struct UpdateServiceSafePointV2Request { /// cluster garbage collection. #[prost(int64, tag = "5")] pub ttl: i64, + /// V3 keyspace identity. + #[prost(message, optional, tag = "6")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1110,6 +1322,9 @@ pub struct UpdateServiceSafePointV2Response { pub ttl: i64, #[prost(uint64, tag = "4")] pub min_safe_point: u64, + /// V3 keyspace identity served by this response. + #[prost(message, optional, tag = "5")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1120,10 +1335,14 @@ pub struct GetAllGcSafePointV2Request { #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct GcSafePointV2 { + /// V1/V2 compatibility keyspace id. V3 should use identity. #[prost(uint32, tag = "1")] pub keyspace_id: u32, #[prost(uint64, tag = "2")] pub gc_safe_point: u64, + /// V3 keyspace identity. + #[prost(message, optional, tag = "3")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -1135,6 +1354,202 @@ pub struct GetAllGcSafePointV2Response { #[prost(int64, tag = "3")] pub revision: i64, } +/// A wrapper over keyspace scope. +/// keyspace_id is kept for V1/V2 compatibility. V3 should use identity and reject +/// missing/invalid namespace or keyspace IDs in tenant-scoped requests. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct KeyspaceScope { + #[prost(uint32, tag = "1")] + pub keyspace_id: u32, + /// V3 keyspace identity. + #[prost(message, optional, tag = "2")] + pub identity: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AdvanceGcSafePointRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub keyspace_scope: ::core::option::Option, + #[prost(uint64, tag = "3")] + pub target: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AdvanceGcSafePointResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(uint64, tag = "2")] + pub old_gc_safe_point: u64, + #[prost(uint64, tag = "3")] + pub new_gc_safe_point: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AdvanceTxnSafePointRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub keyspace_scope: ::core::option::Option, + #[prost(uint64, tag = "3")] + pub target: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct AdvanceTxnSafePointResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(uint64, tag = "2")] + pub old_txn_safe_point: u64, + #[prost(uint64, tag = "3")] + pub new_txn_safe_point: u64, + #[prost(string, tag = "4")] + pub blocker_description: ::prost::alloc::string::String, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetGcBarrierRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub keyspace_scope: ::core::option::Option, + #[prost(string, tag = "3")] + pub barrier_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "4")] + pub barrier_ts: u64, + #[prost(int64, tag = "5")] + pub ttl_seconds: i64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GcBarrierInfo { + #[prost(string, tag = "1")] + pub barrier_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub barrier_ts: u64, + #[prost(int64, tag = "3")] + pub ttl_seconds: i64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetGcBarrierResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub new_barrier_info: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DeleteGcBarrierRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub keyspace_scope: ::core::option::Option, + #[prost(string, tag = "3")] + pub barrier_id: ::prost::alloc::string::String, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DeleteGcBarrierResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub deleted_barrier_info: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetGlobalGcBarrierRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(string, tag = "2")] + pub barrier_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "3")] + pub barrier_ts: u64, + #[prost(int64, tag = "4")] + pub ttl_seconds: i64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct SetGlobalGcBarrierResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub new_barrier_info: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DeleteGlobalGcBarrierRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(string, tag = "2")] + pub barrier_id: ::prost::alloc::string::String, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct DeleteGlobalGcBarrierResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub deleted_barrier_info: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GlobalGcBarrierInfo { + #[prost(string, tag = "1")] + pub barrier_id: ::prost::alloc::string::String, + #[prost(uint64, tag = "2")] + pub barrier_ts: u64, + #[prost(int64, tag = "3")] + pub ttl_seconds: i64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetGcStateRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub keyspace_scope: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GcState { + #[prost(message, optional, tag = "1")] + pub keyspace_scope: ::core::option::Option, + #[prost(bool, tag = "2")] + pub is_keyspace_level_gc: bool, + #[prost(uint64, tag = "3")] + pub txn_safe_point: u64, + #[prost(uint64, tag = "4")] + pub gc_safe_point: u64, + #[prost(message, repeated, tag = "5")] + pub gc_barriers: ::prost::alloc::vec::Vec, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetGcStateResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub gc_state: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetAllKeyspacesGcStatesRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct GetAllKeyspacesGcStatesResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, repeated, tag = "2")] + pub gc_states: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "3")] + pub global_gc_barriers: ::prost::alloc::vec::Vec, +} #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct RegionStat { @@ -1244,6 +1659,7 @@ pub struct SyncMaxTsResponse { pub struct SplitRegionsRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// Physical split key bytes. #[prost(bytes = "vec", repeated, tag = "2")] pub split_keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, #[prost(uint64, tag = "3")] @@ -1264,6 +1680,7 @@ pub struct SplitRegionsResponse { pub struct SplitAndScatterRegionsRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// Physical split key bytes. #[prost(bytes = "vec", repeated, tag = "2")] pub split_keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, #[prost(string, tag = "3")] @@ -1330,6 +1747,17 @@ pub struct QueryStats { #[prost(uint64, tag = "11")] pub rollback: u64, } +/// CPU usage breakdown by kind. New kinds may be added in the future. +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct CpuStats { + /// UnifiedRead is the CPU usage of the unified read pool. + #[prost(uint64, tag = "1")] + pub unified_read: u64, + /// Scheduler is the CPU usage of the scheduler pool, it contains `sched-pool`, `sched-high`, and `sched-pri`. + #[prost(uint64, tag = "2")] + pub scheduler: u64, +} #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ReportBucketsRequest { @@ -1446,6 +1874,7 @@ pub enum ErrorType { InvalidValue = 10, /// required watch revision is smaller than current compact/min revision. DataCompacted = 11, + RegionsNotContainAllKeyRange = 12, } impl ErrorType { /// String value of the enum field names used in the ProtoBuf definition. @@ -1466,6 +1895,9 @@ impl ErrorType { ErrorType::EntryNotFound => "ENTRY_NOT_FOUND", ErrorType::InvalidValue => "INVALID_VALUE", ErrorType::DataCompacted => "DATA_COMPACTED", + ErrorType::RegionsNotContainAllKeyRange => { + "REGIONS_NOT_CONTAIN_ALL_KEY_RANGE" + } } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -1483,6 +1915,9 @@ impl ErrorType { "ENTRY_NOT_FOUND" => Some(Self::EntryNotFound), "INVALID_VALUE" => Some(Self::InvalidValue), "DATA_COMPACTED" => Some(Self::DataCompacted), + "REGIONS_NOT_CONTAIN_ALL_KEY_RANGE" => { + Some(Self::RegionsNotContainAllKeyRange) + } _ => None, } } @@ -1547,6 +1982,35 @@ impl CheckPolicy { } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] +pub enum SplitReason { + Admin = 0, + Size = 1, + Load = 2, +} +impl SplitReason { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + SplitReason::Admin => "ADMIN", + SplitReason::Size => "SIZE", + SplitReason::Load => "LOAD", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "ADMIN" => Some(Self::Admin), + "SIZE" => Some(Self::Size), + "LOAD" => Some(Self::Load), + _ => None, + } + } +} +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] +#[repr(i32)] pub enum ControlGrpcEvent { /// Pause TiKV grpc server. Pause = 0, @@ -2088,6 +2552,31 @@ pub mod pd_client { req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "GetRegionByID")); self.inner.unary(req, path, codec).await } + pub async fn query_region( + &mut self, + request: impl tonic::IntoStreamingRequest< + Message = super::QueryRegionRequest, + >, + ) -> std::result::Result< + tonic::Response>, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static("/pdpb.PD/QueryRegion"); + let mut req = request.into_streaming_request(); + req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "QueryRegion")); + self.inner.streaming(req, path, codec).await + } + /// Deprecated: use BatchScanRegions instead. pub async fn scan_regions( &mut self, request: impl tonic::IntoRequest, @@ -2110,6 +2599,28 @@ pub mod pd_client { req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "ScanRegions")); self.inner.unary(req, path, codec).await } + pub async fn batch_scan_regions( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static("/pdpb.PD/BatchScanRegions"); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "BatchScanRegions")); + self.inner.unary(req, path, codec).await + } pub async fn ask_split( &mut self, request: impl tonic::IntoRequest, @@ -2457,6 +2968,197 @@ pub mod pd_client { .insert(GrpcMethod::new("pdpb.PD", "GetAllGCSafePointV2")); self.inner.unary(req, path, codec).await } + pub async fn advance_gc_safe_point( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/pdpb.PD/AdvanceGCSafePoint", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("pdpb.PD", "AdvanceGCSafePoint")); + self.inner.unary(req, path, codec).await + } + pub async fn advance_txn_safe_point( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/pdpb.PD/AdvanceTxnSafePoint", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("pdpb.PD", "AdvanceTxnSafePoint")); + self.inner.unary(req, path, codec).await + } + pub async fn set_gc_barrier( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static("/pdpb.PD/SetGCBarrier"); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "SetGCBarrier")); + self.inner.unary(req, path, codec).await + } + pub async fn delete_gc_barrier( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static("/pdpb.PD/DeleteGCBarrier"); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "DeleteGCBarrier")); + self.inner.unary(req, path, codec).await + } + pub async fn set_global_gc_barrier( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/pdpb.PD/SetGlobalGCBarrier", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("pdpb.PD", "SetGlobalGCBarrier")); + self.inner.unary(req, path, codec).await + } + pub async fn delete_global_gc_barrier( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/pdpb.PD/DeleteGlobalGCBarrier", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("pdpb.PD", "DeleteGlobalGCBarrier")); + self.inner.unary(req, path, codec).await + } + pub async fn get_gc_state( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static("/pdpb.PD/GetGCState"); + let mut req = request.into_request(); + req.extensions_mut().insert(GrpcMethod::new("pdpb.PD", "GetGCState")); + self.inner.unary(req, path, codec).await + } + pub async fn get_all_keyspaces_gc_states( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/pdpb.PD/GetAllKeyspacesGCStates", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("pdpb.PD", "GetAllKeyspacesGCStates")); + self.inner.unary(req, path, codec).await + } pub async fn sync_regions( &mut self, request: impl tonic::IntoStreamingRequest, diff --git a/src/generated/resource_manager.rs b/src/generated/resource_manager.rs index caf4b116..9b7d81a1 100644 --- a/src/generated/resource_manager.rs +++ b/src/generated/resource_manager.rs @@ -1,7 +1,30 @@ // This file is @generated by prost-build. +/// KeyspaceIDValue is a wrapper for the value of keyspace ID. +/// Because the 0 value is a valid keyspace ID in V1/V2, we need to use a wrapper to distinguish it from the null keyspace ID. +/// V3 tenant-scoped resource filters should use apipb.KeyspaceIdentity fields instead. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] -pub struct ListResourceGroupsRequest {} +pub struct KeyspaceIdValue { + #[prost(uint32, tag = "1")] + pub value: u32, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ListResourceGroupsRequest { + #[prost(bool, tag = "1")] + pub with_ru_stats: bool, + /// There're two cases for this field: + /// + /// * If the keyspace ID is not set, it means this may be a message from an older version. + /// To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + /// * If the keyspace ID is set to a valid value, the listed resource groups will be filtered + /// by the given keyspace ID. + #[prost(message, optional, tag = "2")] + pub keyspace_id: ::core::option::Option, + /// V3 keyspace identity filter. + #[prost(message, optional, tag = "3")] + pub keyspace_identity: ::core::option::Option, +} #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListResourceGroupsResponse { @@ -15,6 +38,19 @@ pub struct ListResourceGroupsResponse { pub struct GetResourceGroupRequest { #[prost(string, tag = "1")] pub resource_group_name: ::prost::alloc::string::String, + #[prost(bool, tag = "2")] + pub with_ru_stats: bool, + /// There're two cases for this field: + /// + /// * If the keyspace ID is not set, it means this may be a message from an older version. + /// To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + /// * If the keyspace ID is set to a valid value, it will try to get the resource group within + /// the given keyspace ID. + #[prost(message, optional, tag = "3")] + pub keyspace_id: ::core::option::Option, + /// V3 keyspace identity filter. + #[prost(message, optional, tag = "4")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -29,6 +65,17 @@ pub struct GetResourceGroupResponse { pub struct DeleteResourceGroupRequest { #[prost(string, tag = "1")] pub resource_group_name: ::prost::alloc::string::String, + /// There're two cases for this field: + /// + /// * If the keyspace ID is not set, it means this may be a message from an older version. + /// To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + /// * If the keyspace ID is set to a valid value, it will try to delete the resource group within + /// the given keyspace ID. + #[prost(message, optional, tag = "2")] + pub keyspace_id: ::core::option::Option, + /// V3 keyspace identity filter. + #[prost(message, optional, tag = "3")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -75,6 +122,17 @@ pub struct TokenBucketRequest { pub is_background: bool, #[prost(bool, tag = "6")] pub is_tiflash: bool, + /// There're two cases for this field: + /// + /// * If the keyspace ID is not set, it means this may be a message from an older version. + /// To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + /// * If the keyspace ID is set to a valid value, it will try to request the token bucket from + /// the resource group within the given keyspace ID. + #[prost(message, optional, tag = "7")] + pub keyspace_id: ::core::option::Option, + /// V3 keyspace identity filter. + #[prost(message, optional, tag = "8")] + pub keyspace_identity: ::core::option::Option, #[prost(oneof = "token_bucket_request::Request", tags = "2, 3")] pub request: ::core::option::Option, } @@ -122,6 +180,17 @@ pub struct TokenBucketResponse { /// Raw mode #[prost(message, repeated, tag = "3")] pub granted_resource_tokens: ::prost::alloc::vec::Vec, + /// There're two cases for this field: + /// + /// * If the keyspace ID is not set, it means this may be a message from an older version, + /// which can be safely ignored to keep compatibility. + /// * If the keyspace ID is set to a valid value, it means this response is from the resource + /// group within this keyspace ID. + #[prost(message, optional, tag = "4")] + pub keyspace_id: ::core::option::Option, + /// V3 keyspace identity served by this response. + #[prost(message, optional, tag = "5")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -162,6 +231,19 @@ pub struct Consumption { pub kv_read_rpc_count: f64, #[prost(double, tag = "8")] pub kv_write_rpc_count: f64, + #[prost(uint64, tag = "9")] + pub read_cross_az_traffic_bytes: u64, + #[prost(uint64, tag = "10")] + pub write_cross_az_traffic_bytes: u64, + /// RUv2 is an experimental v2 RU calculation. + /// For now it only records the consumption without actual token deduction. + #[prost(double, tag = "11")] + pub tikv_r_u_v2: f64, + #[prost(double, tag = "12")] + pub tidb_r_u_v2: f64, + /// tiflash_r_u_v2 equals tiflash RRU + tiflash WRU. + #[prost(double, tag = "13")] + pub tiflash_r_u_v2: f64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -199,8 +281,23 @@ pub struct ResourceGroup { /// Runaway queries settings #[prost(message, optional, tag = "6")] pub runaway_settings: ::core::option::Option, + /// Background task control settings. #[prost(message, optional, tag = "7")] pub background_settings: ::core::option::Option, + /// RU consumption statistics. + #[prost(message, optional, tag = "8")] + pub ru_stats: ::core::option::Option, + /// The keyspace ID that the resource group belongs to. + /// There're two cases for this field: + /// + /// * If the keyspace ID is not set, it means this may be a message from an older version. + /// To maintain compatibility, we will treat it as a null keyspace ID, which is uint32.Max. + /// * If the keyspace ID is set to a valid value, it will directly be used. + #[prost(message, optional, tag = "9")] + pub keyspace_id: ::core::option::Option, + /// V3 keyspace identity that the resource group belongs to. + #[prost(message, optional, tag = "10")] + pub keyspace_identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -248,6 +345,10 @@ pub struct Error { pub struct RunawayRule { #[prost(uint64, tag = "1")] pub exec_elapsed_time_ms: u64, + #[prost(int64, tag = "2")] + pub processed_keys: i64, + #[prost(int64, tag = "3")] + pub request_unit: i64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -267,12 +368,20 @@ pub struct RunawaySettings { pub action: i32, #[prost(message, optional, tag = "3")] pub watch: ::core::option::Option, + /// When the runaway action is `SwitchGroup`, + /// this field will be used to indicate which group to switch. + #[prost(string, tag = "4")] + pub switch_group_name: ::prost::alloc::string::String, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct BackgroundSettings { + /// background task types. #[prost(string, repeated, tag = "1")] pub job_types: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + /// the percentage limit of total resource(cpu/io) that background tasks can use. + #[prost(uint64, tag = "2")] + pub utilization_limit: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -379,6 +488,8 @@ pub enum RunawayAction { CoolDown = 2, /// kill the task Kill = 3, + /// switch the task to another group + SwitchGroup = 4, } impl RunawayAction { /// String value of the enum field names used in the ProtoBuf definition. @@ -391,6 +502,7 @@ impl RunawayAction { RunawayAction::DryRun => "DryRun", RunawayAction::CoolDown => "CoolDown", RunawayAction::Kill => "Kill", + RunawayAction::SwitchGroup => "SwitchGroup", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -400,6 +512,7 @@ impl RunawayAction { "DryRun" => Some(Self::DryRun), "CoolDown" => Some(Self::CoolDown), "Kill" => Some(Self::Kill), + "SwitchGroup" => Some(Self::SwitchGroup), _ => None, } } diff --git a/src/generated/schedulingpb.rs b/src/generated/schedulingpb.rs index 5c6d5de3..d51df0e8 100644 --- a/src/generated/schedulingpb.rs +++ b/src/generated/schedulingpb.rs @@ -98,6 +98,9 @@ pub struct RegionHeartbeatRequest { /// Actually reported time interval #[prost(message, optional, tag = "14")] pub interval: ::core::option::Option, + /// BucketMeta is the bucket version and keys of this region if TiKV enabled the bucket feature + #[prost(message, optional, tag = "15")] + pub bucket_meta: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -147,6 +150,8 @@ pub struct RegionHeartbeatResponse { pub change_peer_v2: ::core::option::Option, #[prost(message, optional, tag = "10")] pub switch_witnesses: ::core::option::Option, + #[prost(message, optional, tag = "11")] + pub change_split: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -172,12 +177,15 @@ pub struct ScatterRegionsResponse { pub header: ::core::option::Option, #[prost(uint64, tag = "2")] pub finished_percentage: u64, + #[prost(uint64, repeated, tag = "3")] + pub failed_regions_id: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SplitRegionsRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// Physical split key bytes. #[prost(bytes = "vec", repeated, tag = "2")] pub split_keys: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, #[prost(uint64, tag = "3")] @@ -233,6 +241,22 @@ pub struct AskBatchSplitResponse { #[prost(message, repeated, tag = "2")] pub ids: ::prost::alloc::vec::Vec, } +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RegionBucketsRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub region_epoch: ::core::option::Option, + #[prost(message, optional, tag = "3")] + pub buckets: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct RegionBucketsResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, +} #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum ErrorType { @@ -508,5 +532,32 @@ pub mod scheduling_client { .insert(GrpcMethod::new("schedulingpb.Scheduling", "AskBatchSplit")); self.inner.unary(req, path, codec).await } + pub async fn region_buckets( + &mut self, + request: impl tonic::IntoStreamingRequest< + Message = super::RegionBucketsRequest, + >, + ) -> std::result::Result< + tonic::Response>, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/schedulingpb.Scheduling/RegionBuckets", + ); + let mut req = request.into_streaming_request(); + req.extensions_mut() + .insert(GrpcMethod::new("schedulingpb.Scheduling", "RegionBuckets")); + self.inner.streaming(req, path, codec).await + } } } diff --git a/src/generated/tsopb.rs b/src/generated/tsopb.rs index 61a3c6f5..47c8dcfb 100644 --- a/src/generated/tsopb.rs +++ b/src/generated/tsopb.rs @@ -8,12 +8,22 @@ pub struct RequestHeader { /// sender_id is the ID of the sender server. #[prost(uint64, tag = "2")] pub sender_id: u64, - /// keyspace_id is the unique id of the tenant/keyspace. + /// keyspace_id is the unique id of the tenant/keyspace in V1/V2. + /// V3 should use identity and must not read this legacy field as the full identity. #[prost(uint32, tag = "3")] pub keyspace_id: u32, /// keyspace_group_id is the unique id of the keyspace group to which the tenant/keyspace belongs. #[prost(uint32, tag = "4")] pub keyspace_group_id: u32, + /// callee_id is the ID of the server which the client expects to receive the request. + /// such as tso-0, tso-1, pd-0, pd-1 etc. + /// This field is used to check if the request is sent to the expected server. + /// If it is not matched, the server will return an error. + #[prost(string, tag = "5")] + pub callee_id: ::prost::alloc::string::String, + /// V3 keyspace identity of the request. + #[prost(message, optional, tag = "6")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -23,12 +33,16 @@ pub struct ResponseHeader { pub cluster_id: u64, #[prost(message, optional, tag = "2")] pub error: ::core::option::Option, - /// keyspace_id is the unique id of the tenant/keyspace as the response receiver. + /// keyspace_id is the unique id of the tenant/keyspace as the response receiver in V1/V2. + /// V3 should use identity and must not read this legacy field as the full identity. #[prost(uint32, tag = "3")] pub keyspace_id: u32, /// keyspace_group_id is the unique id of the keyspace group to which the tenant/keyspace belongs. #[prost(uint32, tag = "4")] pub keyspace_group_id: u32, + /// V3 keyspace identity served by this response. + #[prost(message, optional, tag = "5")] + pub identity: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -97,14 +111,20 @@ pub struct KeyspaceGroup { pub split_state: ::core::option::Option, #[prost(message, repeated, tag = "4")] pub members: ::prost::alloc::vec::Vec, + /// V3 keyspace identities that belong to this group. Membership may cross namespaces. + #[prost(message, repeated, tag = "5")] + pub keyspace_identities: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FindGroupByKeyspaceIdRequest { #[prost(message, optional, tag = "1")] pub header: ::core::option::Option, + /// V1/V2 compatibility keyspace id. V3 should use FindGroupByKeyspaceRequest.identity. #[prost(uint32, tag = "2")] pub keyspace_id: u32, + #[prost(uint64, tag = "3")] + pub mod_revision: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -113,6 +133,29 @@ pub struct FindGroupByKeyspaceIdResponse { pub header: ::core::option::Option, #[prost(message, optional, tag = "2")] pub keyspace_group: ::core::option::Option, + #[prost(uint64, tag = "3")] + pub mod_revision: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FindGroupByKeyspaceRequest { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + /// V3 keyspace identity. + #[prost(message, optional, tag = "2")] + pub identity: ::core::option::Option, + #[prost(uint64, tag = "3")] + pub mod_revision: u64, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct FindGroupByKeyspaceResponse { + #[prost(message, optional, tag = "1")] + pub header: ::core::option::Option, + #[prost(message, optional, tag = "2")] + pub keyspace_group: ::core::option::Option, + #[prost(uint64, tag = "3")] + pub mod_revision: u64, } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -307,6 +350,32 @@ pub mod tso_client { .insert(GrpcMethod::new("tsopb.TSO", "FindGroupByKeyspaceID")); self.inner.unary(req, path, codec).await } + /// Find the keyspace group that the keyspace belongs to by V3 keyspace identity. + pub async fn find_group_by_keyspace( + &mut self, + request: impl tonic::IntoRequest, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::new( + tonic::Code::Unknown, + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic::codec::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/tsopb.TSO/FindGroupByKeyspace", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("tsopb.TSO", "FindGroupByKeyspace")); + self.inner.unary(req, path, codec).await + } /// Get the minimum timestamp across all keyspace groups served by the TSO server who receives /// and handle the request. If the TSO server/pod is not serving any keyspace group, return /// an empty timestamp, and the client needs to skip the empty timestamps when collecting diff --git a/src/lib.rs b/src/lib.rs index 6def1adc..2d9f5377 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -129,6 +129,8 @@ pub use common::ProtoRegionError; pub use common::Result; #[doc(inline)] pub use config::Config; +#[doc(inline)] +pub use config::KeyspaceIdentity; #[doc(inline)] pub use crate::backoff::Backoff; diff --git a/src/mock.rs b/src/mock.rs index 43953ab2..c7acd5cc 100644 --- a/src/mock.rs +++ b/src/mock.rs @@ -221,4 +221,16 @@ impl PdClient for MockPdClient { async fn load_keyspace(&self, _keyspace: &str) -> Result { unimplemented!() } + + async fn lookup_keyspaces(&self, _keyspace: &str) -> Result> { + unimplemented!() + } + + async fn lookup_keyspace( + &self, + _keyspace: &str, + _namespace_id: u32, + ) -> Result { + unimplemented!() + } } diff --git a/src/pd/client.rs b/src/pd/client.rs index 05b9c07c..521d99b3 100644 --- a/src/pd/client.rs +++ b/src/pd/client.rs @@ -14,6 +14,7 @@ use crate::kv::codec; use crate::pd::retry::RetryClientTrait; use crate::pd::Cluster; use crate::pd::RetryClient; +use crate::proto::apipb; use crate::proto::keyspacepb; use crate::proto::kvrpcpb; use crate::proto::metapb; @@ -65,10 +66,26 @@ pub trait PdClient: Send + Sync + 'static { async fn get_timestamp(self: Arc) -> Result; + async fn get_timestamp_with_identity( + self: Arc, + identity: Option, + ) -> Result { + let _ = identity; + self.get_timestamp().await + } + async fn update_safepoint(self: Arc, safepoint: u64) -> Result; async fn load_keyspace(&self, keyspace: &str) -> Result; + async fn lookup_keyspaces(&self, keyspace: &str) -> Result>; + + async fn lookup_keyspace( + &self, + keyspace: &str, + namespace_id: u32, + ) -> Result; + /// In transactional API, `key` is in raw format async fn store_for_key(self: Arc, key: &Key) -> Result { let region = self.region_for_key(key).await?; @@ -261,6 +278,13 @@ impl PdClient for PdRpcClient { self.pd.clone().get_timestamp().await } + async fn get_timestamp_with_identity( + self: Arc, + identity: Option, + ) -> Result { + self.pd.clone().get_timestamp_with_identity(identity).await + } + async fn update_safepoint(self: Arc, safepoint: u64) -> Result { self.pd.clone().update_safepoint(safepoint).await } @@ -280,6 +304,18 @@ impl PdClient for PdRpcClient { async fn load_keyspace(&self, keyspace: &str) -> Result { self.pd.load_keyspace(keyspace).await } + + async fn lookup_keyspaces(&self, keyspace: &str) -> Result> { + self.pd.lookup_keyspaces(keyspace).await + } + + async fn lookup_keyspace( + &self, + keyspace: &str, + namespace_id: u32, + ) -> Result { + self.pd.lookup_keyspace(keyspace, namespace_id).await + } } impl PdRpcClient { diff --git a/src/pd/cluster.rs b/src/pd/cluster.rs index ef6cce2b..7a26f556 100644 --- a/src/pd/cluster.rs +++ b/src/pd/cluster.rs @@ -15,6 +15,7 @@ use tonic::Request; use super::timestamp::TimestampOracle; use crate::internal_err; +use crate::proto::apipb; use crate::proto::keyspacepb; use crate::proto::pdpb; use crate::Error; @@ -85,6 +86,13 @@ impl Cluster { self.tso.clone().get_timestamp().await } + pub async fn get_timestamp_with_identity( + &self, + identity: Option, + ) -> Result { + self.tso.clone().get_timestamp_with_identity(identity).await + } + pub async fn update_safepoint( &mut self, safepoint: u64, @@ -108,6 +116,45 @@ impl Cluster { .ok_or_else(|| Error::KeyspaceNotFound(keyspace.to_owned()))?; Ok(keyspace) } + + pub async fn lookup_keyspace( + &mut self, + keyspace: &str, + namespace_id: u32, + timeout: Duration, + ) -> Result { + let mut req = pd_request!(self.id, keyspacepb::LookupKeyspaceRequest); + req.name = keyspace.to_string(); + req.namespace_id = namespace_id; + let mut resp = req.send(&mut self.keyspace_client, timeout).await?; + match resp.keyspaces.len() { + 1 => Ok(resp.keyspaces.remove(0)), + 0 => Err(Error::KeyspaceNotFound(keyspace.to_owned())), + _ => Err(Error::StringError(format!( + "multiple keyspaces named '{}' found in namespace {}", + keyspace, namespace_id + ))), + } + } + + pub async fn lookup_keyspaces( + &mut self, + keyspace: &str, + timeout: Duration, + ) -> Result> { + let mut req = pd_request!(self.id, keyspacepb::LookupKeyspaceRequest); + req.name = keyspace.to_string(); + let resp = req.send(&mut self.keyspace_client, timeout).await?; + if resp + .header + .as_ref() + .and_then(|h| h.error.as_ref()) + .is_some() + { + return Err(Error::KeyspaceNotFound(keyspace.to_owned())); + } + Ok(resp.keyspaces) + } } /// An object for connecting and reconnecting to a PD cluster. @@ -419,6 +466,16 @@ impl PdMessage for keyspacepb::LoadKeyspaceRequest { } } +#[async_trait] +impl PdMessage for keyspacepb::LookupKeyspaceRequest { + type Client = keyspacepb::keyspace_client::KeyspaceClient; + type Response = keyspacepb::LookupKeyspaceResponse; + + async fn rpc(req: Request, client: &mut Self::Client) -> GrpcResult { + Ok(client.lookup_keyspace(req).await?.into_inner()) + } +} + trait PdResponse { fn header(&self) -> &pdpb::ResponseHeader; } @@ -452,3 +509,9 @@ impl PdResponse for keyspacepb::LoadKeyspaceResponse { self.header.as_ref().unwrap() } } + +impl PdResponse for keyspacepb::LookupKeyspaceResponse { + fn header(&self) -> &pdpb::ResponseHeader { + self.header.as_ref().unwrap() + } +} diff --git a/src/pd/retry.rs b/src/pd/retry.rs index c9ccf1e1..63abf4eb 100644 --- a/src/pd/retry.rs +++ b/src/pd/retry.rs @@ -13,6 +13,7 @@ use tokio::time::sleep; use crate::pd::Cluster; use crate::pd::Connection; +use crate::proto::apipb; use crate::proto::keyspacepb; use crate::proto::metapb; use crate::proto::pdpb::Timestamp; @@ -45,9 +46,22 @@ pub trait RetryClientTrait { async fn get_timestamp(self: Arc) -> Result; + async fn get_timestamp_with_identity( + self: Arc, + identity: Option, + ) -> Result; + async fn update_safepoint(self: Arc, safepoint: u64) -> Result; async fn load_keyspace(&self, keyspace: &str) -> Result; + + async fn lookup_keyspaces(&self, keyspace: &str) -> Result>; + + async fn lookup_keyspace( + &self, + keyspace: &str, + namespace_id: u32, + ) -> Result; } /// Client for communication with a PD cluster. Has the facility to reconnect to the cluster. pub struct RetryClient { @@ -192,6 +206,16 @@ impl RetryClientTrait for RetryClient { retry!(self, "get_timestamp", |cluster| cluster.get_timestamp()) } + async fn get_timestamp_with_identity( + self: Arc, + identity: Option, + ) -> Result { + retry!(self, "get_timestamp_with_identity", |cluster| { + let identity = identity.clone(); + async move { cluster.get_timestamp_with_identity(identity).await } + }) + } + async fn update_safepoint(self: Arc, safepoint: u64) -> Result { retry_mut!(self, "update_gc_safepoint", |cluster| async { cluster @@ -206,6 +230,24 @@ impl RetryClientTrait for RetryClient { cluster.load_keyspace(keyspace, self.timeout).await }) } + + async fn lookup_keyspaces(&self, keyspace: &str) -> Result> { + retry_mut!(self, "lookup_keyspaces", |cluster| async { + cluster.lookup_keyspaces(keyspace, self.timeout).await + }) + } + + async fn lookup_keyspace( + &self, + keyspace: &str, + namespace_id: u32, + ) -> Result { + retry_mut!(self, "lookup_keyspace", |cluster| async { + cluster + .lookup_keyspace(keyspace, namespace_id, self.timeout) + .await + }) + } } impl fmt::Debug for RetryClient { diff --git a/src/pd/timestamp.rs b/src/pd/timestamp.rs index a1cc7fbd..0fee4953 100644 --- a/src/pd/timestamp.rs +++ b/src/pd/timestamp.rs @@ -29,6 +29,7 @@ use tokio::sync::Mutex; use tonic::transport::Channel; use crate::internal_err; +use crate::proto::apipb; use crate::proto::pdpb::pd_client::PdClient; use crate::proto::pdpb::*; use crate::Result; @@ -39,7 +40,10 @@ const MAX_BATCH_SIZE: usize = 64; /// TODO: This value should be adjustable. const MAX_PENDING_COUNT: usize = 1 << 16; -type TimestampRequest = oneshot::Sender; +struct TimestampRequest { + sender: oneshot::Sender, + identity: Option, +} /// The timestamp oracle (TSO) which provides monotonically increasing timestamps. #[derive(Clone)] @@ -64,10 +68,17 @@ impl TimestampOracle { } pub(crate) async fn get_timestamp(self) -> Result { + self.get_timestamp_with_identity(None).await + } + + pub(crate) async fn get_timestamp_with_identity( + self, + identity: Option, + ) -> Result { debug!("getting current timestamp"); - let (request, response) = oneshot::channel(); + let (sender, response) = oneshot::channel(); self.request_tx - .send(request) + .send(TimestampRequest { sender, identity }) .await .map_err(|_| internal_err!("TimestampRequest channel is closed"))?; Ok(response.await?) @@ -90,6 +101,7 @@ async fn run_tso( let request_stream = TsoRequestStream { cluster_id, request_rx, + pending_request: None, pending_requests: pending_requests.clone(), self_waker: sending_future_waker.clone(), }; @@ -113,14 +125,15 @@ async fn run_tso( struct RequestGroup { tso_request: TsoRequest, - requests: Vec, + requests: Vec>, } #[pin_project] struct TsoRequestStream { cluster_id: u64, #[pin] - request_rx: mpsc::Receiver>, + request_rx: mpsc::Receiver, + pending_request: Option, pending_requests: Arc>>, self_waker: Arc, } @@ -140,26 +153,41 @@ impl Stream for TsoRequestStream { this.self_waker.register(cx.waker()); return Poll::Pending; }; - let mut requests = Vec::new(); - - while requests.len() < MAX_BATCH_SIZE && pending_requests.len() < MAX_PENDING_COUNT { - match this.request_rx.poll_recv(cx) { - Poll::Ready(Some(sender)) => { - requests.push(sender); + if pending_requests.len() < MAX_PENDING_COUNT { + let timestamp_request = match this.pending_request.take() { + Some(request) => request, + None => match this.request_rx.poll_recv(cx) { + Poll::Ready(Some(request)) => request, + Poll::Ready(None) => return Poll::Ready(None), + Poll::Pending => { + this.self_waker.register(cx.waker()); + return Poll::Pending; + } + }, + }; + let identity = timestamp_request.identity.clone(); + let mut requests = vec![timestamp_request.sender]; + while requests.len() < MAX_BATCH_SIZE { + match this.request_rx.poll_recv(cx) { + Poll::Ready(Some(request)) if request.identity == identity => { + requests.push(request.sender); + } + Poll::Ready(Some(request)) => { + *this.pending_request = Some(request); + break; + } + Poll::Ready(None) | Poll::Pending => break, } - Poll::Ready(None) if requests.is_empty() => return Poll::Ready(None), - _ => break, } - } - - if !requests.is_empty() { let req = TsoRequest { header: Some(RequestHeader { cluster_id: *this.cluster_id, sender_id: 0, + ..Default::default() }), count: requests.len() as u32, dc_location: String::new(), + identity, }; let request_group = RequestGroup { @@ -216,3 +244,79 @@ fn allocate_timestamps( }; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn tso_request_stream_batches_same_identity() { + let (request_tx, request_rx) = mpsc::channel(2); + let (sender1, _response1) = oneshot::channel(); + let (sender2, _response2) = oneshot::channel(); + let identity = apipb::KeyspaceIdentity { + namespace_id: 3, + keyspace_id: 7, + }; + request_tx + .send(TimestampRequest { + sender: sender1, + identity: Some(identity.clone()), + }) + .await + .expect("test setup should enqueue first timestamp request"); + request_tx + .send(TimestampRequest { + sender: sender2, + identity: Some(identity.clone()), + }) + .await + .expect("test setup should enqueue second timestamp request"); + + let pending_requests = Arc::new(Mutex::new(VecDeque::with_capacity(MAX_PENDING_COUNT))); + let self_waker = Arc::new(AtomicWaker::new()); + let mut stream = TsoRequestStream { + cluster_id: 42, + request_rx, + pending_request: None, + pending_requests, + self_waker, + }; + + let req = stream.next().await.expect("request stream should yield"); + assert_eq!(req.header.unwrap().cluster_id, 42); + assert_eq!(req.count, 2); + assert_eq!(req.identity, Some(identity)); + } + + #[tokio::test] + async fn tso_request_stream_includes_identity() { + let (request_tx, request_rx) = mpsc::channel(1); + let (sender, _response) = oneshot::channel(); + let identity = apipb::KeyspaceIdentity { + namespace_id: 3, + keyspace_id: 7, + }; + request_tx + .send(TimestampRequest { + sender, + identity: Some(identity.clone()), + }) + .await + .expect("test setup should enqueue timestamp request"); + + let pending_requests = Arc::new(Mutex::new(VecDeque::with_capacity(MAX_PENDING_COUNT))); + let self_waker = Arc::new(AtomicWaker::new()); + let mut stream = TsoRequestStream { + cluster_id: 42, + request_rx, + pending_request: None, + pending_requests, + self_waker, + }; + + let req = stream.next().await.expect("request stream should yield"); + assert_eq!(req.header.unwrap().cluster_id, 42); + assert_eq!(req.identity, Some(identity)); + } +} diff --git a/src/raw/client.rs b/src/raw/client.rs index f8991e48..f8f27bcf 100644 --- a/src/raw/client.rs +++ b/src/raw/client.rs @@ -107,18 +107,97 @@ impl Client { pd_endpoints: Vec, config: Config, ) -> Result { - let enable_codec = config.keyspace.is_some(); + if matches!(config.keyspace_namespace_id, Some(0)) { + return Err(crate::Error::StringError( + "config.keyspace_namespace_id must be non-zero".to_owned(), + )); + } + if config.keyspace_global_name_lookup + && (config.keyspace_identity.is_some() || config.keyspace_namespace_id.is_some()) + { + return Err(crate::Error::StringError( + "config.keyspace_global_name_lookup cannot be combined with config.keyspace_identity or config.keyspace_namespace_id".to_owned(), + )); + } + if config.keyspace_global_name_lookup && config.keyspace.is_none() { + return Err(crate::Error::StringError( + "config.keyspace must be set when config.keyspace_global_name_lookup is set" + .to_owned(), + )); + } + let configured_keyspace_identity = config + .keyspace_identity + .map(|identity| Keyspace::api_v3(identity.namespace_id, identity.keyspace_id)) + .transpose()?; + if configured_keyspace_identity.is_none() + && config.keyspace_namespace_id.is_some() + && config.keyspace.is_none() + { + return Err(crate::Error::StringError( + "config.keyspace must be set when config.keyspace_namespace_id is set".to_owned(), + )); + } + let enable_codec = config.keyspace.is_some() + || config.keyspace_identity.is_some() + || config.keyspace_namespace_id.is_some() + || config.keyspace_global_name_lookup; let pd_endpoints: Vec = pd_endpoints.into_iter().map(Into::into).collect(); let rpc = Arc::new(PdRpcClient::connect(&pd_endpoints, config.clone(), enable_codec).await?); - let keyspace = match config.keyspace { - Some(name) => { - let keyspace = rpc.load_keyspace(&name).await?; - Keyspace::Enable { - keyspace_id: keyspace.id, + let keyspace = if let Some(keyspace) = configured_keyspace_identity { + keyspace + } else if let Some(namespace_id) = config.keyspace_namespace_id { + let name = config.keyspace.clone().ok_or_else(|| { + crate::Error::StringError( + "config.keyspace must be set when config.keyspace_namespace_id is set" + .to_owned(), + ) + })?; + let keyspace = rpc.lookup_keyspace(&name, namespace_id).await?; + let identity = keyspace.identity.ok_or_else(|| { + crate::Error::StringError(format!( + "keyspace '{}' in namespace {} does not have V3 identity", + name, namespace_id + )) + })?; + Keyspace::api_v3(identity.namespace_id, identity.keyspace_id)? + } else if config.keyspace_global_name_lookup { + let name = config.keyspace.clone().ok_or_else(|| { + crate::Error::StringError( + "config.keyspace must be set when config.keyspace_global_name_lookup is set" + .to_owned(), + ) + })?; + let mut keyspaces = rpc.lookup_keyspaces(&name).await?; + match keyspaces.len() { + 1 => { + let keyspace = keyspaces.remove(0); + if let Some(identity) = keyspace.identity { + Keyspace::api_v3(identity.namespace_id, identity.keyspace_id)? + } else { + Keyspace::Enable { + keyspace_id: keyspace.id, + } + } + } + 0 => return Err(crate::Error::KeyspaceNotFound(name)), + _ => { + return Err(crate::Error::StringError(format!( + "multiple keyspaces named '{}' found; DB9 global-name lookup requires unique names", + name + ))); + } + } + } else { + match config.keyspace.clone() { + Some(name) => { + let keyspace = rpc.load_keyspace(&name).await?; + Keyspace::Enable { + keyspace_id: keyspace.id, + } } + None => Keyspace::Disable, } - None => Keyspace::Disable, }; Ok(Client { rpc, @@ -784,7 +863,7 @@ impl Client { current_limit -= kvs.len() as u32; result.append(&mut kvs); } - if end_key.clone().is_some_and(|ek| ek <= next_key) { + if next_key.is_empty() || end_key.clone().is_some_and(|ek| ek <= next_key) { break; } else { current_key = next_key; @@ -808,7 +887,8 @@ impl Client { let start_key = scan_args.start_key; let end_key = scan_args.end_key; loop { - let region = self.rpc.clone().region_for_key(&start_key).await?; + let route_start_key = self.keyspace.encode_route_key(&start_key, KeyMode::Raw); + let region = self.rpc.clone().region_for_key(&route_start_key).await?; let store = self.rpc.clone().store_for_id(region.id()).await?; let request = new_raw_scan_request( (start_key.clone(), end_key.clone()).into(), @@ -833,7 +913,12 @@ impl Client { return Err(RegionError(Box::new(err))); } } - Ok((Some(r), region.end_key())) + let (next_key, _) = self.keyspace.decode_route_range( + region.end_key(), + Key::EMPTY, + KeyMode::Raw, + ); + Ok((Some(r), next_key)) } Err(err) => Err(err), }; @@ -912,7 +997,7 @@ struct ScanInnerArgs { #[cfg(test)] mod tests { use std::any::Any; - use std::sync::Arc; + use std::sync::{Arc, Mutex}; use super::*; use crate::mock::MockKvClient; @@ -950,6 +1035,143 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_api_v3_raw_client_rejects_invalid_identity_before_pd_connect() { + let result = Client::new_with_config( + Vec::::new(), + Config::default().with_keyspace_identity(0, 1), + ) + .await; + let err = match result { + Ok(_) => panic!("invalid API V3 identity should be rejected before PD connect"), + Err(err) => err, + }; + assert!(err.to_string().contains("namespace_id must be non-zero")); + } + + #[tokio::test] + async fn test_api_v3_raw_client_rejects_namespace_without_keyspace_before_pd_connect() { + let result = Client::new_with_config( + Vec::::new(), + Config::default().with_keyspace_namespace_id(1), + ) + .await; + let err = match result { + Ok(_) => panic!("API V3 namespace lookup should require a keyspace name"), + Err(err) => err, + }; + assert!(err.to_string().contains("config.keyspace must be set")); + } + + #[tokio::test] + async fn test_api_v3_retryable_scan_routes_with_physical_key_but_sends_user_keys() -> Result<()> + { + let pd_client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + if let Some(req) = req.downcast_ref::() { + assert_eq!(req.start_key, vec![1]); + assert_eq!(req.end_key, vec![2]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + let identity = ctx.keyspace_identity.as_ref().unwrap(); + assert_eq!(identity.namespace_id, 1); + assert_eq!(identity.keyspace_id, 7); + Ok(Box::::default() as Box) + } else { + unreachable!() + } + }, + ))); + let client = Client { + rpc: pd_client, + cf: Some(ColumnFamily::Default), + backoff: DEFAULT_REGION_BACKOFF, + atomic: false, + keyspace: Keyspace::api_v3(1, 7).unwrap(), + }; + + let (resp, next_key) = client + .retryable_scan(ScanInnerArgs { + start_key: vec![1].into(), + end_key: Some(vec![2].into()), + limit: 16, + key_only: false, + reverse: false, + backoff: DEFAULT_STORE_BACKOFF, + }) + .await?; + + assert!(resp.is_some()); + assert!(next_key.is_empty()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_coprocessor_routes_with_physical_key_but_sends_user_ranges( + ) -> Result<()> { + let seen_dispatch = Arc::new(Mutex::new(false)); + let seen_dispatch_in_hook = seen_dispatch.clone(); + let pd_client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + if let Some(req) = req.downcast_ref::() { + assert_eq!(req.copr_name, "example"); + assert_eq!(req.ranges.len(), 1); + assert_eq!(req.ranges[0].start_key, vec![1]); + assert_eq!(req.ranges[0].end_key, vec![2]); + assert_eq!(req.data, b"builder-data".to_vec()); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_dispatch_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawCoprocessorResponse { + data: req.data.clone(), + ..Default::default() + }) as Box) + } else { + unreachable!() + } + }, + ))); + let client = Client { + rpc: pd_client, + cf: Some(ColumnFamily::Default), + backoff: DEFAULT_REGION_BACKOFF, + atomic: false, + keyspace: Keyspace::api_v3(1, 7).unwrap(), + }; + let seen_builder = Arc::new(Mutex::new(false)); + let seen_builder_in_closure = seen_builder.clone(); + let resps = client + .coprocessor( + "example", + "0.1.0", + vec![vec![1]..vec![2]], + move |region, ranges| { + assert_eq!(region.id, 2); + assert_eq!(ranges, vec![Key::from(vec![1])..Key::from(vec![2])]); + *seen_builder_in_closure.lock().unwrap() = true; + b"builder-data".to_vec() + }, + ) + .await?; + + assert_eq!( + resps, + vec![( + vec![Key::from(vec![1])..Key::from(vec![2])], + b"builder-data".to_vec() + )] + ); + assert!(*seen_builder.lock().unwrap()); + assert!(*seen_dispatch.lock().unwrap()); + Ok(()) + } + #[tokio::test] async fn test_raw_coprocessor() -> Result<()> { let pd_client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( diff --git a/src/raw/requests.rs b/src/raw/requests.rs index 1f018184..919f5801 100644 --- a/src/raw/requests.rs +++ b/src/raw/requests.rs @@ -18,12 +18,12 @@ use crate::request::Process; use crate::request::RangeRequest; use crate::request::Shardable; use crate::request::SingleKey; -use crate::request::{Batchable, Collect}; +use crate::request::{Batchable, Collect, KeyMode, Keyspace}; use crate::shardable_key; use crate::shardable_keys; use crate::shardable_range; -use crate::store::region_stream_for_keys; -use crate::store::region_stream_for_ranges; +use crate::store::region_stream_for_keys_with_keyspace; +use crate::store::region_stream_for_ranges_with_keyspace; use crate::store::RegionStore; use crate::store::Request; use crate::transaction::HasLocks; @@ -56,7 +56,7 @@ impl KvRequest for kvrpcpb::RawGetRequest { type Response = kvrpcpb::RawGetResponse; } -shardable_key!(kvrpcpb::RawGetRequest); +shardable_key!(kvrpcpb::RawGetRequest, KeyMode::Raw); collect_single!(kvrpcpb::RawGetResponse); impl SingleKey for kvrpcpb::RawGetRequest { @@ -93,7 +93,7 @@ impl KvRequest for kvrpcpb::RawBatchGetRequest { type Response = kvrpcpb::RawBatchGetResponse; } -shardable_keys!(kvrpcpb::RawBatchGetRequest); +shardable_keys!(kvrpcpb::RawBatchGetRequest, KeyMode::Raw); impl Merge for Collect { type Out = Vec; @@ -121,7 +121,7 @@ impl KvRequest for kvrpcpb::RawGetKeyTtlRequest { type Response = kvrpcpb::RawGetKeyTtlResponse; } -shardable_key!(kvrpcpb::RawGetKeyTtlRequest); +shardable_key!(kvrpcpb::RawGetKeyTtlRequest, KeyMode::Raw); collect_single!(kvrpcpb::RawGetKeyTtlResponse); impl SingleKey for kvrpcpb::RawGetKeyTtlRequest { @@ -164,7 +164,7 @@ impl KvRequest for kvrpcpb::RawPutRequest { type Response = kvrpcpb::RawPutResponse; } -shardable_key!(kvrpcpb::RawPutRequest); +shardable_key!(kvrpcpb::RawPutRequest, KeyMode::Raw); collect_single!(kvrpcpb::RawPutResponse); impl SingleKey for kvrpcpb::RawPutRequest { fn key(&self) -> &Vec { @@ -214,17 +214,22 @@ impl Shardable for kvrpcpb::RawBatchPutRequest { .map(|(kv, ttl)| KvPairTTL(kv, ttl)) .collect(); kv_ttl.sort_by(|a, b| a.0.key.cmp(&b.0.key)); - region_stream_for_keys(kv_ttl.into_iter(), pd_client.clone()) - .flat_map(|result| match result { - Ok((keys, region)) => stream::iter(kvrpcpb::RawBatchPutRequest::batches( - keys, - RAW_KV_REQUEST_BATCH_SIZE, - )) - .map(move |batch| Ok((batch, region.clone()))) - .boxed(), - Err(e) => stream::iter(Err(e)).boxed(), - }) - .boxed() + region_stream_for_keys_with_keyspace( + kv_ttl.into_iter(), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Raw, + ) + .flat_map(|result| match result { + Ok((keys, region)) => stream::iter(kvrpcpb::RawBatchPutRequest::batches( + keys, + RAW_KV_REQUEST_BATCH_SIZE, + )) + .map(move |batch| Ok((batch, region.clone()))) + .boxed(), + Err(e) => stream::iter(Err(e)).boxed(), + }) + .boxed() } fn apply_shard(&mut self, shard: Self::Shard) { @@ -267,7 +272,7 @@ impl KvRequest for kvrpcpb::RawDeleteRequest { type Response = kvrpcpb::RawDeleteResponse; } -shardable_key!(kvrpcpb::RawDeleteRequest); +shardable_key!(kvrpcpb::RawDeleteRequest, KeyMode::Raw); collect_single!(kvrpcpb::RawDeleteResponse); impl SingleKey for kvrpcpb::RawDeleteRequest { fn key(&self) -> &Vec { @@ -307,17 +312,22 @@ impl Shardable for kvrpcpb::RawBatchDeleteRequest { ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { let mut keys = self.keys.clone(); keys.sort(); - region_stream_for_keys(keys.into_iter(), pd_client.clone()) - .flat_map(|result| match result { - Ok((keys, region)) => stream::iter(kvrpcpb::RawBatchDeleteRequest::batches( - keys, - RAW_KV_REQUEST_BATCH_SIZE, - )) - .map(move |batch| Ok((batch, region.clone()))) - .boxed(), - Err(e) => stream::iter(Err(e)).boxed(), - }) - .boxed() + region_stream_for_keys_with_keyspace( + keys.into_iter(), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Raw, + ) + .flat_map(|result| match result { + Ok((keys, region)) => stream::iter(kvrpcpb::RawBatchDeleteRequest::batches( + keys, + RAW_KV_REQUEST_BATCH_SIZE, + )) + .map(move |batch| Ok((batch, region.clone()))) + .boxed(), + Err(e) => stream::iter(Err(e)).boxed(), + }) + .boxed() } fn apply_shard(&mut self, shard: Self::Shard) { @@ -359,7 +369,7 @@ impl KvRequest for kvrpcpb::RawDeleteRangeRequest { } range_request!(kvrpcpb::RawDeleteRangeRequest); -shardable_range!(kvrpcpb::RawDeleteRangeRequest); +shardable_range!(kvrpcpb::RawDeleteRangeRequest, KeyMode::Raw); pub fn new_raw_scan_request( start_key: Vec, @@ -390,7 +400,7 @@ impl KvRequest for kvrpcpb::RawScanRequest { } range_request!(kvrpcpb::RawScanRequest); -shardable_range!(kvrpcpb::RawScanRequest); +shardable_range!(kvrpcpb::RawScanRequest, KeyMode::Raw); impl Merge for Collect { type Out = Vec; @@ -429,7 +439,12 @@ impl Shardable for kvrpcpb::RawBatchScanRequest { &self, pd_client: &Arc, ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { - region_stream_for_ranges(self.ranges.clone(), pd_client.clone()) + region_stream_for_ranges_with_keyspace( + self.ranges.clone(), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Raw, + ) } fn apply_shard(&mut self, shard: Self::Shard) { @@ -473,7 +488,7 @@ impl KvRequest for kvrpcpb::RawCasRequest { type Response = kvrpcpb::RawCasResponse; } -shardable_key!(kvrpcpb::RawCasRequest); +shardable_key!(kvrpcpb::RawCasRequest, KeyMode::Raw); collect_single!(kvrpcpb::RawCasResponse); impl SingleKey for kvrpcpb::RawCasRequest { fn key(&self) -> &Vec { @@ -544,6 +559,10 @@ impl Request for RawCoprocessorRequest { fn set_api_version(&mut self, api_version: kvrpcpb::ApiVersion) { self.inner.set_api_version(api_version); } + + fn set_keyspace(&mut self, keyspace: Keyspace) { + self.inner.set_keyspace(keyspace); + } } impl KvRequest for RawCoprocessorRequest { @@ -557,7 +576,12 @@ impl Shardable for RawCoprocessorRequest { &self, pd_client: &Arc, ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { - region_stream_for_ranges(self.inner.ranges.clone(), pd_client.clone()) + region_stream_for_ranges_with_keyspace( + self.inner.ranges.clone(), + pd_client.clone(), + Keyspace::from_context(&self.inner.context), + KeyMode::Raw, + ) } fn apply_shard(&mut self, shard: Self::Shard) { @@ -662,8 +686,337 @@ mod test { use crate::mock::MockKvClient; use crate::mock::MockPdClient; use crate::proto::kvrpcpb; - use crate::request::Keyspace; use crate::request::Plan; + use crate::request::{CollectError, CollectSingle, Keyspace}; + + fn assert_api_v3_test_context(ctx: &kvrpcpb::Context) { + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + } + + #[tokio::test] + async fn test_api_v3_raw_get_routes_with_physical_key_but_sends_user_key() -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawGetRequest = req.downcast_ref().unwrap(); + assert_eq!(req.key, vec![1]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawGetResponse { + value: vec![9], + ..Default::default() + }) as Box) + }, + ))); + + let req = new_raw_get_request(vec![1], None); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.value, vec![9]); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_get_key_ttl_routes_with_physical_key_but_sends_user_key() -> Result<()> + { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawGetKeyTtlRequest = req.downcast_ref().unwrap(); + assert_eq!(req.key, vec![1]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawGetKeyTtlResponse { + ttl: 3000, + ..Default::default() + }) as Box) + }, + ))); + + let req = new_raw_get_key_ttl_request(vec![1], None); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.ttl, 3000); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_put_routes_with_physical_key_but_sends_user_key() -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawPutRequest = req.downcast_ref().unwrap(); + assert_eq!(req.key, vec![1]); + assert_eq!(req.value, vec![9]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawPutResponse::default()) as Box) + }, + ))); + + let req = new_raw_put_request(vec![1], vec![9], 3000, None, false); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let _ = plan.execute().await?; + + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_batch_put_routes_with_physical_key_but_sends_user_keys() -> Result<()> + { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawBatchPutRequest = req.downcast_ref().unwrap(); + assert_eq!(req.pairs.len(), 1); + assert_eq!(req.pairs[0].key, vec![1]); + assert_eq!(req.pairs[0].value, vec![9]); + assert_eq!(req.ttls, vec![3000]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawBatchPutResponse::default()) as Box) + }, + ))); + + let req = new_raw_batch_put_request( + vec![kvrpcpb::KvPair { + key: vec![1], + value: vec![9], + ..Default::default() + }], + vec![3000], + None, + false, + ); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_delete_routes_with_physical_key_but_sends_user_key() -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawDeleteRequest = req.downcast_ref().unwrap(); + assert_eq!(req.key, vec![1]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawDeleteResponse::default()) as Box) + }, + ))); + + let req = new_raw_delete_request(vec![1], None, false); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let _ = plan.execute().await?; + + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_batch_delete_routes_with_physical_key_but_sends_user_keys( + ) -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawBatchDeleteRequest = req.downcast_ref().unwrap(); + assert_eq!(req.keys, vec![vec![1]]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawBatchDeleteResponse::default()) as Box) + }, + ))); + + let req = new_raw_batch_delete_request(vec![vec![1]], None); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_delete_range_routes_with_physical_key_but_sends_user_range( + ) -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawDeleteRangeRequest = req.downcast_ref().unwrap(); + assert_eq!(req.start_key, vec![1]); + assert_eq!(req.end_key, vec![2]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawDeleteRangeResponse::default()) as Box) + }, + ))); + + let req = new_raw_delete_range_request(vec![1], vec![2], None); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_scan_routes_with_physical_key_but_sends_user_range() -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawScanRequest = req.downcast_ref().unwrap(); + assert_eq!(req.start_key, vec![1]); + assert_eq!(req.end_key, vec![2]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawScanResponse { + kvs: vec![kvrpcpb::KvPair { + key: vec![1], + value: vec![9], + ..Default::default() + }], + ..Default::default() + }) as Box) + }, + ))); + + let req = new_raw_scan_request(vec![1], vec![2], 10, false, false, None); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(Collect) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp, vec![KvPair::new(vec![1], vec![9])]); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_batch_scan_routes_with_physical_key_but_sends_user_ranges( + ) -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawBatchScanRequest = req.downcast_ref().unwrap(); + assert_eq!(req.ranges.len(), 1); + assert_eq!(req.ranges[0].start_key, vec![1]); + assert_eq!(req.ranges[0].end_key, vec![2]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawBatchScanResponse { + kvs: vec![kvrpcpb::KvPair { + key: vec![1], + value: vec![9], + ..Default::default() + }], + ..Default::default() + }) as Box) + }, + ))); + + let req = new_raw_batch_scan_request( + vec![kvrpcpb::KeyRange { + start_key: vec![1], + end_key: vec![2], + }], + 10, + false, + None, + ); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(Collect) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp, vec![KvPair::new(vec![1], vec![9])]); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_raw_cas_routes_with_physical_key_but_sends_user_key() -> Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::RawCasRequest = req.downcast_ref().unwrap(); + assert_eq!(req.key, vec![1]); + assert_eq!(req.value, vec![9]); + assert_eq!(req.previous_value, vec![8]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::RawCasResponse { + succeed: true, + previous_value: vec![8], + ..Default::default() + }) as Box) + }, + ))); + + let req = new_cas_request(vec![1], vec![9], Some(vec![8]), None); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let resp = plan.execute().await?; + + assert!(resp.succeed); + assert!(*seen.lock().unwrap()); + Ok(()) + } #[rstest::rstest] #[case(Keyspace::Disable)] diff --git a/src/region_cache.rs b/src/region_cache.rs index 8837de38..42d71b3a 100644 --- a/src/region_cache.rs +++ b/src/region_cache.rs @@ -349,6 +349,13 @@ mod test { todo!() } + async fn get_timestamp_with_identity( + self: Arc, + _identity: Option, + ) -> Result { + todo!() + } + async fn update_safepoint(self: Arc, _safepoint: u64) -> Result { todo!() } @@ -356,6 +363,18 @@ mod test { async fn load_keyspace(&self, _keyspace: &str) -> Result { unimplemented!() } + + async fn lookup_keyspaces(&self, _keyspace: &str) -> Result> { + unimplemented!() + } + + async fn lookup_keyspace( + &self, + _keyspace: &str, + _namespace_id: u32, + ) -> Result { + unimplemented!() + } } #[tokio::test] diff --git a/src/request/keyspace.rs b/src/request/keyspace.rs index 79f7225d..ba873a60 100644 --- a/src/request/keyspace.rs +++ b/src/request/keyspace.rs @@ -5,12 +5,14 @@ use std::ops::{Bound, Range}; use serde_derive::{Deserialize, Serialize}; use crate::transaction::Mutation; -use crate::{proto::kvrpcpb, Key}; +use crate::{proto::apipb, proto::kvrpcpb, Key}; use crate::{BoundRange, KvPair}; pub const RAW_KEY_PREFIX: u8 = b'r'; pub const TXN_KEY_PREFIX: u8 = b'x'; pub const KEYSPACE_PREFIX_LEN: usize = 4; +pub const API_V3_PREFIX_LEN: usize = 8; +pub const API_V3_MAX_KEYSPACE_ID: u32 = 0xFF_FFFF; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[non_exhaustive] @@ -19,6 +21,11 @@ pub enum Keyspace { Enable { keyspace_id: u32, }, + /// Use API V3 with user keys on the TiKV KV RPC wire format. + ApiV3 { + namespace_id: u32, + keyspace_id: u32, + }, /// Use API V2 without adding or removing the API V2 keyspace/key-mode prefix. /// /// This mode is intended for **server-side embedding** use cases (e.g. embedding this client in @@ -34,13 +41,157 @@ pub enum KeyMode { } impl Keyspace { + pub fn api_v3(namespace_id: u32, keyspace_id: u32) -> crate::Result { + if namespace_id == 0 { + return Err(crate::Error::StringError( + "V3 keyspace identity namespace_id must be non-zero".to_owned(), + )); + } + if keyspace_id == 0 { + return Err(crate::Error::StringError( + "V3 keyspace identity keyspace_id must be non-zero".to_owned(), + )); + } + if keyspace_id > API_V3_MAX_KEYSPACE_ID { + return Err(crate::Error::StringError( + "V3 keyspace identity keyspace_id must be less than 2^24".to_owned(), + )); + } + Ok(Keyspace::ApiV3 { + namespace_id, + keyspace_id, + }) + } + pub fn api_version(&self) -> kvrpcpb::ApiVersion { match self { Keyspace::Disable => kvrpcpb::ApiVersion::V1, Keyspace::Enable { .. } => kvrpcpb::ApiVersion::V2, + Keyspace::ApiV3 { .. } => kvrpcpb::ApiVersion::V3, Keyspace::ApiV2NoPrefix => kvrpcpb::ApiVersion::V2, } } + + pub fn v3_identity(&self) -> Option { + match self { + Keyspace::ApiV3 { + namespace_id, + keyspace_id, + } => Some(apipb::KeyspaceIdentity { + namespace_id: *namespace_id, + keyspace_id: *keyspace_id, + }), + _ => None, + } + } + + pub fn from_context(context: &Option) -> Self { + let Some(ctx) = context else { + return Keyspace::Disable; + }; + if kvrpcpb::ApiVersion::try_from(ctx.api_version) != Ok(kvrpcpb::ApiVersion::V3) { + return Keyspace::Disable; + } + let Some(identity) = ctx.keyspace_identity.as_ref() else { + return Keyspace::Disable; + }; + Keyspace::ApiV3 { + namespace_id: identity.namespace_id, + keyspace_id: identity.keyspace_id, + } + } + + pub fn v3_route_prefix(&self, key_mode: KeyMode) -> Option<[u8; API_V3_PREFIX_LEN]> { + let Keyspace::ApiV3 { + namespace_id, + keyspace_id, + } = *self + else { + return None; + }; + Some(api_v3_keyspace_prefix(namespace_id, keyspace_id, key_mode)) + } + + pub fn encode_route_key(&self, key: &Key, key_mode: KeyMode) -> Key { + let Some(prefix) = self.v3_route_prefix(key_mode) else { + return key.clone(); + }; + let mut route_key = key.clone(); + prepend_bytes(&mut route_key.0, &prefix); + route_key + } + + pub fn encode_route_range( + &self, + start_key: Key, + end_key: Key, + key_mode: KeyMode, + ) -> (Key, Key) { + let Some(prefix) = self.v3_route_prefix(key_mode) else { + return (start_key, end_key); + }; + let mut start = start_key; + prepend_bytes(&mut start.0, &prefix); + let end = if end_key.is_empty() { + self.v3_route_range_end(key_mode) + .map(Key::from) + .unwrap_or(Key::EMPTY) + } else { + let mut end = end_key; + prepend_bytes(&mut end.0, &prefix); + end + }; + (start, end) + } + + pub fn decode_route_range( + &self, + start_key: Key, + end_key: Key, + key_mode: KeyMode, + ) -> (Key, Key) { + if self.v3_route_prefix(key_mode).is_none() { + return (start_key, end_key); + } + ( + self.decode_route_bound(start_key, key_mode), + self.decode_route_bound(end_key, key_mode), + ) + } + + fn decode_route_bound(&self, key: Key, key_mode: KeyMode) -> Key { + let Some(prefix) = self.v3_route_prefix(key_mode) else { + return key; + }; + if key.is_empty() { + return key; + } + if key.0 == prefix { + return Key::EMPTY; + } + if key.0.starts_with(&prefix) { + return Key::from(key.0[API_V3_PREFIX_LEN..].to_vec()); + } + if self + .v3_route_range_end(key_mode) + .is_some_and(|end_prefix| key.0 >= end_prefix) + { + return Key::EMPTY; + } + Key::EMPTY + } + + fn v3_route_range_end(&self, key_mode: KeyMode) -> Option> { + let Keyspace::ApiV3 { + namespace_id, + keyspace_id, + } = *self + else { + return None; + }; + let start = u64::from_be_bytes(api_v3_keyspace_prefix(namespace_id, keyspace_id, key_mode)); + Some(start.wrapping_add(1).to_be_bytes().to_vec()) + } } pub trait EncodeKeyspace { @@ -217,6 +368,29 @@ fn keyspace_prefix(keyspace_id: u32, key_mode: KeyMode) -> [u8; KEYSPACE_PREFIX_ prefix } +fn api_v3_keyspace_prefix( + namespace_id: u32, + keyspace_id: u32, + key_mode: KeyMode, +) -> [u8; API_V3_PREFIX_LEN] { + debug_assert!(keyspace_id <= API_V3_MAX_KEYSPACE_ID); + let namespace_bytes = namespace_id.to_be_bytes(); + let keyspace_bytes = keyspace_id.to_be_bytes(); + [ + match key_mode { + KeyMode::Raw => RAW_KEY_PREFIX, + KeyMode::Txn => TXN_KEY_PREFIX, + }, + namespace_bytes[0], + namespace_bytes[1], + namespace_bytes[2], + namespace_bytes[3], + keyspace_bytes[1], + keyspace_bytes[2], + keyspace_bytes[3], + ] +} + fn prepend_bytes(vec: &mut Vec, prefix: &[u8; N]) { unsafe { vec.reserve_exact(N); @@ -406,6 +580,93 @@ mod tests { ); } + #[test] + fn test_api_v3_keyspace() { + let keyspace = Keyspace::api_v3(7, 0xDEAD).unwrap(); + assert_eq!(keyspace.api_version(), kvrpcpb::ApiVersion::V3); + assert_eq!( + keyspace.v3_identity(), + Some(apipb::KeyspaceIdentity { + namespace_id: 7, + keyspace_id: 0xDEAD, + }) + ); + assert!(Keyspace::api_v3(0, 1).is_err()); + assert!(Keyspace::api_v3(1, 0).is_err()); + assert!(Keyspace::api_v3(1, API_V3_MAX_KEYSPACE_ID + 1).is_err()); + } + + #[test] + fn test_api_v3_route_key_and_range() { + let keyspace = Keyspace::api_v3(0x0102_0304, 0x05_0607).unwrap(); + + assert_eq!( + keyspace.encode_route_key(&Key::from(vec![b'k']), KeyMode::Raw), + Key::from(vec![b'r', 1, 2, 3, 4, 5, 6, 7, b'k']) + ); + assert_eq!( + keyspace.encode_route_key(&Key::from(vec![b'k']), KeyMode::Txn), + Key::from(vec![b'x', 1, 2, 3, 4, 5, 6, 7, b'k']) + ); + + let route_range = + keyspace.encode_route_range(Key::from(vec![b'a']), Key::from(vec![b'z']), KeyMode::Txn); + assert_eq!( + route_range.0, + Key::from(vec![b'x', 1, 2, 3, 4, 5, 6, 7, b'a']) + ); + assert_eq!( + route_range.1, + Key::from(vec![b'x', 1, 2, 3, 4, 5, 6, 7, b'z']) + ); + assert_eq!( + keyspace.decode_route_range(route_range.0, route_range.1, KeyMode::Txn), + (Key::from(vec![b'a']), Key::from(vec![b'z'])) + ); + + let whole_range = keyspace.encode_route_range(Key::EMPTY, Key::EMPTY, KeyMode::Txn); + assert_eq!(whole_range.0, Key::from(vec![b'x', 1, 2, 3, 4, 5, 6, 7])); + assert_eq!(whole_range.1, Key::from(vec![b'x', 1, 2, 3, 4, 5, 6, 8])); + assert_eq!( + keyspace.decode_route_range(whole_range.0, whole_range.1, KeyMode::Txn), + (Key::EMPTY, Key::EMPTY) + ); + + let last_keyspace = Keyspace::api_v3(u32::MAX, API_V3_MAX_KEYSPACE_ID).unwrap(); + let last_whole_range = + last_keyspace.encode_route_range(Key::EMPTY, Key::EMPTY, KeyMode::Txn); + assert_eq!( + last_whole_range.0, + Key::from(vec![b'x', 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]) + ); + assert_eq!( + last_whole_range.1, + Key::from(vec![b'y', 0, 0, 0, 0, 0, 0, 0]) + ); + assert_eq!( + last_keyspace.decode_route_range(last_whole_range.0, last_whole_range.1, KeyMode::Txn), + (Key::EMPTY, Key::EMPTY) + ); + } + + #[test] + fn test_api_v3_wire_key_is_noop() { + let keyspace = Keyspace::api_v3(7, 0xDEAD).unwrap(); + let key_mode = KeyMode::Txn; + + let key = Key::from(vec![b'k']); + assert_eq!(key.clone().encode_keyspace(keyspace, key_mode), key); + + let pair = KvPair(Key::from(vec![b'k']), vec![b'v']); + assert_eq!(pair.clone().encode_keyspace(keyspace, key_mode), pair); + + let range = Range { + start: Key::from(vec![b'a']), + end: Key::from(vec![b'b']), + }; + assert_eq!(range.clone().truncate_keyspace(keyspace), range); + } + #[test] fn test_apiv2_no_prefix_encode_is_noop() { let keyspace = Keyspace::ApiV2NoPrefix; diff --git a/src/request/plan_builder.rs b/src/request/plan_builder.rs index 9c4fcdbe..f408df6e 100644 --- a/src/request/plan_builder.rs +++ b/src/request/plan_builder.rs @@ -49,7 +49,7 @@ impl PlanBuilderPhase for Targetted {} impl PlanBuilder, NoTarget> { pub fn new(pd_client: Arc, keyspace: Keyspace, mut request: Req) -> Self { - request.set_api_version(keyspace.api_version()); + request.set_keyspace(keyspace); PlanBuilder { pd_client, plan: Dispatch { diff --git a/src/request/shard.rs b/src/request/shard.rs index 1bac69e4..8df5595e 100644 --- a/src/request/shard.rs +++ b/src/request/shard.rs @@ -183,6 +183,9 @@ impl Shardable for CleanupLocks { #[macro_export] macro_rules! shardable_key { ($type_: ty) => { + $crate::shardable_key!($type_, $crate::request::KeyMode::Txn); + }; + ($type_: ty, $key_mode: expr) => { impl Shardable for $type_ { type Shard = Vec>; @@ -193,9 +196,11 @@ macro_rules! shardable_key { 'static, $crate::Result<(Self::Shard, $crate::region::RegionWithLeader)>, > { - $crate::store::region_stream_for_keys( + $crate::store::region_stream_for_keys_with_keyspace( std::iter::once(self.key.clone()), pd_client.clone(), + $crate::request::Keyspace::from_context(&self.context), + $key_mode, ) } @@ -215,6 +220,9 @@ macro_rules! shardable_key { #[macro_export] macro_rules! shardable_keys { ($type_: ty) => { + $crate::shardable_keys!($type_, $crate::request::KeyMode::Txn); + }; + ($type_: ty, $key_mode: expr) => { impl Shardable for $type_ { type Shard = Vec>; @@ -227,7 +235,12 @@ macro_rules! shardable_keys { > { let mut keys = self.keys.clone(); keys.sort(); - $crate::store::region_stream_for_keys(keys.into_iter(), pd_client.clone()) + $crate::store::region_stream_for_keys_with_keyspace( + keys.into_iter(), + pd_client.clone(), + $crate::request::Keyspace::from_context(&self.context), + $key_mode, + ) } fn apply_shard(&mut self, shard: Self::Shard) { @@ -271,6 +284,9 @@ macro_rules! reversible_range_request { #[macro_export] macro_rules! shardable_range { ($type_: ty) => { + $crate::shardable_range!($type_, $crate::request::KeyMode::Txn); + }; + ($type_: ty, $key_mode: expr) => { impl Shardable for $type_ { type Shard = (Vec, Vec); @@ -286,7 +302,12 @@ macro_rules! shardable_range { if self.is_reverse() { std::mem::swap(&mut start_key, &mut end_key); } - $crate::store::region_stream_for_range((start_key, end_key), pd_client.clone()) + $crate::store::region_stream_for_range_with_keyspace( + (start_key, end_key), + pd_client.clone(), + $crate::request::Keyspace::from_context(&self.context), + $key_mode, + ) } fn apply_shard(&mut self, shard: Self::Shard) { diff --git a/src/store/mod.rs b/src/store/mod.rs index fb6ed709..87071b13 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -19,9 +19,11 @@ pub use self::errors::HasKeyErrors; pub use self::errors::HasRegionError; pub use self::errors::HasRegionErrors; pub use self::request::Request; +use crate::compat::stream_fn; use crate::pd::PdClient; use crate::proto::kvrpcpb; use crate::region::RegionWithLeader; +use crate::request::{KeyMode, Keyspace}; use crate::BoundRange; use crate::Key; use crate::Result; @@ -50,6 +52,51 @@ where pd_client.clone().group_keys_by_region(key_data) } +pub fn region_stream_for_keys_with_keyspace( + key_data: impl Iterator + Send + Sync + 'static, + pd_client: Arc, + keyspace: Keyspace, + key_mode: KeyMode, +) -> BoxStream<'static, Result<(Vec, RegionWithLeader)>> +where + PdC: PdClient, + K: AsRef + Into + Send + Sync + 'static, + KOut: Send + Sync + 'static, +{ + if !matches!(keyspace, Keyspace::ApiV3 { .. }) { + return region_stream_for_keys(key_data, pd_client); + } + + let mut route_pairs: Vec<(Key, K)> = key_data + .map(|key| (keyspace.encode_route_key(key.as_ref(), key_mode), key)) + .collect(); + route_pairs.sort_by(|left, right| left.0.cmp(&right.0)); + + let route_pairs = route_pairs.into_iter().peekable(); + stream_fn(Some(route_pairs), move |route_pairs| { + let this = pd_client.clone(); + async move { + let mut route_pairs = match route_pairs { + None => return Ok(None), + Some(route_pairs) => route_pairs, + }; + let Some((route_key, key)) = route_pairs.next() else { + return Ok(None); + }; + let region = this.region_for_key(&route_key).await?; + let mut grouped = vec![key.into()]; + while let Some((next_route_key, _)) = route_pairs.peek() { + if !region.contains(next_route_key) { + break; + } + grouped.push(route_pairs.next().unwrap().1.into()); + } + Ok(Some((Some(route_pairs), (grouped, region)))) + } + }) + .boxed() +} + #[allow(clippy::type_complexity)] pub fn region_stream_for_range( range: (Vec, Vec), @@ -73,6 +120,47 @@ pub fn region_stream_for_range( .boxed() } +#[allow(clippy::type_complexity)] +pub fn region_stream_for_range_with_keyspace( + range: (Vec, Vec), + pd_client: Arc, + keyspace: Keyspace, + key_mode: KeyMode, +) -> BoxStream<'static, Result<((Vec, Vec), RegionWithLeader)>> { + if !matches!(keyspace, Keyspace::ApiV3 { .. }) { + return region_stream_for_range(range, pd_client); + } + + let user_range = (Key::from(range.0.clone()), Key::from(range.1.clone())); + let route_range = keyspace.encode_route_range(user_range.0, user_range.1, key_mode); + let route_range_vec: (Vec, Vec) = + (route_range.0.clone().into(), route_range.1.clone().into()); + let bnd_range = if route_range.1.is_empty() { + BoundRange::range_from(route_range.0.clone()) + } else { + BoundRange::from(route_range_vec.clone()) + }; + pd_client + .regions_for_range(bnd_range) + .map_ok(move |region| { + let region_range = region.range(); + let route_result_range = range_intersection( + region_range, + ( + route_range_vec.0.clone().into(), + route_range_vec.1.clone().into(), + ), + ); + let user_result_range = + keyspace.decode_route_range(route_result_range.0, route_result_range.1, key_mode); + ( + (user_result_range.0.into(), user_result_range.1.into()), + region, + ) + }) + .boxed() +} + /// The range used for request should be the intersection of `region_range` and `range`. fn range_intersection(region_range: (Key, Key), range: (Key, Key)) -> (Key, Key) { let (lower, upper) = region_range; @@ -92,3 +180,165 @@ pub fn region_stream_for_ranges( ) -> BoxStream<'static, Result<(Vec, RegionWithLeader)>> { pd_client.clone().group_ranges_by_region(ranges) } + +pub fn region_stream_for_ranges_with_keyspace( + ranges: Vec, + pd_client: Arc, + keyspace: Keyspace, + key_mode: KeyMode, +) -> BoxStream<'static, Result<(Vec, RegionWithLeader)>> { + if !matches!(keyspace, Keyspace::ApiV3 { .. }) { + return region_stream_for_ranges(ranges, pd_client); + } + + let mut route_ranges: Vec = ranges + .into_iter() + .map(|range| { + let (start_key, end_key) = keyspace.encode_route_range( + Key::from(range.start_key), + Key::from(range.end_key), + key_mode, + ); + make_key_range(start_key.into(), end_key.into()) + }) + .collect(); + route_ranges.reverse(); + stream_fn(Some(route_ranges), move |ranges| { + let this = pd_client.clone(); + async move { + let mut ranges = match ranges { + None => return Ok(None), + Some(r) => r, + }; + + if let Some(route_range) = ranges.pop() { + let start_key: Key = route_range.start_key.clone().into(); + let end_key: Key = route_range.end_key.clone().into(); + let region = this.region_for_key(&start_key).await?; + let region_start = region.start_key(); + let region_end = region.end_key(); + let mut grouped = vec![]; + if !region_end.is_empty() && (end_key > region_end || end_key.is_empty()) { + grouped.push(make_user_key_range( + keyspace, + key_mode, + start_key, + region_end.clone(), + )); + ranges.push(make_key_range(region_end.into(), end_key.into())); + return Ok(Some((Some(ranges), (grouped, region)))); + } + grouped.push(make_user_key_range(keyspace, key_mode, start_key, end_key)); + + while let Some(route_range) = ranges.pop() { + let start_key: Key = route_range.start_key.clone().into(); + let end_key: Key = route_range.end_key.clone().into(); + if start_key < region_start + || (!region_end.is_empty() && start_key >= region_end) + { + ranges.push(route_range); + break; + } + if !region_end.is_empty() && (end_key > region_end || end_key.is_empty()) { + grouped.push(make_user_key_range( + keyspace, + key_mode, + start_key, + region_end.clone(), + )); + ranges.push(make_key_range(region_end.into(), end_key.into())); + return Ok(Some((Some(ranges), (grouped, region)))); + } + grouped.push(make_user_key_range(keyspace, key_mode, start_key, end_key)); + } + Ok(Some((Some(ranges), (grouped, region)))) + } else { + Ok(None) + } + } + }) + .boxed() +} + +fn make_user_key_range( + keyspace: Keyspace, + key_mode: KeyMode, + start: Key, + end: Key, +) -> kvrpcpb::KeyRange { + let (start, end) = keyspace.decode_route_range(start, end, key_mode); + make_key_range(start.into(), end.into()) +} + +fn make_key_range(start_key: Vec, end_key: Vec) -> kvrpcpb::KeyRange { + kvrpcpb::KeyRange { start_key, end_key } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use futures::executor; + + use super::*; + use crate::mock::MockPdClient; + + #[test] + fn test_api_v3_key_routing_uses_physical_key_and_keeps_user_keys() { + let keyspace = Keyspace::api_v3(1, 7).unwrap(); + let user_keys: Vec = vec![vec![1].into(), vec![2].into()]; + let pd_client = Arc::new(MockPdClient::default()); + + let mut stream = + executor::block_on_stream( + region_stream_for_keys_with_keyspace::( + user_keys.clone().into_iter(), + pd_client, + keyspace, + KeyMode::Txn, + ), + ); + + let (keys, region) = stream.next().unwrap().unwrap(); + assert_eq!(region.id(), 2); + assert_eq!(keys, user_keys); + assert!(stream.next().is_none()); + } + + #[test] + fn test_api_v3_range_routing_uses_physical_key_and_returns_user_range() { + let keyspace = Keyspace::api_v3(1, 7).unwrap(); + let pd_client = Arc::new(MockPdClient::default()); + + let mut stream = executor::block_on_stream(region_stream_for_range_with_keyspace( + (vec![1], vec![2]), + pd_client, + keyspace, + KeyMode::Txn, + )); + + let (range, region) = stream.next().unwrap().unwrap(); + assert_eq!(region.id(), 2); + assert_eq!(range, (vec![1], vec![2])); + assert!(stream.next().is_none()); + } + + #[test] + fn test_api_v3_multi_range_routing_returns_user_ranges() { + let keyspace = Keyspace::api_v3(1, 7).unwrap(); + let pd_client = Arc::new(MockPdClient::default()); + let user_range = make_key_range(vec![1], vec![2]); + + let mut stream = executor::block_on_stream(region_stream_for_ranges_with_keyspace( + vec![user_range.clone()], + pd_client, + keyspace, + KeyMode::Txn, + )); + + let (ranges, region) = stream.next().unwrap().unwrap(); + assert_eq!(region.id(), 2); + assert_eq!(ranges, vec![user_range]); + assert!(stream.next().is_none()); + } +} diff --git a/src/store/request.rs b/src/store/request.rs index 65911dcc..810238fb 100644 --- a/src/store/request.rs +++ b/src/store/request.rs @@ -9,6 +9,7 @@ use tonic::IntoRequest; use crate::proto::kvrpcpb; use crate::proto::tikvpb::tikv_client::TikvClient; +use crate::request::Keyspace; use crate::store::RegionWithLeader; use crate::Error; use crate::Result; @@ -24,6 +25,9 @@ pub trait Request: Any + Sync + Send + 'static { fn as_any(&self) -> &dyn Any; fn set_leader(&mut self, leader: &RegionWithLeader) -> Result<()>; fn set_api_version(&mut self, api_version: kvrpcpb::ApiVersion); + fn set_keyspace(&mut self, keyspace: Keyspace) { + self.set_api_version(keyspace.api_version()); + } } macro_rules! impl_request { @@ -68,6 +72,31 @@ macro_rules! impl_request { let ctx = self.context.get_or_insert(kvrpcpb::Context::default()); ctx.api_version = api_version.into(); } + + fn set_keyspace(&mut self, keyspace: Keyspace) { + let ctx = self.context.get_or_insert(kvrpcpb::Context::default()); + ctx.api_version = keyspace.api_version().into(); + match keyspace { + Keyspace::Enable { keyspace_id } => { + ctx.keyspace_id = keyspace_id; + ctx.keyspace_identity = None; + } + Keyspace::ApiV3 { + namespace_id, + keyspace_id, + } => { + ctx.keyspace_id = 0; + ctx.keyspace_identity = Some(crate::proto::apipb::KeyspaceIdentity { + namespace_id, + keyspace_id, + }); + } + _ => { + ctx.keyspace_id = 0; + ctx.keyspace_identity = None; + } + } + } } }; } diff --git a/src/transaction/client.rs b/src/transaction/client.rs index a40044b7..b075118e 100644 --- a/src/transaction/client.rs +++ b/src/transaction/client.rs @@ -115,16 +115,92 @@ impl Client { config: Config, ) -> Result { debug!("creating new transactional client"); + if matches!(config.keyspace_namespace_id, Some(0)) { + return Err(crate::Error::StringError( + "config.keyspace_namespace_id must be non-zero".to_owned(), + )); + } + if config.keyspace_global_name_lookup + && (config.keyspace_identity.is_some() || config.keyspace_namespace_id.is_some()) + { + return Err(crate::Error::StringError( + "config.keyspace_global_name_lookup cannot be combined with config.keyspace_identity or config.keyspace_namespace_id".to_owned(), + )); + } + if config.keyspace_global_name_lookup && config.keyspace.is_none() { + return Err(crate::Error::StringError( + "config.keyspace must be set when config.keyspace_global_name_lookup is set" + .to_owned(), + )); + } + let configured_keyspace_identity = config + .keyspace_identity + .map(|identity| Keyspace::api_v3(identity.namespace_id, identity.keyspace_id)) + .transpose()?; + if configured_keyspace_identity.is_none() + && config.keyspace_namespace_id.is_some() + && config.keyspace.is_none() + { + return Err(crate::Error::StringError( + "config.keyspace must be set when config.keyspace_namespace_id is set".to_owned(), + )); + } let pd_endpoints: Vec = pd_endpoints.into_iter().map(Into::into).collect(); let pd = Arc::new(PdRpcClient::connect(&pd_endpoints, config.clone(), true).await?); - let keyspace = match config.keyspace { - Some(name) => { - let keyspace = pd.load_keyspace(&name).await?; - Keyspace::Enable { - keyspace_id: keyspace.id, + let keyspace = if let Some(keyspace) = configured_keyspace_identity { + keyspace + } else if let Some(namespace_id) = config.keyspace_namespace_id { + let name = config.keyspace.clone().ok_or_else(|| { + crate::Error::StringError( + "config.keyspace must be set when config.keyspace_namespace_id is set" + .to_owned(), + ) + })?; + let keyspace = pd.lookup_keyspace(&name, namespace_id).await?; + let identity = keyspace.identity.ok_or_else(|| { + crate::Error::StringError(format!( + "keyspace '{}' in namespace {} does not have V3 identity", + name, namespace_id + )) + })?; + Keyspace::api_v3(identity.namespace_id, identity.keyspace_id)? + } else if config.keyspace_global_name_lookup { + let name = config.keyspace.clone().ok_or_else(|| { + crate::Error::StringError( + "config.keyspace must be set when config.keyspace_global_name_lookup is set" + .to_owned(), + ) + })?; + let mut keyspaces = pd.lookup_keyspaces(&name).await?; + match keyspaces.len() { + 1 => { + let keyspace = keyspaces.remove(0); + if let Some(identity) = keyspace.identity { + Keyspace::api_v3(identity.namespace_id, identity.keyspace_id)? + } else { + Keyspace::Enable { + keyspace_id: keyspace.id, + } + } } + 0 => return Err(crate::Error::KeyspaceNotFound(name)), + _ => { + return Err(crate::Error::StringError(format!( + "multiple keyspaces named '{}' found; DB9 global-name lookup requires unique names", + name + ))); + } + } + } else { + match config.keyspace.clone() { + Some(name) => { + let keyspace = pd.load_keyspace(&name).await?; + Keyspace::Enable { + keyspace_id: keyspace.id, + } + } + None => Keyspace::Disable, } - None => Keyspace::Disable, }; Ok(Client { pd, keyspace }) } @@ -137,9 +213,13 @@ impl Client { pd_endpoints: Vec, config: Config, ) -> Result { - if config.keyspace.is_some() { + if config.keyspace.is_some() + || config.keyspace_identity.is_some() + || config.keyspace_namespace_id.is_some() + || config.keyspace_global_name_lookup + { return Err(crate::Error::StringError( - "config.keyspace must be unset when using api-v2-no-prefix mode".to_owned(), + "config.keyspace, config.keyspace_identity, config.keyspace_namespace_id and config.keyspace_global_name_lookup must be unset when using api-v2-no-prefix mode".to_owned(), )); } @@ -243,7 +323,10 @@ impl Client { /// # }); /// ``` pub async fn current_timestamp(&self) -> Result { - self.pd.clone().get_timestamp().await + self.pd + .clone() + .get_timestamp_with_identity(self.keyspace.v3_identity()) + .await } /// Request garbage collection (GC) of the TiKV cluster. @@ -377,3 +460,36 @@ impl Client { Transaction::new(timestamp, self.pd.clone(), options, self.keyspace) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_api_v3_transaction_client_rejects_invalid_identity_before_pd_connect() { + let result = Client::new_with_config( + Vec::::new(), + Config::default().with_keyspace_identity(0, 1), + ) + .await; + let err = match result { + Ok(_) => panic!("invalid API V3 identity should be rejected before PD connect"), + Err(err) => err, + }; + assert!(err.to_string().contains("namespace_id must be non-zero")); + } + + #[tokio::test] + async fn test_api_v3_transaction_client_rejects_namespace_without_keyspace_before_pd_connect() { + let result = Client::new_with_config( + Vec::::new(), + Config::default().with_keyspace_namespace_id(1), + ) + .await; + let err = match result { + Ok(_) => panic!("API V3 namespace lookup should require a keyspace name"), + Err(err) => err, + }; + assert!(err.to_string().contains("config.keyspace must be set")); + } +} diff --git a/src/transaction/lock.rs b/src/transaction/lock.rs index 5c5d2513..cb4f6b00 100644 --- a/src/transaction/lock.rs +++ b/src/transaction/lock.rs @@ -24,6 +24,7 @@ use crate::request::plan::handle_region_error; use crate::request::plan::is_grpc_error; use crate::request::Collect; use crate::request::CollectSingle; +use crate::request::KeyMode; use crate::request::Keyspace; use crate::request::Plan; use crate::store::RegionStore; @@ -42,6 +43,11 @@ fn format_key_for_log(key: &[u8]) -> String { format!("len={}, prefix={:?}", key.len(), &key[..prefix_len]) } +fn encode_lock_route_key(keyspace: Keyspace, key: &[u8]) -> crate::Key { + let user_key = crate::Key::from(key.to_vec()); + keyspace.encode_route_key(&user_key, KeyMode::Txn) +} + /// _Resolves_ the given locks. Returns locks still live. When there is no live locks, all the given locks are resolved. /// /// If a key has a lock, the latest status of the key is unknown. We need to "resolve" the lock, @@ -55,7 +61,10 @@ pub async fn resolve_locks( keyspace: Keyspace, ) -> Result /* live_locks */> { debug!("resolving locks"); - let ts = pd_client.clone().get_timestamp().await?; + let ts = pd_client + .clone() + .get_timestamp_with_identity(keyspace.v3_identity()) + .await?; let caller_start_ts = timestamp.version(); let current_ts = ts.version(); @@ -76,10 +85,8 @@ pub async fn resolve_locks( // This matches the client-go `LockResolver.ResolveLocksWithOpts` flow: query txn status for // each encountered lock, then resolve immediately when the status is final. for lock in locks { - let region_ver_id = pd_client - .region_for_key(&lock.key.clone().into()) - .await? - .ver_id(); + let route_key = encode_lock_route_key(keyspace, &lock.key); + let region_ver_id = pd_client.region_for_key(&route_key).await?.ver_id(); // skip if the region is cleaned if clean_regions .get(&lock.lock_version) @@ -156,7 +163,8 @@ async fn resolve_lock_with_retry( loop { attempt += 1; debug!("resolving locks: attempt {}", attempt); - let store = pd_client.clone().store_for_key(key.into()).await?; + let route_key = encode_lock_route_key(keyspace, key); + let store = pd_client.clone().store_for_key(&route_key).await?; let ver_id = store.region_with_leader.ver_id(); let request = requests::new_resolve_lock_request(start_version, commit_version, is_txn_file); @@ -480,7 +488,10 @@ impl LockResolver { Err(err) => return Err(err), }; - let current = pd_client.clone().get_timestamp().await?; + let current = pd_client + .clone() + .get_timestamp_with_identity(keyspace.v3_identity()) + .await?; status.check_ttl(current); let res = Arc::new(status); if res.is_cacheable() { @@ -563,7 +574,10 @@ impl LockResolver { { Ok(status) => return Ok(status), Err(Error::TxnNotFound(txn_not_found)) => { - let current = pd_client.clone().get_timestamp().await?; + let current = pd_client + .clone() + .get_timestamp_with_identity(keyspace.v3_identity()) + .await?; if lock_until_expired_ms(lock.lock_version, lock.lock_ttl, current) <= 0 { warn!( "lock txn not found, lock has expired, lock {:?}, caller_start_ts {}, current_ts {}", @@ -621,6 +635,7 @@ mod tests { #[rstest::rstest] #[case(Keyspace::Disable)] #[case(Keyspace::Enable { keyspace_id: 0 })] + #[case(Keyspace::api_v3(1, 7).unwrap())] #[tokio::test] #[serial] async fn test_resolve_lock_with_retry(#[case] keyspace: Keyspace) { @@ -650,12 +665,16 @@ mod tests { ))); let key = vec![1]; - let region1 = MockPdClient::region1(); + let expected_region = if matches!(keyspace, Keyspace::ApiV3 { .. }) { + MockPdClient::region2() + } else { + MockPdClient::region1() + }; let resolved_region = resolve_lock_with_retry(&key, 1, 2, false, client.clone(), keyspace, backoff.clone()) .await .unwrap(); - assert_eq!(region1.ver_id(), resolved_region); + assert_eq!(expected_region.ver_id(), resolved_region); // Test resolve lock over retry limit fail::cfg( diff --git a/src/transaction/requests.rs b/src/transaction/requests.rs index f5859ba0..32c804f8 100644 --- a/src/transaction/requests.rs +++ b/src/transaction/requests.rs @@ -33,7 +33,7 @@ use crate::request::RangeRequest; use crate::request::ResponseWithShard; use crate::request::Shardable; use crate::request::SingleKey; -use crate::request::{Batchable, StoreRequest}; +use crate::request::{Batchable, KeyMode, Keyspace, StoreRequest}; use crate::reversible_range_request; use crate::shardable_key; use crate::shardable_keys; @@ -41,7 +41,7 @@ use crate::shardable_range; use crate::store::RegionStore; use crate::store::Request; use crate::store::Store; -use crate::store::{region_stream_for_keys, region_stream_for_range}; +use crate::store::{region_stream_for_keys_with_keyspace, region_stream_for_range_with_keyspace}; use crate::timestamp::TimestampExt; use crate::transaction::requests::kvrpcpb::prewrite_request::PessimisticAction; use crate::transaction::HasLocks; @@ -261,17 +261,22 @@ impl Shardable for kvrpcpb::PrewriteRequest { let mut mutations = self.mutations.clone(); mutations.sort_by(|a, b| a.key.cmp(&b.key)); - region_stream_for_keys(mutations.into_iter(), pd_client.clone()) - .flat_map(|result| match result { - Ok((mutations, region)) => stream::iter(kvrpcpb::PrewriteRequest::batches( - mutations, - TXN_COMMIT_BATCH_SIZE, - )) - .map(move |batch| Ok((batch, region.clone()))) - .boxed(), - Err(e) => stream::iter(Err(e)).boxed(), - }) - .boxed() + region_stream_for_keys_with_keyspace( + mutations.into_iter(), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Txn, + ) + .flat_map(|result| match result { + Ok((mutations, region)) => stream::iter(kvrpcpb::PrewriteRequest::batches( + mutations, + TXN_COMMIT_BATCH_SIZE, + )) + .map(move |batch| Ok((batch, region.clone()))) + .boxed(), + Err(e) => stream::iter(Err(e)).boxed(), + }) + .boxed() } fn apply_shard(&mut self, shard: Self::Shard) { @@ -330,16 +335,21 @@ impl Shardable for kvrpcpb::CommitRequest { let mut keys = self.keys.clone(); keys.sort(); - region_stream_for_keys(keys.into_iter(), pd_client.clone()) - .flat_map(|result| match result { - Ok((keys, region)) => { - stream::iter(kvrpcpb::CommitRequest::batches(keys, TXN_COMMIT_BATCH_SIZE)) - .map(move |batch| Ok((batch, region.clone()))) - .boxed() - } - Err(e) => stream::iter(Err(e)).boxed(), - }) - .boxed() + region_stream_for_keys_with_keyspace( + keys.into_iter(), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Txn, + ) + .flat_map(|result| match result { + Ok((keys, region)) => { + stream::iter(kvrpcpb::CommitRequest::batches(keys, TXN_COMMIT_BATCH_SIZE)) + .map(move |batch| Ok((batch, region.clone()))) + .boxed() + } + Err(e) => stream::iter(Err(e)).boxed(), + }) + .boxed() } fn apply_shard(&mut self, shard: Self::Shard) { @@ -432,7 +442,12 @@ impl Shardable for kvrpcpb::PessimisticLockRequest { ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { let mut mutations = self.mutations.clone(); mutations.sort_by(|a, b| a.key.cmp(&b.key)); - region_stream_for_keys(mutations.into_iter(), pd_client.clone()) + region_stream_for_keys_with_keyspace( + mutations.into_iter(), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Txn, + ) } fn apply_shard(&mut self, shard: Self::Shard) { @@ -532,9 +547,11 @@ impl Shardable for kvrpcpb::ScanLockRequest { &self, pd_client: &Arc, ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { - region_stream_for_range( + region_stream_for_range_with_keyspace( (self.start_key.clone(), self.end_key.clone()), pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Txn, ) } @@ -598,7 +615,12 @@ impl Shardable for kvrpcpb::TxnHeartBeatRequest { &self, pd_client: &Arc, ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { - region_stream_for_keys(std::iter::once(self.key().clone()), pd_client.clone()) + region_stream_for_keys_with_keyspace( + std::iter::once(self.key().clone()), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Txn, + ) } fn apply_shard(&mut self, mut shard: Self::Shard) { @@ -662,7 +684,12 @@ impl Shardable for kvrpcpb::CheckTxnStatusRequest { &self, pd_client: &Arc, ) -> BoxStream<'static, Result<(Self::Shard, RegionWithLeader)>> { - region_stream_for_keys(std::iter::once(self.key().clone()), pd_client.clone()) + region_stream_for_keys_with_keyspace( + std::iter::once(self.key().clone()), + pd_client.clone(), + Keyspace::from_context(&self.context), + KeyMode::Txn, + ) } fn apply_shard(&mut self, mut shard: Self::Shard) { @@ -892,14 +919,428 @@ impl Merge for Collect { #[cfg(test)] mod tests { + use std::any::Any; + use std::sync::{Arc, Mutex}; + + use crate::backoff::DEFAULT_REGION_BACKOFF; use crate::common::Error::PessimisticLockError; use crate::common::Error::ResolveLockError; + use crate::mock::{MockKvClient, MockPdClient}; use crate::proto::kvrpcpb; use crate::request::plan::Merge; - use crate::request::CollectWithShard; - use crate::request::ResponseWithShard; + use crate::request::{Collect, CollectError, CollectSingle, Keyspace, Plan}; + use crate::request::{CollectWithShard, ResponseWithShard}; use crate::KvPair; + fn assert_api_v3_test_context(ctx: &kvrpcpb::Context) { + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + } + + #[tokio::test] + async fn test_api_v3_get_routes_with_physical_key_but_sends_user_key() -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::GetRequest = req.downcast_ref().unwrap(); + assert_eq!(req.key, vec![1]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::GetResponse { + value: vec![9], + ..Default::default() + }) as Box) + }, + ))); + + let req = super::new_get_request(vec![1], 42); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.value, vec![9]); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_batch_get_routes_with_physical_key_but_sends_user_keys( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::BatchGetRequest = req.downcast_ref().unwrap(); + assert_eq!(req.keys, vec![vec![1]]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::BatchGetResponse { + pairs: vec![kvrpcpb::KvPair { + key: vec![1], + value: vec![9], + ..Default::default() + }], + ..Default::default() + }) as Box) + }, + ))); + + let req = super::new_batch_get_request(vec![vec![1]], 42); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(Collect) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp, vec![KvPair::new(vec![1], vec![9])]); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_scan_routes_with_physical_key_but_sends_user_range() -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::ScanRequest = req.downcast_ref().unwrap(); + assert_eq!(req.start_key, vec![1]); + assert_eq!(req.end_key, vec![2]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::ScanResponse { + pairs: vec![kvrpcpb::KvPair { + key: vec![1], + value: vec![9], + ..Default::default() + }], + ..Default::default() + }) as Box) + }, + ))); + + let req = super::new_scan_request(vec![1], vec![2], 42, 10, false, false); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(Collect) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp, vec![KvPair::new(vec![1], vec![9])]); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_prewrite_routes_with_physical_key_but_sends_user_keys() -> crate::Result<()> + { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::PrewriteRequest = req.downcast_ref().unwrap(); + assert_eq!(req.mutations.len(), 1); + assert_eq!(req.mutations[0].key, vec![1]); + assert_eq!(req.primary_lock, vec![9]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::PrewriteResponse::default()) as Box) + }, + ))); + + let req = super::new_prewrite_request( + vec![kvrpcpb::Mutation { + op: kvrpcpb::Op::Put.into(), + key: vec![1], + value: vec![8], + ..Default::default() + }], + vec![9], + 42, + 3000, + ); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_commit_routes_with_physical_key_but_sends_user_keys() -> crate::Result<()> + { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::CommitRequest = req.downcast_ref().unwrap(); + assert_eq!(req.keys, vec![vec![1]]); + let ctx = req.context.as_ref().unwrap(); + assert_eq!(ctx.api_version, kvrpcpb::ApiVersion::V3 as i32); + assert_eq!(ctx.keyspace_id, 0); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().namespace_id, 1); + assert_eq!(ctx.keyspace_identity.as_ref().unwrap().keyspace_id, 7); + assert_eq!(ctx.region_id, 2); + assert_eq!(ctx.peer.as_ref().unwrap().store_id, 42); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::CommitResponse::default()) as Box) + }, + ))); + + let req = super::new_commit_request(vec![vec![1]], 42, 43); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_batch_rollback_routes_with_physical_key_but_sends_user_keys( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::BatchRollbackRequest = req.downcast_ref().unwrap(); + assert_eq!(req.keys, vec![vec![1]]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::BatchRollbackResponse::default()) as Box) + }, + ))); + + let req = super::new_batch_rollback_request(vec![vec![1]], 42); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_pessimistic_rollback_routes_with_physical_key_but_sends_user_keys( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::PessimisticRollbackRequest = req.downcast_ref().unwrap(); + assert_eq!(req.keys, vec![vec![1]]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::PessimisticRollbackResponse::default()) as Box) + }, + ))); + + let req = super::new_pessimistic_rollback_request(vec![vec![1]], 42, 43); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_pessimistic_lock_routes_with_physical_key_but_sends_user_keys( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::PessimisticLockRequest = req.downcast_ref().unwrap(); + assert_eq!(req.mutations.len(), 1); + assert_eq!(req.mutations[0].key, vec![1]); + assert_eq!(req.primary_lock, vec![9]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::PessimisticLockResponse::default()) as Box) + }, + ))); + + let req = super::new_pessimistic_lock_request( + vec![kvrpcpb::Mutation { + op: kvrpcpb::Op::PessimisticLock.into(), + key: vec![1], + ..Default::default() + }], + vec![9], + 42, + 3000, + 43, + false, + ); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectError) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.len(), 1); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_scan_lock_routes_with_physical_key_but_sends_user_range( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::ScanLockRequest = req.downcast_ref().unwrap(); + assert_eq!(req.start_key, vec![1]); + assert_eq!(req.end_key, vec![2]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::ScanLockResponse::default()) as Box) + }, + ))); + + let req = super::new_scan_lock_request(vec![1], vec![2], 42, 10); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(Collect) + .plan(); + let resp = plan.execute().await?; + + assert!(resp.is_empty()); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_txn_heart_beat_routes_with_physical_key_but_sends_user_key( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::TxnHeartBeatRequest = req.downcast_ref().unwrap(); + assert_eq!(req.primary_lock, vec![1]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::TxnHeartBeatResponse { + lock_ttl: 3000, + ..Default::default() + }) as Box) + }, + ))); + + let req = super::new_heart_beat_request(42, vec![1], 3000); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.lock_ttl, 3000); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_check_txn_status_routes_with_physical_key_but_sends_user_key( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::CheckTxnStatusRequest = req.downcast_ref().unwrap(); + assert_eq!(req.primary_key, vec![1]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::CheckTxnStatusResponse::default()) as Box) + }, + ))); + + let req = + super::new_check_txn_status_request(vec![1], 42, 0, 43, false, false, false, false); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(CollectSingle) + .plan(); + let resp = plan.execute().await?; + + assert_eq!(resp.commit_version, 0); + assert!(*seen.lock().unwrap()); + Ok(()) + } + + #[tokio::test] + async fn test_api_v3_check_secondary_locks_routes_with_physical_key_but_sends_user_keys( + ) -> crate::Result<()> { + let seen = Arc::new(Mutex::new(false)); + let seen_in_hook = seen.clone(); + let client = Arc::new(MockPdClient::new(MockKvClient::with_dispatch_hook( + move |req: &dyn Any| { + let req: &kvrpcpb::CheckSecondaryLocksRequest = req.downcast_ref().unwrap(); + assert_eq!(req.keys, vec![vec![1]]); + assert_api_v3_test_context(req.context.as_ref().unwrap()); + *seen_in_hook.lock().unwrap() = true; + Ok(Box::new(kvrpcpb::CheckSecondaryLocksResponse::default()) as Box) + }, + ))); + + let req = super::new_check_secondary_locks_request(vec![vec![1]], 42); + let plan = crate::request::PlanBuilder::new(client, Keyspace::api_v3(1, 7).unwrap(), req) + .retry_multi_region(DEFAULT_REGION_BACKOFF) + .merge(Collect) + .plan(); + let resp = plan.execute().await?; + + assert!(resp.commit_ts.is_none()); + assert!(*seen.lock().unwrap()); + Ok(()) + } + #[tokio::test] async fn test_merge_pessimistic_lock_response() { let (key1, key2, key3, key4) = (b"key1", b"key2", b"key3", b"key4"); diff --git a/src/transaction/transaction.rs b/src/transaction/transaction.rs index ed8eb911..63f5c507 100644 --- a/src/transaction/transaction.rs +++ b/src/transaction/transaction.rs @@ -847,7 +847,11 @@ impl Transaction { .buffer .get_primary_key() .unwrap_or_else(|| first_key.clone()); - let for_update_ts = self.rpc.clone().get_timestamp().await?; + let for_update_ts = self + .rpc + .clone() + .get_timestamp_with_identity(self.keyspace.v3_identity()) + .await?; self.options.push_for_update_ts(for_update_ts.clone()); let request = new_pessimistic_lock_request( keys.clone().into_iter(), @@ -1381,7 +1385,11 @@ impl Committer { async fn commit_primary(&mut self) -> Result { debug!("committing primary"); let primary_key = self.primary_key.clone().into_iter(); - let commit_version = self.rpc.clone().get_timestamp().await?; + let commit_version = self + .rpc + .clone() + .get_timestamp_with_identity(self.keyspace.v3_identity()) + .await?; let req = new_commit_request( primary_key, self.start_version.clone(),