-
Notifications
You must be signed in to change notification settings - Fork 134
/
Copy pathraft_serverpb.proto
296 lines (261 loc) · 8.57 KB
/
raft_serverpb.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
syntax = "proto3";
package raft_serverpb;
import "eraftpb.proto";
import "metapb.proto";
import "kvrpcpb.proto";
import "disk_usage.proto";
import "encryptionpb.proto";
import "rustproto.proto";
option (rustproto.lite_runtime_all) = true;
option java_package = "org.tikv.kvproto";
message RaftMessage {
uint64 region_id = 1;
metapb.Peer from_peer = 2;
metapb.Peer to_peer = 3;
eraftpb.Message message = 4;
metapb.RegionEpoch region_epoch = 5;
// true means to_peer is a tombstone peer and it should remove itself.
bool is_tombstone = 6;
// Region key range [start_key, end_key).
bytes start_key = 7;
bytes end_key = 8;
// If it has value, to_peer should be removed if merge is never going to complete.
metapb.Region merge_target = 9;
ExtraMessage extra_msg = 10;
bytes extra_ctx = 11;
disk_usage.DiskUsage disk_usage = 12;
}
message RaftTruncatedState {
uint64 index = 1;
uint64 term = 2;
}
message SnapshotCFFile {
string cf = 1;
uint64 size = 2;
uint32 checksum = 3;
}
message SnapshotMeta {
repeated SnapshotCFFile cf_files = 1;
// true means this snapshot is triggered for load balance
bool for_balance = 2;
// true means this is an empty snapshot for witness
bool for_witness = 3;
// the timestamp second to generate snapshot
uint64 start = 4;
// the duration of generating snapshot
uint64 generate_duration_sec = 5;
// the path of the tablet snapshot, it should only be used for v1 to receive
// snapshot from v2
string tablet_snap_path = 6;
// A hint of the latest commit index on leader when sending snapshot.
// It should only be used for v2 to send snapshot to v1.
// See https://github.com/pingcap/tiflash/issues/7568
uint64 commit_index_hint = 7;
}
message SnapshotChunk {
RaftMessage message = 1;
bytes data = 2;
}
message Done {}
message TabletSnapshotFileMeta {
uint64 file_size = 1;
string file_name = 2;
// Some block data. Unencrypted.
bytes head_chunk = 3;
// trailing data including checksum. Unencrypted.
bytes trailing_chunk = 4;
}
// Snapshot preview for server to decide whether skip some files.
// Server should send back an `AcceptedSnapshotFile` to let client
// keep sending specified files. Only SST files can be skipped, all
// other files should always be sent.
message TabletSnapshotPreview {
repeated TabletSnapshotFileMeta metas = 1;
// There may be too many metas, use a flag to indicate all metas
// are sent.
bool end = 2;
}
message TabletSnapshotFileChunk {
uint64 file_size = 1;
string file_name = 2;
// Encrypted.
bytes data = 3;
// Initial vector if encryption is enabled.
bytes iv = 4;
encryptionpb.DataKey key = 5;
}
message TabletSnapshotHead {
RaftMessage message = 1;
bool use_cache = 2;
}
message TabletSnapshotEnd {
// Checksum of all data sent in `TabletSnapshotFileChunk.data` and
// `TabletSnapshotFileChunk.file_name`.
uint64 checksum = 1;
}
message TabletSnapshotRequest {
oneof payload {
TabletSnapshotHead head = 1;
TabletSnapshotPreview preview = 2;
TabletSnapshotFileChunk chunk = 3;
TabletSnapshotEnd end = 4;
}
}
message AcceptedSnapshotFiles {
repeated string file_name = 1;
}
message TabletSnapshotResponse {
AcceptedSnapshotFiles files = 1;
}
message KeyValue {
bytes key = 1;
bytes value = 2;
}
message RaftSnapshotData {
metapb.Region region = 1;
uint64 file_size = 2;
repeated KeyValue data = 3;
uint64 version = 4;
SnapshotMeta meta = 5;
repeated metapb.Peer removed_records = 6;
repeated MergedRecord merged_records = 7;
}
message StoreIdent {
uint64 cluster_id = 1;
uint64 store_id = 2;
kvrpcpb.APIVersion api_version = 3;
}
message StoreRecoverState {
// Used for TiKV start recovery when WAL of KVDB was disabled.
// TiKV may read all relations between seqno and raft log index, and replay
// all raft logs which corresponding seqno smaller than the seqno here.
// After TiKV replays all raft logs and flushed KV data, the seqno here must
// be updated.
uint64 seqno = 1;
}
message RaftLocalState {
eraftpb.HardState hard_state = 1;
uint64 last_index = 2;
}
message RaftApplyState {
uint64 applied_index = 1;
uint64 last_commit_index = 3;
uint64 commit_index = 4;
uint64 commit_term = 5;
RaftTruncatedState truncated_state = 2;
}
enum PeerState {
Normal = 0;
Applying = 1;
Tombstone = 2;
Merging = 3;
// Currently used for witness to non-witness conversion: When a witness
// has just become a non-witness, we need to set and persist this state,
// so that when the service restarts before applying snapshot, we can
// actively request snapshot when initializing this peer.
Unavailable = 4;
}
message MergeState {
uint64 min_index = 1;
metapb.Region target = 2;
uint64 commit = 3;
}
message MergedRecord {
uint64 source_region_id = 1;
metapb.RegionEpoch source_epoch = 2;
// Peers of source region when merge is committed.
repeated metapb.Peer source_peers = 3;
// Removed peers (by confchange) of source region when merge is committed.
repeated metapb.Peer source_removed_records = 9;
uint64 target_region_id = 4;
metapb.RegionEpoch target_epoch = 5;
repeated metapb.Peer target_peers = 6;
// Commit merge index.
uint64 index = 7;
// Prepare merge index.
uint64 source_index = 8;
}
message RegionLocalState {
PeerState state = 1;
metapb.Region region = 2;
MergeState merge_state = 3;
// The apply index corresponding to the storage when it's initialized.
uint64 tablet_index = 4;
// Raft doesn't guarantee peer will be removed in the end. In v1, peer finds
// out its destiny by logs or broadcast; in v2, leader is responsible to
// ensure removed peers are destroyed.
// Note: only peers who has been part of this region can be in this list.
repeated metapb.Peer removed_records = 5;
// Merged peer can't be deleted like gc peers. Instead, leader needs to
// query target peer to decide whether source peer can be destroyed.
repeated MergedRecord merged_records = 6;
}
message RegionSequenceNumberRelation {
uint64 region_id = 1;
uint64 sequence_number = 2;
RaftApplyState apply_state = 3;
RegionLocalState region_state = 4;
}
message AvailabilityContext {
uint64 from_region_id = 1;
metapb.RegionEpoch from_region_epoch = 2;
bool unavailable = 3;
bool trimmed = 4;
}
enum ExtraMessageType {
MsgRegionWakeUp = 0;
MsgWantRollbackMerge = 1;
MsgCheckStalePeer = 2;
MsgCheckStalePeerResponse = 3;
// If leader is going to sleep, it will send requests to all its followers
// to make sure they all agree to sleep.
MsgHibernateRequest = 4;
MsgHibernateResponse = 5;
MsgRejectRaftLogCausedByMemoryUsage = 6;
MsgAvailabilityRequest = 7;
MsgAvailabilityResponse = 8;
MsgVoterReplicatedIndexRequest = 9;
MsgVoterReplicatedIndexResponse = 10;
// Message means that `from` is tombstone. Leader can then update removed_records.
MsgGcPeerRequest = 11;
MsgGcPeerResponse = 12;
MsgFlushMemtable = 13;
MsgRefreshBuckets = 14;
}
message FlushMemtable {
uint64 region_id = 1;
}
message RefreshBuckets {
uint64 version = 1;
repeated bytes keys = 2;
repeated uint64 sizes = 3;
}
message CheckGcPeer {
// The region ID who triggers the check and wait for report. It should be
// the ID of RaftMessage.from.
uint64 from_region_id = 1;
// The region ID to be checked if should be destroyed.
uint64 check_region_id = 2;
// The epoch of the region to be checked.
metapb.RegionEpoch check_region_epoch = 3;
// The peer to be checked.
metapb.Peer check_peer = 4;
}
message ExtraMessage {
ExtraMessageType type = 1;
// It's merge related index. In `WantRollbackMerge`, it's prepare merge index. In
// `MsgGcPeerRequest`, it's the commit merge index. In `MsgVoterReplicatedIndexRequest`
// it's the voter_replicated_index.
uint64 index = 2;
// In `MsgCheckStalePeerResponse`, it's the peers that receiver can continue to query.
repeated metapb.Peer check_peers = 3;
bool wait_data = 4;
// Flag for forcely wake up hibernate regions if true.
bool forcely_awaken = 5;
CheckGcPeer check_gc_peer = 6;
FlushMemtable flush_memtable = 7;
// Used by `MsgAvailabilityRequest` and `MsgAvailabilityResponse` in v2.
AvailabilityContext availability_context = 8;
// notice the peer to refresh buckets version
RefreshBuckets refresh_buckets = 9;
}