Merge branch 'feature-snapshot' into feature

Change-Id: I0dcecb99820cf1e83e248ef8204d19817d2476df
opencurveadmin · Apr 16, 2020 · e6af7c5 · e6af7c5
2 parents cf66618 + 76e1658
commit e6af7c5
Show file tree

Hide file tree

Showing 322 changed files with 54,140 additions and 8,205 deletions.
diff --git a/.gitignore b/.gitignore
@@ -74,3 +74,4 @@ bazel-testlogs
 *.log
 
 .clang-format
+!curve-snapshotcloneserver-nginx/app/lib
diff --git a/WORKSPACE b/WORKSPACE
@@ -3,7 +3,7 @@ workspace(name = "curve")
 git_repository(
     name = "com_netease_storage_gerrit_curve_curve_braft",
     remote = "http://gerrit.storage.netease.com/curve/curve-braft",
-    commit = "27dedf9e2e0d26f99af4e7de35ced5c2106cb711",
+    commit = "b17ebad68d1d1b84440f7bce984755ff47095137",
 )
 
 bind(

diff --git a/conf/chunkserver.conf.example b/conf/chunkserver.conf.example
@@ -9,6 +9,8 @@ global.port=8200
 global.chunk_size=16777216
 # chunk 元数据页大小，一般4KB
 global.meta_page_size=4096
+# clone chunk允许的最长location长度
+global.location_limit=3000
 
 #
 # MDS settings
@@ -92,21 +94,24 @@ copyset.check_loadmargin_interval_ms=1000
 # Clone settings
 #
 # 禁止使用curveclient
-clone.disable_curve_client=true
+clone.disable_curve_client=false
 # 禁止使用s3adapter
-clone.disable_s3_adapter=true
+clone.disable_s3_adapter=false
 # 克隆的分片大小，一般1MB
 clone.slice_size=1048576
+# 读clone chunk时是否需要paste到本地
+# 该配置对recover chunk请求类型无效
+clone.enable_paste=false
 # 克隆的线程数量
 clone.thread_num=10
 # 克隆的队列深度
-clone.queue_depth=100
+clone.queue_depth=6000
 # curve用户名
 curve.root_username=root
 # curve密码
-curve.root_password=
+curve.root_password=root_password
 # client配置文件
-curve.config_path=conf/client.conf
+curve.config_path=conf/cs_client.conf
 # s3配置文件
 s3.config_path=conf/s3.conf
 

diff --git a/conf/client.conf b/conf/client.conf
@@ -33,12 +33,6 @@ mds.rpcRetryIntervalUS=100000
 # 获取leader的rpc超时时间
 metacache.getLeaderTimeOutMS=500
 
-# 获取leader的backup request超时时间
-metacache.getLeaderBackupRequestMS=100
-
-# getleaer backup request使用的load balancer方法
-metacache.getLeaderBackupRequestLbName=rr
-
 # 获取leader的重试次数
 metacache.getLeaderRetry=5
 

diff --git a/conf/cs_client.conf b/conf/cs_client.conf
@@ -0,0 +1,144 @@
+#
+################### mds一侧配置信息 ##################
+#
+
+# mds的地址信息，对于mds集群，地址以逗号隔开
+mds.listen.addr=127.0.0.1:6666
+
+# 初始化阶段向mds注册开关，默认为开
+mds.registerToMDS=false
+
+# 与mds通信的rpc超时时间
+mds.rpcTimeoutMS=500
+
+# 与mds通信rpc最大的超时时间, 指数退避的超时间不能超过这个值
+mds.maxRPCTimeoutMS=2000
+
+# 与mds通信重试总时间
+mds.maxRetryMS=8000
+
+# 在当前mds上连续重试次数超过该限制就切换, 这个失败次数包含超时重试次数
+mds.maxFailedTimesBeforeChangeMDS=2
+
+# 与MDS一侧保持一个lease时间内多少次续约
+mds.refreshTimesPerLease=4
+
+# mds RPC接口每次重试之前需要先睡眠一段时间
+mds.rpcRetryIntervalUS=100000
+
+#
+################# metacache配置信息 ################
+#
+
+# 获取leader的rpc超时时间
+metacache.getLeaderTimeOutMS=500
+
+# 获取leader的backup request超时时间
+metacache.getLeaderBackupRequestMS=100
+
+# getleaer backup request使用的load balancer方法
+metacache.getLeaderBackupRequestLbName=rr
+
+# 获取leader的重试次数
+metacache.getLeaderRetry=5
+
+# 获取leader接口每次重试之前需要先睡眠一段时间
+metacache.rpcRetryIntervalUS=100000
+
+#
+############### 调度层的配置信息 #############
+#
+
+# 调度层队列大小，每个文件对应一个队列
+# 调度队列的深度会影响client端整体吞吐，这个队列存放的是异步IO任务。。
+schedule.queueCapacity=1000000
+
+# 队列的执行线程数量
+# 执行线程所要做的事情就是将IO取出，然后发到网络就返回取下一个网络任务。一个任务从
+# 队列取出到发送完rpc请求大概在(20us-100us)，20us是正常情况下不需要获取leader的时候
+# 如果在发送的时候需要获取leader，时间会在100us左右，一个线程的吞吐在10w-50w
+# 性能已经满足需求
+schedule.threadpoolSize=1
+
+# 为隔离qemu侧线程引入的任务队列，因为qemu一侧只有一个IO线程
+# 当qemu一侧调用aio接口的时候直接将调用push到任务队列就返回，
+# 这样libcurve不占用qemu的线程，不阻塞其异步调用
+isolation.taskQueueCapacity=1000000
+
+# 隔离qemu线程的任务队列线程池大小, 默认值为1个线程
+isolation.taskThreadPoolSize=1
+
+
+#
+################ 与chunkserver通信相关配置 #############
+#
+# 读写接口失败的OP之间重试睡眠
+chunkserver.opRetryIntervalUS=100000
+
+# 失败的OP重试次数
+chunkserver.opMaxRetry=3
+
+# 与chunkserver通信的rpc超时时间
+chunkserver.rpcTimeoutMS=1000
+
+# 开启基于appliedindex的读，用于性能优化
+chunkserver.enableAppliedIndexRead=1
+
+# 重试请求之间睡眠最长时间
+# 因为当网络拥塞的时候或者chunkserver出现过载的时候，需要增加睡眠时间
+# 这个时间最大为maxRetrySleepIntervalUs
+chunkserver.maxRetrySleepIntervalUS=8000000
+
+# 重试请求的超时rpc时间最大值，超时时间会遵循指数退避策略
+# 因为当网络拥塞的时候出现超时，需要增加RPC超时时间
+# 这个时间最大为maxTimeoutMS
+chunkserver.maxRPCTimeoutMS=8000
+
+# 同一个chunkserver连续超时上限次数
+# 如果超过这个值，就会进行健康检查，健康检查失败后，会标记为unstable
+chunkserver.maxStableTimeoutTimes=64
+# chunkserver上rpc连续超时后，健康检查请求的超时间
+chunkserver.checkHealthTimeoutMs=100
+# 同一个server上unstable的chunkserver数量超过这个值之后
+# 所有的chunkserver都会标记为unstable
+chunkserver.serverStableThreshold=3
+
+# 当底层chunkserver压力大时，可能也会触发unstable
+# 由于copyset leader may change，会导致请求超时时间设置为默认值，从而导致IO hang
+# 真正宕机的情况下，请求重试一定次数后会处理完成
+# 如果一直重试，则不是宕机情况，这时候超时时间还是要进入指数退避逻辑
+# 当一个请求重试次数超过这个值时，其超时时间一定进入指数退避
+chunkserver.minRetryTimesForceTimeoutBackoff=5
+
+# 当一个rpc重试超过次数maxRetryTimesBeforeConsiderSuspend的时候
+# 记为悬挂IO，metric会报警
+chunkserver.maxRetryTimesBeforeConsiderSuspend=20
+
+#
+################# 文件级别配置项 #############
+#
+# libcurve底层rpc调度允许最大的未返回rpc数量，每个文件的inflight RPC独立
+global.fileMaxInFlightRPCNum=64
+
+# 文件IO下发到底层chunkserver最大的分片KB
+global.fileIOSplitMaxSizeKB=64
+
+#
+################# log相关配置 ###############
+#
+# log等级 INFO=0/WARNING=1/ERROR=2/FATAL=3
+global.logLevel=0
+# 设置log的路径
+global.logPath=/data/log/curve/
+# 单元测试情况下
+# logpath=./runlog/
+
+#
+############### metric 配置信息 #############
+#
+global.metricDummyServerStartPort=9000
+
+#
+# session map文件，存储打开文件的filename到path的映射
+#
+global.sessionMapPath=./session_map.json
diff --git a/conf/mds.conf b/conf/mds.conf
@@ -148,7 +148,7 @@ mds.topology.CreateCopysetRpcRetrySleepTimeMs=1000
 # Topology模块刷新metric时间间隔
 mds.topology.UpdateMetricIntervalSec=60
 # 物理池使用百分比，即使用量超过这个值即不再往这个池分配
-mds.topology.PoolUsagePercentLimit=90
+mds.topology.PoolUsagePercentLimit=85
 # 多pool选pool策略 0:Random, 1:Weight
 mds.topology.choosePoolPolicy=0
 

diff --git a/conf/py_client.conf b/conf/py_client.conf
@@ -0,0 +1,138 @@
+#
+################### mds一侧配置信息 ##################
+#
+
+# mds的地址信息，对于mds集群，地址以逗号隔开
+mds.listen.addr=127.0.0.1:6666
+
+# 初始化阶段向mds注册开关，默认为开
+mds.registerToMDS=false
+
+# 与mds通信的rpc超时时间
+mds.rpcTimeoutMS=500
+
+# 与mds通信rpc最大的超时时间, 指数退避的超时间不能超过这个值
+mds.maxRPCTimeoutMS=2000
+
+# 与mds通信重试总时间
+mds.maxRetryMS=8000
+
+# 在当前mds上连续重试次数超过该限制就切换, 这个失败次数包含超时重试次数
+mds.maxFailedTimesBeforeChangeMDS=2
+
+# 与MDS一侧保持一个lease时间内多少次续约
+mds.refreshTimesPerLease=4
+
+# mds RPC接口每次重试之前需要先睡眠一段时间
+mds.rpcRetryIntervalUS=100000
+
+#
+################# metacache配置信息 ################
+#
+
+# 获取leader的rpc超时时间
+metacache.getLeaderTimeOutMS=500
+
+# 获取leader的重试次数
+metacache.getLeaderRetry=5
+
+# 获取leader接口每次重试之前需要先睡眠一段时间
+metacache.rpcRetryIntervalUS=100000
+
+#
+############### 调度层的配置信息 #############
+#
+
+# 调度层队列大小，每个文件对应一个队列
+# 调度队列的深度会影响client端整体吞吐，这个队列存放的是异步IO任务。。
+schedule.queueCapacity=1000000
+
+# 队列的执行线程数量
+# 执行线程所要做的事情就是将IO取出，然后发到网络就返回取下一个网络任务。一个任务从
+# 队列取出到发送完rpc请求大概在(20us-100us)，20us是正常情况下不需要获取leader的时候
+# 如果在发送的时候需要获取leader，时间会在100us左右，一个线程的吞吐在10w-50w
+# 性能已经满足需求
+schedule.threadpoolSize=1
+
+# 为隔离qemu侧线程引入的任务队列，因为qemu一侧只有一个IO线程
+# 当qemu一侧调用aio接口的时候直接将调用push到任务队列就返回，
+# 这样libcurve不占用qemu的线程，不阻塞其异步调用
+isolation.taskQueueCapacity=1000000
+
+# 隔离qemu线程的任务队列线程池大小, 默认值为1个线程
+isolation.taskThreadPoolSize=1
+
+
+#
+################ 与chunkserver通信相关配置 #############
+#
+# 读写接口失败的OP之间重试睡眠
+chunkserver.opRetryIntervalUS=100000
+
+# 失败的OP重试次数
+chunkserver.opMaxRetry=2500000
+
+# 与chunkserver通信的rpc超时时间
+chunkserver.rpcTimeoutMS=1000
+
+# 开启基于appliedindex的读，用于性能优化
+chunkserver.enableAppliedIndexRead=1
+
+# 重试请求之间睡眠最长时间
+# 因为当网络拥塞的时候或者chunkserver出现过载的时候，需要增加睡眠时间
+# 这个时间最大为maxRetrySleepIntervalUs
+chunkserver.maxRetrySleepIntervalUS=8000000
+
+# 重试请求的超时rpc时间最大值，超时时间会遵循指数退避策略
+# 因为当网络拥塞的时候出现超时，需要增加RPC超时时间
+# 这个时间最大为maxTimeoutMS
+chunkserver.maxRPCTimeoutMS=8000
+
+# 同一个chunkserver连续超时上限次数
+# 如果超过这个值，就会进行健康检查，健康检查失败后，会标记为unstable
+chunkserver.maxStableTimeoutTimes=10
+# chunkserver上rpc连续超时后，健康检查请求的超时间
+chunkserver.checkHealthTimeoutMs=100
+# 同一个server上unstable的chunkserver数量超过这个值之后
+# 所有的chunkserver都会标记为unstable
+chunkserver.serverStableThreshold=3
+
+# 当底层chunkserver压力大时，可能也会触发unstable
+# 由于copyset leader may change，会导致请求超时时间设置为默认值，从而导致IO hang
+# 真正宕机的情况下，请求重试一定次数后会处理完成
+# 如果一直重试，则不是宕机情况，这时候超时时间还是要进入指数退避逻辑
+# 当一个请求重试次数超过这个值时，其超时时间一定进入指数退避
+chunkserver.minRetryTimesForceTimeoutBackoff=5
+
+# 当一个rpc重试超过次数maxRetryTimesBeforeConsiderSuspend的时候
+# 记为悬挂IO，metric会报警
+chunkserver.maxRetryTimesBeforeConsiderSuspend=20
+
+#
+################# 文件级别配置项 #############
+#
+# libcurve底层rpc调度允许最大的未返回rpc数量，每个文件的inflight RPC独立
+global.fileMaxInFlightRPCNum=64
+
+# 文件IO下发到底层chunkserver最大的分片KB
+global.fileIOSplitMaxSizeKB=64
+
+#
+################# log相关配置 ###############
+#
+# log等级 INFO=0/WARNING=1/ERROR=2/FATAL=3
+global.logLevel=0
+# 设置log的路径
+global.logPath=/data/log/curve/
+# 单元测试情况下
+# logpath=./runlog/
+
+#
+############### metric 配置信息 #############
+#
+global.metricDummyServerStartPort=10000
+
+#
+# session map文件，存储打开文件的filename到path的映射
+#
+global.sessionMapPath=./session_map.json
diff --git a/conf/s3_test.conf b/conf/s3_test.conf
Original file line number	Diff line number	Diff line change
Expand Up		@@ -74,3 +74,4 @@ bazel-testlogs
		*.log

		.clang-format
		!curve-snapshotcloneserver-nginx/app/lib