@@ -2132,6 +2132,172 @@ jobs:
2132
2132
path : /tmp/priority_scheduler
2133
2133
destination : devcluster-priority_scheduler-logs
2134
2134
2135
+ test-perf :
2136
+ parameters :
2137
+ snapshot-after-migrations :
2138
+ type : boolean
2139
+ default : false
2140
+ deploy-db :
2141
+ type : boolean
2142
+ default : false
2143
+ machine :
2144
+ image : <<pipeline.parameters.machine-image>>
2145
+ resource_class : xlarge
2146
+ steps :
2147
+ - queue/until_front_of_line :
2148
+ only-on-branch : main
2149
+ time : " 120" # Wait two hours at most. Adjust this over time.
2150
+ - checkout
2151
+ - attach_workspace :
2152
+ at : .
2153
+ - setup-python-venv :
2154
+ executor : <<pipeline.parameters.machine-image>>
2155
+ - install-devcluster
2156
+ - run :
2157
+ name : Install upload deps
2158
+ command : tools/scripts/retry.sh pip install requests determined psycopg2-binary
2159
+
2160
+ - when :
2161
+ condition : <<parameters.deploy-db>>
2162
+ steps :
2163
+ - run :
2164
+ name : Select snapshot to use
2165
+ command : |
2166
+ echo 'export PERF_SNAPSHOT_TO_USE="perf-test-base-snapshot"' >> "$BASH_ENV"
2167
+
2168
+ SNAPSHOT_COMMITS=$(aws rds describe-db-snapshots \
2169
+ --region="us-west-2" \
2170
+ --query "DBSnapshots[?TagList[?Key=='ci-snapshot']].DBSnapshotIdentifier" \
2171
+ --output json | jq -r '.[] | split("-")[3]')
2172
+ echo "Snapshot commits (${SNAPSHOT_COMMITS})"
2173
+
2174
+ for ((n=0; n<=1000; n++)); do
2175
+ COMMIT=$(git log --format="%H" -n 1 --skip=$n)
2176
+
2177
+ if [[ " $SNAPSHOT_COMMITS " =~ .*"$COMMIT".* ]]; then
2178
+ echo "export PERF_SNAPSHOT_TO_USE=\"ci-snapshot-commit-${COMMIT}\"" >> "$BASH_ENV"
2179
+ break
2180
+ fi
2181
+ done
2182
+
2183
+ source $BASH_ENV
2184
+ echo "Deciding to use $PERF_SNAPSHOT_TO_USE"
2185
+ - run :
2186
+ name : Wait for snapshot to be available
2187
+ command : |
2188
+ aws rds wait db-snapshot-available \
2189
+ --region "us-west-2" \
2190
+ --db-snapshot-identifier "${PERF_SNAPSHOT_TO_USE}"
2191
+ - run :
2192
+ name : Deploy database
2193
+ command : |
2194
+ aws rds restore-db-instance-from-db-snapshot \
2195
+ --region="us-west-2" \
2196
+ --db-snapshot-identifier="${PERF_SNAPSHOT_TO_USE}" \
2197
+ --db-instance-identifier="ci-perf-db-${CIRCLE_BUILD_NUM}" \
2198
+ --no-multi-az \
2199
+ --no-publicly-accessible \
2200
+ --no-auto-minor-version-upgrade \
2201
+ --db-parameter-group-name="logquerieslong" \
2202
+ --tags "Key=ci-snapshot" \
2203
+ --vpc-security-group-ids="${PERF_DB_SECURITY_GROUP_ID}" \
2204
+ no_output_timeout : 30m
2205
+ - run :
2206
+ name : Get db instance host
2207
+ command : |
2208
+ echo "export RDS_HOST=$(aws rds describe-db-instances \
2209
+ --region us-west-2 \
2210
+ --db-instance-identifier "ci-perf-db-${CIRCLE_BUILD_NUM}" \
2211
+ --query "DBInstances[0].Endpoint.Address" \
2212
+ --output text)" >> "$BASH_ENV"
2213
+ source $BASH_ENV
2214
+ echo "perf db host ${PERF_DB_HOST}"
2215
+ - run :
2216
+ name : Wait for database to be ready
2217
+ command : |
2218
+ aws rds wait db-instance-available \
2219
+ --region="us-west-2" \
2220
+ --db-instance-identifier="ci-perf-db-${CIRCLE_BUILD_NUM}"
2221
+
2222
+ - run :
2223
+ name : Add SSH key
2224
+ command : echo "${PERF_DB_BASTION_SSH_KEY}" | base64 --decode | ssh-add -
2225
+ - run :
2226
+ name : Port forward to bastion instance
2227
+ command : ssh -L 5432:${PERF_DB_HOST}:5432 -N -f ubuntu@$PERF_DB_BASTION_HOST
2228
+ - start-devcluster :
2229
+ target-stage : master
2230
+ devcluster-config : perftest.devcluster.yaml
2231
+ - run :
2232
+ name : Wait and record any migrations ran
2233
+ command : python .circleci/scripts/wait_for_perf_migration_upload_results.py
2234
+
2235
+ - when :
2236
+ condition : <<parameters.snapshot-after-migrations>>
2237
+ steps :
2238
+ - run :
2239
+ name : Take and wait for RDS snapshot, only on main and when migrations were applied
2240
+ command : |
2241
+ if [ -f /tmp/no-migrations-needed ]; then
2242
+ echo "/tmp/no-migrations-needed exists, no need to take a snapshot"
2243
+ exit 0
2244
+ fi
2245
+
2246
+ COMMIT=$(git log -1 --pretty=format:%H)
2247
+ echo "Taking snapshot"
2248
+ aws rds create-db-snapshot \
2249
+ --region="us-west-2" \
2250
+ --db-instance-identifier="${PERF_DB_AWS_NAME}" \
2251
+ --db-snapshot-identifier="ci-snapshot-commit-${COMMIT}" \
2252
+ --tags "Key=ci-snapshot"
2253
+
2254
+ echo "Snapshot taken now waiting for it to become completed"
2255
+ aws rds wait db-snapshot-completed \
2256
+ --region="us-west-2" \
2257
+ --db-snapshot-identifier="ci-snapshot-commit-${COMMIT}"
2258
+ echo "Snapshot completed"
2259
+ - run :
2260
+ name : Build performance test Docker image
2261
+ command : make -C performance build
2262
+ - run :
2263
+ name : Run performance test
2264
+ command : |
2265
+ export PERF_DOCKER_FLAGS="--network=host"
2266
+ export PERF_K6_FLAGS='-e DET_ADMIN_USERNAME="admin" \
2267
+ -e DET_ADMIN_PASSWORD="" \
2268
+ -e model_name="tnjpuojqzbluqiyyqilftulsw" \
2269
+ -e model_version_number="1" \
2270
+ -e trial_id="8282" \
2271
+ -e experiment_id="100" \
2272
+ -e task_id="backported.8282" \
2273
+ -e metric_name="85c9" \
2274
+ -e metric_type="METRIC_TYPE_TRAINING" \
2275
+ -e batches="1800" \
2276
+ -e batches_margin="99" \
2277
+ -e resource_pool="default"'
2278
+ make -C performance run
2279
+ - run :
2280
+ name : Upload result of performance test to Postgres result db
2281
+ command : python .circleci/scripts/upload_perf_results.py ./performance/reports/latest.results.json
2282
+
2283
+ - when :
2284
+ condition : <<parameters.deploy-db>>
2285
+ when : always
2286
+ steps :
2287
+ - run :
2288
+ name : Delete RDS instance
2289
+ command : |
2290
+ aws rds delete-db-instance \
2291
+ --region="us-west-2" \
2292
+ --db-instance-identifier="ci-perf-db-${CIRCLE_BUILD_NUM}" \
2293
+ --skip-final-snapshot
2294
+
2295
+ - slack/status :
2296
+ fail_only : false
2297
+ only_for_branches : main
2298
+ failure_message : ' :thisisfine: A \`${CIRCLE_JOB}\` job on branch \`${CIRCLE_BRANCH}\` has failed!'
2299
+ mentions : " U03CP4ZKY2D" # Ping Nick Blaskey for now. Eventually switch this to perf team.
2300
+
2135
2301
deploy :
2136
2302
parameters :
2137
2303
compute-agent-instance-type :
@@ -2752,6 +2918,20 @@ workflows:
2752
2918
target-stage : agent
2753
2919
wait-for-master : false
2754
2920
2921
+ - test-perf :
2922
+ name : test-perf
2923
+ snapshot-after-migrations : true
2924
+ deploy-db : false
2925
+ requires :
2926
+ - build-go
2927
+ context :
2928
+ - perf-tests
2929
+ - aws
2930
+ filters :
2931
+ branches :
2932
+ only :
2933
+ - main
2934
+
2755
2935
- deploy :
2756
2936
name : deploy-latest-master-cluster
2757
2937
enable-cors : true
@@ -3125,6 +3305,27 @@ workflows:
3125
3305
aux-agent-instance-type : ["m5.large"]
3126
3306
max-dynamic-agents : [2]
3127
3307
3308
+ # Perf tests.
3309
+ - request-perf-tests :
3310
+ type : approval
3311
+ filters : *upstream-feature-branch
3312
+
3313
+ - build-go :
3314
+ requires :
3315
+ - request-perf-tests
3316
+
3317
+ - test-perf :
3318
+ name : test-perf-feature-branch
3319
+ snapshot-after-migrations : false
3320
+ deploy-db : true
3321
+ requires :
3322
+ - build-go
3323
+ - request-perf-tests
3324
+ context :
3325
+ - perf-tests
3326
+ - aws
3327
+ filters : *upstream-feature-branch
3328
+
3128
3329
# Nightly tests
3129
3330
- request-gpu-nightly :
3130
3331
type : approval
0 commit comments