123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198 |
- #!/usr/bin/env bash
- # Copyright Materialize, Inc. and contributors. All rights reserved.
- #
- # Use of this software is governed by the Business Source License
- # included in the LICENSE file at the root of this repository.
- #
- # As of the Change Date specified in that file, in accordance with
- # the Business Source License, use of this software will be governed
- # by the Apache License, Version 2.0.
- set -euo pipefail
- . misc/shlib/shlib.bash
- . test/cloudtest/config.bash
- run_args=(
- "--junitxml=junit_cloudtest_$BUILDKITE_JOB_ID.xml"
- )
- kubectl() {
- bin/ci-builder run stable kubectl --context="$K8S_CONTEXT" "$@"
- }
- export_cov() {
- bin/ci-builder run stable rust-cov export \
- --ignore-filename-regex=.cargo/ \
- --ignore-filename-regex=target/release/ \
- --ignore-filename-regex=/cargo/ \
- --ignore-filename-regex=/mnt/build/ \
- --ignore-filename-regex=/rustc/ \
- --format=lcov "$1" --instr-profile=coverage/"$BUILDKITE_JOB_ID".profdata src/ \
- > coverage/"$BUILDKITE_JOB_ID"-"$(basename "$1")".lcov
- }
- test_parallelism=true
- if read_list BUILDKITE_PLUGIN_CLOUDTEST_ARGS; then
- for arg in "${result[@]}"; do
- if [[ "$arg" == "--no-test-parallelism" ]]; then
- test_parallelism=false
- else
- run_args+=("$arg")
- fi
- done
- fi
- if [[ "$test_parallelism" == true ]]; then
- run_args+=(
- "--splits=${BUILDKITE_PARALLEL_JOB_COUNT:-1}"
- "--group=$((${BUILDKITE_PARALLEL_JOB:-0}+1))"
- )
- fi
- STEP_START_TIMESTAMP=$(date +"%Y-%m-%d %H:%M:%S")
- ci_collapsed_heading "kind: Increase system limits..."
- sudo sysctl fs.inotify.max_user_watches=524288
- sudo sysctl fs.inotify.max_user_instances=512
- ci_collapsed_heading "kind: Make sure kind is running..."
- bin/ci-builder run stable test/cloudtest/setup
- # Sometimes build cancellations prevent us from properly cleaning up the last
- # cloudtest run, so force a cleanup just in case.
- ci_collapsed_heading "kind: Purging state from previous builds..."
- bin/ci-builder run stable test/cloudtest/reset
- rm -f kubectl-*.log
- ci_collapsed_heading "kail: Start a new instance"
- NO_COLOR=1 bin/ci-builder run stable --detach --name "kail" kail --context "$K8S_CONTEXT" --log-level info
- TEST_CMD="bin/pytest ${run_args[*]}"
- ci_uncollapsed_heading "cloudtest: Running \`$TEST_CMD\`"
- TEST_RESULT=0
- cleanup() {
- echo "--- Post command steps"
- # Buildkite exposes no way to check if a test timed out (and wasn't cancelled manually), so we have to calculate it ourselves
- START_TIME=$(date -d "$STEP_START_TIMESTAMP" +%s)
- END_TIME=$(date +%s)
- ELAPSED=$((END_TIME - START_TIME))
- if [ $ELAPSED -ge $((BUILDKITE_TIMEOUT * 60)) ]; then
- printf "\n%s" "$BUILDKITE_LABEL: test timed out" >> run.log
- fi
- if [ -n "${CI_COVERAGE_ENABLED:-}" ]; then
- ci_uncollapsed_heading "cloudtest: Fetching binaries for coverage"
- mkdir -p coverage/
- chmod 777 coverage/
- kubectl cp environmentd-0:/usr/local/bin/environmentd coverage/environmentd
- kubectl cp environmentd-0:/coverage coverage/
- for pod in $(kubectl get pods -o name | grep -E 'cluster-'); do
- kubectl cp "$pod":/coverage coverage/ || true # Could get deleted
- kubectl cp "$pod":/usr/local/bin/clusterd coverage/clusterd || true
- done
- ci_unimportant_heading "cloudtest: Generate coverage information"
- if [ -n "$(find . -name '*.profraw')" ]; then
- find . -name '*.profraw' -exec bin/ci-builder run stable rust-profdata merge -sparse -o coverage/"$BUILDKITE_JOB_ID".profdata {} +
- find . -name '*.profraw' -delete
- ARGS=()
- for program in clusterd environmentd; do
- if [ -f coverage/"$program" ]; then
- export_cov coverage/"$program"
- ARGS+=("-a" coverage/"$BUILDKITE_JOB_ID"-"$program".lcov)
- fi
- done
- rm coverage/"$BUILDKITE_JOB_ID".profdata
- if [ "${#ARGS[@]}" != 0 ]; then
- bin/ci-builder run stable lcov "${ARGS[@]}" -o coverage/"$BUILDKITE_JOB_ID".lcov
- rm coverage/"$BUILDKITE_JOB_ID"-*.lcov
- bin/ci-builder run stable zstd coverage/"$BUILDKITE_JOB_ID".lcov
- buildkite-agent artifact upload coverage/"$BUILDKITE_JOB_ID".lcov.zst
- rm -rf coverage
- fi
- fi
- fi
- ci_unimportant_heading "cloudtest: Cleaning up mz_debug files from test/cloudtest/test_mz_debug_tool.py"
- find . -type d -name 'mz_debug*' -exec rm -r {} +
- ci_unimportant_heading "kail: Stopping instance..."
- docker logs kail > kail-output.log 2>&1
- docker stop kail
- ci_unimportant_heading "cloudtest: Uploading logs..."
- for pod in $(kubectl get pods -o name | grep -v -E 'kubernetes|minio|cockroach|redpanda'); do
- kubectl logs --prefix=true "$pod" &>> kubectl-get-logs.log || true
- kubectl logs --previous --prefix=true "$pod" &>> kubectl-get-logs-previous.log || true
- done
- kubectl get events > kubectl-get-events.log || true
- kubectl get all > kubectl-get-all.log || true
- kubectl describe all | awk '
- BEGIN { redact=0 }
- /^[[:space:]]*Environment:/ {
- indent = match($0, /[^ ]/) - 1
- print substr($0, 1, indent) "Environment: [REDACTED]"
- redact = 1
- next
- }
- redact {
- current_indent = match($0, /[^ ]/) - 1
- if (current_indent <= indent || NF == 0) {
- redact = 0
- } else {
- next
- }
- }
- { print }
- ' > kubectl-describe-all.log || true
- kubectl get pods -o wide > kubectl-pods-with-nodes.log || true
- kubectl -n kube-system get events > kubectl-get-events-kube-system.log || true
- kubectl -n kube-system get all > kubectl-get-all-kube-system.log || true
- kubectl -n kube-system describe all > kubectl-describe-all-kube-system.log || true
- # shellcheck disable=SC2024
- sudo journalctl --merge --since "$STEP_START_TIMESTAMP" > journalctl-merge.log
- mapfile -t artifacts < <(printf "run.log\nkubectl-get-logs.log\nkubectl-get-logs-previous.log\nkubectl-get-events.log\nkubectl-get-all.log\nkubectl-describe-all.log\nkubectl-pods-with-nodes.log\nkubectl-get-events-kube-system.log\nkubectl-get-all-kube-system.log\nkubectl-describe-all-kube-system.log\njournalctl-merge.log\nkail-output.log\n"; find . -name 'junit_*.xml')
- {
- bin/ci-builder run stable trufflehog --no-update --no-verification --json --exclude-detectors=coda,dockerhub,box,npmtoken,github,snykkey,eightxeight,sumologickey,miro,fmfw,logzio,qase filesystem "${artifacts[@]}" | trufflehog_jq_filter_logs > trufflehog.log
- } &
- artifacts_str=$(IFS=";"; echo "${artifacts[*]}")
- unset CI_EXTRA_ARGS # We don't want extra args for the annotation
- # Continue even if ci-annotate-errors fails
- CI_ANNOTATE_ERRORS_RESULT=0
- # We have to upload artifacts before ci-annotate-errors, so that the annotations can link to the artifacts
- buildkite-agent artifact upload "$artifacts_str" &
- wait
- bin/ci-builder run stable bin/ci-annotate-errors --test-cmd="$TEST_CMD" --test-result="$TEST_RESULT" "${artifacts[@]}" trufflehog.log > ci-annotate-errors.log || CI_ANNOTATE_ERRORS_RESULT=$?
- buildkite-agent artifact upload "ci-annotate-errors.log"
- # File should not be empty, see database-issues#7569
- test -s kubectl-get-logs-previous.log
- ci_unimportant_heading "cloudtest: Resetting..."
- bin/ci-builder run stable test/cloudtest/reset
- ci_collapsed_heading ":docker: Purging all existing docker containers and volumes, regardless of origin"
- sudo systemctl restart docker
- docker ps --all --quiet | xargs --no-run-if-empty docker rm --force --volumes
- exit "$CI_ANNOTATE_ERRORS_RESULT"
- }
- trap cleanup EXIT SIGTERM SIGINT
- # sed command to filter out ANSI command codes in run.log, while keeping them in Buildkite's view
- { stdbuf --output=L --error=L bin/ci-builder run stable bin/pytest "${run_args[@]}" |& tee >(sed -r "s/\x1B\[[0-9;]*[A-Za-z]//g" > run.log); } || TEST_RESULT=$?
- if [ "$TEST_RESULT" != "0" ]; then
- # Give the logs some time to log panics, otherwise they might be missing later
- sleep 10
- fi
|