From 5b5cc666c0bb4d104ea67bbdfe603f0d8b74298c Mon Sep 17 00:00:00 2001 From: Sarah Chen Date: Fri, 6 Feb 2026 15:27:36 -0500 Subject: [PATCH 1/3] Remove local benchmarks --- .gitlab-ci.yml | 17 +++ .gitlab/benchmarks.yml | 109 +----------------- .gitlab/macrobenchmarks.yml | 6 - .gitlab/thresholds/java-spring-petclinic.yml | 36 ++++++ benchmark/Dockerfile | 103 ----------------- benchmark/README.MD | 29 ----- benchmark/benchmarks.sh | 42 ------- benchmark/dacapo/benchmark.json | 45 -------- benchmark/dacapo/run.sh | 41 ------- benchmark/load/insecure-bank/k6.js | 76 ------------ benchmark/load/insecure-bank/start-servers.sh | 28 ----- benchmark/load/petclinic/k6.js | 61 ---------- benchmark/load/petclinic/start-servers.sh | 28 ----- benchmark/load/run.sh | 77 ------------- benchmark/run.sh | 43 ------- .../startup/insecure-bank/benchmark.json | 22 ---- benchmark/startup/petclinic/benchmark.json | 34 ------ benchmark/startup/run.sh | 5 - benchmark/utils/k6.js | 21 ---- benchmark/utils/run-k6-load-test.sh | 21 ---- benchmark/utils/run-on-server-ready.sh | 11 -- benchmark/utils/run-sirun-benchmarks.sh | 41 ------- benchmark/utils/update-java-version.sh | 5 - 23 files changed, 54 insertions(+), 847 deletions(-) create mode 100644 .gitlab/thresholds/java-spring-petclinic.yml delete mode 100644 benchmark/Dockerfile delete mode 100644 benchmark/README.MD delete mode 100755 benchmark/benchmarks.sh delete mode 100644 benchmark/dacapo/benchmark.json delete mode 100755 benchmark/dacapo/run.sh delete mode 100644 benchmark/load/insecure-bank/k6.js delete mode 100755 benchmark/load/insecure-bank/start-servers.sh delete mode 100644 benchmark/load/petclinic/k6.js delete mode 100755 benchmark/load/petclinic/start-servers.sh delete mode 100755 benchmark/load/run.sh delete mode 100755 benchmark/run.sh delete mode 100644 benchmark/startup/insecure-bank/benchmark.json delete mode 100644 benchmark/startup/petclinic/benchmark.json delete mode 100755 benchmark/startup/run.sh delete mode 100644 benchmark/utils/k6.js delete mode 100755 benchmark/utils/run-k6-load-test.sh delete mode 100755 benchmark/utils/run-on-server-ready.sh delete mode 100755 benchmark/utils/run-sirun-benchmarks.sh delete mode 100755 benchmark/utils/update-java-version.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 064b031355d..9bb4b6a7bab 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,12 +4,29 @@ include: - local: ".gitlab/macrobenchmarks.yml" - local: ".gitlab/exploration-tests.yml" - local: ".gitlab/ci-visibility-tests.yml" + - project: 'DataDog/apm-reliability/apm-sdks-benchmarks' + file: '.gitlab/ci-java-spring-petclinic.yml' + ref: 'main' + - project: 'DataDog/apm-reliability/apm-sdks-benchmarks' + file: '.gitlab/ci-java-insecure-bank.yml' + ref: 'main' + - project: 'DataDog/apm-reliability/apm-sdks-benchmarks' + file: '.gitlab/ci-java-dacapo.yml' + ref: 'main' stages: - build - publish - shared-pipeline - benchmarks + - infrastructure + - java-spring-petclinic-tests + - java-spring-petclinic-macrobenchmarks + - java-startup-microbenchmarks + - java-load-microbenchmarks + - java-dacapo-microbenchmarks + - generate-slos + - upload-to-bp-api - macrobenchmarks - tests - exploration-tests diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index 28aef7ad58b..e4e6e1c029c 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -1,111 +1,4 @@ -.benchmarks: - stage: benchmarks - timeout: 1h - tags: ["runner:apm-k8s-tweaked-metal"] - image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/benchmarking-platform:dd-trace-java-benchmarks - needs: [ "build", "publish-artifacts-to-s3" ] - rules: - - if: '$POPULATE_CACHE' - when: never - - if: '$CI_COMMIT_TAG =~ /^v?[0-9]+\.[0-9]+\.[0-9]+$/' - when: manual - allow_failure: true - - if: '$CI_COMMIT_BRANCH == "master"' - when: on_success - interruptible: false - - when: on_success - interruptible: true - script: - - export ARTIFACTS_DIR="$(pwd)/reports" && mkdir -p "${ARTIFACTS_DIR}" - - git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/" - - git clone --branch dd-trace-java/tracer-benchmarks-parallel https://github.com/DataDog/benchmarking-platform.git /platform && cd /platform - artifacts: - name: "reports" - paths: - - reports/ - expire_in: 3 months - variables: - UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance. - UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-java" - UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built. - UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for. - -benchmarks-startup: - extends: .benchmarks - script: - - !reference [ .benchmarks, script ] - - ./steps/capture-hardware-software-info.sh - - ./steps/run-benchmarks.sh startup - - ./steps/analyze-results.sh startup - -benchmarks-load: - extends: .benchmarks - script: - - !reference [ .benchmarks, script ] - - ./steps/capture-hardware-software-info.sh - - ./steps/run-benchmarks.sh load - - ./steps/analyze-results.sh load - -benchmarks-dacapo: - extends: .benchmarks - script: - - !reference [ .benchmarks, script ] - - ./steps/capture-hardware-software-info.sh - - ./steps/run-benchmarks.sh dacapo - - ./steps/analyze-results.sh dacapo - -benchmarks-post-results: - extends: .benchmarks - tags: ["arch:amd64"] - script: - - !reference [ .benchmarks, script ] - - ./steps/upload-results-to-s3.sh - - ./steps/post-pr-comment.sh - needs: - - job: benchmarks-startup - artifacts: true - - job: benchmarks-load - artifacts: true - - job: benchmarks-dacapo - artifacts: true - -check-big-regressions: - extends: .benchmarks - needs: - - job: benchmarks-startup - artifacts: true - - job: benchmarks-dacapo - artifacts: true - when: on_success - tags: ["arch:amd64"] - rules: - - if: '$POPULATE_CACHE' - when: never - - if: '$CI_COMMIT_BRANCH =~ /backport-pr-/' - when: never - - if: '$CI_COMMIT_BRANCH !~ /^(master|release\/)/' - when: on_success - - when: never - # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/ - # need to convert them - script: - - !reference [ .benchmarks, script ] - - | - for benchmarkType in startup dacapo; do - find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do - relpath="${file#$ARTIFACTS_DIR/$benchmarkType/}" - prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json - prefix="${prefix#./}" # Remove any leading ./ - prefix="${prefix//\//-}" # Replace / with - - case "$file" in - *benchmark-baseline.json) type="baseline" ;; - *benchmark-candidate.json) type="candidate" ;; - esac - echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json" - cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json" - done - done - - bp-runner $CI_PROJECT_DIR/.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug +# Insert more benchmark logic here .dsm-kafka-benchmarks: stage: benchmarks diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml index b1c5681fb5b..36f0222a9f4 100644 --- a/.gitlab/macrobenchmarks.yml +++ b/.gitlab/macrobenchmarks.yml @@ -92,12 +92,6 @@ check-slo-breaches: artifacts: true - job: otel-latest artifacts: true - - job: benchmarks-startup - artifacts: true - - job: benchmarks-load - artifacts: true - - job: benchmarks-dacapo - artifacts: true script: # macrobenchmarks are located here, files are already in "converted" format - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" && mkdir -p "${ARTIFACTS_DIR}" diff --git a/.gitlab/thresholds/java-spring-petclinic.yml b/.gitlab/thresholds/java-spring-petclinic.yml new file mode 100644 index 00000000000..9064e7bd352 --- /dev/null +++ b/.gitlab/thresholds/java-spring-petclinic.yml @@ -0,0 +1,36 @@ +# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-thresholds-for-pre-release-gates%3F + +experiments: + - name: Run SLO breach check + steps: + - name: SLO breach check + run: fail_on_breach + # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-a-warning-range-for-pre-release-gates%3F + warning_range: 10 + # File spec + # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#Specification + # Measurements + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario + scenarios: + # Note that thresholds there are chosen based the confidence interval with a 10% adjustment. + + # Standard macrobenchmarks + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fonly-tracing&trendsType=scenario + - name: normal_operation/only-tracing + thresholds: + - agg_http_req_duration_p50 < 2.6 ms + - agg_http_req_duration_p99 < 8.5 ms + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario + - name: normal_operation/otel-latest + thresholds: + - agg_http_req_duration_p50 < 2.5 ms + - agg_http_req_duration_p99 < 10 ms + + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fonly-tracing&trendsType=scenario + - name: high_load/only-tracing + thresholds: + - throughput > 1100.0 op/s + # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fotel-latest&trendsType=scenario + - name: high_load/otel-latest + thresholds: + - throughput > 1100.0 op/s diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile deleted file mode 100644 index 0279186478a..00000000000 --- a/benchmark/Dockerfile +++ /dev/null @@ -1,103 +0,0 @@ -# Petclinic download and compilation stage -FROM eclipse-temurin:17-jammy as petclinic - -ARG SPRING_PETCLINIC_COMMIT=cefaf55dd124d0635abfe857c3c99a3d3ea62017 - -RUN apt-get update \ - && apt-get -y install git \ - && apt-get -y clean \ - && rm -rf /var/lib/apt/lists/* - -RUN set -eux;\ - git init spring-petclinic;\ - cd spring-petclinic;\ - git remote add origin https://github.com/spring-projects/spring-petclinic.git;\ - git fetch --depth 1 origin ${SPRING_PETCLINIC_COMMIT};\ - git checkout ${SPRING_PETCLINIC_COMMIT};\ - ./mvnw dependency:go-offline - -RUN cd spring-petclinic \ - && ./mvnw package -Dmaven.test.skip=true \ - && cp target/*.jar /spring-petclinic.jar - - -# Insecure bank download and compilation stage -FROM eclipse-temurin:17-jammy as insecure-bank - -RUN apt-get update \ - && apt-get -y install git \ - && apt-get -y clean \ - && rm -rf /var/lib/apt/lists/* - -RUN git clone --depth 1 --branch malvarez/spring-boot --single-branch https://github.com/hdiv/insecure-bank.git \ - && cd insecure-bank \ - && ./gradlew -q dependencies - -RUN cd insecure-bank \ - && ./gradlew bootWar \ - && cp build/libs/*.war /insecure-bank.war - -# Dacapo download -FROM debian:bookworm-slim as dacapo -RUN apt-get update \ - && apt-get -y install wget unzip \ - && apt-get -y clean \ - && rm -rf /var/lib/apt/lists/* - -ARG DACAPO_VERSION=23.11-chopin -# The data for the big benchmarks is removed too ensure the final docker image is not too big -RUN wget -nv -O dacapo.zip https://download.dacapobench.org/chopin/dacapo-$DACAPO_VERSION.zip \ - && mkdir /dacapo \ - && unzip dacapo.zip -d /dacapo/ \ - && rm -rf /dacapo/dacapo-$DACAPO_VERSION/dat/luindex \ - && rm -rf /dacapo/dacapo-$DACAPO_VERSION/dat/lusearch \ - && rm -rf /dacapo/dacapo-$DACAPO_VERSION/dat/graphchi \ - && rm dacapo.zip - -FROM debian:bookworm-slim - -RUN apt-get update \ - && apt-get -y install git curl wget procps gettext-base \ - && apt-get -y clean \ - && rm -rf /var/lib/apt/lists/* - -COPY --from=eclipse-temurin:8-jammy /opt/java/openjdk /usr/lib/jvm/8 -COPY --from=eclipse-temurin:11-jammy /opt/java/openjdk /usr/lib/jvm/11 -COPY --from=eclipse-temurin:17-jammy /opt/java/openjdk /usr/lib/jvm/17 - -RUN rm -rf \ - /usr/lib/jvm/*/man \ - /usr/lib/jvm/*/src.zip \ - /usr/lib/jvm/*/lib/src.zip \ - /usr/lib/jvm/*/demo \ - /usr/lib/jvm/*/sample - -ENV JAVA_8_HOME=/usr/lib/jvm/8 -ENV JAVA_11_HOME=/usr/lib/jvm/11 -ENV JAVA_17_HOME=/usr/lib/jvm/17 -ENV JAVA_HOME=${JAVA_8_HOME} -ENV PATH=${PATH}:${JAVA_HOME}/bin - -ARG SIRUN_VERSION=0.1.11 -RUN wget -O sirun.tar.gz https://github.com/DataDog/sirun/releases/download/v$SIRUN_VERSION/sirun-v$SIRUN_VERSION-x86_64-unknown-linux-musl.tar.gz \ - && tar -xzf sirun.tar.gz \ - && rm sirun.tar.gz \ - && mv sirun /usr/bin/sirun - -ARG K6_VERSION=0.45.1 -RUN wget -O k6.tar.gz https://github.com/grafana/k6/releases/download/v$K6_VERSION/k6-v$K6_VERSION-linux-amd64.tar.gz \ - && tar --strip-components=1 -xzf k6.tar.gz \ - && rm k6.tar.gz \ - && mv k6 /usr/bin/k6 - -RUN mkdir -p /app - -COPY --from=petclinic /spring-petclinic.jar /app/spring-petclinic.jar -ENV PETCLINIC=/app/spring-petclinic.jar - -COPY --from=insecure-bank /insecure-bank.war /app/insecure-bank.war -ENV INSECURE_BANK=/app/insecure-bank.war - -COPY --from=dacapo /dacapo/ /app/ -ARG DACAPO_VERSION=23.11-chopin -ENV DACAPO=/app/dacapo-$DACAPO_VERSION.jar diff --git a/benchmark/README.MD b/benchmark/README.MD deleted file mode 100644 index 30f3bbcf864..00000000000 --- a/benchmark/README.MD +++ /dev/null @@ -1,29 +0,0 @@ -# Benchmarks - -This directory contains different types of benchmarks. - -## Running Benchmarks via Docker - -Docker allows the execution of benchmarks without needing to install and configure your development environment. For example, package installation and installation of sirun are performed automatically. - -In order to run benchmarks using Docker, issue the following command from the `benchmark/` folder of this project: - -```sh -./run.sh -``` - -If you run into storage errors (e.g. running out of disk space), try removing all unused Docker containers, networks, and images with `docker system prune -af` before running the script again. Once finished, the reports will be available in the `benchmark/reports/` folder. Note that the script can take ~40 minutes to run. - -### Running specific benchmarks - -If you want to run only a specific category of benchmarks, you can do so via arguments: - -1. Run startup benchmarks -```sh -./run.sh startup [application]? -``` - -2. Run load benchmarks -```sh -./run.sh load [application]? -``` diff --git a/benchmark/benchmarks.sh b/benchmark/benchmarks.sh deleted file mode 100755 index 0b245038afa..00000000000 --- a/benchmark/benchmarks.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash -set -eu - -readonly SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) -export TRACER_DIR="${SCRIPT_DIR}/.." -export REPORTS_DIR="${SCRIPT_DIR}/reports" -export UTILS_DIR="${SCRIPT_DIR}/utils" -export SHELL_UTILS_DIR="${UTILS_DIR}/shell" -export K6_UTILS_DIR="${UTILS_DIR}/k6" -export TRACER="${SCRIPT_DIR}/tracer/dd-java-agent.jar" -export NO_AGENT_VARIANT="no_agent" - -run_benchmarks() { - local type=$1 - if [[ -d "${type}" ]] && [[ -f "${type}/run.sh" ]]; then - cd "${type}" - ./run.sh "$@" - cd "${SCRIPT_DIR}" - fi -} - -# Find or rebuild tracer to be used in the benchmarks -if [[ ! -f "${TRACER}" ]]; then - mkdir -p "${SCRIPT_DIR}/tracer" - cd "${TRACER_DIR}" - readonly TRACER_VERSION=$(./gradlew properties -q | grep "version:" | awk '{print $2}') - readonly TRACER_COMPILED="${SCRIPT_DIR}/../dd-java-agent/build/libs/dd-java-agent-${TRACER_VERSION}.jar" - if [[ ! -f "${TRACER_COMPILED}" ]]; then - echo "Tracer not found, starting gradle compile ..." - ./gradlew assemble - fi - cp "${TRACER_COMPILED}" "${TRACER}" - cd "${SCRIPT_DIR}" -fi - -if [[ "$#" == '0' ]]; then - for type in 'startup' 'load' 'dacapo'; do - run_benchmarks "$type" - done -else - run_benchmarks "$@" -fi diff --git a/benchmark/dacapo/benchmark.json b/benchmark/dacapo/benchmark.json deleted file mode 100644 index ec0ca767f43..00000000000 --- a/benchmark/dacapo/benchmark.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "dacapo_${BENCHMARK}", - "setup": "bash -c \"mkdir -p ${OUTPUT_DIR}/${VARIANT}\"", - "run": "bash -c \"java ${JAVA_OPTS} -jar ${DACAPO} --converge --scratch-directory=${OUTPUT_DIR}/${VARIANT}/scratch --latency-csv ${BENCHMARK} &> ${OUTPUT_DIR}/${VARIANT}/dacapo.log\"", - "timeout": 150, - "iterations": 1, - "variants": { - "${NO_AGENT_VARIANT}": { - "env": { - "VARIANT": "${NO_AGENT_VARIANT}", - "JAVA_OPTS": "" - } - }, - "tracing": { - "env": { - "VARIANT": "tracing", - "JAVA_OPTS": "-javaagent:${TRACER}" - } - }, - "profiling": { - "env": { - "VARIANT": "profiling", - "JAVA_OPTS": "-javaagent:${TRACER} -Ddd.profiling.enabled=true" - } - }, - "appsec": { - "env": { - "VARIANT": "appsec", - "JAVA_OPTS": "-javaagent:${TRACER} -Ddd.appsec.enabled=true -Ddd.iast.enabled=false" - } - }, - "iast": { - "env": { - "VARIANT": "iast", - "JAVA_OPTS": "-javaagent:${TRACER} -Ddd.iast.enabled=true" - } - }, - "iast_GLOBAL": { - "env": { - "VARIANT": "iast_GLOBAL", - "JAVA_OPTS": "-javaagent:${TRACER} -Ddd.iast.enabled=true -Ddd.iast.context.mode=GLOBAL" - } - } - } -} diff --git a/benchmark/dacapo/run.sh b/benchmark/dacapo/run.sh deleted file mode 100755 index ece44f9e5f0..00000000000 --- a/benchmark/dacapo/run.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -set -eu - -source "${UTILS_DIR}/update-java-version.sh" 11 - -function message() { - echo "$(date +"%T"): $1" -} - -run_benchmark() { - local type=$1 - - message "dacapo benchmark: ${type} started" - - # export the benchmark - export BENCHMARK="${type}" - - # create output folder for the test - export OUTPUT_DIR="${REPORTS_DIR}/dacapo/${type}" - mkdir -p "${OUTPUT_DIR}" - - # substitute environment variables in the json file - benchmark=$(mktemp) - # shellcheck disable=SC2046 - # shellcheck disable=SC2016 - envsubst "$(printf '${%s} ' $(env | cut -d'=' -f1))" "${benchmark}" - - # run the sirun test - sirun "${benchmark}" &>"${OUTPUT_DIR}/${type}.json" - - message "dacapo benchmark: ${type} finished" -} - -if [ "$#" == '2' ]; then - run_benchmark "$2" -else - for benchmark in biojava tomcat ; do - run_benchmark "${benchmark}" - done -fi - diff --git a/benchmark/load/insecure-bank/k6.js b/benchmark/load/insecure-bank/k6.js deleted file mode 100644 index 2dd800fa7e5..00000000000 --- a/benchmark/load/insecure-bank/k6.js +++ /dev/null @@ -1,76 +0,0 @@ -import http from 'k6/http'; -import {checkResponse, isOk, isRedirect} from "../../utils/k6.js"; - -const variants = { - "no_agent": { - "APP_URL": 'http://localhost:8080', - }, - "tracing": { - "APP_URL": 'http://localhost:8081', - }, - "profiling": { - "APP_URL": 'http://localhost:8082', - }, - "iast": { - "APP_URL": 'http://localhost:8083', - }, - "iast_GLOBAL": { - "APP_URL": 'http://localhost:8084', - }, - "iast_FULL": { - "APP_URL": 'http://localhost:8085', - }, -} - -export const options = function (variants) { - let scenarios = {}; - for (const variant of Object.keys(variants)) { - scenarios[`load--insecure-bank--${variant}--warmup`] = { - executor: 'constant-vus', // https://grafana.com/docs/k6/latest/using-k6/scenarios/executors/#all-executors - vus: 5, - duration: '165s', - gracefulStop: '2s', - env: { - "APP_URL": variants[variant]["APP_URL"] - } - }; - - scenarios[`load--insecure-bank--${variant}--high_load`] = { - executor: 'constant-vus', - vus: 5, - startTime: '167s', - duration: '15s', - gracefulStop: '2s', - env: { - "APP_URL": variants[variant]["APP_URL"] - } - }; - } - - return { - discardResponseBodies: true, - scenarios, - } -}(variants); - -export default function () { - - // login form - const loginResponse = http.post(`${__ENV.APP_URL}/login`, { - username: 'john', - password: 'test' - }, { - redirects: 0 - }); - checkResponse(loginResponse, isRedirect); - - // dashboard - const dashboard = http.get(`${__ENV.APP_URL}/dashboard`); - checkResponse(dashboard, isOk); - - // logout - const logout = http.get(`${__ENV.APP_URL}/j_spring_security_logout`, { - redirects: 0 - }); - checkResponse(logout, isRedirect); -} diff --git a/benchmark/load/insecure-bank/start-servers.sh b/benchmark/load/insecure-bank/start-servers.sh deleted file mode 100755 index 4cae95567f2..00000000000 --- a/benchmark/load/insecure-bank/start-servers.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -set -e - -start_server() { - local VARIANT=$1 - local JAVA_OPTS=$2 - - if [ -n "$CI_JOB_TOKEN" ]; then - # Inside BP, so we can assume 24 CPU cores available and set CPU affinity - CPU_AFFINITY_APP=$3 - else - CPU_AFFINITY_APP="" - fi - - mkdir -p "${LOGS_DIR}/${VARIANT}" - ${CPU_AFFINITY_APP}java ${JAVA_OPTS} -Xms3G -Xmx3G -jar ${INSECURE_BANK} &> ${LOGS_DIR}/${VARIANT}/insecure-bank.log &PID=$! - echo "${CPU_AFFINITY_APP}java ${JAVA_OPTS} -Xms3G -Xmx3G -jar ${INSECURE_BANK} &> ${LOGS_DIR}/${VARIANT}/insecure-bank.log [PID=$PID]" -} - -start_server "no_agent" "-Dserver.port=8080" "taskset -c 47 " & -start_server "tracing" "-javaagent:${TRACER} -Dserver.port=8081" "taskset -c 46 " & -start_server "profiling" "-javaagent:${TRACER} -Ddd.profiling.enabled=true -Dserver.port=8082" "taskset -c 45 " & -start_server "iast" "-javaagent:${TRACER} -Ddd.iast.enabled=true -Dserver.port=8083" "taskset -c 44 " & -start_server "iast_GLOBAL" "-javaagent:${TRACER} -Ddd.iast.enabled=true -Ddd.iast.context.mode=GLOBAL -Dserver.port=8084" "taskset -c 43 " & -start_server "iast_FULL" "-javaagent:${TRACER} -Ddd.iast.enabled=true -Ddd.iast.detection.mode=FULL -Dserver.port=8085" "taskset -c 42 " & - -wait diff --git a/benchmark/load/petclinic/k6.js b/benchmark/load/petclinic/k6.js deleted file mode 100644 index debeab10a8e..00000000000 --- a/benchmark/load/petclinic/k6.js +++ /dev/null @@ -1,61 +0,0 @@ -import http from 'k6/http'; -import {checkResponse, isOk} from "../../utils/k6.js"; - -const variants = { - "no_agent": { - "APP_URL": 'http://localhost:8080', - }, - "tracing": { - "APP_URL": 'http://localhost:8081', - }, - "profiling": { - "APP_URL": 'http://localhost:8082', - }, - "appsec": { - "APP_URL": 'http://localhost:8083', - }, - "iast": { - "APP_URL": 'http://localhost:8084', - }, - "code_origins": { - "APP_URL": 'http://localhost:8085', - } -} - -export const options = function (variants) { - const scenarios = {}; - for (const variant of Object.keys(variants)) { - scenarios[`load--petclinic--${variant}--warmup`] = { - executor: 'constant-vus', // https://grafana.com/docs/k6/latest/using-k6/scenarios/executors/#all-executors - vus: 5, - duration: '165s', - gracefulStop: '2s', - env: { - "APP_URL": variants[variant]["APP_URL"] - } - }; - - scenarios[`load--petclinic--${variant}--high_load`] = { - executor: 'constant-vus', - vus: 5, - startTime: '167s', - duration: '15s', - gracefulStop: '2s', - env: { - "APP_URL": variants[variant]["APP_URL"] - } - }; - } - - return { - discardResponseBodies: true, - scenarios, - } -}(variants); - -export default function () { - - // find owner - const ownersList = http.get(`${__ENV.APP_URL}/owners?lastName=`); - checkResponse(ownersList, isOk); -} diff --git a/benchmark/load/petclinic/start-servers.sh b/benchmark/load/petclinic/start-servers.sh deleted file mode 100755 index 1ebbb4e0418..00000000000 --- a/benchmark/load/petclinic/start-servers.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -set -e - -start_server() { - local VARIANT=$1 - local JAVA_OPTS=$2 - - if [ -n "$CI_JOB_TOKEN" ]; then - # Inside BP, so we can assume 24 CPU cores available and set CPU affinity - CPU_AFFINITY_APP=$3 - else - CPU_AFFINITY_APP="" - fi - - mkdir -p "${LOGS_DIR}/${VARIANT}" - ${CPU_AFFINITY_APP}java ${JAVA_OPTS} -Xms2G -Xmx2G -jar ${PETCLINIC} &> ${LOGS_DIR}/${VARIANT}/petclinic.log &PID=$! - echo "${CPU_AFFINITY_APP}java ${JAVA_OPTS} -Xms2G -Xmx2G -jar ${PETCLINIC} &> ${LOGS_DIR}/${VARIANT}/petclinic.log [PID=$!]" -} - -start_server "no_agent" "-Dserver.port=8080" "taskset -c 31-32 " & -start_server "tracing" "-javaagent:${TRACER} -Dserver.port=8081" "taskset -c 33-34 " & -start_server "profiling" "-javaagent:${TRACER} -Ddd.profiling.enabled=true -Dserver.port=8082" "taskset -c 35-36 " & -start_server "appsec" "-javaagent:${TRACER} -Ddd.appsec.enabled=true -Dserver.port=8083" "taskset -c 37-38 " & -start_server "iast" "-javaagent:${TRACER} -Ddd.iast.enabled=true -Dserver.port=8084" "taskset -c 39-40 " & -start_server "code_origins" "-javaagent:${TRACER} -Ddd.code.origin.for.spans.enabled=true -Dserver.port=8085" "taskset -c 41-42 " & - -wait diff --git a/benchmark/load/run.sh b/benchmark/load/run.sh deleted file mode 100755 index 5f2f265b045..00000000000 --- a/benchmark/load/run.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -function message() { - echo "$(date +"%T"): $1" -} - -function healthcheck() { - local url=$1 - - while true; do - if [[ $(curl -fso /dev/null -w "%{http_code}" "${url}") = 200 ]]; then - break - fi - done -} - -type=$1 - -if [ -n "$CI_JOB_TOKEN" ]; then - # Inside BP, so we can assume 24 CPU cores on the second socket available and set CPU affinity - export CPU_AFFINITY_K6="taskset -c 24-27 " -else - export CPU_AFFINITY_K6="" -fi - -source "${UTILS_DIR}/update-java-version.sh" 17 - -for app in *; do - if [[ ! -d "${app}" ]]; then - continue - fi - - message "${type} benchmark: ${app} started" - - export OUTPUT_DIR="${REPORTS_DIR}/${type}/${app}" - mkdir -p ${OUTPUT_DIR} - - export LOGS_DIR="${ARTIFACTS_DIR}/${type}/${app}" - mkdir -p ${LOGS_DIR} - - # Using profiler variants for healthcheck as they are the slowest - if [ "${app}" == "petclinic" ]; then - HEALTHCHECK_URL=http://localhost:8082 - REPETITIONS_COUNT=2 - elif [ "${app}" == "insecure-bank" ]; then - HEALTHCHECK_URL=http://localhost:8082/login - REPETITIONS_COUNT=2 - else - echo "Unknown app ${app}" - exit 1 - fi - - for i in $(seq 1 $REPETITIONS_COUNT); do - bash -c "${UTILS_DIR}/../${type}/${app}/start-servers.sh" & - - echo "Waiting for serves to start..." - if [ "${app}" == "petclinic" ]; then - for port in $(seq 8080 8085); do - healthcheck http://localhost:$port - done - elif [ "${app}" == "insecure-bank" ]; then - for port in $(seq 8080 8085); do - healthcheck http://localhost:$port/login - done - fi - echo "Servers are up!" - - ( - cd ${app} && - bash -c "${CPU_AFFINITY_K6}${UTILS_DIR}/run-k6-load-test.sh 'pkill java'" - ) - done - - message "${type} benchmark: ${app} finished" -done diff --git a/benchmark/run.sh b/benchmark/run.sh deleted file mode 100755 index bcd3649e9a0..00000000000 --- a/benchmark/run.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -set -eu - -readonly SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" -readonly INITIAL_DIR="$(pwd)" -readonly TRACER="${SCRIPT_DIR}/tracer/dd-java-agent.jar" - -cd "${SCRIPT_DIR}" - -# Build container image -echo "Building base image ..." -docker build \ - -t dd-trace-java/benchmark \ - . - -# Find or rebuild tracer to be used in the benchmarks -if [[ ! -f "${TRACER}" ]]; then - mkdir -p "${SCRIPT_DIR}/tracer" - cd "${SCRIPT_DIR}/.." - readonly TRACER_VERSION=$(./gradlew properties -q | grep "version:" | awk '{print $2}') - readonly TRACER_COMPILED="${SCRIPT_DIR}/../dd-java-agent/build/libs/dd-java-agent-${TRACER_VERSION}.jar" - if [ ! -f "${TRACER_COMPILED}" ]; then - echo "Tracer not found, starting gradle compile ..." - ./gradlew assemble - fi - cp "${TRACER_COMPILED}" "${TRACER}" - cd "${SCRIPT_DIR}" -fi - -# Trigger benchmarks -echo "Running benchmarks ..." -docker run --rm \ - -v "${HOME}/.gradle":/home/benchmark/.gradle:delegated \ - -v "${PWD}/..":/tracer:delegated \ - -w /tracer/benchmark \ - -e GRADLE_OPTS="-Dorg.gradle.daemon=false" \ - --entrypoint=./benchmarks.sh \ - --name dd-trace-java-benchmark \ - --cap-add SYS_ADMIN \ - dd-trace-java/benchmark \ - "$@" - -cd "${INITIAL_DIR}" diff --git a/benchmark/startup/insecure-bank/benchmark.json b/benchmark/startup/insecure-bank/benchmark.json deleted file mode 100644 index 17c69a50847..00000000000 --- a/benchmark/startup/insecure-bank/benchmark.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "startup_insecure-bank", - "setup": "bash -c \"mkdir -p ${OUTPUT_DIR}/${VARIANT}\"", - "service": "bash -c \"${UTILS_DIR}/run-on-server-ready.sh http://localhost:8080/login 'pkill java'\"", - "run": "bash -c \"java -javaagent:${TRACER} -Ddd.benchmark.enabled=true -Ddd.benchmark.output.dir=${OUTPUT_DIR}/${VARIANT} ${JAVA_OPTS} -jar ${INSECURE_BANK} &> ${OUTPUT_DIR}/${VARIANT}/insecure-bank.log\"", - "iterations": 10, - "timeout": 60, - "variants": { - "tracing": { - "env": { - "VARIANT": "tracing", - "JAVA_OPTS": "" - } - }, - "iast": { - "env": { - "VARIANT": "iast", - "JAVA_OPTS": "-Ddd.iast.enabled=true" - } - } - } -} diff --git a/benchmark/startup/petclinic/benchmark.json b/benchmark/startup/petclinic/benchmark.json deleted file mode 100644 index 23713c38469..00000000000 --- a/benchmark/startup/petclinic/benchmark.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "name": "startup_petclinic", - "setup": "bash -c \"mkdir -p ${OUTPUT_DIR}/${VARIANT}\"", - "service": "bash -c \"${UTILS_DIR}/run-on-server-ready.sh http://localhost:8080 'pkill java'\"", - "run": "bash -c \"java -javaagent:${TRACER} -Ddd.benchmark.enabled=true -Ddd.benchmark.output.dir=${OUTPUT_DIR}/${VARIANT} ${JAVA_OPTS} -jar ${PETCLINIC} &> ${OUTPUT_DIR}/${VARIANT}/petclinic.log\"", - "iterations": 10, - "timeout": 60, - "variants": { - "tracing": { - "env": { - "VARIANT": "tracing", - "JAVA_OPTS": "" - } - }, - "profiling": { - "env": { - "VARIANT": "profiling", - "JAVA_OPTS": "-Ddd.profiling.enabled=true" - } - }, - "appsec": { - "env": { - "VARIANT": "appsec", - "JAVA_OPTS": "-Ddd.appsec.enabled=true" - } - }, - "iast": { - "env": { - "VARIANT": "iast", - "JAVA_OPTS": "-Ddd.iast.enabled=true" - } - } - } -} diff --git a/benchmark/startup/run.sh b/benchmark/startup/run.sh deleted file mode 100755 index 432c65d3fd5..00000000000 --- a/benchmark/startup/run.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -set -eu - -source "${UTILS_DIR}/update-java-version.sh" 17 -"${UTILS_DIR}/run-sirun-benchmarks.sh" "$@" diff --git a/benchmark/utils/k6.js b/benchmark/utils/k6.js deleted file mode 100644 index aa5147ae3c8..00000000000 --- a/benchmark/utils/k6.js +++ /dev/null @@ -1,21 +0,0 @@ -import {check} from 'k6'; - -export function checkResponse(response) { - const checks = Array.prototype.slice.call(arguments, 1); - const reduced = checks.reduce((result, current) => Object.assign(result, current), {}); - check(response, reduced); -} - -export const isOk = { - 'is OK': r => r.status === 200 -}; - -export const isRedirect = { - 'is redirect': r => r.status >= 300 && r.status < 400 -}; - -export function bodyContains(text) { - return { - 'body contains': r => r.body.includes(text) - } -} diff --git a/benchmark/utils/run-k6-load-test.sh b/benchmark/utils/run-k6-load-test.sh deleted file mode 100755 index d3415f54eef..00000000000 --- a/benchmark/utils/run-k6-load-test.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -set -eu - -command=$1 -exit_code=0 - -cleanup() { - # run the exit command - bash -c "${command}" - exit $exit_code -} - -trap cleanup EXIT ERR INT TERM - -echo "Starting k6 load test, logs are recorded into ${LOGS_DIR}/k6.log..." - -# run the k6 benchmark and store the result as JSON -k6 run k6.js --out "json=${OUTPUT_DIR}/k6_$(date +%s).json" > "${LOGS_DIR}/k6.log" 2>&1 -exit_code=$? - -echo "k6 load test done !!!" diff --git a/benchmark/utils/run-on-server-ready.sh b/benchmark/utils/run-on-server-ready.sh deleted file mode 100755 index 2aad5aa9f70..00000000000 --- a/benchmark/utils/run-on-server-ready.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -set -eu - -url=$1 -command=$2 -# wait for an HTTP server to come up and runs the selected command -while true; do - if [[ $(curl -fso /dev/null -w "%{http_code}" "${url}") = 200 ]]; then - bash -c "${command}" - fi -done diff --git a/benchmark/utils/run-sirun-benchmarks.sh b/benchmark/utils/run-sirun-benchmarks.sh deleted file mode 100755 index c0bc732dcfa..00000000000 --- a/benchmark/utils/run-sirun-benchmarks.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env bash -set -eu - -function message() { - echo "$(date +"%T"): $1" -} - -run_benchmark() { - local type=$1 - local app=$2 - if [[ -d "${app}" ]] && [[ -f "${app}/benchmark.json" ]]; then - - message "${type} benchmark: ${app} started" - cd "${app}" - - # create output folder for the test - export OUTPUT_DIR="${REPORTS_DIR}/${type}/${app}" - mkdir -p "${OUTPUT_DIR}" - - # substitute environment variables in the json file - benchmark=$(mktemp) - # shellcheck disable=SC2046 - # shellcheck disable=SC2016 - envsubst "$(printf '${%s} ' $(env | cut -d'=' -f1))" "${benchmark}" - - # run the sirun test - sirun "${benchmark}" &>"${OUTPUT_DIR}/${app}.json" - - message "${type} benchmark: ${app} finished" - - cd .. - fi -} - -if [ "$#" == '2' ]; then - run_benchmark "$@" -else - for folder in *; do - run_benchmark "$1" "${folder}" - done -fi diff --git a/benchmark/utils/update-java-version.sh b/benchmark/utils/update-java-version.sh deleted file mode 100755 index 3d76603e0ef..00000000000 --- a/benchmark/utils/update-java-version.sh +++ /dev/null @@ -1,5 +0,0 @@ -readonly target=$1 -readonly NEW_PATH=$(echo "${PATH}" | sed -e "s@/usr/lib/jvm/[[:digit:]]\+@/usr/lib/jvm/${target}@g") -export PATH="${NEW_PATH}" - -java --version From 8a3964d0e7bfebcdc0e470102b8cf65dcb2a82c7 Mon Sep 17 00:00:00 2001 From: Sarah Chen Date: Tue, 10 Feb 2026 17:04:14 -0500 Subject: [PATCH 2/3] Attempt bringing benchmarking-platform scripts over --- .gitlab-ci.yml | 1 + .gitlab/benchmarks.yml | 68 ++++ .gitlab/scripts/append-dacapo-report | 96 +++++ .gitlab/scripts/append-load-report | 106 ++++++ .gitlab/scripts/append-startup-report | 153 ++++++++ .gitlab/scripts/benchmark-compare.sh | 351 +++++++++++++++++++ .gitlab/scripts/get-baseline-commit-info.sh | 81 +++++ .gitlab/scripts/post-benchmark-pr-comment.sh | 72 ++++ .gitlab/scripts/python_utils.py | 29 ++ 9 files changed, 957 insertions(+) create mode 100755 .gitlab/scripts/append-dacapo-report create mode 100755 .gitlab/scripts/append-load-report create mode 100755 .gitlab/scripts/append-startup-report create mode 100755 .gitlab/scripts/benchmark-compare.sh create mode 100755 .gitlab/scripts/get-baseline-commit-info.sh create mode 100755 .gitlab/scripts/post-benchmark-pr-comment.sh create mode 100644 .gitlab/scripts/python_utils.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9bb4b6a7bab..0400a193624 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -25,6 +25,7 @@ stages: - java-startup-microbenchmarks - java-load-microbenchmarks - java-dacapo-microbenchmarks + - benchmark-comparison - generate-slos - upload-to-bp-api - macrobenchmarks diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index e4e6e1c029c..3942b6f4cc2 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -80,3 +80,71 @@ debugger-benchmarks: UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built. UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for. FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true" + +.benchmark-compare-template: + stage: benchmark-comparison + image: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:6845f3c7 + tags: ["arch:amd64"] + interruptible: true + rules: + - if: '$POPULATE_CACHE' + when: never + - when: on_success + script: + - mkdir -p "$(pwd)/reports/${BENCHMARK_TYPE}" + - .gitlab/scripts/get-baseline-commit-info.sh "$(pwd)/reports/baseline-info.env" "$(pwd)/reports/${BENCHMARK_TYPE}/fallback_to_master.txt" + - .gitlab/scripts/benchmark-compare.sh "${BENCHMARK_TYPE}" + artifacts: + name: "benchmark-compare-${BENCHMARK_TYPE}" + when: always + paths: + - reports/${BENCHMARK_TYPE}/ + expire_in: 3 months + variables: + TARGET_BRANCH: "master" + +benchmarks-compare-startup: + extends: .benchmark-compare-template + variables: + BENCHMARK_TYPE: startup + +benchmarks-compare-load: + extends: .benchmark-compare-template + variables: + BENCHMARK_TYPE: load + +benchmarks-compare-dacapo: + extends: .benchmark-compare-template + variables: + BENCHMARK_TYPE: dacapo + +benchmarks-post-pr-comment: + stage: benchmark-comparison + image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1 + tags: ["arch:amd64"] + interruptible: true + needs: + - job: benchmarks-compare-startup + artifacts: true + - job: benchmarks-compare-load + artifacts: true + - job: benchmarks-compare-dacapo + artifacts: true + rules: + - if: '$POPULATE_CACHE' + when: never + - when: always + id_tokens: + DDOCTOSTS_ID_TOKEN: + aud: dd-octo-sts + before_script: + - dd-octo-sts token --scope DataDog/${UPSTREAM_PROJECT_NAME} --policy "${DDOCTOSTS_POLICY}" > github-token.txt + - export GITHUB_TOKEN="$(cat github-token.txt)" + script: + - .gitlab/scripts/post-benchmark-pr-comment.sh + after_script: + - dd-octo-sts revoke -t "$(cat github-token.txt)" || true + variables: + DDOCTOSTS_POLICY: "self.gitlab.github-access" + UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME + UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME diff --git a/.gitlab/scripts/append-dacapo-report b/.gitlab/scripts/append-dacapo-report new file mode 100755 index 00000000000..4c9137f6ed1 --- /dev/null +++ b/.gitlab/scripts/append-dacapo-report @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +from io import StringIO + +import numpy as np + +from python_utils import compute_confidence_interval, import_benchmark_values, round_value + +NO_AGENT_VARIANT = os.getenv("NO_AGENT_VARIANT") +CI_INTERVAL = 0.99 +UOM = "µs" +CANDIDATE = os.getenv("CANDIDATE_VERSION") +BASELINE = os.getenv("BASELINE_VERSION") + +input_folder = sys.argv[1] + + +def _parse_benchmarks(folder: str, build: str) -> list[float]: + with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader: + benchmark_json = json.loads(reader.read()) + return import_benchmark_values(list(benchmark_json["benchmarks"])[0], "execution_time", UOM) + + +def _sort_variants(item: str) -> str: + if item == NO_AGENT_VARIANT: + return "" + return item + + +def _build_application_results(folder: str) -> str: + application = os.path.basename(folder) + chart = StringIO() + tables = { + "baseline": StringIO(), + "candidate": StringIO(), + } + values = { + "baseline": dict(), + "candidate": dict(), + } + for variant_folder in [f.path for f in os.scandir(application_folder) if f.is_dir()]: + variant = os.path.basename(variant_folder) + values["candidate"][variant] = _parse_benchmarks(variant_folder, "candidate") + values["baseline"][variant] = _parse_benchmarks(variant_folder, "baseline") + no_agent_means = { + "baseline": float(np.mean(values["baseline"][NO_AGENT_VARIANT])), + "candidate": float(np.mean(values["candidate"][NO_AGENT_VARIANT])), + } + variants = sorted(values["candidate"].keys(), key=_sort_variants) + chart.write( + f""" +```mermaid +gantt + title {application} - execution time [CI {CI_INTERVAL}] : candidate={CANDIDATE}, baseline={BASELINE} + dateFormat X + axisFormat %s +""" + ) + + for build in ("baseline", "candidate"): + table = tables[build] + table.write(f"\n|Variant|Execution Time [CI {CI_INTERVAL}]|Δ {NO_AGENT_VARIANT}|\n") + table.write("|---|---|---|\n") + build_values = values[build] + no_agent_mean = no_agent_means[build] + chart.write(f"section {build}\n") + for variant in variants: + variant_values = build_values[variant] + mean = float(np.mean(variant_values)) + lower, upper = compute_confidence_interval(variant_values, CI_INTERVAL) + overhead = mean - no_agent_mean + overhead_pct = overhead * 100 / no_agent_mean + chart.write(f"{variant} ({round_value(mean, UOM)}) : {round(lower)}, {round(upper)}\n") + chart.write(f". : milestone, {round(mean)},\n") + table.write( + f"|{variant}|{round_value(mean, UOM)} [{round_value(lower, UOM)}, {round_value(upper, UOM)}]" + f"|{'-' if variant == NO_AGENT_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n" + ) + + chart.write("```\n") + + result = StringIO() + result.write(f"\n\n
Execution time for {application}\n") + result.write(chart.getvalue()) + for build, table in tables.items(): + result.write(f"\n* **{build}** results\n") + result.write(table.getvalue()) + result.write("\n
\n") + return result.getvalue() + + +for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]: + print(_build_application_results(application_folder)) diff --git a/.gitlab/scripts/append-load-report b/.gitlab/scripts/append-load-report new file mode 100755 index 00000000000..7c4814faaf7 --- /dev/null +++ b/.gitlab/scripts/append-load-report @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +from io import StringIO + +import numpy as np + +from python_utils import compute_confidence_interval, import_benchmark_values, round_value + +NO_AGENT_VARIANT = os.getenv("NO_AGENT_VARIANT") +CI_INTERVAL = 0.99 +UOM = "µs" +CANDIDATE = os.getenv("CANDIDATE_VERSION") +BASELINE = os.getenv("BASELINE_VERSION") + +input_folder = sys.argv[1] + + +def _get_variant_from_scenario(scenario: str) -> str: + return scenario.split(":")[2] + + +def _parse_benchmarks(folder: str, build: str) -> dict[str, list[float]]: + with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader: + benchmark_json = json.loads(reader.read()) + + return { + _get_variant_from_scenario(b["parameters"]["scenario"]): import_benchmark_values( + b, "http_req_duration", UOM + ) + for b in benchmark_json["benchmarks"] + } + + +def _sort_variants(item: str) -> str: + if item == NO_AGENT_VARIANT: + return "" + return item + + +def _build_application_results(folder: str) -> str: + application = os.path.basename(folder) + chart = StringIO() + tables = { + "baseline": StringIO(), + "candidate": StringIO(), + } + values = { + "baseline": dict(), + "candidate": dict(), + } + + values["candidate"] = _parse_benchmarks(os.path.join(application_folder, "_merged_k6_results"), "candidate") + values["baseline"] = _parse_benchmarks(os.path.join(application_folder, "_merged_k6_results"), "baseline") + + no_agent_means = { + "baseline": float(np.mean(values["baseline"][NO_AGENT_VARIANT])), + "candidate": float(np.mean(values["candidate"][NO_AGENT_VARIANT])), + } + variants = sorted(values["candidate"].keys(), key=_sort_variants) + chart.write( + f""" +```mermaid +gantt + title {application} - request duration [CI {CI_INTERVAL}] : candidate={CANDIDATE}, baseline={BASELINE} + dateFormat X + axisFormat %s +""" + ) + + for build in ("baseline", "candidate"): + table = tables[build] + table.write(f"\n|Variant|Request duration [CI {CI_INTERVAL}]|Δ {NO_AGENT_VARIANT}|\n") + table.write("|---|---|---|\n") + build_values = values[build] + no_agent_mean = no_agent_means[build] + chart.write(f"section {build}\n") + for variant in variants: + variant_values = build_values[variant] + mean = float(np.mean(variant_values)) + lower, upper = compute_confidence_interval(variant_values, CI_INTERVAL) + overhead = mean - no_agent_mean + overhead_pct = overhead * 100 / no_agent_mean + chart.write(f"{variant} ({round_value(mean, UOM)}) : {round(lower)}, {round(upper)}\n") + chart.write(f". : milestone, {round(mean)},\n") + table.write( + f"|{variant}|{round_value(mean, UOM)} [{round_value(lower, UOM)}, {round_value(upper, UOM)}]" + f"|{'-' if variant == NO_AGENT_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n" + ) + + chart.write("```\n") + + result = StringIO() + result.write(f"\n\n
Request duration reports for {application}\n") + result.write(chart.getvalue()) + for build, table in tables.items(): + result.write(f"\n* **{build}** results\n") + result.write(table.getvalue()) + result.write("\n
\n") + return result.getvalue() + + +for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]: + print(_build_application_results(application_folder)) diff --git a/.gitlab/scripts/append-startup-report b/.gitlab/scripts/append-startup-report new file mode 100755 index 00000000000..233b9206897 --- /dev/null +++ b/.gitlab/scripts/append-startup-report @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 + +import json +import os +import sys +from io import StringIO + +import numpy as np + +from python_utils import import_benchmark_values, round_value + +AGENT_MODULE = "Agent" +AGENT_START_MODULE = "Agent.start" +UOM = "µs" +CANDIDATE = os.getenv("CANDIDATE_VERSION") +BASELINE = os.getenv("BASELINE_VERSION") +TRACING_VARIANT = "tracing" + +input_folder = sys.argv[1] + + +def _parse_benchmarks(folder: str, build: str, modules: list[str]) -> dict: + benchmarks = dict() + with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader: + benchmark_json = json.loads(reader.read()) + for benchmark in benchmark_json["benchmarks"]: + parameters = benchmark["parameters"] + module = parameters["module"] + if module not in modules: + modules.append(module) + benchmarks[module] = import_benchmark_values(benchmark, "execution_time", UOM) + return benchmarks + + +def _sort_variants(item: str) -> str: + if item == TRACING_VARIANT: + return "" + return item + + +def _build_global_results(application: str, variants: list[str], values: dict) -> str: + chart = StringIO() + tables = { + "baseline": StringIO(), + "candidate": StringIO(), + } + tracing_means = { + "baseline": { + AGENT_MODULE: float(np.mean(values["baseline"][TRACING_VARIANT][AGENT_MODULE])), + AGENT_START_MODULE: float( + np.mean(values["baseline"][TRACING_VARIANT][AGENT_START_MODULE]) + ), + }, + "candidate": { + AGENT_MODULE: float(np.mean(values["candidate"][TRACING_VARIANT][AGENT_MODULE])), + AGENT_START_MODULE: float( + np.mean(values["candidate"][TRACING_VARIANT][AGENT_START_MODULE]) + ), + }, + } + + chart.write( + f""" +```mermaid +gantt + title {application} - global startup overhead: candidate={CANDIDATE}, baseline={BASELINE} + + dateFormat X + axisFormat %s +""" + ) + for variant in variants: + chart.write(f"section {variant}\n") + for build in ("baseline", "candidate"): + for module in (AGENT_START_MODULE, AGENT_MODULE): + module_name = "Total" if module == AGENT_MODULE else "Agent" + mean = float(np.mean(values[build][variant][module])) + chart.write(f"{module_name} [{build}] ({round_value(mean, UOM)}) : 0, {round(mean)}\n") + chart.write("```\n") + + for build in ("baseline", "candidate"): + table = tables[build] + table.write(f"\n|Module|Variant|Duration|Δ {TRACING_VARIANT}|\n") + table.write("|---|---|---|---|\n") + for module in (AGENT_START_MODULE, AGENT_MODULE): + for variant in variants: + tracing_mean = tracing_means[build][module] + module_name = "Total" if module == AGENT_MODULE else "Agent" + mean = float(np.mean(values[build][variant][module])) + overhead = mean - tracing_mean + overhead_pct = overhead * 100 / tracing_mean + table.write( + f"|{module_name}|{variant}|{round_value(mean, UOM)}" + f"|{'-' if variant == TRACING_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n" + ) + + result = StringIO() + result.write(chart.getvalue()) + for build, table in tables.items(): + result.write(f"\n* **{build}** results\n") + result.write(table.getvalue()) + return result.getvalue() + + +def _build_module_results(application: str, module_names: list[str], variants: list[str], values: dict) -> str: + result = StringIO() + result.write( + f""" +```mermaid +gantt + title {application} - break down per module: candidate={CANDIDATE}, baseline={BASELINE} + + dateFormat X + axisFormat %s +""" + ) + for variant in variants: + result.write(f"section {variant}\n") + for module in module_names: + if module not in (AGENT_START_MODULE, AGENT_MODULE): + for build in ("baseline", "candidate"): + variant_values = values[build][variant] + if module in variant_values: + module_values = variant_values[module] + mean = float(np.mean(module_values)) + result.write(f"{module} [{build}] ({round_value(mean, UOM)}) : 0, {round(mean)}\n") + + result.write("```\n") + return result.getvalue() + + +def _build_application_results(folder: str) -> str: + application = os.path.basename(folder) + module_names = list() + values = { + "baseline": dict(), + "candidate": dict(), + } + for variant_folder in [f.path for f in os.scandir(folder) if f.is_dir()]: + variant = os.path.basename(variant_folder) + values["candidate"][variant] = _parse_benchmarks(variant_folder, "candidate", module_names) + values["baseline"][variant] = _parse_benchmarks(variant_folder, "baseline", module_names) + variants = sorted(values["candidate"].keys(), key=_sort_variants) + result = StringIO() + result.write(f"\n\n
Startup time reports for {application}\n") + result.write(_build_global_results(application, variants, values)) + result.write(_build_module_results(application, module_names, variants, values)) + result.write("\n
\n") + return result.getvalue() + + +for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]: + print(_build_application_results(application_folder)) diff --git a/.gitlab/scripts/benchmark-compare.sh b/.gitlab/scripts/benchmark-compare.sh new file mode 100755 index 00000000000..4bc84649424 --- /dev/null +++ b/.gitlab/scripts/benchmark-compare.sh @@ -0,0 +1,351 @@ +#!/usr/bin/env bash + +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +readonly BENCHMARK_TYPE="$1" +readonly SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]:-$0}")" &>/dev/null && pwd 2>/dev/null)" +readonly REPORTS_DIR="${CI_PROJECT_DIR}/reports" +readonly TYPE_DIR="${REPORTS_DIR}/${BENCHMARK_TYPE}" +readonly CANDIDATE_RAW_DIR="${TYPE_DIR}/candidate-raw" +readonly BASELINE_RAW_DIR="${TYPE_DIR}/baseline-raw" + +mkdir -p "${TYPE_DIR}" "${CANDIDATE_RAW_DIR}" "${BASELINE_RAW_DIR}" + +source "${REPORTS_DIR}/baseline-info.env" + +export MD_REPORT_ONLY_CHANGES=1 +export MD_REPORT_SAMPLE_METRICS=1 +export FAIL_ON_REGRESSION_THRESHOLD=20.0 +export NO_AGENT_VARIANT="no_agent" +export BASELINE_VERSION="${BASELINE_SHA:0:10}" +export CANDIDATE_VERSION="${CI_COMMIT_SHA:0:10}" + +readonly JOBS_API_URL="${CI_API_V4_URL}/projects/${CI_PROJECT_ID}" + +job_pattern_for_type() { + case "${BENCHMARK_TYPE}" in + startup) + echo '^linux-java-(spring-petclinic|insecure-bank)-microbenchmark-startup-' + ;; + load) + echo '^linux-java-(spring-petclinic|insecure-bank)-microbenchmark-load-' + ;; + dacapo) + echo '^linux-java-dacapo-microbenchmark-' + ;; + *) + echo "Unknown benchmark type '${BENCHMARK_TYPE}'" >&2 + exit 1 + ;; + esac +} + +extract_job_meta() { + local job_name="$1" + python3 - "${job_name}" <<'PY' +import re +import sys + +name = sys.argv[1] +for pattern in ( + r"^linux-java-(?Pspring-petclinic|insecure-bank)-microbenchmark-(?Pstartup|load)-(?P.+)$", + r"^linux-java-dacapo-microbenchmark-(?P.+)$", +): + m = re.match(pattern, name) + if m: + app = m.groupdict().get("app", "dacapo") + variant = m.group("variant") + print(f"{app}|{variant}") + sys.exit(0) +print("||") +PY +} + +list_matching_jobs() { + local pipeline_id="$1" + local pattern + pattern="$(job_pattern_for_type)" + + curl --silent --show-error --fail \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${JOBS_API_URL}/pipelines/${pipeline_id}/jobs?scope[]=success&per_page=100" \ + | python3 - "${pattern}" <<'PY' +import json +import re +import sys + +pattern = re.compile(sys.argv[1]) +jobs = json.load(sys.stdin) +for job in jobs: + if not pattern.match(job["name"]): + continue + if not job.get("artifacts_file") or not job["artifacts_file"].get("filename"): + continue + print(f'{job["id"]}|{job["name"]}') +PY +} + +download_job_artifacts() { + local job_id="$1" + local output_dir="$2" + local archive_path="${output_dir}/artifacts.zip" + + mkdir -p "${output_dir}" + curl --silent --show-error --fail --location \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${JOBS_API_URL}/jobs/${job_id}/artifacts" \ + --output "${archive_path}" + + rm -rf "${output_dir}/unzipped" + mkdir -p "${output_dir}/unzipped" + unzip -qq "${archive_path}" -d "${output_dir}/unzipped" +} + +post_process_startup() { + local report_path="$1" + local application="$2" + local variant="$3" + local side="$4" + python3 - "${report_path}" "${application}" "${variant}" "${side}" <<'PY' +import json +import sys + +path, app, variant, side = sys.argv[1:] +with open(path, "r", encoding="utf-8") as fin: + report = json.load(fin) + +for benchmark in report.get("benchmarks", []): + params = benchmark.setdefault("parameters", {}) + module = params.get("module", "unknown") + params["scenario"] = f"startup:{app}:{variant}:{module}" + params["application"] = app + params["variant"] = variant + params["baseline_or_candidate"] = side + +with open(path, "w", encoding="utf-8") as fout: + json.dump(report, fout, indent=2) +PY +} + +post_process_dacapo() { + local report_path="$1" + local benchmark_name="$2" + local variant="$3" + local side="$4" + python3 - "${report_path}" "${benchmark_name}" "${variant}" "${side}" <<'PY' +import json +import sys + +path, benchmark_name, variant, side = sys.argv[1:] +with open(path, "r", encoding="utf-8") as fin: + report = json.load(fin) + +for benchmark in report.get("benchmarks", []): + params = benchmark.setdefault("parameters", {}) + params["scenario"] = f"dacapo:{benchmark_name}:{variant}" + params["application"] = benchmark_name + params["variant"] = variant + params["baseline_or_candidate"] = side + +with open(path, "w", encoding="utf-8") as fout: + json.dump(report, fout, indent=2) +PY +} + +post_process_load() { + local report_path="$1" + local application="$2" + local variant="$3" + local side="$4" + python3 - "${report_path}" "${application}" "${variant}" "${side}" <<'PY' +import json +import sys + +path, app, variant, side = sys.argv[1:] +with open(path, "r", encoding="utf-8") as fin: + report = json.load(fin) + +benchmarks = [] +for benchmark in report.get("benchmarks", []): + params = benchmark.setdefault("parameters", {}) + raw_scenario = str(params.get("scenario", "")) + stage = raw_scenario.split("--")[0] if "--" in raw_scenario else raw_scenario + if stage.endswith("warmup"): + continue + params["scenario"] = f"{stage}:{app}:{variant}" + params["application"] = app + params["variant"] = variant + params["baseline_or_candidate"] = side + benchmarks.append(benchmark) + +report["benchmarks"] = benchmarks + +with open(path, "w", encoding="utf-8") as fout: + json.dump(report, fout, indent=2) +PY +} + +convert_startup_job() { + local source_dir="$1" + local side="$2" + local application="$3" + local variant="$4" + + local startup_dir + startup_dir="$(find "${source_dir}" -type f -name 'startup_*.csv' -print -quit | xargs dirname 2>/dev/null || true)" + if [[ -z "${startup_dir}" || ! -d "${startup_dir}" ]]; then + return + fi + + local target_dir="${TYPE_DIR}/${application}/${variant}" + local out_file="${target_dir}/benchmark-${side}.json" + mkdir -p "${target_dir}" + + benchmark_analyzer convert \ + --framework=JavaStartup \ + --extra-params="{\"application\":\"${application}\",\"variant\":\"${variant}\",\"baseline_or_candidate\":\"${side}\"}" \ + --outpath="${out_file}" \ + "${startup_dir}" + + post_process_startup "${out_file}" "${application}" "${variant}" "${side}" +} + +convert_dacapo_job() { + local source_dir="$1" + local side="$2" + local variant="$3" + + while IFS= read -r benchmark_dir; do + local benchmark_name + benchmark_name="$(basename "${benchmark_dir}")" + local target_dir="${TYPE_DIR}/${benchmark_name}/${variant}" + local out_file="${target_dir}/benchmark-${side}.json" + mkdir -p "${target_dir}" + + benchmark_analyzer convert \ + --framework=JavaDacapo \ + --extra-params="{\"application\":\"${benchmark_name}\",\"variant\":\"${variant}\",\"baseline_or_candidate\":\"${side}\"}" \ + --outpath="${out_file}" \ + "${benchmark_dir}" + + post_process_dacapo "${out_file}" "${benchmark_name}" "${variant}" "${side}" + done < <(find "${source_dir}" -type d -path '*/dacapo/*/*') +} + +convert_load_job() { + local source_dir="$1" + local side="$2" + local application="$3" + local variant="$4" + + local k6_files=() + while IFS= read -r file; do + k6_files+=("${file}") + done < <(find "${source_dir}" -type f -name 'candidate-*.converted.json' ! -name '*resource*') + + if [[ "${#k6_files[@]}" -eq 0 ]]; then + return + fi + + local target_dir="${TYPE_DIR}/${application}/_merged_k6_results" + local out_file="${target_dir}/benchmark-${side}.json" + mkdir -p "${target_dir}" + + benchmark_analyzer merge \ + --mergeby="['scenario', 'application', 'variant', 'baseline_or_candidate', 'git_branch', 'git_commit_sha', 'git_commit_date', 'cpu_model', 'kernel_version', 'ci_job_date', 'ci_job_id', 'ci_pipeline_id']" \ + --outpath="${out_file}" \ + "${k6_files[@]}" + + post_process_load "${out_file}" "${application}" "${variant}" "${side}" +} + +convert_job() { + local source_dir="$1" + local side="$2" + local application="$3" + local variant="$4" + + case "${BENCHMARK_TYPE}" in + startup) + convert_startup_job "${source_dir}" "${side}" "${application}" "${variant}" + ;; + load) + convert_load_job "${source_dir}" "${side}" "${application}" "${variant}" + ;; + dacapo) + convert_dacapo_job "${source_dir}" "${side}" "${variant}" + ;; + esac +} + +process_pipeline() { + local pipeline_id="$1" + local side="$2" + local raw_dir="$3" + local git_sha="$4" + + local jobs_found=0 + while IFS='|' read -r job_id job_name; do + [[ -z "${job_id}" ]] && continue + jobs_found=$((jobs_found + 1)) + + local meta application variant + meta="$(extract_job_meta "${job_name}")" + application="${meta%%|*}" + variant="${meta##*|}" + if [[ -z "${application}" || -z "${variant}" ]]; then + continue + fi + + local job_dir="${raw_dir}/${job_id}" + echo "Processing ${side} job ${job_name} (#${job_id})" + download_job_artifacts "${job_id}" "${job_dir}" + convert_job "${job_dir}/unzipped" "${side}" "${application}" "${variant}" + done < <(list_matching_jobs "${pipeline_id}") + + if [[ "${jobs_found}" -eq 0 ]]; then + echo "No matching ${BENCHMARK_TYPE} jobs found in pipeline ${pipeline_id} for ${side}." >&2 + exit 1 + fi + + echo "${git_sha}" > "${TYPE_DIR}/${side}.sha" +} + +build_report() { + local metrics + case "${BENCHMARK_TYPE}" in + startup|dacapo) + metrics="['execution_time']" + ;; + load) + metrics="['agg_http_req_duration_p50','agg_http_req_duration_p95','throughput']" + ;; + esac + + benchmark_analyzer compare pairwise \ + --baseline='{"baseline_or_candidate":"baseline"}' \ + --candidate='{"baseline_or_candidate":"candidate"}' \ + --format=md \ + --metrics="${metrics}" \ + --outpath="${TYPE_DIR}/comparison-baseline-vs-candidate.md" \ + "${TYPE_DIR}"/*/*/benchmark-{baseline,candidate}.json + + if [[ "${BENCHMARK_TYPE}" == "startup" ]]; then + "${SCRIPT_DIR}/append-startup-report" "${TYPE_DIR}" >> "${TYPE_DIR}/comparison-baseline-vs-candidate.md" || true + elif [[ "${BENCHMARK_TYPE}" == "load" ]]; then + "${SCRIPT_DIR}/append-load-report" "${TYPE_DIR}" >> "${TYPE_DIR}/comparison-baseline-vs-candidate.md" || true + elif [[ "${BENCHMARK_TYPE}" == "dacapo" ]]; then + "${SCRIPT_DIR}/append-dacapo-report" "${TYPE_DIR}" >> "${TYPE_DIR}/comparison-baseline-vs-candidate.md" || true + fi +} + +process_pipeline "${CI_PIPELINE_ID}" "candidate" "${CANDIDATE_RAW_DIR}" "${CI_COMMIT_SHA}" +process_pipeline "${BASELINE_PIPELINE_ID}" "baseline" "${BASELINE_RAW_DIR}" "${BASELINE_SHA}" +build_report + +echo "Comparison report generated at ${TYPE_DIR}/comparison-baseline-vs-candidate.md" diff --git a/.gitlab/scripts/get-baseline-commit-info.sh b/.gitlab/scripts/get-baseline-commit-info.sh new file mode 100755 index 00000000000..01b578c14e7 --- /dev/null +++ b/.gitlab/scripts/get-baseline-commit-info.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +set -euo pipefail + +readonly SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]:-$0}")" &>/dev/null && pwd 2>/dev/null)" +readonly OUT_ENV_FILE="${1:-${SCRIPT_DIR}/../reports/baseline-info.env}" +readonly FALLBACK_MARKER_FILE="${2:-${SCRIPT_DIR}/../reports/fallback_to_master.txt}" +readonly TARGET_BRANCH="${TARGET_BRANCH:-master}" + +mkdir -p "$(dirname "${OUT_ENV_FILE}")" +rm -f "${FALLBACK_MARKER_FILE}" + +if [[ -z "${CI_PROJECT_ID:-}" || -z "${CI_API_V4_URL:-}" || -z "${CI_JOB_TOKEN:-}" ]]; then + echo "Missing CI_PROJECT_ID/CI_API_V4_URL/CI_JOB_TOKEN environment variables." >&2 + exit 1 +fi + +readonly PROJECT_API_URL="${CI_API_V4_URL}/projects/${CI_PROJECT_ID}" + +get_pipeline_id_for_sha() { + local sha="$1" + + curl --silent --show-error --fail \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${PROJECT_API_URL}/pipelines?sha=${sha}&status=success&order_by=updated_at&sort=desc&per_page=1" \ + | python3 -c 'import json,sys; data=json.load(sys.stdin); print(data[0]["id"] if data else "")' +} + +get_latest_master_pipeline_id() { + curl --silent --show-error --fail \ + --header "JOB-TOKEN: ${CI_JOB_TOKEN}" \ + "${PROJECT_API_URL}/pipelines?ref=${TARGET_BRANCH}&status=success&order_by=updated_at&sort=desc&per_page=1" \ + | python3 -c 'import json,sys; data=json.load(sys.stdin); print(data[0]["id"] if data else "")' +} + +resolve_merge_base_sha() { + if [[ -n "${CI_MERGE_REQUEST_DIFF_BASE_SHA:-}" ]]; then + echo "${CI_MERGE_REQUEST_DIFF_BASE_SHA}" + return + fi + + git fetch origin "${TARGET_BRANCH}" --depth=200 >/dev/null 2>&1 || true + git merge-base "${CI_COMMIT_SHA}" "origin/${TARGET_BRANCH}" 2>/dev/null || true +} + +MERGE_BASE_SHA="$(resolve_merge_base_sha)" +BASELINE_SHA="${MERGE_BASE_SHA}" +BASELINE_PIPELINE_ID="" +BASELINE_SOURCE="merge_base" +FALLBACK_TO_MASTER="false" + +if [[ -n "${MERGE_BASE_SHA}" ]]; then + BASELINE_PIPELINE_ID="$(get_pipeline_id_for_sha "${MERGE_BASE_SHA}")" +fi + +if [[ -z "${BASELINE_PIPELINE_ID}" ]]; then + FALLBACK_TO_MASTER="true" + BASELINE_SOURCE="master" + BASELINE_SHA="$(git rev-parse "origin/${TARGET_BRANCH}" 2>/dev/null || true)" + BASELINE_PIPELINE_ID="$(get_latest_master_pipeline_id)" +fi + +if [[ -z "${BASELINE_PIPELINE_ID}" ]]; then + echo "Failed to resolve a baseline pipeline id." >&2 + exit 1 +fi + +if [[ "${FALLBACK_TO_MASTER}" == "true" ]]; then + echo "fallback_to_master=true" > "${FALLBACK_MARKER_FILE}" +fi + +cat > "${OUT_ENV_FILE}" </dev/null; then + echo "⚠️ **Warning**: Baseline build not found for merge-base commit. Comparing against the latest commit on master instead." + echo + fi + echo "# Startup" + cat "${REPORTS_DIR}/startup/comparison-baseline-vs-candidate.md" + echo + echo "# Load" + cat "${REPORTS_DIR}/load/comparison-baseline-vs-candidate.md" + echo + echo "# Dacapo" + cat "${REPORTS_DIR}/dacapo/comparison-baseline-vs-candidate.md" +} > "${BODY_FILE}" + +if [[ -z "${UPSTREAM_BRANCH:-}" ]]; then + cat "${BODY_FILE}" + exit 0 +fi + +if [[ -z "${GITHUB_REPOSITORY:-}" ]]; then + export GITHUB_REPOSITORY="DataDog/${UPSTREAM_PROJECT_NAME:-dd-trace-java}" +fi + +if [[ -z "${GITHUB_TOKEN:-}" ]]; then + echo "GITHUB_TOKEN is required to post benchmark comments." >&2 + exit 1 +fi + +gh auth login --with-token < <(printf '%s' "${GITHUB_TOKEN}") >/dev/null 2>&1 || true +COMMENT_BODY="$(cat "${BODY_FILE}")" + +readonly PR_NUMBER="$( + gh api "repos/${GITHUB_REPOSITORY}/pulls" \ + -f state=open \ + -f head="DataDog:${UPSTREAM_BRANCH}" \ + --jq '.[0].number // ""' +)" + +if [[ -z "${PR_NUMBER}" ]]; then + echo "No open PR found for branch '${UPSTREAM_BRANCH}'. Printing report instead." + cat "${BODY_FILE}" + exit 0 +fi + +readonly COMMENT_ID="$( + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" --paginate \ + --jq ".[] | select(.body | contains(\"${MARKER}\")) | .id" \ + | head -n 1 +)" + +if [[ -n "${COMMENT_ID}" ]]; then + gh api --method PATCH "repos/${GITHUB_REPOSITORY}/issues/comments/${COMMENT_ID}" \ + -f body="${COMMENT_BODY}" >/dev/null + echo "Updated existing benchmark comment (${COMMENT_ID}) on PR #${PR_NUMBER}." +else + gh api --method POST "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + -f body="${COMMENT_BODY}" >/dev/null + echo "Posted benchmark comment on PR #${PR_NUMBER}." +fi diff --git a/.gitlab/scripts/python_utils.py b/.gitlab/scripts/python_utils.py new file mode 100644 index 00000000000..b721c060932 --- /dev/null +++ b/.gitlab/scripts/python_utils.py @@ -0,0 +1,29 @@ +import math + + +def round_value(value, uom): + for unit in ("s", "ms", "µs", "ns"): + if value >= 100.0 or unit == uom: + return f"{round(value, 1)} {unit}" + value *= 1_000 + return f"{value} ns" + + +def compute_confidence_interval(values, confidence): + n = len(values) + values = sorted(values) + lower = int(math.floor((1.0 - confidence) / 2.0 * n)) + upper = int(math.ceil((1.0 + confidence) / 2.0 * n)) + return values[lower], values[upper - 1] + + +def import_benchmark_values(benchmark, metric, output_uom): + values = list() + for run in benchmark["runs"].values(): + values += run[metric]["values"] + input_uom = run[metric]["uom"] + if input_uom == "ns" and output_uom == "µs": + values = [v / 1_000 for v in values] + if input_uom == "µs" and output_uom == "ms": + values = [v / 1_000 for v in values] + return values From 7309e39e52ec1e7da6500ce9786f5abe275129dd Mon Sep 17 00:00:00 2001 From: Sarah Chen Date: Thu, 12 Feb 2026 15:34:09 -0500 Subject: [PATCH 3/3] Simplify report formatting and PR comment posting --- .gitlab/benchmarks.yml | 13 +- .gitlab/scripts/append-dacapo-report | 96 ------------ .gitlab/scripts/append-load-report | 106 ------------- .gitlab/scripts/append-startup-report | 153 ------------------- .gitlab/scripts/benchmark-compare.sh | 12 -- .gitlab/scripts/post-benchmark-pr-comment.sh | 50 +----- .gitlab/scripts/python_utils.py | 29 ---- 7 files changed, 10 insertions(+), 449 deletions(-) delete mode 100755 .gitlab/scripts/append-dacapo-report delete mode 100755 .gitlab/scripts/append-load-report delete mode 100755 .gitlab/scripts/append-startup-report delete mode 100644 .gitlab/scripts/python_utils.py diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index 3942b6f4cc2..fd3946071e4 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -120,7 +120,7 @@ benchmarks-compare-dacapo: benchmarks-post-pr-comment: stage: benchmark-comparison - image: registry.ddbuild.io/images/dd-octo-sts-ci-base:2025.06-1 + image: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:6845f3c7 tags: ["arch:amd64"] interruptible: true needs: @@ -134,17 +134,10 @@ benchmarks-post-pr-comment: - if: '$POPULATE_CACHE' when: never - when: always - id_tokens: - DDOCTOSTS_ID_TOKEN: - aud: dd-octo-sts - before_script: - - dd-octo-sts token --scope DataDog/${UPSTREAM_PROJECT_NAME} --policy "${DDOCTOSTS_POLICY}" > github-token.txt - - export GITHUB_TOKEN="$(cat github-token.txt)" script: - .gitlab/scripts/post-benchmark-pr-comment.sh - after_script: - - dd-octo-sts revoke -t "$(cat github-token.txt)" || true variables: - DDOCTOSTS_POLICY: "self.gitlab.github-access" UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME + BP_HEADER: "Benchmarks" + BP_ON_DUPLICATE: "replace" diff --git a/.gitlab/scripts/append-dacapo-report b/.gitlab/scripts/append-dacapo-report deleted file mode 100755 index 4c9137f6ed1..00000000000 --- a/.gitlab/scripts/append-dacapo-report +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 - -import json -import os -import sys -from io import StringIO - -import numpy as np - -from python_utils import compute_confidence_interval, import_benchmark_values, round_value - -NO_AGENT_VARIANT = os.getenv("NO_AGENT_VARIANT") -CI_INTERVAL = 0.99 -UOM = "µs" -CANDIDATE = os.getenv("CANDIDATE_VERSION") -BASELINE = os.getenv("BASELINE_VERSION") - -input_folder = sys.argv[1] - - -def _parse_benchmarks(folder: str, build: str) -> list[float]: - with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader: - benchmark_json = json.loads(reader.read()) - return import_benchmark_values(list(benchmark_json["benchmarks"])[0], "execution_time", UOM) - - -def _sort_variants(item: str) -> str: - if item == NO_AGENT_VARIANT: - return "" - return item - - -def _build_application_results(folder: str) -> str: - application = os.path.basename(folder) - chart = StringIO() - tables = { - "baseline": StringIO(), - "candidate": StringIO(), - } - values = { - "baseline": dict(), - "candidate": dict(), - } - for variant_folder in [f.path for f in os.scandir(application_folder) if f.is_dir()]: - variant = os.path.basename(variant_folder) - values["candidate"][variant] = _parse_benchmarks(variant_folder, "candidate") - values["baseline"][variant] = _parse_benchmarks(variant_folder, "baseline") - no_agent_means = { - "baseline": float(np.mean(values["baseline"][NO_AGENT_VARIANT])), - "candidate": float(np.mean(values["candidate"][NO_AGENT_VARIANT])), - } - variants = sorted(values["candidate"].keys(), key=_sort_variants) - chart.write( - f""" -```mermaid -gantt - title {application} - execution time [CI {CI_INTERVAL}] : candidate={CANDIDATE}, baseline={BASELINE} - dateFormat X - axisFormat %s -""" - ) - - for build in ("baseline", "candidate"): - table = tables[build] - table.write(f"\n|Variant|Execution Time [CI {CI_INTERVAL}]|Δ {NO_AGENT_VARIANT}|\n") - table.write("|---|---|---|\n") - build_values = values[build] - no_agent_mean = no_agent_means[build] - chart.write(f"section {build}\n") - for variant in variants: - variant_values = build_values[variant] - mean = float(np.mean(variant_values)) - lower, upper = compute_confidence_interval(variant_values, CI_INTERVAL) - overhead = mean - no_agent_mean - overhead_pct = overhead * 100 / no_agent_mean - chart.write(f"{variant} ({round_value(mean, UOM)}) : {round(lower)}, {round(upper)}\n") - chart.write(f". : milestone, {round(mean)},\n") - table.write( - f"|{variant}|{round_value(mean, UOM)} [{round_value(lower, UOM)}, {round_value(upper, UOM)}]" - f"|{'-' if variant == NO_AGENT_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n" - ) - - chart.write("```\n") - - result = StringIO() - result.write(f"\n\n
Execution time for {application}\n") - result.write(chart.getvalue()) - for build, table in tables.items(): - result.write(f"\n* **{build}** results\n") - result.write(table.getvalue()) - result.write("\n
\n") - return result.getvalue() - - -for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]: - print(_build_application_results(application_folder)) diff --git a/.gitlab/scripts/append-load-report b/.gitlab/scripts/append-load-report deleted file mode 100755 index 7c4814faaf7..00000000000 --- a/.gitlab/scripts/append-load-report +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 - -import json -import os -import sys -from io import StringIO - -import numpy as np - -from python_utils import compute_confidence_interval, import_benchmark_values, round_value - -NO_AGENT_VARIANT = os.getenv("NO_AGENT_VARIANT") -CI_INTERVAL = 0.99 -UOM = "µs" -CANDIDATE = os.getenv("CANDIDATE_VERSION") -BASELINE = os.getenv("BASELINE_VERSION") - -input_folder = sys.argv[1] - - -def _get_variant_from_scenario(scenario: str) -> str: - return scenario.split(":")[2] - - -def _parse_benchmarks(folder: str, build: str) -> dict[str, list[float]]: - with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader: - benchmark_json = json.loads(reader.read()) - - return { - _get_variant_from_scenario(b["parameters"]["scenario"]): import_benchmark_values( - b, "http_req_duration", UOM - ) - for b in benchmark_json["benchmarks"] - } - - -def _sort_variants(item: str) -> str: - if item == NO_AGENT_VARIANT: - return "" - return item - - -def _build_application_results(folder: str) -> str: - application = os.path.basename(folder) - chart = StringIO() - tables = { - "baseline": StringIO(), - "candidate": StringIO(), - } - values = { - "baseline": dict(), - "candidate": dict(), - } - - values["candidate"] = _parse_benchmarks(os.path.join(application_folder, "_merged_k6_results"), "candidate") - values["baseline"] = _parse_benchmarks(os.path.join(application_folder, "_merged_k6_results"), "baseline") - - no_agent_means = { - "baseline": float(np.mean(values["baseline"][NO_AGENT_VARIANT])), - "candidate": float(np.mean(values["candidate"][NO_AGENT_VARIANT])), - } - variants = sorted(values["candidate"].keys(), key=_sort_variants) - chart.write( - f""" -```mermaid -gantt - title {application} - request duration [CI {CI_INTERVAL}] : candidate={CANDIDATE}, baseline={BASELINE} - dateFormat X - axisFormat %s -""" - ) - - for build in ("baseline", "candidate"): - table = tables[build] - table.write(f"\n|Variant|Request duration [CI {CI_INTERVAL}]|Δ {NO_AGENT_VARIANT}|\n") - table.write("|---|---|---|\n") - build_values = values[build] - no_agent_mean = no_agent_means[build] - chart.write(f"section {build}\n") - for variant in variants: - variant_values = build_values[variant] - mean = float(np.mean(variant_values)) - lower, upper = compute_confidence_interval(variant_values, CI_INTERVAL) - overhead = mean - no_agent_mean - overhead_pct = overhead * 100 / no_agent_mean - chart.write(f"{variant} ({round_value(mean, UOM)}) : {round(lower)}, {round(upper)}\n") - chart.write(f". : milestone, {round(mean)},\n") - table.write( - f"|{variant}|{round_value(mean, UOM)} [{round_value(lower, UOM)}, {round_value(upper, UOM)}]" - f"|{'-' if variant == NO_AGENT_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n" - ) - - chart.write("```\n") - - result = StringIO() - result.write(f"\n\n
Request duration reports for {application}\n") - result.write(chart.getvalue()) - for build, table in tables.items(): - result.write(f"\n* **{build}** results\n") - result.write(table.getvalue()) - result.write("\n
\n") - return result.getvalue() - - -for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]: - print(_build_application_results(application_folder)) diff --git a/.gitlab/scripts/append-startup-report b/.gitlab/scripts/append-startup-report deleted file mode 100755 index 233b9206897..00000000000 --- a/.gitlab/scripts/append-startup-report +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python3 - -import json -import os -import sys -from io import StringIO - -import numpy as np - -from python_utils import import_benchmark_values, round_value - -AGENT_MODULE = "Agent" -AGENT_START_MODULE = "Agent.start" -UOM = "µs" -CANDIDATE = os.getenv("CANDIDATE_VERSION") -BASELINE = os.getenv("BASELINE_VERSION") -TRACING_VARIANT = "tracing" - -input_folder = sys.argv[1] - - -def _parse_benchmarks(folder: str, build: str, modules: list[str]) -> dict: - benchmarks = dict() - with open(f"{folder}/benchmark-{build}.json", "r", encoding="utf-8") as reader: - benchmark_json = json.loads(reader.read()) - for benchmark in benchmark_json["benchmarks"]: - parameters = benchmark["parameters"] - module = parameters["module"] - if module not in modules: - modules.append(module) - benchmarks[module] = import_benchmark_values(benchmark, "execution_time", UOM) - return benchmarks - - -def _sort_variants(item: str) -> str: - if item == TRACING_VARIANT: - return "" - return item - - -def _build_global_results(application: str, variants: list[str], values: dict) -> str: - chart = StringIO() - tables = { - "baseline": StringIO(), - "candidate": StringIO(), - } - tracing_means = { - "baseline": { - AGENT_MODULE: float(np.mean(values["baseline"][TRACING_VARIANT][AGENT_MODULE])), - AGENT_START_MODULE: float( - np.mean(values["baseline"][TRACING_VARIANT][AGENT_START_MODULE]) - ), - }, - "candidate": { - AGENT_MODULE: float(np.mean(values["candidate"][TRACING_VARIANT][AGENT_MODULE])), - AGENT_START_MODULE: float( - np.mean(values["candidate"][TRACING_VARIANT][AGENT_START_MODULE]) - ), - }, - } - - chart.write( - f""" -```mermaid -gantt - title {application} - global startup overhead: candidate={CANDIDATE}, baseline={BASELINE} - - dateFormat X - axisFormat %s -""" - ) - for variant in variants: - chart.write(f"section {variant}\n") - for build in ("baseline", "candidate"): - for module in (AGENT_START_MODULE, AGENT_MODULE): - module_name = "Total" if module == AGENT_MODULE else "Agent" - mean = float(np.mean(values[build][variant][module])) - chart.write(f"{module_name} [{build}] ({round_value(mean, UOM)}) : 0, {round(mean)}\n") - chart.write("```\n") - - for build in ("baseline", "candidate"): - table = tables[build] - table.write(f"\n|Module|Variant|Duration|Δ {TRACING_VARIANT}|\n") - table.write("|---|---|---|---|\n") - for module in (AGENT_START_MODULE, AGENT_MODULE): - for variant in variants: - tracing_mean = tracing_means[build][module] - module_name = "Total" if module == AGENT_MODULE else "Agent" - mean = float(np.mean(values[build][variant][module])) - overhead = mean - tracing_mean - overhead_pct = overhead * 100 / tracing_mean - table.write( - f"|{module_name}|{variant}|{round_value(mean, UOM)}" - f"|{'-' if variant == TRACING_VARIANT else f'{round_value(overhead, UOM)} ({round(overhead_pct, 1)}%)'}|\n" - ) - - result = StringIO() - result.write(chart.getvalue()) - for build, table in tables.items(): - result.write(f"\n* **{build}** results\n") - result.write(table.getvalue()) - return result.getvalue() - - -def _build_module_results(application: str, module_names: list[str], variants: list[str], values: dict) -> str: - result = StringIO() - result.write( - f""" -```mermaid -gantt - title {application} - break down per module: candidate={CANDIDATE}, baseline={BASELINE} - - dateFormat X - axisFormat %s -""" - ) - for variant in variants: - result.write(f"section {variant}\n") - for module in module_names: - if module not in (AGENT_START_MODULE, AGENT_MODULE): - for build in ("baseline", "candidate"): - variant_values = values[build][variant] - if module in variant_values: - module_values = variant_values[module] - mean = float(np.mean(module_values)) - result.write(f"{module} [{build}] ({round_value(mean, UOM)}) : 0, {round(mean)}\n") - - result.write("```\n") - return result.getvalue() - - -def _build_application_results(folder: str) -> str: - application = os.path.basename(folder) - module_names = list() - values = { - "baseline": dict(), - "candidate": dict(), - } - for variant_folder in [f.path for f in os.scandir(folder) if f.is_dir()]: - variant = os.path.basename(variant_folder) - values["candidate"][variant] = _parse_benchmarks(variant_folder, "candidate", module_names) - values["baseline"][variant] = _parse_benchmarks(variant_folder, "baseline", module_names) - variants = sorted(values["candidate"].keys(), key=_sort_variants) - result = StringIO() - result.write(f"\n\n
Startup time reports for {application}\n") - result.write(_build_global_results(application, variants, values)) - result.write(_build_module_results(application, module_names, variants, values)) - result.write("\n
\n") - return result.getvalue() - - -for application_folder in [f.path for f in os.scandir(input_folder) if f.is_dir()]: - print(_build_application_results(application_folder)) diff --git a/.gitlab/scripts/benchmark-compare.sh b/.gitlab/scripts/benchmark-compare.sh index 4bc84649424..d5d14d650d8 100755 --- a/.gitlab/scripts/benchmark-compare.sh +++ b/.gitlab/scripts/benchmark-compare.sh @@ -8,7 +8,6 @@ if [[ $# -ne 1 ]]; then fi readonly BENCHMARK_TYPE="$1" -readonly SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]:-$0}")" &>/dev/null && pwd 2>/dev/null)" readonly REPORTS_DIR="${CI_PROJECT_DIR}/reports" readonly TYPE_DIR="${REPORTS_DIR}/${BENCHMARK_TYPE}" readonly CANDIDATE_RAW_DIR="${TYPE_DIR}/candidate-raw" @@ -21,9 +20,6 @@ source "${REPORTS_DIR}/baseline-info.env" export MD_REPORT_ONLY_CHANGES=1 export MD_REPORT_SAMPLE_METRICS=1 export FAIL_ON_REGRESSION_THRESHOLD=20.0 -export NO_AGENT_VARIANT="no_agent" -export BASELINE_VERSION="${BASELINE_SHA:0:10}" -export CANDIDATE_VERSION="${CI_COMMIT_SHA:0:10}" readonly JOBS_API_URL="${CI_API_V4_URL}/projects/${CI_PROJECT_ID}" @@ -334,14 +330,6 @@ build_report() { --metrics="${metrics}" \ --outpath="${TYPE_DIR}/comparison-baseline-vs-candidate.md" \ "${TYPE_DIR}"/*/*/benchmark-{baseline,candidate}.json - - if [[ "${BENCHMARK_TYPE}" == "startup" ]]; then - "${SCRIPT_DIR}/append-startup-report" "${TYPE_DIR}" >> "${TYPE_DIR}/comparison-baseline-vs-candidate.md" || true - elif [[ "${BENCHMARK_TYPE}" == "load" ]]; then - "${SCRIPT_DIR}/append-load-report" "${TYPE_DIR}" >> "${TYPE_DIR}/comparison-baseline-vs-candidate.md" || true - elif [[ "${BENCHMARK_TYPE}" == "dacapo" ]]; then - "${SCRIPT_DIR}/append-dacapo-report" "${TYPE_DIR}" >> "${TYPE_DIR}/comparison-baseline-vs-candidate.md" || true - fi } process_pipeline "${CI_PIPELINE_ID}" "candidate" "${CANDIDATE_RAW_DIR}" "${CI_COMMIT_SHA}" diff --git a/.gitlab/scripts/post-benchmark-pr-comment.sh b/.gitlab/scripts/post-benchmark-pr-comment.sh index c46850f59dd..4ca2245da87 100755 --- a/.gitlab/scripts/post-benchmark-pr-comment.sh +++ b/.gitlab/scripts/post-benchmark-pr-comment.sh @@ -3,14 +3,13 @@ set -euo pipefail readonly REPORTS_DIR="${CI_PROJECT_DIR}/reports" -readonly MARKER="" readonly BODY_FILE="${REPORTS_DIR}/benchmark-comment.md" +readonly BP_HEADER="${BP_HEADER:-Benchmarks}" +readonly BP_ON_DUPLICATE="${BP_ON_DUPLICATE:-replace}" mkdir -p "${REPORTS_DIR}" { - echo "${MARKER}" - echo if compgen -G "${REPORTS_DIR}/*/fallback_to_master.txt" >/dev/null; then echo "⚠️ **Warning**: Baseline build not found for merge-base commit. Comparing against the latest commit on master instead." echo @@ -30,43 +29,8 @@ if [[ -z "${UPSTREAM_BRANCH:-}" ]]; then exit 0 fi -if [[ -z "${GITHUB_REPOSITORY:-}" ]]; then - export GITHUB_REPOSITORY="DataDog/${UPSTREAM_PROJECT_NAME:-dd-trace-java}" -fi - -if [[ -z "${GITHUB_TOKEN:-}" ]]; then - echo "GITHUB_TOKEN is required to post benchmark comments." >&2 - exit 1 -fi - -gh auth login --with-token < <(printf '%s' "${GITHUB_TOKEN}") >/dev/null 2>&1 || true -COMMENT_BODY="$(cat "${BODY_FILE}")" - -readonly PR_NUMBER="$( - gh api "repos/${GITHUB_REPOSITORY}/pulls" \ - -f state=open \ - -f head="DataDog:${UPSTREAM_BRANCH}" \ - --jq '.[0].number // ""' -)" - -if [[ -z "${PR_NUMBER}" ]]; then - echo "No open PR found for branch '${UPSTREAM_BRANCH}'. Printing report instead." - cat "${BODY_FILE}" - exit 0 -fi - -readonly COMMENT_ID="$( - gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" --paginate \ - --jq ".[] | select(.body | contains(\"${MARKER}\")) | .id" \ - | head -n 1 -)" - -if [[ -n "${COMMENT_ID}" ]]; then - gh api --method PATCH "repos/${GITHUB_REPOSITORY}/issues/comments/${COMMENT_ID}" \ - -f body="${COMMENT_BODY}" >/dev/null - echo "Updated existing benchmark comment (${COMMENT_ID}) on PR #${PR_NUMBER}." -else - gh api --method POST "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ - -f body="${COMMENT_BODY}" >/dev/null - echo "Posted benchmark comment on PR #${PR_NUMBER}." -fi +cat "${BODY_FILE}" | pr-commenter \ + --for-repo="${UPSTREAM_PROJECT_NAME:-$CI_PROJECT_NAME}" \ + --for-pr="${UPSTREAM_BRANCH}" \ + --header="${BP_HEADER}" \ + --on-duplicate="${BP_ON_DUPLICATE}" diff --git a/.gitlab/scripts/python_utils.py b/.gitlab/scripts/python_utils.py deleted file mode 100644 index b721c060932..00000000000 --- a/.gitlab/scripts/python_utils.py +++ /dev/null @@ -1,29 +0,0 @@ -import math - - -def round_value(value, uom): - for unit in ("s", "ms", "µs", "ns"): - if value >= 100.0 or unit == uom: - return f"{round(value, 1)} {unit}" - value *= 1_000 - return f"{value} ns" - - -def compute_confidence_interval(values, confidence): - n = len(values) - values = sorted(values) - lower = int(math.floor((1.0 - confidence) / 2.0 * n)) - upper = int(math.ceil((1.0 + confidence) / 2.0 * n)) - return values[lower], values[upper - 1] - - -def import_benchmark_values(benchmark, metric, output_uom): - values = list() - for run in benchmark["runs"].values(): - values += run[metric]["values"] - input_uom = run[metric]["uom"] - if input_uom == "ns" and output_uom == "µs": - values = [v / 1_000 for v in values] - if input_uom == "µs" and output_uom == "ms": - values = [v / 1_000 for v in values] - return values