contentauth · tmathern · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 10, 2026
diff --git a/.github/workflows/memory-benchmark.yml b/.github/workflows/memory-benchmark.yml
@@ -0,0 +1,44 @@
+name: Python SDK memray memory benchmark
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - labeled
+
+permissions:
+  contents: read
+
+jobs:
+  memory-benchmark:
+    name: Python SDK memray memory benchmark
+    # Needs to match the arch the baseline was generated on.
+    runs-on: ubuntu-24.04-arm
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'check-memory-benchmark') &&
+      (
+        github.event.pull_request.author_association == 'COLLABORATOR' ||
+        github.event.pull_request.author_association == 'MEMBER' ||
+        github.event.pull_request.author_association == 'OWNER'
+      )
+    steps:
+      - uses: actions/checkout@v4
+
+      # Build the perf image.
+      - name: Build memray perf image
+        run: make perf-image-rebuild
+
+      # Uses the Dockerfile environment for repeatable runs.
+      - name: Run memray memory benchmark
+        run: make memory-use-bench
+
+      # Upload all three flamegraph views per scenario (peak/leaks/temporary).
+      - name: Upload flamegraph reports
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: memray-flamegraphs
+          path: tests/perf/reports/*.html
+          if-no-files-found: warn
diff --git a/Makefile b/Makefile
@@ -148,10 +148,26 @@ MEMRAY_ITERATIONS ?= 100
 MEMRAY_THRESHOLD ?= 1.1
 SCENARIO ?=
 SCENARIO_ARG := $(if $(SCENARIO),--scenario $(SCENARIO),)
+# In CI, use en vars to write the report to the job run
+GH_SUMMARY_MOUNT := $(if $(GITHUB_STEP_SUMMARY),-v $(GITHUB_STEP_SUMMARY):$(GITHUB_STEP_SUMMARY),)
+# Build the perf Docker image only if it is missing. The repo is bind-mounted at
+# run time and the Dockerfile only COPYs requirements*.txt, so latest Python code
+# is picked up without a rebuild; rebuild is only needed when deps/Dockerfile
+# change (use perf-image-rebuild for that).
+.PHONY: perf-image
+perf-image:
+	@docker image inspect c2pa-memray-$(PERF_ENV) >/dev/null 2>&1 || \
+		docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) .
+
+# Force a clean rebuild of the memray perf Docker image
+.PHONY: perf-image-rebuild
+perf-image-rebuild:
+	docker build --no-cache --pull -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) .
+
+# Runs memory benchmarks. Pre-requisite: Docker image built using `make perf-image-rebuild`.
 .PHONY: memory-use-bench
 memory-use-bench:
-	docker build -f tests/perf/Dockerfiles/$(PERF_ENV)-perf-Dockerfile -t c2pa-memray-$(PERF_ENV) .
-	docker run --rm -v $(PWD):/workspace -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS)
+	docker run --rm -v $(PWD):/workspace $(GH_SUMMARY_MOUNT) -e PYTHONPATH=/workspace/src -e PERF_ENV=$(PERF_ENV) -e MEMRAY_ITERATIONS=$(MEMRAY_ITERATIONS) -e MEMRAY_THRESHOLD=$(MEMRAY_THRESHOLD) -e GITHUB_TOKEN -e GITHUB_STEP_SUMMARY c2pa-memray-$(PERF_ENV) python -m tests.perf.run_profile $(SCENARIO_ARG) $(PERF_ARGS)
 	@echo ""
 	@echo "Reports written to tests/perf/reports/"
 	@echo "Open tests/perf/reports/<scenario>-{peak,leaks,temporary}.html in a browser"

diff --git a/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile b/tests/perf/Dockerfiles/python-3.10-slim-perf-Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.10.20-slim-bookworm
 
 WORKDIR /workspace
 
-# libunwind for memray native stack unwinding
+# libunwind-dev for memray native stack unwinding.
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libunwind-dev \
     ca-certificates \

diff --git a/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile b/tests/perf/Dockerfiles/python-3.12-slim-perf-Dockerfile
@@ -2,7 +2,7 @@ FROM python:3.12.13-slim-bookworm
 
 WORKDIR /workspace
 
-# libunwind for memray native stack unwinding
+# libunwind-dev for memray native stack unwinding.
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libunwind-dev \
     ca-certificates \

diff --git a/tests/perf/README.md b/tests/perf/README.md
@@ -67,6 +67,16 @@ The trailing `VAR=value` arguments (e.g. `PERF_ENV=ubuntu-24.04`, `PERF_ARGS=--u
 
 Reports are written to `tests/perf/reports/` on the local machine. Three HTML files per scenario, one per suffix (described below). Open any in a browser. After a run, the run also reports if the scenarios were or were not all within baseline threshold (baseline +10% memory use tolerance).
 
+## Running in CI
+
+The `.github/workflows/memory-benchmark.yml` workflow runs the  Docker-based benchmarks on a PR, but only when the PR has the `check-memory-benchmark` label. This runs `make memory-use-bench`, so:
+
+- A regression (peak or leaked > baseline +10%) makes the benchmark job exit non-zero.
+- A values report table is written to the job's Step Summary.
+- All three flamegraph HTML views per scenario are uploaded as the `memray-flamegraphs` artifact.
+
+The gate only acts as regression test once a `tests/perf/baseline.json` is committed on the branch. Without one, `run_profile.py` treats the run as baseline creation (exits 0, no gating).
+
 ## Report views
 
 Each scenario produces three [memray flamegraphs](https://bloomberg.github.io/memray/flamegraph.html). All three are flamegraphs of the same run. They differ only in which allocations they count.