diff --git a/.github/workflows/pg-ci.yml b/.github/workflows/pg-ci.yml index 8560e9389f6..5bc5292d2a5 100644 --- a/.github/workflows/pg-ci.yml +++ b/.github/workflows/pg-ci.yml @@ -130,6 +130,22 @@ env: # commit-message directive parsed in the `setup` job below. CI_OS_ONLY_JOBS: "linux macos windows mingw compilerwarnings sanitycheck" + ### + # A few variables to make expressions later on shorter + ### + + ON_DEFAULT_BRANCH: ${{github.event.repository.default_branch == github.ref_name }} + + # Note that we need to be careful to use a separator that can't be in branch + # names, otherwise e.g. caches for 'master' might be restored on the + # 'master-pending' branch. + CACHE_PREFIX_DEFAULT: >- + :${{ github.job }}:${{ github.event.repository.default_branch }}: + CACHE_PREFIX_BRANCH: >- + :${{ github.job }}:${{ github.ref_name }}: + CACHE_SUFFIX: >- + ${{ github.run_id }}:${{ github.run_attempt }} + jobs: @@ -277,16 +293,30 @@ jobs: with: fetch-depth: ${{ env.CLONE_DEPTH }} - - &ccache_restore_step - name: Restore ccache - id: ccache_restore + # We restore both the ccache from the default branch (typically master), + # and from the current branch. This will often allow feature branches to + # start out with a high cache hit ratio. + # + # With ccache it turns out to work to just restore two caches into the + # same directory, as it's basically a content addressed store. Stats + # could be corrupted, but we zero them out anyway. + - &ccache_restore_default_step + name: "ccache: Restore for default branch ${{github.event.repository.default_branch}}" + if: ${{ env.ON_DEFAULT_BRANCH == 'false' }} uses: actions/cache/restore@v5 with: path: ${{ env.CCACHE_DIR }} - key: ccache-${{ github.job }}-${{ github.ref_name }}-${{ github.run_id }}-${{ github.run_attempt }} - restore-keys: | - ccache-${{ github.job }}-${{ github.ref_name }}- - ccache-${{ github.job }}- + key: ccache${{env.CACHE_PREFIX_DEFAULT}}${{env.CACHE_SUFFIX}} + restore-keys: ccache${{env.CACHE_PREFIX_DEFAULT}} + + - &ccache_restore_branch_step + name: "ccache: Restore for branch ${{ github.ref_name }}" + id: ccache-restore-branch + uses: actions/cache/restore@v5 + with: + path: ${{ env.CCACHE_DIR }} + key: ccache${{env.CACHE_PREFIX_BRANCH}}${{env.CACHE_SUFFIX}} + restore-keys: ccache${{env.CACHE_PREFIX_BRANCH}} - &linux_prepare_workspace_step name: Prepare workspace @@ -325,15 +355,30 @@ jobs: ninja -C build -j${{env.BUILD_JOBS}} ${{env.MBUILD_TARGET}} ninja -C build -t missingdeps - # TODO: As long as we use per-run ccache caches, we should probably add - # a step that checks if there is sufficient new content to warrant - # saving the new cache. + # Decide if it's worth uploading a new version of the ccache cache. If + # we always do so unconditionally, we'd very quickly go through the + # allowed cache space. Instead we check if the hit rate is high enough + # already for that not to be worth it. + - &ccache_decide_save_step + name: "ccache: Decide whether to upload" + id: ccache-decide + # Make the decision whether to upload whenever the cache has been set + # up, so that incrementally addressing compiler errors/warnings + # doesn't have to start from scratch. + if: | + always() && + steps.ccache-restore-branch.conclusion == 'success' + run: python3 src/tools/ci/gha_ccache_decide.py + - &ccache_save_step - name: Save ccache + name: "ccache: Upload cache" uses: actions/cache/save@v5 + if: | + always() && + steps.ccache-decide.outputs.should_save == 'true' with: path: ${{ env.CCACHE_DIR }} - key: ${{ steps.ccache_restore.outputs.cache-primary-key }} + key: ${{ steps.ccache-restore-branch.outputs.cache-primary-key }} # Run a minimal set of tests. The main regression tests take too long # for this purpose. For now this is a random quick pg_regress style @@ -448,7 +493,8 @@ jobs: - *nix_sysinfo_step - *checkout_step - - *ccache_restore_step + - *ccache_restore_default_step + - *ccache_restore_branch_step - *linux_prepare_workspace_step - name: Configure @@ -467,6 +513,7 @@ jobs: run: | make -s -j${BUILD_JOBS} world-bin + - *ccache_decide_save_step - *ccache_save_step - name: Test world @@ -508,7 +555,8 @@ jobs: - *nix_sysinfo_step - *checkout_step - - *ccache_restore_step + - *ccache_restore_default_step + - *ccache_restore_branch_step - *linux_prepare_workspace_step - name: Configure @@ -527,6 +575,7 @@ jobs: shell: *su_postgres_shell run: *ninja_build_cmd + - *ccache_decide_save_step - *ccache_save_step - name: Test world @@ -596,7 +645,8 @@ jobs: - *nix_sysinfo_step - *checkout_step - - *ccache_restore_step + - *ccache_restore_default_step + - *ccache_restore_branch_step - *linux_prepare_workspace_step - name: Configure @@ -613,6 +663,7 @@ jobs: shell: *su_postgres_shell run: *ninja_build_cmd + - *ccache_decide_save_step - *ccache_save_step - name: Test world @@ -682,7 +733,6 @@ jobs: steps: - *nix_sysinfo_step - *checkout_step - - *ccache_restore_step - name: Setup core files run: | @@ -745,6 +795,9 @@ jobs: path: ${{ env.MACPORTS_CACHE }} key: ${{ steps.mp-key.outputs.key }} + - *ccache_restore_default_step + - *ccache_restore_branch_step + - name: Configure env: PKG_CONFIG_PATH: /opt/local/lib/pkgconfig/ @@ -762,6 +815,7 @@ jobs: - name: Build run: *ninja_build_cmd + - *ccache_decide_save_step - *ccache_save_step - name: Test world @@ -1062,7 +1116,8 @@ jobs: shell: cmd run: mkdir ${{env.PG_REGRESS_SOCK_DIR}} - - *ccache_restore_step + - *ccache_restore_default_step + - *ccache_restore_branch_step - name: Configure run: | @@ -1077,6 +1132,7 @@ jobs: - name: Build run: *ninja_build_cmd + - *ccache_decide_save_step - *ccache_save_step - name: Test world @@ -1118,7 +1174,8 @@ jobs: steps: - *nix_sysinfo_step - *checkout_step - - *ccache_restore_step + - *ccache_restore_default_step + - *ccache_restore_branch_step - name: Setup workspace run: | @@ -1213,5 +1270,6 @@ jobs: headerscheck cpluspluscheck \ EXTRAFLAGS='-fmax-errors=10' + - *ccache_decide_save_step - *ccache_save_step - *upload_logs_step diff --git a/src/tools/ci/gha_ccache_decide.py b/src/tools/ci/gha_ccache_decide.py new file mode 100644 index 00000000000..a8e32310d0f --- /dev/null +++ b/src/tools/ci/gha_ccache_decide.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +import os +import re +import shutil +import subprocess + +def run(cmd, check=True): + return subprocess.run( + cmd, + check=check, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ).stdout + +def parse_ccache_stats(): + out = run(["ccache", "--print-stats"]) + hits = 0 + misses = 0 + + for line in out.splitlines(): + line = line.strip() + m = re.match(r"^local_storage_hit\s+(\d+)$", line) + if m: + hits = int(m.group(1)) + continue + m = re.match(r"^local_storage_miss\s+(\d+)$", line) + if m: + misses = int(m.group(1)) + continue + + return hits, misses + +def append_github_output(key, value): + output_path = os.environ["GITHUB_OUTPUT"] + with open(output_path, "a", encoding="utf-8") as f: + f.write(f"{key}={value}\n") + +def main(): + on_default_branch = os.environ["ON_DEFAULT_BRANCH"] == "true" + + # Decide the target hit percentage below which we decide to upload a new + # cache. On non-default branches a few misses aren't that bad. But, as the + # caches of the default branch are shared with all branches, it's worth + # aiming for a higher ratio there. + target_rate = 95 if on_default_branch else 80 + + # Log ccache stats, useful for more in-depth understanding. To avoid + # swamping the output, collapse it in a group. + print("::group::ccache_stats") + print(run(["ccache", "-s", "-vv"])) + print("::endgroup::") + + # compute cache hit ratio + hits, misses = parse_ccache_stats() + total = hits + misses + hit_pct = int((hits / total) * 100) if total > 0 else 100 + + print(f"hits: {hits}, misses: {misses}, hit_pct: {hit_pct}, target rate: {target_rate}") + + # If the cache hit ratio was high, or the absolute number of misses + # (e.g. in case of a failed build) was low, there is no point in + # generating a new cache entry. We have limited cache space. + if hit_pct >= target_rate: + print(f"hit rate {hit_pct} is above target of {target_rate}, skip creating new cache entry") + should_save = False + elif misses <= 10: + print(f"only {misses} misses, skip creating new cache entry") + should_save = False + else: + print(f"hit rate {hit_pct} is below target of {target_rate}, create new cache entry") + should_save = True + + append_github_output("should_save", str(should_save).lower()) + + if not should_save: + return 0 + + # It's not worth persisting old cache entries (e.g. from before a + # change to a central header, or from the default branch if this + # branch differs a lot). Therefore evict ccache entries that are a + # bit older. The cutoff here is fairly arbitrary, it could + # probably be improved. + print("::group::ccache_shrink") + print(run(["ccache", "--evict-older-than", f"{45*60}s"])) + print(run(["ccache", "-X", "10"])) + + # Don't store ccache stats, otherwise we'd need to reset the cache access + # data after restoring the cache in the next run, to be able to get the + # hit ratio of the CI run. + print(run(["ccache", "-z"])) + print("::endgroup::") + + # Before continuing, try to kill all ccache instances, otherwise + # it's possible that on cancellations there is still running + # ccaches that cause the upload to fail. + if shutil.which("killall"): + print(run(["killall", "ccache"], check=False)) + + return 0 + +if __name__ == "__main__": + exit(main())