diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 75a8ff62d..53b9b43be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ jobs: test-ubuntu-latest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make # Fail build if there are warnings # build with TLS just for compilation coverage @@ -26,7 +26,7 @@ jobs: test-sanitizer-address: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make # build with TLS module just for compilation coverage run: make SANITIZER=address REDIS_CFLAGS='-Werror -DDEBUG_ASSERTIONS -DREDIS_TEST' BUILD_TLS=module @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-latest container: debian:buster steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make run: | sed -i 's|http://deb.debian.org/debian|http://archive.debian.org/debian|g' /etc/apt/sources.list @@ -50,7 +50,7 @@ jobs: build-macos-latest: runs-on: macos-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make # Fail build if there are warnings # build with TLS just for compilation coverage @@ -59,7 +59,7 @@ jobs: build-32bit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make run: | sudo apt-get update && sudo apt-get install libc6-dev-i386 gcc-multilib @@ -68,7 +68,7 @@ jobs: build-libc-malloc: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make run: make REDIS_CFLAGS='-Werror' MALLOC=libc @@ -76,7 +76,7 @@ jobs: runs-on: ubuntu-latest container: quay.io/centos/centos:stream9 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make run: | dnf -y install which gcc make @@ -86,7 +86,7 @@ jobs: runs-on: ubuntu-latest container: ubuntu:20.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: make run: | apt-get update diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 5108ec907..a5e3ebd01 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -4,13 +4,17 @@ name: "Codecov" # where each PR needs to be compared against the coverage of the head commit on: [push, pull_request] +permissions: + contents: read + jobs: code-coverage: - runs-on: ubuntu-22.04 + if: ${{ github.repository == 'redis/redis' }} + runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Install lcov and run test run: | @@ -19,6 +23,10 @@ jobs: - name: Upload coverage reports to Codecov uses: codecov/codecov-action@57e3a136b779b570ffcdbf80b3bdc90e7fab3de2 # v6 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: - token: ${{ secrets.CODECOV_TOKEN }} - file: ./src/redis.info + files: ./src/redis.info + disable_search: true + fail_ci_if_error: true + diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c5411b96c..117161a9c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -19,15 +19,15 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@v4 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index f5d37ae5c..4c99adb92 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -11,7 +11,7 @@ jobs: if: github.repository == 'redis/redis' runs-on: ubuntu-latest steps: - - uses: actions/checkout@main + - uses: actions/checkout@v6 - name: Download and extract the Coverity Build Tool run: | wget -q https://scan.coverity.com/download/cxx/linux64 --post-data "token=${COVERITY_SCAN_TOKEN}&project=redis-unstable" -O cov-analysis-linux64.tar.gz diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 36edb7529..029ec4530 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -47,7 +47,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -84,7 +84,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -123,7 +123,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -159,7 +159,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -201,7 +201,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -234,14 +234,15 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | apt-get update && apt-get install -y make gcc - make CC=gcc REDIS_CFLAGS='-Werror -DREDIS_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' + # Also enables jemalloc's sized deallocation checks to catch sdallocx()/zfree_with_size() misuse. + make CC=gcc REDIS_CFLAGS='-Werror -DREDIS_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' JEMALLOC_CONFIGURE_OPTS='--enable-opt-size-checks' - name: testprep run: sudo apt-get install -y tcl8.6 tclx procps - name: test @@ -273,7 +274,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -307,7 +308,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -341,7 +342,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -381,7 +382,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -421,7 +422,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -461,7 +462,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -493,7 +494,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -571,7 +572,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -604,7 +605,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -636,7 +637,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -666,7 +667,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -703,7 +704,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -744,7 +745,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -788,7 +789,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -833,7 +834,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -872,7 +873,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -911,7 +912,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -954,7 +955,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -996,7 +997,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1022,7 +1023,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1048,7 +1049,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1080,7 +1081,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1099,7 +1100,7 @@ jobs: run: | echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1131,7 +1132,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1167,7 +1168,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1203,7 +1204,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1244,7 +1245,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1289,7 +1290,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1339,7 +1340,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1388,7 +1389,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} @@ -1416,7 +1417,7 @@ jobs: echo "skiptests: ${{github.event.inputs.skiptests}}" echo "test_args: ${{github.event.inputs.test_args}}" echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} diff --git a/.github/workflows/external.yml b/.github/workflows/external.yml index 40c83e293..9dd3340aa 100644 --- a/.github/workflows/external.yml +++ b/.github/workflows/external.yml @@ -12,7 +12,7 @@ jobs: if: github.event_name != 'schedule' || github.repository == 'redis/redis' timeout-minutes: 360 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build run: make REDIS_CFLAGS=-Werror - name: Start redis-server @@ -27,7 +27,7 @@ jobs: --tags -slow - name: Archive redis log if: ${{ failure() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: test-external-redis-log path: external-redis.log @@ -37,7 +37,7 @@ jobs: if: github.event_name != 'schedule' || github.repository == 'redis/redis' timeout-minutes: 360 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build run: make REDIS_CFLAGS=-Werror - name: Start redis-server @@ -55,7 +55,7 @@ jobs: --tags -slow - name: Archive redis log if: ${{ failure() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: test-external-cluster-log path: external-redis-cluster.log @@ -65,7 +65,7 @@ jobs: if: github.event_name != 'schedule' || github.repository == 'redis/redis' timeout-minutes: 360 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build run: make REDIS_CFLAGS=-Werror - name: Start redis-server @@ -79,7 +79,7 @@ jobs: --tags "-slow -needs:debug" - name: Archive redis log if: ${{ failure() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: test-external-redis-nodebug-log path: external-redis-nodebug.log diff --git a/.github/workflows/post-release-automation.yml b/.github/workflows/post-release-automation.yml deleted file mode 100644 index 94d9cc52a..000000000 --- a/.github/workflows/post-release-automation.yml +++ /dev/null @@ -1,165 +0,0 @@ -name: Post-Release Automation - -on: - release: - types: [published] - -jobs: - extract-release-info: - if: github.repository == 'redis/redis' - runs-on: ubuntu-latest - outputs: - tag_name: ${{ steps.release-info.outputs.tag_name }} - release_type: ${{ steps.release-info.outputs.release_type }} - steps: - - name: Checkout repository - uses: actions/checkout@v5 - - - name: Extract and validate release information - id: release-info - env: - TAG_NAME: ${{ github.event.release.tag_name }} - GH_TOKEN: ${{ github.token }} - run: | - echo "tag_name=${TAG_NAME}" >> $GITHUB_OUTPUT - echo "Release tag: ${TAG_NAME}" - - LATEST_TAG=$(gh release view --json tagName --jq '.tagName') - echo "Latest release tag(from gh release): ${LATEST_TAG}" - - if [[ "${TAG_NAME}" == "${LATEST_TAG}" ]]; then - echo "release_type=latest" >> $GITHUB_OUTPUT - echo "Detected latest release: ${TAG_NAME}" - else - echo "release_type=non-latest" >> $GITHUB_OUTPUT - echo "Detected non-latest release: ${TAG_NAME} (latest is ${LATEST_TAG})" - fi - - create-tarball: - needs: extract-release-info - runs-on: ubuntu-latest - env: - TAG_NAME: ${{ needs.extract-release-info.outputs.tag_name }} - outputs: - sha256: ${{ steps.checksum.outputs.sha256 }} - size_mb: ${{ steps.size.outputs.size_mb }} - size_warning: ${{ steps.size.outputs.size_warning }} - steps: - - name: Checkout repository - uses: actions/checkout@v5 - with: - ref: ${{ env.TAG_NAME }} - fetch-depth: 0 - - - name: Create tarball - run: ./utils/releasetools/01_create_tarball.sh "$TAG_NAME" - - - name: Verify tarball size - id: size - run: | - TARBALL="/tmp/redis-${TAG_NAME}.tar.gz" - SIZE_MB=$(du -m "$TARBALL" | cut -f1) - echo "Tarball size: ${SIZE_MB} MB" - echo "size_mb=${SIZE_MB}" >> $GITHUB_OUTPUT - if [ "$SIZE_MB" -lt 3 ] || [ "$SIZE_MB" -gt 5 ]; then - echo "::warning::Tarball size ${SIZE_MB} MB is outside expected range (3-5 MB)" - echo "size_warning=true" >> $GITHUB_OUTPUT - else - echo "size_warning=false" >> $GITHUB_OUTPUT - fi - - - name: Calculate SHA256 checksum - id: checksum - run: | - TARBALL="/tmp/redis-${TAG_NAME}.tar.gz" - SHA256=$(shasum -a 256 "$TARBALL" | cut -d' ' -f1) - echo "SHA256: $SHA256" - echo "sha256=$SHA256" >> $GITHUB_OUTPUT - - - name: Upload tarball as artifact - uses: actions/upload-artifact@v6 - with: - name: redis-${{ env.TAG_NAME }}-tarball - path: /tmp/redis-${{ env.TAG_NAME }}.tar.gz - compression-level: 0 - - # approval-gate: - # needs: [extract-release-info, create-tarball] - # if: needs.extract-release-info.outputs.release_type == 'latest' - # runs-on: ubuntu-latest - # steps: - # - name: Approval gate - # run: | - # echo "Latest release detected. Manual approval required for production deployment." - # # TODO: Implement approval workflow - # # This could use GitHub Environments with required reviewers - # # or a manual approval step - - # upload-tarball: - # needs: [extract-release-info, create-tarball, approval-gate] - # if: always() && !cancelled() && needs.create-tarball.result == 'success' && (needs.approval-gate.result == 'success' || needs.approval-gate.result == 'skipped') - # runs-on: ubuntu-latest - # steps: - # - name: Upload tarball - # run: | - # echo "TODO: Implement tarball upload" - # # This will require: - # # - SSH credentials/keys for upload to download.redis.io - # # - Adaptation of utils/releasetools/02_upload_tarball.sh for CI environment - - # test-release-tarball: - # needs: upload-tarball - # runs-on: ubuntu-latest - # steps: - # - name: Test release tarball - # run: | - # echo "TODO: Implement release testing using utils/releasetools/03_test_release.sh" - # # This will: - # # - Download the uploaded tarball - # # - Extract and build Redis - - # update-release-hashes: - # needs: test-release-tarball - # runs-on: ubuntu-latest - # steps: - # - name: Update release hashes - # run: | - # echo "TODO: Implement hash update using utils/releasetools/04_release_hash.sh" - # # This will require: - # # - Access to redis-hashes repository - # # - Git credentials for committing and pushing - - summary-and-notify: - needs: [extract-release-info, create-tarball] # update-release-hashes - if: always() && github.repository == 'redis/redis' - runs-on: ubuntu-latest - env: - TAG_NAME: ${{ needs.extract-release-info.outputs.tag_name }} - RELEASE_TYPE: ${{ needs.extract-release-info.outputs.release_type }} - SHA256: ${{ needs.create-tarball.outputs.sha256 }} - SIZE_MB: ${{ needs.create-tarball.outputs.size_mb }} - SIZE_WARNING: ${{ needs.create-tarball.outputs.size_warning }} - steps: - - name: Summary - run: | - { - echo "## Post-Release Automation Summary" - echo "" - echo "- **Release Tag:** ${TAG_NAME}" - echo "- **Release Type:** ${RELEASE_TYPE}" - echo "- **Tarball SHA256:** ${SHA256}" - echo "- **Tarball Size:** ${SIZE_MB} MB" - if [ "${SIZE_WARNING}" == "true" ]; then - echo "" - echo "> [!WARNING]" - echo "> Tarball size is outside expected range, check the logs for details." - fi - } >> $GITHUB_STEP_SUMMARY - - # - name: Send Slack notification - # run: | - # echo "TODO: Implement Slack notification" - # # This will require: - # # - Slack webhook URL or bot token (stored in secrets) - # # - Determine appropriate channel (e.g., #releases, #redis-releases) - # # - Craft message with release information and workflow status diff --git a/.github/workflows/redis_docs_sync.yaml b/.github/workflows/redis_docs_sync.yaml index 508b8839d..154e69530 100644 --- a/.github/workflows/redis_docs_sync.yaml +++ b/.github/workflows/redis_docs_sync.yaml @@ -11,7 +11,7 @@ jobs: steps: - name: Generate a token id: generate-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ secrets.DOCS_APP_ID }} private-key: ${{ secrets.DOCS_APP_PRIVATE_KEY }} diff --git a/.github/workflows/reply-schemas-linter.yml b/.github/workflows/reply-schemas-linter.yml index a57a97ab3..539e739f3 100644 --- a/.github/workflows/reply-schemas-linter.yml +++ b/.github/workflows/reply-schemas-linter.yml @@ -12,9 +12,9 @@ jobs: reply-schemas-linter: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Setup nodejs - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 - name: Install packages run: npm install ajv - name: linter diff --git a/.github/workflows/spell-check.yml b/.github/workflows/spell-check.yml index 2f9cf9cce..48b949b05 100644 --- a/.github/workflows/spell-check.yml +++ b/.github/workflows/spell-check.yml @@ -16,10 +16,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: pip cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} diff --git a/.gitignore b/.gitignore index 5ed94f1da..63968fb29 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ deps/lua/src/luac deps/lua/src/liblua.a deps/hdr_histogram/libhdrhistogram.a deps/fpconv/libfpconv.a +deps/tre/libtre.a tests/tls/* .make-* .prerequisites diff --git a/README.md b/README.md index 5ea9241ad..21de64642 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Redis excels in various applications, including: - **Distributed Session Store:** Offers flexible session data modeling (string, JSON, hash). - **Data Structure Server:** Provides low-level data structures (strings, lists, sets, hashes, sorted sets, JSON, etc.) with high-level semantics (counters, queues, leaderboards, rate limiters) and supports transactions & scripting. - **NoSQL Data Store:** Key-value, document, and time series data storage. -- **Search and Query Engine:** Indexing for hash/JSON documents, supporting vector search, full-text search, geospatial queries, ranking, and aggregations via Redis Query Engine. +- **Search and Query Engine:** Indexing for hash/JSON documents, supporting vector search, full-text search, geospatial queries, ranking, and aggregations via Redis Search. - **Event Store & Message Broker:** Implements queues (lists), priority queues (sorted sets), event deduplication (sets), streams, and pub/sub with probabilistic stream processing capabilities. - **Vector Store for GenAI:** Integrates with AI applications (e.g. LangGraph, mem0) for short-term memory, long-term memory, LLM response caching (semantic caching), and retrieval augmented generation (RAG). - **Real-Time Analytics:** Powers personalization, recommendations, fraud detection, and risk assessment. @@ -172,9 +172,10 @@ Redis provides a variety of data types, processing engines, and capabilities to **Important:** Features marked with an asterisk (\*) require Redis to be compiled with the `BUILD_WITH_MODULES=yes` flag when [building Redis from source](#build-redis-from-source) - [**String:**](https://redis.io/docs/latest/develop/data-types/strings) Sequences of bytes, including text, serialized objects, and binary arrays used for caching, counters, and bitwise operations. -- [**JSON:**](https://redis.io/docs/latest/develop/data-types/json/) Nested JSON documents that are indexed and searchable using JSONPath expressions and with [Redis Query Engine](https://redis.io/docs/latest/develop/interact/search-and-query/) +- [**JSON:**](https://redis.io/docs/latest/develop/data-types/json/) Nested JSON documents that are indexed and searchable using JSONPath expressions and with [Redis Search](https://redis.io/docs/latest/develop/ai/search-and-query/) +- [**Array:**](https://redis.io/docs/latest/develop/data-types/arrays/) Sparse, index-addressable collection of string values - [**Hash:**](https://redis.io/docs/latest/develop/data-types/hashes/) Field-value maps used to represent basic objects and store groupings of key-value pairs with support for [hash field expiration (TTL)](https://redis.io/docs/latest/develop/data-types/hashes/#field-expiration) -- [**Redis Query Engine:**](https://redis.io/docs/latest/develop/interact/search-and-query/) Use Redis as a document database, a vector database, a secondary index, and a search engine. Define indexes for hash and JSON documents and then use a rich query language for vector search, full-text search, geospatial queries, and aggregations. +- [**Redis Search:**](https://redis.io/docs/latest/develop/ai/search-and-query/) Use Redis as a document database, a vector database, a secondary index, and a search engine. Define indexes for hash and JSON documents and then use a rich query language for vector search, full-text search, geospatial queries, and aggregations. - [**List:**](https://redis.io/docs/latest/develop/data-types/lists/) Linked lists of string values used as stacks, queues, and for queue management. - [**Set:**](https://redis.io/docs/latest/develop/data-types/sets/) Unordered collection of unique strings used for tracking unique items, relations, and common set operations (intersections, unions, differences). - [**Sorted set:**](https://redis.io/docs/latest/develop/data-types/sorted-sets/) Collection of unique strings ordered by an associated score used for leaderboards and rate limiters. diff --git a/SECURITY.md b/SECURITY.md index 8cc4dde0f..367efe9bc 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -23,10 +23,20 @@ unless this is not possible or feasible with a reasonable effort. ## Reporting a Vulnerability -If you believe you've discovered a serious vulnerability, please contact the -Redis core team at redis@redis.io. We will evaluate your report and if -necessary issue a fix and an advisory. If the issue was previously undisclosed, -we'll also mention your name in the credits. +If you believe you have found a security vulnerability, to ensure proper review +and assessment, we kindly ask vulnerability reports be submitted through +our [Redis Vulnerability Disclosure Program.](https://redis.io/redis-responsible-vulnerability-disclosure/) + +We have found this path to be beneficial for both researchers and us for +a number of reasons. Including, offering fast response times to researchers and +opportunities for us to invite those with exceptional reports into closed paid +engagements. + +For those averse to using our chosen platform, we will also accept reports directly +via GitHub's "Report a Vulnerability". + +To contact the security team directly with questions use: [security@redis.com](mailto:security@redis.com) + ## Responsible Disclosure diff --git a/deps/Makefile b/deps/Makefile index 60e0e569e..7ca6de4c2 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -59,6 +59,7 @@ distclean: -(cd jemalloc && [ -f Makefile ] && $(MAKE) distclean) > /dev/null || true -(cd hdr_histogram && $(MAKE) clean) > /dev/null || true -(cd fpconv && $(MAKE) clean) > /dev/null || true + -(cd tre && $(MAKE) clean) > /dev/null || true -(cd xxhash && $(MAKE) clean) > /dev/null || true -(rm -f .make-*) @@ -94,6 +95,13 @@ fpconv: .make-prerequisites .PHONY: fpconv +tre: .make-prerequisites + @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) + cd tre && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)" + +.PHONY: tre + + XXHASH_CFLAGS = -fPIC $(DEPS_CFLAGS) xxhash: .make-prerequisites @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) @@ -129,8 +137,8 @@ lua: .make-prerequisites .PHONY: lua -JEMALLOC_CFLAGS=$(CFLAGS) -JEMALLOC_LDFLAGS=$(LDFLAGS) +JEMALLOC_CFLAGS=$(ENABLE_LTO) $(CFLAGS) +JEMALLOC_LDFLAGS=$(ENABLE_LTO) $(LDFLAGS) ifneq ($(DEB_HOST_GNU_TYPE),) JEMALLOC_CONFIGURE_OPTS += --host=$(DEB_HOST_GNU_TYPE) diff --git a/deps/tre/LICENSE b/deps/tre/LICENSE new file mode 100644 index 000000000..76ea75f40 --- /dev/null +++ b/deps/tre/LICENSE @@ -0,0 +1,29 @@ +This is the license, copyright notice, and disclaimer for TRE, a regex +matching package (library and tools) with support for approximate +matching. + +Copyright (c) 2001-2009 Ville Laurikari +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/tre/Makefile b/deps/tre/Makefile new file mode 100644 index 000000000..507487749 --- /dev/null +++ b/deps/tre/Makefile @@ -0,0 +1,79 @@ +STD= -std=c99 +WARN= -Wall +OPT= -Os + +ifeq ($(SANITIZER),address) + CFLAGS+=-fsanitize=address -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=address +else +ifeq ($(SANITIZER),undefined) + CFLAGS+=-fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=undefined +else +ifeq ($(SANITIZER),thread) + CFLAGS+=-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=thread +else +ifeq ($(SANITIZER),memory) + CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=memory +endif +endif +endif +endif + +R_CFLAGS= $(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) -DTRE_REGEX_T_FIELD=value -Ilocal_includes -Ilib +R_LDFLAGS= $(LDFLAGS) +DEBUG= -g + +R_CC=$(CC) $(R_CFLAGS) +R_LD=$(CC) $(R_LDFLAGS) + +AR= ar +ARFLAGS= rcs + +TRE_OBJ=lib/regcomp.o lib/regerror.o lib/regexec.o lib/tre-ast.o lib/tre-compile.o \ + lib/tre-filter.o lib/tre-match-backtrack.o lib/tre-match-parallel.o \ + lib/tre-mem.o lib/tre-parse.o lib/tre-stack.o lib/xmalloc.o +TRE_TESTS=tests/retest tests/test-str-source tests/test-literal-opt tests/test-malformed-regn + +libtre.a: $(TRE_OBJ) + $(AR) $(ARFLAGS) $@ $+ + +check: $(TRE_TESTS) + @set -e; \ + for test in $(TRE_TESTS); do \ + echo "TEST $$test"; \ + ./$$test; \ + done + +tests/retest: tests/retest.c libtre.a + $(R_LD) $(R_CFLAGS) -DHAVE_REGNEXEC -DHAVE_REGNCOMP -o $@ $< libtre.a + +tests/test-str-source: tests/test-str-source.c libtre.a + $(R_LD) $(R_CFLAGS) -o $@ $< libtre.a + +tests/test-literal-opt: tests/test-literal-opt.c libtre.a + $(R_LD) $(R_CFLAGS) -o $@ $< libtre.a + +tests/test-malformed-regn: tests/test-malformed-regn.c libtre.a + $(R_LD) $(R_CFLAGS) -o $@ $< libtre.a + +lib/regcomp.o: lib/regcomp.c local_includes/tre.h local_includes/tre-config.h lib/tre-internal.h lib/xmalloc.h +lib/regerror.o: lib/regerror.c local_includes/tre.h +lib/regexec.o: lib/regexec.c local_includes/tre.h lib/tre-internal.h lib/xmalloc.h +lib/tre-ast.o: lib/tre-ast.c lib/tre-ast.h lib/tre-internal.h +lib/tre-compile.o: lib/tre-compile.c lib/tre-compile.h lib/tre-internal.h lib/tre-mem.h lib/tre-parse.h lib/tre-stack.h lib/xmalloc.h +lib/tre-filter.o: lib/tre-filter.c lib/tre-filter.h lib/tre-internal.h +lib/tre-match-backtrack.o: lib/tre-match-backtrack.c lib/tre-internal.h lib/tre-match-utils.h lib/tre-mem.h lib/tre-stack.h +lib/tre-match-parallel.o: lib/tre-match-parallel.c lib/tre-internal.h lib/tre-match-utils.h lib/tre-mem.h +lib/tre-mem.o: lib/tre-mem.c lib/tre-mem.h +lib/tre-parse.o: lib/tre-parse.c lib/tre-ast.h lib/tre-compile.h lib/tre-filter.h lib/tre-internal.h lib/tre-mem.h lib/tre-parse.h lib/tre-stack.h lib/xmalloc.h +lib/tre-stack.o: lib/tre-stack.c lib/tre-internal.h lib/tre-stack.h +lib/xmalloc.o: lib/xmalloc.c lib/xmalloc.h + +.c.o: + $(R_CC) -c -o $@ $< + +clean: + rm -f $(TRE_OBJ) libtre.a $(TRE_TESTS) diff --git a/deps/tre/README.md b/deps/tre/README.md new file mode 100644 index 000000000..b2e09bbcb --- /dev/null +++ b/deps/tre/README.md @@ -0,0 +1,276 @@ +Introduction +============ + +TRE is a lightweight, robust, and efficient POSIX compliant regexp +matching library with some exciting features such as approximate +(fuzzy) matching. + +The matching algorithm used in TRE uses linear worst-case time in +the length of the text being searched, and quadratic worst-case +time in the length of the used regular expression. + +In other words, the time complexity of the algorithm is O(M^2N), where +M is the length of the regular expression and N is the length of the +text. The used space is also quadratic on the length of the regex, +but does not depend on the searched string. This quadratic behaviour +occurs only on pathological cases which are probably very rare in +practice. + + +Hacking +======= + +Here's how to work with this code. + +Prerequisites +------------- + +You will need the following tools installed on your system: + + - autoconf + - automake + - gettext (including autopoint) + - libtool + - zip (optional) + + +Building +-------- + +First, prepare the tree. Change to the root of the source directory +and run + + ./utils/autogen.sh + +This will regenerate various things using the prerequisite tools so +that you end up with a buildable tree. + +After this, you can run the configure script and build TRE as usual: + + ./configure + make + make check + make install + + +Building a source code package +------------------------------ + +In a prepared tree, this command creates a source code tarball: + + ./configure && make dist + +Alternatively, you can run + + ./utils/build-sources.sh + +which builds the source code packages and puts them in the `dist` +subdirectory. This script needs a working `zip` command. + + +Features +======== + +TRE is not just yet another regexp matcher. TRE has some features +which are not there in most free POSIX compatible implementations. +Most of these features are not present in non-free implementations +either, for that matter. + +Approximate matching +-------------------- + +Approximate pattern matching allows matches to be approximate, that +is, allows the matches to be close to the searched pattern under some +measure of closeness. TRE uses the edit-distance measure (also known +as the Levenshtein distance) where characters can be inserted, +deleted, or substituted in the searched text in order to get an exact +match. + +Each insertion, deletion, or substitution adds the distance, or cost, +of the match. TRE can report the matches which have a cost lower than +some given threshold value. TRE can also be used to search for +matches with the lowest cost. + +TRE includes a version of the agrep (approximate grep) command line +tool for approximate regexp matching in the style of grep. Unlike +other agrep implementations (like the one by Sun Wu and Udi Manber +from University of Arizona) TRE agrep allows full regexps of any +length, any number of errors, and non-uniform costs for insertion, +deletion and substitution. + +Strict standard conformance +--------------------------- + +POSIX defines the behaviour of regexp functions precisely. TRE +attempts to conform to these specifications as strictly as possible. +TRE always returns the correct matches for subpatterns, for example. +Very few other implementations do this correctly. In fact, the only +other implementations besides TRE that I am aware of (free or not) +that get it right are Rx by Tom Lord, Regex++ by John Maddock, and the +AT&T ast regex by Glenn Fowler and Doug McIlroy. + +The standard TRE tries to conform to is the IEEE Std 1003.1-2001, or +Open Group Base Specifications Issue 6, commonly referred to as +“POSIX”. The relevant parts are the base specifications on regular +expressions (and the rationale) and the description of the `regcomp()` +API. + +For an excellent survey on POSIX regexp matchers, see the testregex +pages by Glenn Fowler of AT&T Labs Research. + +Predictable matching speed +-------------------------- + +Because of the matching algorithm used in TRE, the maximum time +consumed by any `regexec()` call is always directly proportional to +the length of the searched string. There is one exception: if back +references are used, the matching may take time that grows +exponentially with the length of the string. This is because matching +back references is an NP complete problem, and almost certainly +requires exponential time to match in the worst case. + +Predictable and modest memory consumption +----------------------------------------- + +A `regexec()` call never allocates memory from the heap. TRE allocates +all the memory it needs during a `regcomp()` call, and some temporary +working space from the stack frame for the duration of the `regexec()` +call. The amount of temporary space needed is constant during +matching and does not depend on the searched string. For regexps of +reasonable size TRE needs less than 50K of dynamically allocated +memory during the `regcomp()` call, less than 20K for the compiled +pattern buffer, and less than two kilobytes of temporary working space +from the stack frame during a `regexec()` call. There is no time / +memory tradeoff. TRE is also small in code size; statically linking +with TRE increases the executable size less than 30K (gcc-3.2, x86, +GNU/Linux). + +Wide character and multibyte character set support +-------------------------------------------------- + +TRE supports multibyte character sets. This makes it possible to use +regexps seamlessly with, for example, Japanese locales. TRE also +provides a wide character API. + +Binary pattern and data support +------------------------------- + +TRE provides APIs which allow binary zero characters both in regexps +and searched strings. The standard API cannot be easily used to, for +example, search for printable words from binary data (although it is +possible with some hacking). Searching for patterns which contain +binary zeroes embedded is not possible at all with the standard API. + +Completely thread safe +---------------------- + +TRE is completely thread safe. All the exported functions are +re-entrant, and a single compiled regexp object can be used +simultaneously in multiple contexts; e.g. in `main()` and a signal +handler, or in many threads of a multithreaded application. + +Portable +-------- + +TRE is portable across multiple platforms. Below is a table of +platforms and compilers used to develop and test TRE: + + + + + + + +
Platform Compiler
FreeBSD 14.1 Clang 18
Ubuntu 22.04 GCC 11
macOS 14.6 Clang 14
Windows 11 Microsoft Visual Studio 2022
+ +TRE should compile without changes on most modern POSIX-like +platforms, and be easily portable to any platform with a hosted C +implementation. + +Depending on the platform, you may need to install libutf8 to get +wide character and multibyte character set support. + +Free +---- + +TRE is released under a license which is essentially the same as the +“2 clause” BSD-style license used in NetBSD. See the file LICENSE for +details. + +Roadmap +------- + +There are currently two features, both related to collating elements, +missing from 100% POSIX compliance. These are: + +* Support for collating elements (e.g. `[[.\.]]`, where `\` is a + collating element). It is not possible to support multi-character + collating elements portably, since POSIX does not define a way to + determine whether a character sequence is a multi-character + collating element or not. + +* Support for equivalence classes, for example `[[=\=]]`, where + `\` is a collating element. An equivalence class matches any + character which has the same primary collation weight as `\`. + Again, POSIX provides no portable mechanism for determining the + primary collation weight of a collating element. + +Note that other portable regexp implementations don't support +collating elements either. The single exception is Regex++, which +comes with its own database for collating elements for different +locales. Support for collating elements and equivalence classes has +not been widely requested and is not very high on the TODO list at the +moment. + +These are other features I'm planning to implement real soon now: + +* All the missing GNU extensions enabled in GNU regex, such as + `[[:<:]]` and `[[:>:]]`. + +* A `REG_SHORTEST` `regexec()` flag for returning the shortest match + instead of the longest match. + +* Perl-compatible syntax: + * `[:^class:]` + Matches anything but the characters in class. Note that + `[^[:class:]]` works already, this would be just a convenience + shorthand. + + * `\A` + Match only at beginning of string. + + * `\Z` + Match only at end of string, or before newline at the end. + + * `\z` + Match only at end of string. + + * `\l` + Lowercase next char (think vi). + + * `\u` + Uppercase next char (think vi). + + * `\L` + Lowercase till `\E` (think vi). + + * `\U` + Uppercase till `\E` (think vi). + + * `(?=pattern)` + Zero-width positive look-ahead assertions. + + * `(?!pattern)` + Zero-width negative look-ahead assertions. + + * `(?<=pattern)` + Zero-width positive look-behind assertions. + + * `(? +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#include "tre-internal.h" +#include "xmalloc.h" + +int +tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags) +{ + int ret; + if (n > TRE_MAX_RE) + return REG_ESPACE; +#if TRE_WCHAR + tre_char_t *wregex; + size_t wlen; + + wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); + if (wregex == NULL) + return REG_ESPACE; + + /* If the current locale uses the standard single byte encoding of + characters, we don't do a multibyte string conversion. If we did, + many applications which use the default locale would break since + the default "C" locale uses the 7-bit ASCII character set, and + all characters with the eighth bit set would be considered invalid. */ +#if TRE_MULTIBYTE + if (TRE_MB_CUR_MAX == 1) +#endif /* TRE_MULTIBYTE */ + { + size_t i; + const unsigned char *str = (const unsigned char *)regex; + tre_char_t *wstr = wregex; + + for (i = 0; i < n; i++) + *(wstr++) = *(str++); + wlen = n; + } +#if TRE_MULTIBYTE + else + { + size_t consumed; + tre_char_t *wcptr = wregex; +#ifdef HAVE_MBSTATE_T + mbstate_t state; + memset(&state, '\0', sizeof(state)); +#endif /* HAVE_MBSTATE_T */ + while (n > 0) + { + consumed = tre_mbrtowc(wcptr, regex, n, &state); + + switch (consumed) + { + case 0: + if (*regex == '\0') + consumed = 1; + else + { + xfree(wregex); + return REG_BADPAT; + } + break; + case -1: + DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); + xfree(wregex); + return REG_BADPAT; + case -2: + /* The last character wasn't complete. Let's not call it a + fatal error. */ + consumed = n; + break; + } + regex += consumed; + n -= consumed; + wcptr++; + } + wlen = wcptr - wregex; + } +#endif /* TRE_MULTIBYTE */ + + wregex[wlen] = L'\0'; + ret = tre_compile(preg, wregex, wlen, cflags); + xfree(wregex); +#else /* !TRE_WCHAR */ + ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags); +#endif /* !TRE_WCHAR */ + + return ret; +} + +/* this version takes bytes literally, to be used with raw vectors */ +int +tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags) +{ + int ret; + if (n > TRE_MAX_RE) + return REG_ESPACE; +#if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */ + tre_char_t *wregex; + size_t i; + + wregex = xmalloc(sizeof(tre_char_t) * n); + if (wregex == NULL) + return REG_ESPACE; + + for (i = 0; i < n; i++) + wregex[i] = (tre_char_t) ((unsigned char) regex[i]); + + ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES); + xfree(wregex); +#else /* !TRE_WCHAR */ + ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES); +#endif /* !TRE_WCHAR */ + + return ret; +} + +int +tre_regcomp(regex_t *preg, const char *regex, int cflags) +{ + size_t n = regex ? strlen(regex) : 0; + if (n > TRE_MAX_RE) + return REG_ESPACE; + return tre_regncomp(preg, regex, n, cflags); +} + +int +tre_regcompb(regex_t *preg, const char *regex, int cflags) +{ + int ret; + tre_char_t *wregex; + size_t i, n = regex ? strlen(regex) : 0; + const unsigned char *str = (const unsigned char *)regex; + tre_char_t *wstr; + + if (n > TRE_MAX_RE) + return REG_ESPACE; + wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); + if (wregex == NULL) return REG_ESPACE; + wstr = wregex; + + for (i = 0; i < n; i++) + *(wstr++) = *(str++); + wregex[n] = L'\0'; + ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES); + xfree(wregex); + return ret; +} + + +#ifdef TRE_WCHAR +int +tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags) +{ + if (n > TRE_MAX_RE) + return REG_ESPACE; + return tre_compile(preg, regex, n, cflags); +} + +int +tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags) +{ + size_t n = regex ? wcslen(regex) : 0; + if (n > TRE_MAX_RE) + return REG_ESPACE; + return tre_compile(preg, regex, n, cflags); +} +#endif /* TRE_WCHAR */ + +void +tre_regfree(regex_t *preg) +{ + tre_free(preg); +} + +/* EOF */ diff --git a/deps/tre/lib/regerror.c b/deps/tre/lib/regerror.c new file mode 100644 index 000000000..2f8326ce7 --- /dev/null +++ b/deps/tre/lib/regerror.c @@ -0,0 +1,86 @@ +/* + tre_regerror.c - POSIX tre_regerror() implementation for TRE. + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ + +#include "tre-internal.h" + +#ifdef HAVE_GETTEXT +#include +#else +#define dgettext(p, s) s +#define gettext(s) s +#endif + +#define _(String) dgettext(PACKAGE, String) +#define gettext_noop(String) String + +#define xstr(s) str(s) +#define str(s) #s + +/* Error message strings for error codes listed in `tre.h'. This list + needs to be in sync with the codes listed there, naturally. */ +static const char *tre_error_messages[] = + { gettext_noop("No error"), /* REG_OK */ + gettext_noop("No match"), /* REG_NOMATCH */ + gettext_noop("Invalid regexp"), /* REG_BADPAT */ + gettext_noop("Unknown collating element"), /* REG_ECOLLATE */ + gettext_noop("Unknown character class name"), /* REG_ECTYPE */ + gettext_noop("Trailing backslash"), /* REG_EESCAPE */ + gettext_noop("Invalid back reference"), /* REG_ESUBREG */ + gettext_noop("Missing ']'"), /* REG_EBRACK */ + gettext_noop("Missing ')'"), /* REG_EPAREN */ + gettext_noop("Missing '}'"), /* REG_EBRACE */ + gettext_noop("Invalid contents of {}"), /* REG_BADBR */ + gettext_noop("Invalid character range"), /* REG_ERANGE */ + gettext_noop("Out of memory"), /* REG_ESPACE */ + gettext_noop("Invalid use of repetition operators"), /* REG_BADRPT */ + gettext_noop("Maximum repetition in {} larger than " xstr(RE_DUP_MAX)), /* REG_BADMAX */ + }; + +size_t +tre_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) +{ + const char *err; + size_t err_len; + + /*LINTED*/(void)&preg; + if (errcode >= 0 + && errcode < (int)(sizeof(tre_error_messages) + / sizeof(*tre_error_messages))) + err = gettext(tre_error_messages[errcode]); + else + err = gettext("Unknown error"); + + err_len = strlen(err) + 1; + if (errbuf_size > 0 && errbuf != NULL) + { + if (err_len > errbuf_size) + { + strncpy(errbuf, err, errbuf_size - 1); + errbuf[errbuf_size - 1] = '\0'; + } + else + { + strcpy(errbuf, err); + } + } + return err_len; +} + +/* EOF */ diff --git a/deps/tre/lib/regexec.c b/deps/tre/lib/regexec.c new file mode 100644 index 000000000..c70eb70a4 --- /dev/null +++ b/deps/tre/lib/regexec.c @@ -0,0 +1,584 @@ +/* + tre_regexec.c - TRE POSIX compatible matching functions (and more). + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ +#include + +#include "tre-internal.h" +#include "xmalloc.h" + +/* Literal alternatives are grouped by the first byte so the matcher can + * reach the relevant candidates in O(1). In nocase mode the lookup uses the + * same folded byte mapping that was applied at compile time. */ +static void +tre_litopt_candidate_range(const tre_literal_opt_t *opt, unsigned char first_byte, + size_t *start, size_t *end) +{ + unsigned char key = opt->nocase ? opt->fold_map[first_byte] : first_byte; + *start = opt->start_offsets[key]; + *end = opt->start_offsets[key + 1]; +} + +static int +tre_litopt_bytes_equal(const unsigned char *haystack, + const unsigned char *needle, size_t len, + const unsigned char *fold_map) +{ + size_t i; + + if (fold_map == NULL) + return memcmp(haystack, needle, len) == 0; + + for (i = 0; i < len; i++) + if (fold_map[haystack[i]] != needle[i]) + return 0; + return 1; +} + +static int +tre_litopt_contains_case(const unsigned char *haystack, size_t hay_len, + const unsigned char *needle, size_t needle_len, + int *match_end_ofs) +{ + const unsigned char *p; + size_t remaining; + + if (needle_len > hay_len) + return 0; + + p = haystack; + remaining = hay_len; + while (remaining >= needle_len) + { + p = memchr(p, needle[0], remaining - needle_len + 1); + if (p == NULL) + return 0; + if (memcmp(p, needle, needle_len) == 0) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)(p - haystack + needle_len); + return 1; + } + remaining = hay_len - (size_t)(p - haystack) - 1; + p++; + } + return 0; +} + +/* Nocase substring matching is still byte-oriented, but scanning once and + * only checking literals that share the same folded first byte avoids the + * old O(haystack * literals) restart pattern. */ +static int +tre_litopt_contains_nocase(const tre_literal_opt_t *opt, + const unsigned char *haystack, size_t hay_len, + int *match_end_ofs) +{ + size_t i, start, end, j; + + for (i = 0; i < hay_len; i++) + { + tre_litopt_candidate_range(opt, haystack[i], &start, &end); + for (j = start; j < end; j++) + { + const tre_literal_opt_literal_t *lit = &opt->literals[j]; + if (lit->len <= hay_len - i + && tre_litopt_bytes_equal(haystack + i, lit->data, lit->len, + opt->fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)(i + lit->len); + return 1; + } + } + } + return 0; +} + +static reg_errcode_t +tre_match_literal_opt(const tre_tnfa_t *tnfa, const char *string, size_t len, + int eflags, int *match_end_ofs) +{ + const tre_literal_opt_t *opt = &tnfa->literal_opt; + const unsigned char *haystack = (const unsigned char *)string; + size_t start = 0, end = opt->num_literals, i; + const unsigned char *fold_map = opt->nocase ? opt->fold_map : NULL; + + if ((opt->mode == TRE_LITERAL_OPT_PREFIX + || opt->mode == TRE_LITERAL_OPT_EXACT) + && (eflags & REG_NOTBOL)) + return REG_NOMATCH; + if ((opt->mode == TRE_LITERAL_OPT_SUFFIX + || opt->mode == TRE_LITERAL_OPT_EXACT) + && (eflags & REG_NOTEOL)) + return REG_NOMATCH; + + if ((opt->mode == TRE_LITERAL_OPT_EXACT + || opt->mode == TRE_LITERAL_OPT_PREFIX) + && len > 0) + tre_litopt_candidate_range(opt, haystack[0], &start, &end); + + if (opt->mode == TRE_LITERAL_OPT_CONTAINS) + { + if (opt->nocase) + return tre_litopt_contains_nocase(opt, haystack, len, match_end_ofs) + ? REG_OK : REG_NOMATCH; + + for (i = 0; i < opt->num_literals; i++) + { + const tre_literal_opt_literal_t *lit = &opt->literals[i]; + if (tre_litopt_contains_case(haystack, len, lit->data, lit->len, + match_end_ofs)) + return REG_OK; + } + return REG_NOMATCH; + } + + for (i = start; i < end; i++) + { + const tre_literal_opt_literal_t *lit = &opt->literals[i]; + + switch (opt->mode) + { + case TRE_LITERAL_OPT_EXACT: + if (len == lit->len + && tre_litopt_bytes_equal(haystack, lit->data, len, fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)len; + return REG_OK; + } + break; + + case TRE_LITERAL_OPT_PREFIX: + if (len >= lit->len + && tre_litopt_bytes_equal(haystack, lit->data, lit->len, + fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)lit->len; + return REG_OK; + } + break; + + case TRE_LITERAL_OPT_SUFFIX: + if (len >= lit->len + && tre_litopt_bytes_equal(haystack + len - lit->len, lit->data, + lit->len, fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)len; + return REG_OK; + } + break; + + case TRE_LITERAL_OPT_CONTAINS: + case TRE_LITERAL_OPT_NONE: + break; + } + } + + return REG_NOMATCH; +} + + +/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match + endpoint values. */ +void +tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, + const tre_tnfa_t *tnfa, int *tags, int match_eo) +{ + tre_submatch_data_t *submatch_data; + unsigned int i, j; + int *parents; + + i = 0; + if (match_eo >= 0 && !(cflags & REG_NOSUB)) + { + /* Construct submatch offsets from the tags. */ + DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo)); + submatch_data = tnfa->submatch_data; + while (i < tnfa->num_submatches && i < nmatch) + { + if (submatch_data[i].so_tag == tnfa->end_tag) + pmatch[i].rm_so = match_eo; + else + pmatch[i].rm_so = tags[submatch_data[i].so_tag]; + + if (submatch_data[i].eo_tag == tnfa->end_tag) + pmatch[i].rm_eo = match_eo; + else + pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; + + /* If either of the endpoints were not used, this submatch + was not part of the match. */ + if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) + pmatch[i].rm_so = pmatch[i].rm_eo = -1; + + DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i, + submatch_data[i].so_tag, pmatch[i].rm_so, + submatch_data[i].eo_tag, pmatch[i].rm_eo)); + i++; + } + /* Reset all submatches that are not within all of their parent + submatches. */ + i = 0; + while (i < tnfa->num_submatches && i < nmatch) + { + if (pmatch[i].rm_eo == -1) + assert(pmatch[i].rm_so == -1); + assert(pmatch[i].rm_so <= pmatch[i].rm_eo); + + parents = submatch_data[i].parents; + if (parents != NULL) + for (j = 0; parents[j] >= 0; j++) + { + DPRINT(("pmatch[%d] parent %d\n", i, parents[j])); + if (pmatch[i].rm_so < pmatch[parents[j]].rm_so + || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) + pmatch[i].rm_so = pmatch[i].rm_eo = -1; + } + i++; + } + } + + while (i < nmatch) + { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + i++; + } +} + + +/* + Wrapper functions for POSIX compatible regexp matching. +*/ + +int +tre_have_backrefs(const regex_t *preg) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tnfa->have_backrefs; +} + +int +tre_have_approx(const regex_t *preg) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tnfa->have_approx; +} + +static int +tre_match(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t status; + int *tags = NULL, eo; + if (tnfa->num_tags > 0 && nmatch > 0) + { +#ifdef TRE_USE_ALLOCA + tags = alloca(sizeof(*tags) * tnfa->num_tags); +#else /* !TRE_USE_ALLOCA */ + tags = xmalloc(sizeof(*tags) * tnfa->num_tags); +#endif /* !TRE_USE_ALLOCA */ + if (tags == NULL) + return REG_ESPACE; + } + + if (type == STR_BYTE + && tnfa->literal_opt.mode != TRE_LITERAL_OPT_NONE + && (nmatch == 0 || (tnfa->cflags & REG_NOSUB)) +#ifdef TRE_APPROX + && !(eflags & REG_APPROX_MATCHER) +#endif /* TRE_APPROX */ + && !(eflags & REG_BACKTRACKING_MATCHER)) + { + size_t byte_len = (len >= 0) ? (size_t)len : strlen((const char *)string); + status = tre_match_literal_opt(tnfa, string, byte_len, eflags, &eo); + + /* Even when the caller asked for no submatches, regexec() still has to + * clear any pmatch entries it was handed. The normal matcher path does + * this through tre_fill_pmatch(), so mirror that behavior here. */ + if (status == REG_OK && nmatch > 0) + tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, NULL, eo); + +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return status; + } + + /* Dispatch to the appropriate matcher. */ + if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER) + { + /* The regex has back references, use the backtracking matcher. */ + if (type == STR_USER) + { + const tre_str_source *source = string; + if (source->rewind == NULL || source->compare == NULL) + { + /* The backtracking matcher requires rewind and compare + capabilities from the input stream. */ +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return REG_BADPAT; + } + } + status = tre_tnfa_run_backtrack(tnfa, string, len, type, + tags, eflags, &eo); + } +#ifdef TRE_APPROX + else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER) + { + /* The regex uses approximate matching, use the approximate matcher. */ + regamatch_t match; + regaparams_t params; + tre_regaparams_default(¶ms); + params.max_err = 0; + params.max_cost = 0; + status = tre_tnfa_run_approx(tnfa, string, len, type, tags, + &match, params, eflags, &eo); + } +#endif /* TRE_APPROX */ + else + { + /* Exact matching, no back references, use the parallel matcher. */ + status = tre_tnfa_run_parallel(tnfa, string, len, type, + tags, eflags, &eo); + } + + if (status == REG_OK) + /* A match was found, so fill the submatch registers. */ + tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return status; +} + +int +tre_regnexec(const regex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; + + return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags); +} + +#ifdef TRE_USE_GNUC_REGEXEC_FPL +int +tre_regexec(const regex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[_Restrict_arr_ _REGEX_NELTS (nmatch)], + int eflags) +#else +int +tre_regexec(const regex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +#endif +{ + return tre_regnexec(preg, str, -1, nmatch, pmatch, eflags); +} + +int +tre_regexecb(const regex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + + return tre_match(tnfa, str, -1, STR_BYTE, nmatch, pmatch, eflags); +} + +int +tre_regnexecb(const regex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + + return tre_match(tnfa, str, len, STR_BYTE, nmatch, pmatch, eflags); +} + + +#ifdef TRE_WCHAR + +int +tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags); +} + +int +tre_regwexec(const regex_t *preg, const wchar_t *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + return tre_regwnexec(preg, str, -1, nmatch, pmatch, eflags); +} + +#endif /* TRE_WCHAR */ + +int +tre_reguexec(const regex_t *preg, const tre_str_source *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tre_match(tnfa, str, -1, STR_USER, nmatch, pmatch, eflags); +} + + +#ifdef TRE_APPROX + +/* + Wrapper functions for approximate regexp matching. +*/ + +static int +tre_match_approx(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, regamatch_t *match, regaparams_t params, + int eflags) +{ + reg_errcode_t status; + int *tags = NULL, eo; + + /* If the regexp does not use approximate matching features, the + maximum cost is zero, and the approximate matcher isn't forced, + use the exact matcher instead. */ + if (params.max_cost == 0 && !tnfa->have_approx + && !(eflags & REG_APPROX_MATCHER)) + return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch, + eflags); + + /* Back references are not supported by the approximate matcher. */ + if (tnfa->have_backrefs) + return REG_BADPAT; + + if (tnfa->num_tags > 0 && match->nmatch > 0) + { +#if TRE_USE_ALLOCA + tags = alloca(sizeof(*tags) * tnfa->num_tags); +#else /* !TRE_USE_ALLOCA */ + tags = xmalloc(sizeof(*tags) * tnfa->num_tags); +#endif /* !TRE_USE_ALLOCA */ + if (tags == NULL) + return REG_ESPACE; + } + status = tre_tnfa_run_approx(tnfa, string, len, type, tags, + match, params, eflags, &eo); + if (status == REG_OK) + tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo); +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return status; +} + +int +tre_reganexec(const regex_t *preg, const char *str, size_t len, + regamatch_t *match, regaparams_t params, int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; + + return tre_match_approx(tnfa, str, len, type, match, params, eflags); +} + +int +tre_regaexec(const regex_t *preg, const char *str, + regamatch_t *match, regaparams_t params, int eflags) +{ + return tre_reganexec(preg, str, -1, match, params, eflags); +} + +int +tre_regaexecb(const regex_t *preg, const char *str, + regamatch_t *match, regaparams_t params, int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + + return tre_match_approx(tnfa, str, -1, STR_BYTE, match, params, eflags); +} + +#ifdef TRE_WCHAR + +int +tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len, + regamatch_t *match, regaparams_t params, int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tre_match_approx(tnfa, str, len, STR_WIDE, + match, params, eflags); +} + +int +tre_regawexec(const regex_t *preg, const wchar_t *str, + regamatch_t *match, regaparams_t params, int eflags) +{ + return tre_regawnexec(preg, str, -1, match, params, eflags); +} + +#endif /* TRE_WCHAR */ + +void +tre_regaparams_default(regaparams_t *params) +{ + memset(params, 0, sizeof(*params)); + params->cost_ins = 1; + params->cost_del = 1; + params->cost_subst = 1; + params->max_cost = INT_MAX; + params->max_ins = INT_MAX; + params->max_del = INT_MAX; + params->max_subst = INT_MAX; + params->max_err = INT_MAX; +} + +#endif /* TRE_APPROX */ + +/* EOF */ diff --git a/deps/tre/lib/tre-ast.c b/deps/tre/lib/tre-ast.c new file mode 100644 index 000000000..5a4bb1940 --- /dev/null +++ b/deps/tre/lib/tre-ast.c @@ -0,0 +1,226 @@ +/* + tre-ast.c - Abstract syntax tree (AST) routines + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include + +#include "tre-ast.h" +#include "tre-mem.h" + +tre_ast_node_t * +tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size) +{ + tre_ast_node_t *node; + + node = tre_mem_calloc(mem, sizeof(*node)); + if (!node) + return NULL; + node->obj = tre_mem_calloc(mem, size); + if (!node->obj) + return NULL; + node->type = type; + node->nullable = -1; + node->submatch_id = -1; + + return node; +} + +tre_ast_node_t * +tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max) +{ + tre_ast_node_t *node; + tre_literal_t *lit; + + node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t)); + if (!node) + return NULL; + lit = node->obj; + lit->code_min = code_min; + lit->code_max = code_max; + lit->position = -1; + + return node; +} + +tre_ast_node_t * +tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, + int minimal) +{ + tre_ast_node_t *node; + tre_iteration_t *iter; + + node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t)); + if (!node) + return NULL; + iter = node->obj; + iter->arg = arg; + iter->min = min; + iter->max = max; + iter->minimal = minimal; + node->num_submatches = arg->num_submatches; + + return node; +} + +tre_ast_node_t * +tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) +{ + tre_ast_node_t *node; + + node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t)); + if (node == NULL) + return NULL; + ((tre_union_t *)node->obj)->left = left; + ((tre_union_t *)node->obj)->right = right; + node->num_submatches = left->num_submatches + right->num_submatches; + + return node; +} + +tre_ast_node_t * +tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, + tre_ast_node_t *right) +{ + tre_ast_node_t *node; + + node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t)); + if (node == NULL) + return NULL; + ((tre_catenation_t *)node->obj)->left = left; + ((tre_catenation_t *)node->obj)->right = right; + node->num_submatches = left->num_submatches + right->num_submatches; + + return node; +} + +#ifdef TRE_DEBUG + +static void +tre_findent(FILE *stream, int i) +{ + while (i-- > 0) + fputc(' ', stream); +} + +void +tre_print_params(int *params) +{ + int i; + if (params) + { + DPRINT(("params [")); + for (i = 0; i < TRE_PARAM_LAST; i++) + { + if (params[i] == TRE_PARAM_UNSET) + DPRINT(("unset")); + else if (params[i] == TRE_PARAM_DEFAULT) + DPRINT(("default")); + else + DPRINT(("%d", params[i])); + if (i < TRE_PARAM_LAST - 1) + DPRINT((", ")); + } + DPRINT(("]")); + } +} + +static void +tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent) +{ + int code_min, code_max, pos; + int num_tags = ast->num_tags; + tre_literal_t *lit; + tre_iteration_t *iter; + + tre_findent(stream, indent); + switch (ast->type) + { + case LITERAL: + lit = ast->obj; + code_min = lit->code_min; + code_max = lit->code_max; + pos = lit->position; + if (IS_EMPTY(lit)) + { + fprintf(stream, "literal empty\n"); + } + else if (IS_ASSERTION(lit)) + { + int i; + char *assertions[] = { "bol", "eol", "ctype", "!ctype", + "bow", "eow", "wb", "!wb" }; + if (code_max >= ASSERT_LAST << 1) + assert(0); + fprintf(stream, "assertions: "); + for (i = 0; (1 << i) <= ASSERT_LAST; i++) + if (code_max & (1 << i)) + fprintf(stream, "%s ", assertions[i]); + fprintf(stream, "\n"); + } + else if (IS_TAG(lit)) + { + fprintf(stream, "tag %d\n", code_max); + } + else if (IS_BACKREF(lit)) + { + fprintf(stream, "backref %d, pos %d\n", code_max, pos); + } + else if (IS_PARAMETER(lit)) + { + tre_print_params(lit->u.params); + fprintf(stream, "\n"); + } + else + { + fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, " + "%d tags\n", code_min, code_max, code_min, code_max, pos, + ast->submatch_id, num_tags); + } + break; + case ITERATION: + iter = ast->obj; + fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n", + iter->min, iter->max, ast->submatch_id, num_tags, + iter->minimal ? "minimal" : "greedy"); + tre_do_print(stream, iter->arg, indent + 2); + break; + case UNION: + fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags); + tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2); + tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2); + break; + case CATENATION: + fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id, + num_tags); + tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2); + tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2); + break; + default: + assert(0); + break; + } +} + +static void +tre_ast_fprint(FILE *stream, tre_ast_node_t *ast) +{ + tre_do_print(stream, ast, 0); +} + +void +tre_ast_print(tre_ast_node_t *tree) +{ + printf("AST:\n"); + tre_ast_fprint(stdout, tree); +} + +#endif /* TRE_DEBUG */ + +/* EOF */ diff --git a/deps/tre/lib/tre-ast.h b/deps/tre/lib/tre-ast.h new file mode 100644 index 000000000..190c4b033 --- /dev/null +++ b/deps/tre/lib/tre-ast.h @@ -0,0 +1,128 @@ +/* + tre-ast.h - Abstract syntax tree (AST) definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_AST_H +#define TRE_AST_H 1 + +#include "tre-mem.h" +#include "tre-internal.h" +#include "tre-compile.h" + +/* The different AST node types. */ +typedef enum { + LITERAL, + CATENATION, + ITERATION, + UNION +} tre_ast_type_t; + +/* Special subtypes of TRE_LITERAL. */ +#define EMPTY -1 /* Empty leaf (denotes empty string). */ +#define ASSERTION -2 /* Assertion leaf. */ +#define TAG -3 /* Tag leaf. */ +#define BACKREF -4 /* Back reference leaf. */ +#define PARAMETER -5 /* Parameter. */ + +#define IS_SPECIAL(x) ((x)->code_min < 0) +#define IS_EMPTY(x) ((x)->code_min == EMPTY) +#define IS_ASSERTION(x) ((x)->code_min == ASSERTION) +#define IS_TAG(x) ((x)->code_min == TAG) +#define IS_BACKREF(x) ((x)->code_min == BACKREF) +#define IS_PARAMETER(x) ((x)->code_min == PARAMETER) + + +/* A generic AST node. All AST nodes consist of this node on the top + level with `obj' pointing to the actual content. */ +typedef struct { + tre_ast_type_t type; /* Type of the node. */ + void *obj; /* Pointer to actual node. */ + int nullable; + int submatch_id; + unsigned int num_submatches; + unsigned int num_tags; + tre_pos_and_tags_t *firstpos; + tre_pos_and_tags_t *lastpos; +} tre_ast_node_t; + + +/* A "literal" node. These are created for assertions, back references, + tags, matching parameter settings, and all expressions that match one + character. */ +typedef struct { + long code_min; + long code_max; + int position; + union { + tre_ctype_t class; + int *params; + } u; + tre_ctype_t *neg_classes; +} tre_literal_t; + +/* A "catenation" node. These are created when two regexps are concatenated. + If there are more than one subexpressions in sequence, the `left' part + holds all but the last, and `right' part holds the last subexpression + (catenation is left associative). */ +typedef struct { + tre_ast_node_t *left; + tre_ast_node_t *right; +} tre_catenation_t; + +/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" + operators. */ +typedef struct { + /* Subexpression to match. */ + tre_ast_node_t *arg; + /* Minimum number of consecutive matches. */ + int min; + /* Maximum number of consecutive matches. */ + int max; + /* If 0, match as many characters as possible, if 1 match as few as + possible. Note that this does not always mean the same thing as + matching as many/few repetitions as possible. */ + unsigned int minimal:1; + /* Approximate matching parameters (or NULL). */ + int *params; +} tre_iteration_t; + +/* An "union" node. These are created for the "|" operator. */ +typedef struct { + tre_ast_node_t *left; + tre_ast_node_t *right; +} tre_union_t; + +tre_ast_node_t * +tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); + +tre_ast_node_t * +tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max); + +tre_ast_node_t * +tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, + int minimal); + +tre_ast_node_t * +tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); + +tre_ast_node_t * +tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, + tre_ast_node_t *right); + +#ifdef TRE_DEBUG +void +tre_ast_print(tre_ast_node_t *tree); + +/* XXX - rethink AST printing API */ +void +tre_print_params(int *params); +#endif /* TRE_DEBUG */ + +#endif /* TRE_AST_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-compile.c b/deps/tre/lib/tre-compile.c new file mode 100644 index 000000000..a3573df5a --- /dev/null +++ b/deps/tre/lib/tre-compile.c @@ -0,0 +1,2673 @@ +/* + tre-compile.c - TRE regex compiler + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + TODO: + - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive + function calls. +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include +#include + +#include "tre-internal.h" +#include "tre-mem.h" +#include "tre-stack.h" +#include "tre-ast.h" +#include "tre-parse.h" +#include "tre-compile.h" +#include "xmalloc.h" + +typedef struct { + const tre_ast_node_t **nodes; + size_t len; + size_t cap; +} tre_ast_node_vec_t; + +typedef struct { + unsigned char *bytes; + size_t len; + size_t cap; +} tre_literal_byte_buf_t; + +static unsigned char +tre_litopt_fold_byte(unsigned char c) +{ + return (unsigned char)tre_tolower((tre_cint_t)c); +} + +static void +tre_litopt_free_literal_list(tre_literal_opt_literal_t *literals, size_t count) +{ + size_t i; + + if (literals == NULL) + return; + for (i = 0; i < count; i++) + if (literals[i].data != NULL) + xfree(literals[i].data); + xfree(literals); +} + +static void +tre_litopt_reset_byte_buf(tre_literal_byte_buf_t *buf) +{ + if (buf->bytes != NULL) + xfree(buf->bytes); + buf->bytes = NULL; + buf->len = 0; + buf->cap = 0; +} + +static int +tre_litopt_append_ast_node(tre_ast_node_vec_t *vec, const tre_ast_node_t *node) +{ + const tre_ast_node_t **new_nodes; + size_t new_cap; + + if (vec->len == vec->cap) + { + new_cap = vec->cap ? vec->cap * 2 : 8; + new_nodes = xrealloc(vec->nodes, sizeof(*new_nodes) * new_cap); + if (new_nodes == NULL) + return REG_ESPACE; + vec->nodes = new_nodes; + vec->cap = new_cap; + } + + vec->nodes[vec->len++] = node; + return REG_OK; +} + +static int +tre_litopt_append_byte(tre_literal_byte_buf_t *buf, unsigned char byte) +{ + unsigned char *new_bytes; + size_t new_cap; + + if (buf->len == buf->cap) + { + new_cap = buf->cap ? buf->cap * 2 : 8; + new_bytes = xrealloc(buf->bytes, new_cap); + if (new_bytes == NULL) + return REG_ESPACE; + buf->bytes = new_bytes; + buf->cap = new_cap; + } + + buf->bytes[buf->len++] = byte; + return REG_OK; +} + +static int +tre_litopt_append_literal(tre_literal_opt_t *opt, + const tre_literal_byte_buf_t *buf) +{ + tre_literal_opt_literal_t *new_literals; + unsigned char *copy; + size_t new_count; + + new_count = opt->num_literals + 1; + new_literals = xrealloc(opt->literals, sizeof(*new_literals) * new_count); + if (new_literals == NULL) + return REG_ESPACE; + opt->literals = new_literals; + + copy = xmalloc(buf->len); + if (copy == NULL) + return REG_ESPACE; + memcpy(copy, buf->bytes, buf->len); + + opt->literals[opt->num_literals].data = copy; + opt->literals[opt->num_literals].len = buf->len; + opt->num_literals = new_count; + return REG_OK; +} + +/* Fill the fold table once and group literals by the first byte so the + * matcher can jump straight to the small set of candidates that can match + * at a given position. */ +static reg_errcode_t +tre_litopt_prepare(tre_literal_opt_t *opt) +{ + size_t counts[256] = { 0 }; + size_t next[256]; + tre_literal_opt_literal_t *grouped; + size_t i; + + for (i = 0; i < 256; i++) + opt->fold_map[i] = tre_litopt_fold_byte((unsigned char)i); + + memset(opt->start_offsets, 0, sizeof(opt->start_offsets)); + if (opt->num_literals == 0) + return REG_OK; + + for (i = 0; i < opt->num_literals; i++) + counts[opt->literals[i].data[0]]++; + + for (i = 0; i < 256; i++) + opt->start_offsets[i + 1] = opt->start_offsets[i] + counts[i]; + + grouped = xmalloc(sizeof(*grouped) * opt->num_literals); + if (grouped == NULL) + return REG_ESPACE; + + memcpy(next, opt->start_offsets, sizeof(next)); + for (i = 0; i < opt->num_literals; i++) + { + unsigned char first = opt->literals[i].data[0]; + grouped[next[first]++] = opt->literals[i]; + } + + xfree(opt->literals); + opt->literals = grouped; + return REG_OK; +} + +static int +tre_litopt_is_simple_literal(const tre_ast_node_t *node, unsigned char *byte) +{ + tre_literal_t *lit; + + if (node == NULL || node->type != LITERAL) + return 0; + lit = node->obj; + if (IS_SPECIAL(lit) || lit->code_min != lit->code_max) + return 0; + if (lit->code_min < 0 || lit->code_min > UCHAR_MAX) + return 0; + *byte = (unsigned char)lit->code_min; + return 1; +} + +static int +tre_litopt_is_icase_char_union(const tre_ast_node_t *node, int cflags, + unsigned char *byte) +{ + tre_union_t *uni; + unsigned char left, right; + + if (!(cflags & REG_ICASE) || node == NULL || node->type != UNION) + return 0; + + uni = node->obj; + if (!tre_litopt_is_simple_literal(uni->left, &left) + || !tre_litopt_is_simple_literal(uni->right, &right)) + return 0; + + if (tre_litopt_fold_byte(left) != tre_litopt_fold_byte(right)) + return 0; + + *byte = tre_litopt_fold_byte(left); + return 1; +} + +static int +tre_litopt_is_assertion(const tre_ast_node_t *node, int assertion) +{ + tre_literal_t *lit; + + if (node == NULL || node->type != LITERAL) + return 0; + lit = node->obj; + return IS_ASSERTION(lit) && lit->code_max == assertion; +} + +static int +tre_litopt_collect_cat_nodes(const tre_ast_node_t *node, tre_ast_node_vec_t *vec) +{ + tre_catenation_t *cat; + int err; + + if (node->type != CATENATION) + return tre_litopt_append_ast_node(vec, node); + + cat = node->obj; + err = tre_litopt_collect_cat_nodes(cat->left, vec); + if (err != REG_OK) + return err; + return tre_litopt_collect_cat_nodes(cat->right, vec); +} + +static int +tre_litopt_collect_alt_nodes(const tre_ast_node_t *node, int cflags, + tre_ast_node_vec_t *vec) +{ + tre_union_t *uni; + unsigned char byte; + int err; + + if (node->type != UNION || tre_litopt_is_icase_char_union(node, cflags, &byte)) + return tre_litopt_append_ast_node(vec, node); + + uni = node->obj; + err = tre_litopt_collect_alt_nodes(uni->left, cflags, vec); + if (err != REG_OK) + return err; + return tre_litopt_collect_alt_nodes(uni->right, cflags, vec); +} + +static int +tre_litopt_collect_literal_string(const tre_ast_node_t *node, int cflags, + tre_literal_byte_buf_t *buf) +{ + tre_catenation_t *cat; + unsigned char byte; + int err; + + switch (node->type) + { + case CATENATION: + cat = node->obj; + err = tre_litopt_collect_literal_string(cat->left, cflags, buf); + if (err != 1) + return err; + return tre_litopt_collect_literal_string(cat->right, cflags, buf); + + case LITERAL: + if (!tre_litopt_is_simple_literal(node, &byte)) + return 0; + if (cflags & REG_ICASE) + byte = tre_litopt_fold_byte(byte); + return tre_litopt_append_byte(buf, byte) == REG_OK ? 1 : -1; + + case UNION: + if (!tre_litopt_is_icase_char_union(node, cflags, &byte)) + return 0; + return tre_litopt_append_byte(buf, byte) == REG_OK ? 1 : -1; + + default: + return 0; + } +} + +static reg_errcode_t +tre_litopt_try_compile(tre_tnfa_t *tnfa, const tre_ast_node_t *tree, + int cflags, int mb_cur_max) +{ + tre_ast_node_vec_t pieces = { 0 }, alts = { 0 }; + tre_literal_byte_buf_t buf = { 0 }; + tre_literal_opt_t opt = { 0 }; + size_t first, last, i; + int err; + + if (mb_cur_max != 1 || (cflags & REG_NEWLINE)) + return REG_OK; + + err = tre_litopt_collect_cat_nodes(tree, &pieces); + if (err != REG_OK) + goto error; + + first = 0; + last = pieces.len; + + if (first < last && tre_litopt_is_assertion(pieces.nodes[first], ASSERT_AT_BOL)) + first++; + if (first < last && tre_litopt_is_assertion(pieces.nodes[last - 1], ASSERT_AT_EOL)) + last--; + + if (first == last) + goto out; + + if (last - first == 1) + { + err = tre_litopt_collect_alt_nodes(pieces.nodes[first], cflags, &alts); + if (err != REG_OK) + goto error; + + for (i = 0; i < alts.len; i++) + { + err = tre_litopt_collect_literal_string(alts.nodes[i], cflags, &buf); + if (err < 0) + goto error; + if (err == 0 || buf.len == 0) + goto out; + err = tre_litopt_append_literal(&opt, &buf); + if (err != REG_OK) + goto error; + buf.len = 0; + } + } + else + { + for (i = first; i < last; i++) + { + err = tre_litopt_collect_literal_string(pieces.nodes[i], cflags, &buf); + if (err < 0) + goto error; + if (err == 0) + goto out; + } + if (buf.len == 0) + goto out; + err = tre_litopt_append_literal(&opt, &buf); + if (err != REG_OK) + goto error; + buf.len = 0; + } + + if (opt.num_literals == 0) + goto out; + + if (first > 0 && last < pieces.len) + opt.mode = TRE_LITERAL_OPT_EXACT; + else if (first > 0) + opt.mode = TRE_LITERAL_OPT_PREFIX; + else if (last < pieces.len) + opt.mode = TRE_LITERAL_OPT_SUFFIX; + else + opt.mode = TRE_LITERAL_OPT_CONTAINS; + opt.nocase = !!(cflags & REG_ICASE); + err = tre_litopt_prepare(&opt); + if (err != REG_OK) + goto error; + + tnfa->literal_opt = opt; + opt.literals = NULL; + opt.num_literals = 0; + + out: + if (pieces.nodes != NULL) + xfree(pieces.nodes); + if (alts.nodes != NULL) + xfree(alts.nodes); + tre_litopt_reset_byte_buf(&buf); + tre_litopt_free_literal_list(opt.literals, opt.num_literals); + return REG_OK; + + error: + if (pieces.nodes != NULL) + xfree(pieces.nodes); + if (alts.nodes != NULL) + xfree(alts.nodes); + tre_litopt_reset_byte_buf(&buf); + tre_litopt_free_literal_list(opt.literals, opt.num_literals); + return REG_ESPACE; +} + +/* + Algorithms to setup tags so that submatch addressing can be done. +*/ + + +/* Inserts a catenation node to the root of the tree given in `node'. + As the left child a new tag with number `tag_id' to `node' is added, + and the right child is the old root. */ +static reg_errcode_t +tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) +{ + tre_catenation_t *c; + + DPRINT(("add_tag_left: tag %d\n", tag_id)); + + c = tre_mem_alloc(mem, sizeof(*c)); + if (c == NULL) + return REG_ESPACE; + c->left = tre_ast_new_literal(mem, TAG, tag_id); + if (c->left == NULL) + return REG_ESPACE; + c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); + if (c->right == NULL) + return REG_ESPACE; + + c->right->obj = node->obj; + c->right->type = node->type; + c->right->nullable = -1; + c->right->submatch_id = -1; + c->right->firstpos = NULL; + c->right->lastpos = NULL; + c->right->num_tags = 0; + node->obj = c; + node->type = CATENATION; + return REG_OK; +} + +/* Inserts a catenation node to the root of the tree given in `node'. + As the right child a new tag with number `tag_id' to `node' is added, + and the left child is the old root. */ +static reg_errcode_t +tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id) +{ + tre_catenation_t *c; + + DPRINT(("tre_add_tag_right: tag %d\n", tag_id)); + + c = tre_mem_alloc(mem, sizeof(*c)); + if (c == NULL) + return REG_ESPACE; + c->right = tre_ast_new_literal(mem, TAG, tag_id); + if (c->right == NULL) + return REG_ESPACE; + c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); + if (c->left == NULL) + return REG_ESPACE; + + c->left->obj = node->obj; + c->left->type = node->type; + c->left->nullable = -1; + c->left->submatch_id = -1; + c->left->firstpos = NULL; + c->left->lastpos = NULL; + c->left->num_tags = 0; + node->obj = c; + node->type = CATENATION; + return REG_OK; +} + +typedef enum { + ADDTAGS_RECURSE, + ADDTAGS_AFTER_ITERATION, + ADDTAGS_AFTER_UNION_LEFT, + ADDTAGS_AFTER_UNION_RIGHT, + ADDTAGS_AFTER_CAT_LEFT, + ADDTAGS_AFTER_CAT_RIGHT, + ADDTAGS_SET_SUBMATCH_END +} tre_addtags_symbol_t; + + +typedef struct { + int tag; + int next_tag; +} tre_tag_states_t; + + +/* Go through `regset' and set submatch data for submatches that are + using this tag. */ +static void +tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag) +{ + int i; + + for (i = 0; regset[i] >= 0; i++) + { + int id = regset[i] / 2; + int start = !(regset[i] % 2); + DPRINT((" Using tag %d for %s offset of " + "submatch %d\n", tag, + start ? "start" : "end", id)); + if (start) + tnfa->submatch_data[id].so_tag = tag; + else + tnfa->submatch_data[id].eo_tag = tag; + } + regset[0] = -1; +} + + +/* Adds tags to appropriate locations in the parse tree in `tree', so that + subexpressions marked for submatch addressing can be traced. */ +static reg_errcode_t +tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, + tre_tnfa_t *tnfa) +{ + reg_errcode_t status = REG_OK; + tre_addtags_symbol_t symbol; + tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ + size_t bottom = tre_stack_num_items(stack); + /* True for first pass (counting number of needed tags) */ + int first_pass = (mem == NULL || tnfa == NULL); + int *regset, *orig_regset; + unsigned int num_tags = 0; /* Total number of tags. */ + unsigned int num_minimals = 0; /* Number of special minimal tags. */ + unsigned int tag = 0; /* The tag that is to be added next. */ + unsigned int next_tag = 1; /* Next tag to use after this one. */ + int *parents; /* Stack of submatches the current submatch is + contained in. */ + int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ + tre_tag_states_t *saved_states; + + tre_tag_direction_t direction = TRE_TAG_MINIMIZE; + if (!first_pass) + { + tnfa->end_tag = 0; + tnfa->minimal_tags[0] = -1; + } + + regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); + if (regset == NULL) + return REG_ESPACE; + regset[0] = -1; + orig_regset = regset; + + parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); + if (parents == NULL) + { + xfree(regset); + return REG_ESPACE; + } + parents[0] = -1; + + saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); + if (saved_states == NULL) + { + xfree(regset); + xfree(parents); + return REG_ESPACE; + } + else + { + unsigned int i; + for (i = 0; i <= tnfa->num_submatches; i++) + saved_states[i].tag = -1; + } + + STACK_PUSH(stack, voidptr, node); + STACK_PUSH(stack, int, ADDTAGS_RECURSE); + + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack); + switch (symbol) + { + + case ADDTAGS_SET_SUBMATCH_END: + { + int id = tre_stack_pop_int(stack); + int i; + + /* Add end of this submatch to regset. */ + for (i = 0; regset[i] >= 0; i++); + regset[i] = id * 2 + 1; + regset[i + 1] = -1; + + /* Pop this submatch from the parents stack. */ + for (i = 0; parents[i] >= 0; i++); + parents[i - 1] = -1; + break; + } + + case ADDTAGS_RECURSE: + node = tre_stack_pop_voidptr(stack); + + if (node->submatch_id >= 0) + { + int id = node->submatch_id; + int i; + + + /* Add start of this submatch to regset. */ + for (i = 0; regset[i] >= 0; i++); + regset[i] = id * 2; + regset[i + 1] = -1; + + if (!first_pass) + { + for (i = 0; parents[i] >= 0; i++); + tnfa->submatch_data[id].parents = NULL; + if (i > 0) + { + int *p = xmalloc(sizeof(*p) * (i + 1)); + if (p == NULL) + { + status = REG_ESPACE; + break; + } + assert(tnfa->submatch_data[id].parents == NULL); + tnfa->submatch_data[id].parents = p; + for (i = 0; parents[i] >= 0; i++) + p[i] = parents[i]; + p[i] = -1; + } + } + + /* Add end of this submatch to regset after processing this + node. */ + STACK_PUSHX(stack, int, node->submatch_id); + STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); + } + + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = node->obj; + + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + int i; + DPRINT(("Literal %d-%d\n", + (int)lit->code_min, (int)lit->code_max)); + if (regset[0] >= 0) + { + /* Regset is not empty, so add a tag before the + literal or backref. */ + if (!first_pass) + { + status = tre_add_tag_left(mem, node, tag); + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + else + { + DPRINT((" num_tags = 1\n")); + node->num_tags = 1; + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + } + else + { + assert(!IS_TAG(lit)); + } + break; + } + case CATENATION: + { + tre_catenation_t *cat = node->obj; + tre_ast_node_t *left = cat->left; + tre_ast_node_t *right = cat->right; + int reserved_tag = -1; + DPRINT(("Catenation, next_tag = %d\n", next_tag)); + + + /* After processing right child. */ + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); + + /* Process right child. */ + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* After processing left child. */ + STACK_PUSHX(stack, int, next_tag + left->num_tags); + DPRINT((" Pushing %d for after left\n", + next_tag + left->num_tags)); + if (left->num_tags > 0 && right->num_tags > 0) + { + /* Reserve the next tag to the right child. */ + DPRINT((" Reserving next_tag %d to right child\n", + next_tag)); + reserved_tag = next_tag; + next_tag++; + } + STACK_PUSHX(stack, int, reserved_tag); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); + + /* Process left child. */ + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + } + break; + case ITERATION: + { + tre_iteration_t *iter = node->obj; + DPRINT(("Iteration\n")); + + if (first_pass) + { + STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); + } + else + { + STACK_PUSHX(stack, int, tag); + STACK_PUSHX(stack, int, iter->minimal); + } + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); + + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* Regset is not empty, so add a tag here. */ + if (regset[0] >= 0 || iter->minimal) + { + if (!first_pass) + { + int i; + status = tre_add_tag_left(mem, node, tag); + if (iter->minimal) + tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; + else + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + direction = TRE_TAG_MINIMIZE; + } + break; + case UNION: + { + tre_union_t *uni = node->obj; + tre_ast_node_t *left = uni->left; + tre_ast_node_t *right = uni->right; + int left_tag; + int right_tag; + + if (regset[0] >= 0) + { + left_tag = next_tag; + right_tag = next_tag + 1; + } + else + { + left_tag = tag; + right_tag = next_tag; + } + + DPRINT(("Union\n")); + + /* After processing right child. */ + STACK_PUSHX(stack, int, right_tag); + STACK_PUSHX(stack, int, left_tag); + STACK_PUSHX(stack, voidptr, regset); + STACK_PUSHX(stack, int, regset[0] >= 0); + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); + + /* Process right child. */ + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* After processing left child. */ + STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); + + /* Process left child. */ + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* Regset is not empty, so add a tag here. */ + if (regset[0] >= 0) + { + if (!first_pass) + { + int i; + status = tre_add_tag_left(mem, node, tag); + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + + if (node->num_submatches > 0) + { + /* The next two tags are reserved for markers. */ + next_tag++; + tag = next_tag; + next_tag++; + } + + break; + } + } + + if (node->submatch_id >= 0) + { + int i; + /* Push this submatch on the parents stack. */ + for (i = 0; parents[i] >= 0; i++); + parents[i] = node->submatch_id; + parents[i + 1] = -1; + } + + break; /* end case: ADDTAGS_RECURSE */ + + case ADDTAGS_AFTER_ITERATION: + { + int minimal = 0; + int enter_tag; + node = tre_stack_pop_voidptr(stack); + if (first_pass) + { + node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags + + tre_stack_pop_int(stack); + minimal_tag = -1; + } + else + { + minimal = tre_stack_pop_int(stack); + enter_tag = tre_stack_pop_int(stack); + if (minimal) + minimal_tag = enter_tag; + } + + DPRINT(("After iteration\n")); + if (!first_pass) + { + DPRINT((" Setting direction to %s\n", + minimal ? "minimize" : "maximize")); + if (minimal) + direction = TRE_TAG_MINIMIZE; + else + direction = TRE_TAG_MAXIMIZE; + } + break; + } + + case ADDTAGS_AFTER_CAT_LEFT: + { + int new_tag = tre_stack_pop_int(stack); + next_tag = tre_stack_pop_int(stack); + DPRINT(("After cat left, tag = %d, next_tag = %d\n", + tag, next_tag)); + if (new_tag >= 0) + { + DPRINT((" Setting tag to %d\n", new_tag)); + tag = new_tag; + } + break; + } + + case ADDTAGS_AFTER_CAT_RIGHT: + DPRINT(("After cat right\n")); + node = tre_stack_pop_voidptr(stack); + if (first_pass) + node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags + + ((tre_catenation_t *)node->obj)->right->num_tags; + break; + + case ADDTAGS_AFTER_UNION_LEFT: + DPRINT(("After union left\n")); + /* Lift the bottom of the `regset' array so that when processing + the right operand the items currently in the array are + invisible. The original bottom was saved at ADDTAGS_UNION and + will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ + while (*regset >= 0) + regset++; + break; + + case ADDTAGS_AFTER_UNION_RIGHT: + { + int added_tags, tag_left, tag_right; + tre_ast_node_t *left = tre_stack_pop_voidptr(stack); + tre_ast_node_t *right = tre_stack_pop_voidptr(stack); + DPRINT(("After union right\n")); + node = tre_stack_pop_voidptr(stack); + added_tags = tre_stack_pop_int(stack); + if (first_pass) + { + node->num_tags = ((tre_union_t *)node->obj)->left->num_tags + + ((tre_union_t *)node->obj)->right->num_tags + added_tags + + ((node->num_submatches > 0) ? 2 : 0); + } + regset = tre_stack_pop_voidptr(stack); + tag_left = tre_stack_pop_int(stack); + tag_right = tre_stack_pop_int(stack); + + /* Add tags after both children, the left child gets a smaller + tag than the right child. This guarantees that we prefer + the left child over the right child. */ + /* XXX - This is not always necessary (if the children have + tags which must be seen for every match of that child). */ + /* XXX - Check if this is the only place where tre_add_tag_right + is used. If so, use tre_add_tag_left (putting the tag before + the child as opposed after the child) and throw away + tre_add_tag_right. */ + if (node->num_submatches > 0) + { + if (!first_pass) + { + status = tre_add_tag_right(mem, left, tag_left); + tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; + status = tre_add_tag_right(mem, right, tag_right); + tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; + } + DPRINT((" num_tags += 2\n")); + num_tags += 2; + } + direction = TRE_TAG_MAXIMIZE; + break; + } + + default: + assert(0); + break; + + } /* end switch(symbol) */ + } /* end while(tre_stack_num_items(stack) > bottom) */ + + if (!first_pass) + tre_purge_regset(regset, tnfa, tag); + + if (!first_pass && minimal_tag >= 0) + { + int i; + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + + DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n", + first_pass? "First pass" : "Second pass", num_tags)); + + assert(tree->num_tags == num_tags); + tnfa->end_tag = num_tags; + tnfa->num_tags = num_tags; + tnfa->num_minimals = num_minimals; + xfree(orig_regset); + xfree(parents); + xfree(saved_states); + return status; +} + + + +/* + AST to TNFA compilation routines. +*/ + +typedef enum { + COPY_RECURSE, + COPY_SET_RESULT_PTR +} tre_copyast_symbol_t; + +/* Flags for tre_copy_ast(). */ +#define COPY_REMOVE_TAGS 1 +#define COPY_MAXIMIZE_FIRST_TAG 2 + +static reg_errcode_t +tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, + int flags, int *pos_add, tre_tag_direction_t *tag_directions, + tre_ast_node_t **copy, int *max_pos) +{ + reg_errcode_t status = REG_OK; + size_t bottom = tre_stack_num_items(stack); + int num_copied = 0; + int first_tag = 1; + tre_ast_node_t **result = copy; + tre_copyast_symbol_t symbol; + + STACK_PUSH(stack, voidptr, ast); + STACK_PUSH(stack, int, COPY_RECURSE); + + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + tre_ast_node_t *node; + if (status != REG_OK) + break; + + symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack); + switch (symbol) + { + case COPY_SET_RESULT_PTR: + result = tre_stack_pop_voidptr(stack); + break; + case COPY_RECURSE: + node = tre_stack_pop_voidptr(stack); + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = node->obj; + int pos = lit->position; + long min = lit->code_min; + long max = lit->code_max; + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + /* XXX - e.g. [ab] has only one position but two + nodes, so we are creating holes in the state space + here. Not fatal, just wastes memory. */ + pos += *pos_add; + num_copied++; + } + else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) + { + /* Change this tag to empty. */ + min = EMPTY; + max = pos = -1; + } + else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) + && first_tag) + { + /* Maximize the first tag. */ + tag_directions[max] = TRE_TAG_MAXIMIZE; + first_tag = 0; + } + *result = tre_ast_new_literal(mem, min, max); + if (*result == NULL) { + status = REG_ESPACE; + break; + } + if (!IS_SPECIAL(lit)) { + ((tre_literal_t *)(*result)->obj)->u.class = lit->u.class; + ((tre_literal_t *)(*result)->obj)->neg_classes = lit->neg_classes; + } else if (IS_PARAMETER(lit)) { + ((tre_literal_t *)(*result)->obj)->u.params = lit->u.params; + } + + if (pos > *max_pos) + *max_pos = pos; + break; + } + case UNION: + { + tre_union_t *uni = node->obj; + tre_union_t *tmp; + *result = tre_ast_new_union(mem, uni->left, uni->right); + if (*result == NULL) + { + status = REG_ESPACE; + break; + } + tmp = (*result)->obj; + result = &tmp->left; + STACK_PUSHX(stack, voidptr, uni->right); + STACK_PUSHX(stack, int, COPY_RECURSE); + STACK_PUSHX(stack, voidptr, &tmp->right); + STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); + STACK_PUSHX(stack, voidptr, uni->left); + STACK_PUSHX(stack, int, COPY_RECURSE); + break; + } + case CATENATION: + { + tre_catenation_t *cat = node->obj; + tre_catenation_t *tmp; + *result = tre_ast_new_catenation(mem, cat->left, cat->right); + if (*result == NULL) + { + status = REG_ESPACE; + break; + } + tmp = (*result)->obj; + tmp->left = NULL; + tmp->right = NULL; + result = &tmp->left; + + STACK_PUSHX(stack, voidptr, cat->right); + STACK_PUSHX(stack, int, COPY_RECURSE); + STACK_PUSHX(stack, voidptr, &tmp->right); + STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); + STACK_PUSHX(stack, voidptr, cat->left); + STACK_PUSHX(stack, int, COPY_RECURSE); + break; + } + case ITERATION: + { + tre_iteration_t *iter = node->obj; + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, COPY_RECURSE); + *result = tre_ast_new_iter(mem, iter->arg, iter->min, + iter->max, iter->minimal); + if (*result == NULL) + { + status = REG_ESPACE; + break; + } + iter = (*result)->obj; + result = &iter->arg; + break; + } + default: + assert(0); + break; + } + break; + } + } + *pos_add += num_copied; + return status; +} + +typedef enum { + EXPAND_RECURSE, + EXPAND_AFTER_ITER +} tre_expand_ast_symbol_t; + +/* Expands each iteration node that has a finite nonzero minimum or maximum + iteration count to a catenated sequence of copies of the node. */ +static reg_errcode_t +tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, + tre_tag_direction_t *tag_directions, int *max_depth) +{ + reg_errcode_t status = REG_OK; + size_t bottom = tre_stack_num_items(stack); + int pos_add = 0; + int pos_add_total = 0; + int max_pos = 0; + /* Current approximate matching parameters. */ + int params[TRE_PARAM_LAST]; + /* Approximate parameter nesting level. */ + int params_depth = 0; + int iter_depth = 0; + int i; + + for (i = 0; i < TRE_PARAM_LAST; i++) + params[i] = TRE_PARAM_DEFAULT; + + STACK_PUSHR(stack, voidptr, ast); + STACK_PUSHR(stack, int, EXPAND_RECURSE); + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + tre_ast_node_t *node; + tre_expand_ast_symbol_t symbol; + + if (status != REG_OK) + break; + + DPRINT(("pos_add %d\n", pos_add)); + + symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack); + node = tre_stack_pop_voidptr(stack); + switch (symbol) + { + case EXPAND_RECURSE: + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit= node->obj; + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + lit->position += pos_add; + if (lit->position > max_pos) + max_pos = lit->position; + } + break; + } + case UNION: + { + tre_union_t *uni = node->obj; + STACK_PUSHX(stack, voidptr, uni->right); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + STACK_PUSHX(stack, voidptr, uni->left); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + break; + } + case CATENATION: + { + tre_catenation_t *cat = node->obj; + STACK_PUSHX(stack, voidptr, cat->right); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + STACK_PUSHX(stack, voidptr, cat->left); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + break; + } + case ITERATION: + { + tre_iteration_t *iter = node->obj; + STACK_PUSHX(stack, int, pos_add); + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, EXPAND_AFTER_ITER); + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + /* If we are going to expand this node at EXPAND_AFTER_ITER + then don't increase the `pos' fields of the nodes now, it + will get done when expanding. */ + if (iter->min > 1 || iter->max > 1) + pos_add = 0; + iter_depth++; + DPRINT(("iter\n")); + break; + } + default: + assert(0); + break; + } + break; + case EXPAND_AFTER_ITER: + { + tre_iteration_t *iter = node->obj; + int pos_add_last; + pos_add = tre_stack_pop_int(stack); + pos_add_last = pos_add; + if (iter->min > 1 || iter->max > 1) + { + tre_ast_node_t *seq1 = NULL, *seq2 = NULL; + int j; + int pos_add_save = pos_add; + + /* Create a catenated sequence of copies of the node. */ + for (j = 0; j < iter->min; j++) + { + tre_ast_node_t *copy; + /* Remove tags from all but the last copy. */ + int flags = ((j + 1 < iter->min) + ? COPY_REMOVE_TAGS + : COPY_MAXIMIZE_FIRST_TAG); + DPRINT((" pos_add %d\n", pos_add)); + pos_add_save = pos_add; + status = tre_copy_ast(mem, stack, iter->arg, flags, + &pos_add, tag_directions, ©, + &max_pos); + if (status != REG_OK) + return status; + if (seq1 != NULL) + seq1 = tre_ast_new_catenation(mem, seq1, copy); + else + seq1 = copy; + if (seq1 == NULL) + return REG_ESPACE; + } + + if (iter->max == -1) + { + /* No upper limit. */ + pos_add_save = pos_add; + status = tre_copy_ast(mem, stack, iter->arg, 0, + &pos_add, NULL, &seq2, &max_pos); + if (status != REG_OK) + return status; + seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); + if (seq2 == NULL) + return REG_ESPACE; + } + else + { + for (j = iter->min; j < iter->max; j++) + { + tre_ast_node_t *copy; + pos_add_save = pos_add; + status = tre_copy_ast(mem, stack, iter->arg, 0, + &pos_add, NULL, ©, &max_pos); + if (status != REG_OK) + return status; + if (seq2 != NULL) + seq2 = tre_ast_new_catenation(mem, copy, seq2); + else + seq2 = copy; + if (seq2 == NULL) + return REG_ESPACE; + seq2 = tre_ast_new_iter(mem, seq2, 0, 1, 0); + if (seq2 == NULL) + return REG_ESPACE; + } + } + + pos_add = pos_add_save; + if (seq1 == NULL) + seq1 = seq2; + else if (seq2 != NULL) + seq1 = tre_ast_new_catenation(mem, seq1, seq2); + if (seq1 == NULL) + return REG_ESPACE; + node->obj = seq1->obj; + node->type = seq1->type; + } + + iter_depth--; + pos_add_total += pos_add - pos_add_last; + if (iter_depth == 0) + pos_add = pos_add_total; + + /* If approximate parameters are specified, surround the result + with two parameter setting nodes. The one on the left sets + the specified parameters, and the one on the right restores + the old parameters. */ + if (iter->params) + { + tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy; + int *old_params; + + tmp_l = tre_ast_new_literal(mem, PARAMETER, 0); + if (!tmp_l) + return REG_ESPACE; + ((tre_literal_t *)tmp_l->obj)->u.params = iter->params; + iter->params[TRE_PARAM_DEPTH] = params_depth + 1; + tmp_r = tre_ast_new_literal(mem, PARAMETER, 0); + if (!tmp_r) + return REG_ESPACE; + old_params = tre_mem_alloc(mem, sizeof(*old_params) + * TRE_PARAM_LAST); + if (!old_params) + return REG_ESPACE; + for (i = 0; i < TRE_PARAM_LAST; i++) + old_params[i] = params[i]; + ((tre_literal_t *)tmp_r->obj)->u.params = old_params; + old_params[TRE_PARAM_DEPTH] = params_depth; + /* XXX - this is the only place where ast_new_node is + needed -- should be moved inside AST module. */ + node_copy = tre_ast_new_node(mem, ITERATION, + sizeof(tre_iteration_t)); + if (!node_copy) + return REG_ESPACE; + node_copy->obj = node->obj; + tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy); + if (!tmp_node) + return REG_ESPACE; + tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r); + if (!tmp_node) + return REG_ESPACE; + /* Replace the contents of `node' with `tmp_node'. */ + memcpy(node, tmp_node, sizeof(*node)); + node->obj = tmp_node->obj; + node->type = tmp_node->type; + params_depth++; + if (params_depth > *max_depth) + *max_depth = params_depth; + } + break; + } + default: + assert(0); + break; + } + } + +#ifdef TRE_DEBUG + DPRINT(("Expanded AST:\n")); + tre_ast_print(ast); +#endif + + return status; +} + +static tre_pos_and_tags_t * +tre_set_empty(tre_mem_t mem) +{ + tre_pos_and_tags_t *new_set; + + new_set = tre_mem_calloc(mem, sizeof(*new_set)); + if (new_set == NULL) + return NULL; + + new_set[0].position = -1; + new_set[0].code_min = -1; + new_set[0].code_max = -1; + + return new_set; +} + +static tre_pos_and_tags_t * +tre_set_one(tre_mem_t mem, int position, long code_min, long code_max, + tre_ctype_t class, tre_ctype_t *neg_classes, int backref) +{ + tre_pos_and_tags_t *new_set; + + new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); + if (new_set == NULL) + return NULL; + + new_set[0].position = position; + new_set[0].code_min = code_min; + new_set[0].code_max = code_max; + new_set[0].class = class; + new_set[0].neg_classes = neg_classes; + new_set[0].backref = backref; + new_set[1].position = -1; + new_set[1].code_min = -1; + new_set[1].code_max = -1; + + return new_set; +} + +static tre_pos_and_tags_t * +tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, + int *tags, int assertions, int *params) +{ + int s1, s2, i, j; + tre_pos_and_tags_t *new_set; + int *new_tags; + int num_tags; + + for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++); + for (s1 = 0; set1[s1].position >= 0; s1++); + for (s2 = 0; set2[s2].position >= 0; s2++); + new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); + if (!new_set ) + return NULL; + + for (s1 = 0; set1[s1].position >= 0; s1++) + { + new_set[s1].position = set1[s1].position; + new_set[s1].code_min = set1[s1].code_min; + new_set[s1].code_max = set1[s1].code_max; + new_set[s1].assertions = set1[s1].assertions | assertions; + new_set[s1].class = set1[s1].class; + new_set[s1].neg_classes = set1[s1].neg_classes; + new_set[s1].backref = set1[s1].backref; + if (set1[s1].tags == NULL && tags == NULL) + new_set[s1].tags = NULL; + else + { + for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++); + new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) + * (i + num_tags + 1))); + if (new_tags == NULL) + return NULL; + for (j = 0; j < i; j++) + new_tags[j] = set1[s1].tags[j]; + for (i = 0; i < num_tags; i++) + new_tags[j + i] = tags[i]; + new_tags[j + i] = -1; + new_set[s1].tags = new_tags; + } + if (set1[s1].params) + new_set[s1].params = set1[s1].params; + if (params) + { + if (!new_set[s1].params) + new_set[s1].params = params; + else + { + new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) * + TRE_PARAM_LAST); + if (!new_set[s1].params) + return NULL; + for (i = 0; i < TRE_PARAM_LAST; i++) + if (params[i] != TRE_PARAM_UNSET) + new_set[s1].params[i] = params[i]; + } + } + } + + for (s2 = 0; set2[s2].position >= 0; s2++) + { + new_set[s1 + s2].position = set2[s2].position; + new_set[s1 + s2].code_min = set2[s2].code_min; + new_set[s1 + s2].code_max = set2[s2].code_max; + /* XXX - why not | assertions here as well? */ + new_set[s1 + s2].assertions = set2[s2].assertions; + new_set[s1 + s2].class = set2[s2].class; + new_set[s1 + s2].neg_classes = set2[s2].neg_classes; + new_set[s1 + s2].backref = set2[s2].backref; + if (set2[s2].tags == NULL) + new_set[s1 + s2].tags = NULL; + else + { + for (i = 0; set2[s2].tags[i] >= 0; i++); + new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); + if (new_tags == NULL) + return NULL; + for (j = 0; j < i; j++) + new_tags[j] = set2[s2].tags[j]; + new_tags[j] = -1; + new_set[s1 + s2].tags = new_tags; + } + if (set2[s2].params) + new_set[s1 + s2].params = set2[s2].params; + if (params) + { + if (!new_set[s1 + s2].params) + new_set[s1 + s2].params = params; + else + { + new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) * + TRE_PARAM_LAST); + if (!new_set[s1 + s2].params) + return NULL; + for (i = 0; i < TRE_PARAM_LAST; i++) + if (params[i] != TRE_PARAM_UNSET) + new_set[s1 + s2].params[i] = params[i]; + } + } + } + new_set[s1 + s2].position = -1; + return new_set; +} + +/* Finds the empty path through `node' which is the one that should be + taken according to POSIX.2 rules, and adds the tags on that path to + `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is + set to the number of tags seen on the path. */ +static reg_errcode_t +tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, + int *assertions, int *params, int *num_tags_seen, + int *params_seen) +{ + tre_literal_t *lit; + tre_union_t *uni; + tre_catenation_t *cat; + tre_iteration_t *iter; + int i; + size_t bottom = tre_stack_num_items(stack); + reg_errcode_t status = REG_OK; + if (num_tags_seen) + *num_tags_seen = 0; + if (params_seen) + *params_seen = 0; + + status = tre_stack_push_voidptr(stack, node); + + /* Walk through the tree recursively. */ + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + node = tre_stack_pop_voidptr(stack); + + switch (node->type) + { + case LITERAL: + lit = (tre_literal_t *)node->obj; + switch (lit->code_min) + { + case TAG: + if (lit->code_max >= 0) + { + if (tags != NULL) + { + /* Add the tag to `tags'. */ + for (i = 0; tags[i] >= 0; i++) + if (tags[i] == lit->code_max) + break; + if (tags[i] < 0) + { + tags[i] = lit->code_max; + tags[i + 1] = -1; + } + } + if (num_tags_seen) + (*num_tags_seen)++; + } + break; + case ASSERTION: + assert(lit->code_max >= 1 && lit->code_max <= ASSERT_LAST); + if (assertions != NULL) + *assertions |= lit->code_max; + break; + case PARAMETER: + if (params != NULL) + for (i = 0; i < TRE_PARAM_LAST; i++) + params[i] = lit->u.params[i]; + if (params_seen != NULL) + *params_seen = 1; + break; + case EMPTY: + break; + default: + assert(0); + break; + } + break; + + case UNION: + /* Subexpressions starting earlier take priority over ones + starting later, so we prefer the left subexpression over the + right subexpression. */ + uni = (tre_union_t *)node->obj; + if (uni->left->nullable) + STACK_PUSHX(stack, voidptr, uni->left) + else if (uni->right->nullable) + STACK_PUSHX(stack, voidptr, uni->right) + else + assert(0); + break; + + case CATENATION: + /* The path must go through both children. */ + cat = (tre_catenation_t *)node->obj; + assert(cat->left->nullable); + assert(cat->right->nullable); + STACK_PUSHX(stack, voidptr, cat->left); + STACK_PUSHX(stack, voidptr, cat->right); + break; + + case ITERATION: + /* A match with an empty string is preferred over no match at + all, so we go through the argument if possible. */ + iter = (tre_iteration_t *)node->obj; + if (iter->arg->nullable) + STACK_PUSHX(stack, voidptr, iter->arg); + break; + + default: + assert(0); + break; + } + } + + return status; +} + + +typedef enum { + NPFL_RECURSE, + NPFL_POST_UNION, + NPFL_POST_CATENATION, + NPFL_POST_ITERATION +} tre_npfl_stack_symbol_t; + + +/* Computes and fills in the fields `nullable', `position`, `firstpos', + and `lastpos' for the nodes of the AST `tree'; `nextpos' points to an + integer indicating the next available position, and will be updated on + return to reflect the number of additional positions assigned. */ +static reg_errcode_t +tre_compute_npfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, + int *nextpos) +{ + size_t bottom = tre_stack_num_items(stack); + + STACK_PUSHR(stack, voidptr, tree); + STACK_PUSHR(stack, int, NPFL_RECURSE); + + while (tre_stack_num_items(stack) > bottom) + { + tre_npfl_stack_symbol_t symbol; + tre_ast_node_t *node; + + symbol = (tre_npfl_stack_symbol_t)tre_stack_pop_int(stack); + node = tre_stack_pop_voidptr(stack); + switch (symbol) + { + case NPFL_RECURSE: + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = (tre_literal_t *)node->obj; + if (IS_BACKREF(lit)) + { + /* Back references: nullable = false, firstpos = {i}, + lastpos = {i}. */ + node->nullable = 0; + lit->position = (*nextpos)++; + node->firstpos = tre_set_one(mem, lit->position, 0, + TRE_CHAR_MAX, 0, NULL, -1); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_one(mem, lit->position, 0, + TRE_CHAR_MAX, 0, NULL, + lit->code_max); + if (!node->lastpos) + return REG_ESPACE; + } + else if (lit->code_min < 0) + { + /* Tags, empty strings, params, and zero width assertions: + nullable = true, firstpos = {}, and lastpos = {}. */ + node->nullable = 1; + node->firstpos = tre_set_empty(mem); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_empty(mem); + if (!node->lastpos) + return REG_ESPACE; + } + else + { + /* Literal at position i: nullable = false, firstpos = {i}, + lastpos = {i}. */ + node->nullable = 0; + lit->position = (*nextpos)++; + node->firstpos = + tre_set_one(mem, lit->position, lit->code_min, + lit->code_max, 0, NULL, -1); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_one(mem, lit->position, + lit->code_min, + lit->code_max, + lit->u.class, lit->neg_classes, + -1); + if (!node->lastpos) + return REG_ESPACE; + } + break; + } + + case UNION: + /* Compute the attributes for the two subtrees, and after that + for this node. */ + STACK_PUSHR(stack, voidptr, node); + STACK_PUSHR(stack, int, NPFL_POST_UNION); + STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); + STACK_PUSHR(stack, int, NPFL_RECURSE); + STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); + STACK_PUSHR(stack, int, NPFL_RECURSE); + break; + + case CATENATION: + /* Compute the attributes for the two subtrees, and after that + for this node. */ + STACK_PUSHR(stack, voidptr, node); + STACK_PUSHR(stack, int, NPFL_POST_CATENATION); + STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right); + STACK_PUSHR(stack, int, NPFL_RECURSE); + STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left); + STACK_PUSHR(stack, int, NPFL_RECURSE); + break; + + case ITERATION: + /* Compute the attributes for the subtree, and after that for + this node. */ + STACK_PUSHR(stack, voidptr, node); + STACK_PUSHR(stack, int, NPFL_POST_ITERATION); + STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); + STACK_PUSHR(stack, int, NPFL_RECURSE); + break; + } + break; /* end case: NPFL_RECURSE */ + + case NPFL_POST_UNION: + { + tre_union_t *uni = (tre_union_t *)node->obj; + node->nullable = uni->left->nullable || uni->right->nullable; + node->firstpos = tre_set_union(mem, uni->left->firstpos, + uni->right->firstpos, NULL, 0, NULL); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_union(mem, uni->left->lastpos, + uni->right->lastpos, NULL, 0, NULL); + if (!node->lastpos) + return REG_ESPACE; + break; + } + + case NPFL_POST_ITERATION: + { + tre_iteration_t *iter = (tre_iteration_t *)node->obj; + + if (iter->min == 0 || iter->arg->nullable) + node->nullable = 1; + else + node->nullable = 0; + node->firstpos = iter->arg->firstpos; + node->lastpos = iter->arg->lastpos; + break; + } + + case NPFL_POST_CATENATION: + { + int num_tags, *tags, assertions, params_seen; + int *params; + reg_errcode_t status; + tre_catenation_t *cat = node->obj; + node->nullable = cat->left->nullable && cat->right->nullable; + + /* Compute firstpos. */ + if (cat->left->nullable) + { + /* The left side matches the empty string. Make a first pass + with tre_match_empty() to get the number of tags and + parameters. */ + status = tre_match_empty(stack, cat->left, + NULL, NULL, NULL, &num_tags, + ¶ms_seen); + if (status != REG_OK) + return status; + /* Allocate arrays for the tags and parameters. */ + tags = xmalloc(sizeof(*tags) * (num_tags + 1)); + if (!tags) + return REG_ESPACE; + tags[0] = -1; + assertions = 0; + params = NULL; + if (params_seen) + { + params = tre_mem_alloc(mem, sizeof(*params) + * TRE_PARAM_LAST); + if (!params) + { + xfree(tags); + return REG_ESPACE; + } + } + /* Second pass with tre_mach_empty() to get the list of + tags and parameters. */ + status = tre_match_empty(stack, cat->left, tags, + &assertions, params, NULL, NULL); + if (status != REG_OK) + { + xfree(tags); + return status; + } + node->firstpos = + tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, + tags, assertions, params); + xfree(tags); + if (!node->firstpos) + return REG_ESPACE; + } + else + { + node->firstpos = cat->left->firstpos; + } + + /* Compute lastpos. */ + if (cat->right->nullable) + { + /* The right side matches the empty string. Make a first pass + with tre_match_empty() to get the number of tags and + parameters. */ + status = tre_match_empty(stack, cat->right, + NULL, NULL, NULL, &num_tags, + ¶ms_seen); + if (status != REG_OK) + return status; + /* Allocate arrays for the tags and parameters. */ + tags = xmalloc(sizeof(*tags) * (num_tags + 1)); + if (!tags) + return REG_ESPACE; + tags[0] = -1; + assertions = 0; + params = NULL; + if (params_seen) + { + params = tre_mem_alloc(mem, sizeof(*params) + * TRE_PARAM_LAST); + if (!params) + { + xfree(tags); + return REG_ESPACE; + } + } + /* Second pass with tre_mach_empty() to get the list of + tags and parameters. */ + status = tre_match_empty(stack, cat->right, tags, + &assertions, params, NULL, NULL); + if (status != REG_OK) + { + xfree(tags); + return status; + } + node->lastpos = + tre_set_union(mem, cat->left->lastpos, cat->right->lastpos, + tags, assertions, params); + xfree(tags); + if (!node->lastpos) + return REG_ESPACE; + } + else + { + node->lastpos = cat->right->lastpos; + } + break; + } + + default: + assert(0); + break; + } + } + + return REG_OK; +} + + +/* Adds a transition from each position in `p1' to each position in `p2'. */ +static reg_errcode_t +tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, + tre_tnfa_transition_t *transitions, + int *counts, int *offs) +{ + tre_pos_and_tags_t *orig_p2 = p2; + tre_tnfa_transition_t *trans; + int i, j, k, l, dup, prev_p2_pos; + + if (transitions != NULL) + while (p1->position >= 0) + { + p2 = orig_p2; + prev_p2_pos = -1; + while (p2->position >= 0) + { + /* Optimization: if this position was already handled, skip it. */ + if (p2->position == prev_p2_pos) + { + p2++; + continue; + } + prev_p2_pos = p2->position; + /* Set `trans' to point to the next unused transition from + position `p1->position'. */ + trans = transitions + offs[p1->position]; + while (trans->state != NULL) + { +#if 0 + /* If we find a previous transition from `p1->position' to + `p2->position', it is overwritten. This can happen only + if there are nested loops in the regexp, like in "((a)*)*". + In POSIX.2 repetition using the outer loop is always + preferred over using the inner loop. Therefore the + transition for the inner loop is useless and can be thrown + away. */ + /* XXX - The same position is used for all nodes in a bracket + expression, so this optimization cannot be used (it will + break bracket expressions) unless I figure out a way to + detect it here. */ + if (trans->state_id == p2->position) + { + DPRINT(("*")); + break; + } +#endif + trans++; + } + + if (trans->state == NULL) + (trans + 1)->state = NULL; + /* Use the character ranges, assertions, etc. from `p1' for + the transition from `p1' to `p2'. */ + trans->code_min = (tre_cint_t) p1->code_min; + trans->code_max = (tre_cint_t) p1->code_max; + trans->state = transitions + offs[p2->position]; + trans->state_id = p2->position; + trans->assertions = p1->assertions | p2->assertions + | (p1->class ? ASSERT_CHAR_CLASS : 0) + | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); + if (p1->backref >= 0) + { + assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); + assert(p2->backref < 0); + trans->u.backref = p1->backref; + trans->assertions |= ASSERT_BACKREF; + } + else + trans->u.class = p1->class; + if (p1->neg_classes != NULL) + { + for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); + trans->neg_classes = + xmalloc(sizeof(*trans->neg_classes) * (i + 1)); + if (trans->neg_classes == NULL) + return REG_ESPACE; + for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) + trans->neg_classes[i] = p1->neg_classes[i]; + trans->neg_classes[i] = (tre_ctype_t)0; + } + else + trans->neg_classes = NULL; + + /* Find out how many tags this transition has. */ + i = 0; + if (p1->tags != NULL) + while(p1->tags[i] >= 0) + i++; + j = 0; + if (p2->tags != NULL) + while(p2->tags[j] >= 0) + j++; + + /* If we are overwriting a transition, free the old tag array. */ + if (trans->tags != NULL) + xfree(trans->tags); + trans->tags = NULL; + + /* If there were any tags, allocate an array and fill it. */ + if (i + j > 0) + { + trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); + if (!trans->tags) + return REG_ESPACE; + i = 0; + if (p1->tags != NULL) + while(p1->tags[i] >= 0) + { + trans->tags[i] = p1->tags[i]; + i++; + } + l = i; + j = 0; + if (p2->tags != NULL) + while (p2->tags[j] >= 0) + { + /* Don't add duplicates. */ + dup = 0; + for (k = 0; k < i; k++) + if (trans->tags[k] == p2->tags[j]) + { + dup = 1; + break; + } + if (!dup) + trans->tags[l++] = p2->tags[j]; + j++; + } + trans->tags[l] = -1; + } + + /* Set the parameter array. If both `p2' and `p1' have same + parameters, the values in `p2' override those in `p1'. */ + if (p1->params || p2->params) + { + if (!trans->params) + trans->params = xmalloc(sizeof(*trans->params) + * TRE_PARAM_LAST); + if (!trans->params) + return REG_ESPACE; + for (i = 0; i < TRE_PARAM_LAST; i++) + { + trans->params[i] = TRE_PARAM_UNSET; + if (p1->params && p1->params[i] != TRE_PARAM_UNSET) + trans->params[i] = p1->params[i]; + if (p2->params && p2->params[i] != TRE_PARAM_UNSET) + trans->params[i] = p2->params[i]; + } + } + else + { + if (trans->params) + xfree(trans->params); + trans->params = NULL; + } + + +#ifdef TRE_DEBUG + { + int *tags; + + DPRINT((" %2d -> %2d on %3d", p1->position, p2->position, + p1->code_min)); + if (p1->code_max != p1->code_min) + DPRINT(("-%3d", p1->code_max)); + tags = trans->tags; + if (tags) + { + DPRINT((", tags [")); + while (*tags >= 0) + { + DPRINT(("%d", *tags)); + tags++; + if (*tags >= 0) + DPRINT((",")); + } + DPRINT(("]")); + } + if (trans->assertions) + DPRINT((", assert %d", trans->assertions)); + if (trans->assertions & ASSERT_BACKREF) + DPRINT((", backref %d", trans->u.backref)); + else if (trans->u.class) + DPRINT((", class %ld", (long)trans->u.class)); + if (trans->neg_classes) + DPRINT((", neg_classes %p", trans->neg_classes)); + if (trans->params) + { + DPRINT((", ")); + tre_print_params(trans->params); + } + DPRINT(("\n")); + } +#endif /* TRE_DEBUG */ + p2++; + } + p1++; + } + else + /* Compute a maximum limit for the number of transitions leaving + from each state. */ + while (p1->position >= 0) + { + p2 = orig_p2; + while (p2->position >= 0) + { + counts[p1->position]++; + p2++; + } + p1++; + } + return REG_OK; +} + +/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are + labelled with one character range (there are no transitions on empty + strings). The TNFA takes O(n^2) space in the worst case, `n' is size of + the regexp. */ +static reg_errcode_t +tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, + int *counts, int *offs) +{ + tre_union_t *uni; + tre_catenation_t *cat; + tre_iteration_t *iter; + reg_errcode_t errcode = REG_OK; + + /* XXX - recurse using a stack!. */ + switch (node->type) + { + case LITERAL: + break; + case UNION: + uni = (tre_union_t *)node->obj; + errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); + break; + + case CATENATION: + cat = (tre_catenation_t *)node->obj; + /* Add a transition from each position in cat->left->lastpos + to each position in cat->right->firstpos. */ + errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, + transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); + break; + + case ITERATION: + iter = (tre_iteration_t *)node->obj; + assert(iter->max == -1 || iter->max == 1); + + if (iter->max == -1) + { + assert(iter->min == 0 || iter->min == 1); + /* Add a transition from each last position in the iterated + expression to each first position. */ + errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, + transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + } + errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); + break; + } + return errcode; +} + +#define ERROR_EXIT(err) \ + do \ + { \ + errcode = err; \ + if (/*CONSTCOND*/(void)1,1) \ + goto error_exit; \ + } \ + while (/*CONSTCOND*/(void)0,0) + + +int +tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +{ + tre_stack_t *stack; + tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; + tre_pos_and_tags_t *p; + int *counts = NULL, *offs = NULL; + int i, add = 0; + tre_tnfa_transition_t *transitions, *initial; + tre_tnfa_t *tnfa = NULL; + tre_submatch_data_t *submatch_data; + tre_tag_direction_t *tag_directions = NULL; + reg_errcode_t errcode; + tre_mem_t mem; + int numpos = 0; + + /* Parse context. */ + tre_parse_ctx_t parse_ctx; + + /* Allocate a stack used throughout the compilation process for various + purposes. */ + stack = tre_stack_new(512, TRE_MAX_STACK); + if (!stack) + return REG_ESPACE; + /* Allocate a fast memory allocator. */ + mem = tre_mem_new(); + if (!mem) + { + tre_stack_destroy(stack); + return REG_ESPACE; + } + + /* Parse the regexp. */ + memset(&parse_ctx, 0, sizeof(parse_ctx)); + parse_ctx.mem = mem; + parse_ctx.stack = stack; + parse_ctx.re = regex; + parse_ctx.len = n; + parse_ctx.cflags = cflags; + parse_ctx.max_backref = -1; + /* Use 8-bit optimizations in 8-bit mode */ + parse_ctx.mb_cur_max = (cflags & REG_USEBYTES) ? 1 : TRE_MB_CUR_MAX; + DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex)); + errcode = tre_parse(&parse_ctx); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + preg->re_nsub = parse_ctx.submatch_id - 1; + tree = parse_ctx.result; + + /* Back references and approximate matching cannot currently be used + in the same regexp. */ + if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx) + ERROR_EXIT(REG_BADPAT); + +#ifdef TRE_DEBUG + tre_ast_print(tree); +#endif /* TRE_DEBUG */ + + /* Referring to nonexistent subexpressions is illegal. */ + if (parse_ctx.max_backref > (int)preg->re_nsub) + ERROR_EXIT(REG_ESUBREG); + + /* Allocate the TNFA struct. */ + tnfa = xcalloc(1, sizeof(tre_tnfa_t)); + if (tnfa == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->have_backrefs = parse_ctx.max_backref >= 0; + tnfa->have_approx = parse_ctx.have_approx; + tnfa->num_submatches = parse_ctx.submatch_id; + + /* The literal optimizer only looks at the final tree plus the outer + * compile flags. If the regexp changes flags inline with (?i:...) or + * (?-i:...), those scopes are no longer explicit in the optimized form, + * so keep using the full matcher. */ + if (!parse_ctx.have_inline_cflags) + { + errcode = tre_litopt_try_compile(tnfa, tree, cflags, + parse_ctx.mb_cur_max); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + } + + /* Set up tags for submatch addressing. If REG_NOSUB is set and the + regexp does not have back references, this can be skipped. */ + if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) + { + DPRINT(("tre_compile: setting up tags\n")); + + /* Figure out how many tags we will need. */ + errcode = tre_add_tags(NULL, stack, tree, tnfa); + if (errcode != REG_OK) + ERROR_EXIT(errcode); +#ifdef TRE_DEBUG + tre_ast_print(tree); +#endif /* TRE_DEBUG */ + + if (tnfa->num_tags > 0) + { + tag_directions = xmalloc(sizeof(*tag_directions) + * (tnfa->num_tags + 1)); + if (tag_directions == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->tag_directions = tag_directions; + memset(tag_directions, -1, + sizeof(*tag_directions) * (tnfa->num_tags + 1)); + } + tnfa->minimal_tags = xcalloc(tnfa->num_tags * 2 + 1, + sizeof(*tnfa->minimal_tags)); + if (tnfa->minimal_tags == NULL) + ERROR_EXIT(REG_ESPACE); + + submatch_data = xcalloc((unsigned)parse_ctx.submatch_id, + sizeof(*submatch_data)); + if (submatch_data == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->submatch_data = submatch_data; + + errcode = tre_add_tags(mem, stack, tree, tnfa); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + +#ifdef TRE_DEBUG + for (i = 0; i < parse_ctx.submatch_id; i++) + DPRINT(("pmatch[%d] = {t%d, t%d}\n", + i, submatch_data[i].so_tag, submatch_data[i].eo_tag)); + for (i = 0; i < tnfa->num_tags; i++) + DPRINT(("t%d is %s\n", i, + tag_directions[i] == TRE_TAG_MINIMIZE ? + "minimized" : "maximized")); +#endif /* TRE_DEBUG */ + } + + /* Expand iteration nodes. */ + errcode = tre_expand_ast(mem, stack, tree, tag_directions, + &tnfa->params_depth); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + + /* Add a dummy node for the final state. + XXX - For certain patterns this dummy node can be optimized away, + for example "a*" or "ab*". Figure out a simple way to detect + this possibility. */ + tmp_ast_l = tree; + tmp_ast_r = tre_ast_new_literal(mem, 0, 0); + if (tmp_ast_r == NULL) + ERROR_EXIT(REG_ESPACE); + + tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); + if (tree == NULL) + ERROR_EXIT(REG_ESPACE); + + errcode = tre_compute_npfl(mem, stack, tree, &numpos); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + +#ifdef TRE_DEBUG + tre_ast_print(tree); + DPRINT(("Number of states: %d\n", numpos)); +#endif /* TRE_DEBUG */ + + counts = xmalloc(sizeof(int) * numpos); + if (counts == NULL) + ERROR_EXIT(REG_ESPACE); + + offs = xmalloc(sizeof(int) * numpos); + if (offs == NULL) + ERROR_EXIT(REG_ESPACE); + + for (i = 0; i < numpos; i++) + counts[i] = 0; + tre_ast_to_tnfa(tree, NULL, counts, NULL); + + add = 0; + for (i = 0; i < numpos; i++) + { + offs[i] = add; + add += counts[i] + 1; + counts[i] = 0; + } + transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); + if (transitions == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->transitions = transitions; + tnfa->num_transitions = add; + + DPRINT(("Converting to TNFA:\n")); + errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + + /* If in eight bit mode, compute a table of characters that can be the + first character of a match. */ + tnfa->first_char = -1; + if (parse_ctx.mb_cur_max == 1 && !tmp_ast_l->nullable) + { + int count = 0; + tre_cint_t k; + DPRINT(("Characters that can start a match:")); + tnfa->firstpos_chars = xcalloc(256, sizeof(char)); + if (tnfa->firstpos_chars == NULL) + ERROR_EXIT(REG_ESPACE); + for (p = tree->firstpos; p->position >= 0; p++) + { + tre_tnfa_transition_t *j = transitions + offs[p->position]; + while (j->state != NULL) + { + for (k = j->code_min; k <= j->code_max && k < 256; k++) + { + DPRINT((" %d", k)); + tnfa->firstpos_chars[k] = 1; + count++; + } + j++; + } + } + DPRINT(("\n")); +#define TRE_OPTIMIZE_FIRST_CHAR 1 +#if TRE_OPTIMIZE_FIRST_CHAR + if (count == 1) + { + for (k = 0; k < 256; k++) + if (tnfa->firstpos_chars[k]) + { + DPRINT(("first char must be %d\n", k)); + tnfa->first_char = k; + xfree(tnfa->firstpos_chars); + tnfa->firstpos_chars = NULL; + break; + } + } +#endif + + } + else + tnfa->firstpos_chars = NULL; + + + p = tree->firstpos; + i = 0; + while (p->position >= 0) + { + i++; + +#ifdef TRE_DEBUG + { + int *tags; + DPRINT(("initial: %d", p->position)); + tags = p->tags; + if (tags != NULL) + { + if (*tags >= 0) + DPRINT(("/")); + while (*tags >= 0) + { + DPRINT(("%d", *tags)); + tags++; + if (*tags >= 0) + DPRINT((",")); + } + } + DPRINT((", assert %d", p->assertions)); + if (p->params) + { + DPRINT((", ")); + tre_print_params(p->params); + } + DPRINT(("\n")); + } +#endif /* TRE_DEBUG */ + + p++; + } + + initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); + if (initial == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->initial = initial; + + i = 0; + for (p = tree->firstpos; p->position >= 0; p++) + { + initial[i].state = transitions + offs[p->position]; + initial[i].state_id = p->position; + initial[i].tags = NULL; + /* Copy the arrays p->tags, and p->params, they are allocated + from a tre_mem object. */ + if (p->tags) + { + int j; + for (j = 0; p->tags[j] >= 0; j++); + initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); + if (!initial[i].tags) + ERROR_EXIT(REG_ESPACE); + memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); + } + initial[i].params = NULL; + if (p->params) + { + initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST); + if (!initial[i].params) + ERROR_EXIT(REG_ESPACE); + memcpy(initial[i].params, p->params, + sizeof(*p->params) * TRE_PARAM_LAST); + } + initial[i].assertions = p->assertions; + i++; + } + initial[i].state = NULL; + + tnfa->num_transitions = add; + tnfa->final = transitions + offs[tree->lastpos[0].position]; + tnfa->num_states = numpos; + tnfa->cflags = cflags; + + DPRINT(("final state %p\n", (void *)tnfa->final)); + + tre_mem_destroy(mem); + tre_stack_destroy(stack); + xfree(counts); + xfree(offs); + + preg->TRE_REGEX_T_FIELD = (void *)tnfa; + return REG_OK; + + error_exit: + /* Free everything that was allocated and return the error code. */ + tre_mem_destroy(mem); + if (stack != NULL) + tre_stack_destroy(stack); + if (counts != NULL) + xfree(counts); + if (offs != NULL) + xfree(offs); + preg->TRE_REGEX_T_FIELD = (void *)tnfa; + tre_free(preg); + return errcode; +} + + + + +void +tre_free(regex_t *preg) +{ + tre_tnfa_t *tnfa; + unsigned int i; + tre_tnfa_transition_t *trans; + + tnfa = (void *)preg->TRE_REGEX_T_FIELD; + if (!tnfa) + return; + + for (i = 0; i < tnfa->num_transitions; i++) + if (tnfa->transitions[i].state) + { + if (tnfa->transitions[i].tags) + xfree(tnfa->transitions[i].tags); + if (tnfa->transitions[i].neg_classes) + xfree(tnfa->transitions[i].neg_classes); + if (tnfa->transitions[i].params) + xfree(tnfa->transitions[i].params); + } + if (tnfa->transitions) + xfree(tnfa->transitions); + + if (tnfa->initial) + { + for (trans = tnfa->initial; trans->state; trans++) + { + if (trans->tags) + xfree(trans->tags); + if (trans->params) + xfree(trans->params); + } + xfree(tnfa->initial); + } + + if (tnfa->submatch_data) + { + for (i = 0; i < tnfa->num_submatches; i++) + if (tnfa->submatch_data[i].parents) + xfree(tnfa->submatch_data[i].parents); + xfree(tnfa->submatch_data); + } + + if (tnfa->tag_directions) + xfree(tnfa->tag_directions); + if (tnfa->firstpos_chars) + xfree(tnfa->firstpos_chars); + if (tnfa->minimal_tags) + xfree(tnfa->minimal_tags); + tre_litopt_free_literal_list(tnfa->literal_opt.literals, + tnfa->literal_opt.num_literals); + xfree(tnfa); +} + +char * +tre_version(void) +{ + static char str[256]; + char *version; + + if (str[0] == 0) + { + (void) tre_config(TRE_CONFIG_VERSION, &version); + (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version); + } + return str; +} + +int +tre_config(int query, void *result) +{ + int *int_result = result; + const char **string_result = result; + + switch (query) + { + case TRE_CONFIG_APPROX: +#ifdef TRE_APPROX + *int_result = 1; +#else /* !TRE_APPROX */ + *int_result = 0; +#endif /* !TRE_APPROX */ + return REG_OK; + + case TRE_CONFIG_WCHAR: +#ifdef TRE_WCHAR + *int_result = 1; +#else /* !TRE_WCHAR */ + *int_result = 0; +#endif /* !TRE_WCHAR */ + return REG_OK; + + case TRE_CONFIG_MULTIBYTE: +#ifdef TRE_MULTIBYTE + *int_result = 1; +#else /* !TRE_MULTIBYTE */ + *int_result = 0; +#endif /* !TRE_MULTIBYTE */ + return REG_OK; + + case TRE_CONFIG_SYSTEM_ABI: +#ifdef TRE_CONFIG_SYSTEM_ABI + *int_result = 1; +#else /* !TRE_CONFIG_SYSTEM_ABI */ + *int_result = 0; +#endif /* !TRE_CONFIG_SYSTEM_ABI */ + return REG_OK; + + case TRE_CONFIG_VERSION: + *string_result = TRE_VERSION; + return REG_OK; + } + + return REG_NOMATCH; +} + + +/* EOF */ diff --git a/deps/tre/lib/tre-compile.h b/deps/tre/lib/tre-compile.h new file mode 100644 index 000000000..51d5ac94a --- /dev/null +++ b/deps/tre/lib/tre-compile.h @@ -0,0 +1,27 @@ +/* + tre-compile.h: Regex compilation definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_COMPILE_H +#define TRE_COMPILE_H 1 + +typedef struct { + int position; + int code_min; + int code_max; + int *tags; + int assertions; + tre_ctype_t class; + tre_ctype_t *neg_classes; + int backref; + int *params; +} tre_pos_and_tags_t; + +#endif /* TRE_COMPILE_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-filter.c b/deps/tre/lib/tre-filter.c new file mode 100644 index 000000000..194e188ba --- /dev/null +++ b/deps/tre/lib/tre-filter.c @@ -0,0 +1,73 @@ +/* + tre-filter.c: Histogram filter to quickly find regexp match candidates + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* The idea of this filter is quite simple. First, let's assume the + search pattern is a simple string. In order for a substring of a + longer string to match the search pattern, it must have the same + numbers of different characters as the pattern, and those + characters must occur in the same order as they occur in pattern. */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include "tre-internal.h" +#include "tre-filter.h" + +int +tre_filter_find(const unsigned char *str, size_t len, tre_filter_t *filter) +{ + unsigned short counts[256]; + unsigned int i; + unsigned int window_len = filter->window_len; + tre_filter_profile_t *profile = filter->profile; + const unsigned char *str_orig = str; + + DPRINT(("tre_filter_find: %.*s\n", len, str)); + + for (i = 0; i < elementsof(counts); i++) + counts[i] = 0; + + i = 0; + while (*str && i < window_len && i < len) + { + counts[*str]++; + i++; + str++; + len--; + } + + while (len > 0) + { + tre_filter_profile_t *p; + counts[*str]++; + counts[*(str - window_len)]--; + + p = profile; + while (p->ch) + { + if (counts[p->ch] < p->count) + break; + p++; + } + if (!p->ch) + { + DPRINT(("Found possible match at %d\n", + str - str_orig)); + return str - str_orig; + } + else + { + DPRINT(("No match so far...\n")); + } + len--; + str++; + } + DPRINT(("This string cannot match.\n")); + return -1; +} diff --git a/deps/tre/lib/tre-filter.h b/deps/tre/lib/tre-filter.h new file mode 100644 index 000000000..31d0b8263 --- /dev/null +++ b/deps/tre/lib/tre-filter.h @@ -0,0 +1,19 @@ + + + + +typedef struct { + unsigned char ch; + unsigned char count; +} tre_filter_profile_t; + +typedef struct { + /* Length of the window where the character counts are kept. */ + int window_len; + /* Required character counts table. */ + tre_filter_profile_t *profile; +} tre_filter_t; + + +int +tre_filter_find(const unsigned char *str, size_t len, tre_filter_t *filter); diff --git a/deps/tre/lib/tre-internal.h b/deps/tre/lib/tre-internal.h new file mode 100644 index 000000000..40081f0c0 --- /dev/null +++ b/deps/tre/lib/tre-internal.h @@ -0,0 +1,319 @@ +/* + tre-internal.h - TRE internal definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_INTERNAL_H +#define TRE_INTERNAL_H 1 + +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ + +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ + +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ + +#include +#include +#include "../local_includes/tre.h" + +#define TRE_MAX_RE 65536 +#define TRE_MAX_STRING INT_MAX +#define TRE_MAX_STACK 1048576 + +#ifdef TRE_DEBUG +#include +#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/(void)0,0) +#else /* !TRE_DEBUG */ +#define DPRINT(msg) do { } while(/*CONSTCOND*/(void)0,0) +#endif /* !TRE_DEBUG */ + +#define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) + +#ifdef HAVE_MBRTOWC +#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) +#else /* !HAVE_MBRTOWC */ +#ifdef HAVE_MBTOWC +#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) +#endif /* HAVE_MBTOWC */ +#endif /* !HAVE_MBRTOWC */ + +#ifdef TRE_MULTIBYTE +#ifdef HAVE_MBSTATE_T +#define TRE_MBSTATE +#endif /* TRE_MULTIBYTE */ +#endif /* HAVE_MBSTATE_T */ + +/* Define the character types and functions. */ +#ifdef TRE_WCHAR + +/* Wide characters. */ +typedef wint_t tre_cint_t; +#if WCHAR_MAX <= INT_MAX +#define TRE_CHAR_MAX WCHAR_MAX +#else /* WCHAR_MAX > INT_MAX */ +#define TRE_CHAR_MAX INT_MAX +#endif + +#ifdef TRE_MULTIBYTE +#define TRE_MB_CUR_MAX MB_CUR_MAX +#else /* !TRE_MULTIBYTE */ +#define TRE_MB_CUR_MAX 1 +#endif /* !TRE_MULTIBYTE */ + +#define tre_isalnum iswalnum +#define tre_isalpha iswalpha +#ifdef HAVE_ISWBLANK +#define tre_isblank iswblank +#endif /* HAVE_ISWBLANK */ +#define tre_iscntrl iswcntrl +#define tre_isdigit iswdigit +#define tre_isgraph iswgraph +#define tre_islower iswlower +#define tre_isprint iswprint +#define tre_ispunct iswpunct +#define tre_isspace iswspace +#define tre_isupper iswupper +#define tre_isxdigit iswxdigit + +#define tre_tolower towlower +#define tre_toupper towupper +#define tre_strlen wcslen + +#else /* !TRE_WCHAR */ + +/* 8 bit characters. */ +typedef short tre_cint_t; +#define TRE_CHAR_MAX 255 +#define TRE_MB_CUR_MAX 1 + +#define tre_isalnum isalnum +#define tre_isalpha isalpha +#ifdef HAVE_ISASCII +#define tre_isascii isascii +#endif /* HAVE_ISASCII */ +#ifdef HAVE_ISBLANK +#define tre_isblank isblank +#endif /* HAVE_ISBLANK */ +#define tre_iscntrl iscntrl +#define tre_isdigit isdigit +#define tre_isgraph isgraph +#define tre_islower islower +#define tre_isprint isprint +#define tre_ispunct ispunct +#define tre_isspace isspace +#define tre_isupper isupper +#define tre_isxdigit isxdigit + +#define tre_tolower(c) (tre_cint_t)(tolower(c)) +#define tre_toupper(c) (tre_cint_t)(toupper(c)) +#define tre_strlen(s) (strlen((const char*)s)) + +#endif /* !TRE_WCHAR */ + +#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) +#define TRE_USE_SYSTEM_WCTYPE 1 +#endif + +#ifdef TRE_USE_SYSTEM_WCTYPE +/* Use system provided iswctype() and wctype(). */ +typedef wctype_t tre_ctype_t; +#define tre_isctype iswctype +#define tre_ctype wctype +#else /* !TRE_USE_SYSTEM_WCTYPE */ +/* Define our own versions of iswctype() and wctype(). */ +typedef int (*tre_ctype_t)(tre_cint_t); +#define tre_isctype(c, type) ( (type)(c) ) +tre_ctype_t tre_ctype(const char *name); +#endif /* !TRE_USE_SYSTEM_WCTYPE */ + +typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; + +/* Returns number of bytes to add to (char *)ptr to make it + properly aligned for the type. */ +#define ALIGN(ptr, type) \ + ((((long)ptr) % sizeof(type)) \ + ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ + : 0) + +#undef MAX +#undef MIN +#define MAX(a, b) (((a) >= (b)) ? (a) : (b)) +#define MIN(a, b) (((a) <= (b)) ? (a) : (b)) + +/* Define STRF to the correct printf formatter for strings. */ +#ifdef TRE_WCHAR +#define STRF "ls" +#else /* !TRE_WCHAR */ +#define STRF "s" +#endif /* !TRE_WCHAR */ + +/* TNFA transition type. A TNFA state is an array of transitions, + the terminator is a transition with NULL `state'. */ +typedef struct tnfa_transition tre_tnfa_transition_t; + +struct tnfa_transition { + /* Range of accepted characters. */ + tre_cint_t code_min; + tre_cint_t code_max; + /* Pointer to the destination state. */ + tre_tnfa_transition_t *state; + /* ID number of the destination state. */ + int state_id; + /* -1 terminated array of tags (or NULL). */ + int *tags; + /* Matching parameters settings (or NULL). */ + int *params; + /* Assertion bitmap. */ + int assertions; + /* Assertion parameters. */ + union { + /* Character class assertion. */ + tre_ctype_t class; + /* Back reference assertion. */ + int backref; + } u; + /* Negative character class assertions. */ + tre_ctype_t *neg_classes; +}; + + +/* Assertions. */ +#define ASSERT_AT_BOL 1 /* Beginning of line. */ +#define ASSERT_AT_EOL 2 /* End of line. */ +#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */ +#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */ +#define ASSERT_AT_BOW 16 /* Beginning of word. */ +#define ASSERT_AT_EOW 32 /* End of word. */ +#define ASSERT_AT_WB 64 /* Word boundary. */ +#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */ +#define ASSERT_BACKREF 256 /* A back reference in `backref'. */ +#define ASSERT_LAST 256 + +/* Tag directions. */ +typedef enum { + TRE_TAG_MINIMIZE = 0, + TRE_TAG_MAXIMIZE = 1 +} tre_tag_direction_t; + +/* Parameters that can be changed dynamically while matching. */ +typedef enum { + TRE_PARAM_COST_INS = 0, + TRE_PARAM_COST_DEL = 1, + TRE_PARAM_COST_SUBST = 2, + TRE_PARAM_COST_MAX = 3, + TRE_PARAM_MAX_INS = 4, + TRE_PARAM_MAX_DEL = 5, + TRE_PARAM_MAX_SUBST = 6, + TRE_PARAM_MAX_ERR = 7, + TRE_PARAM_DEPTH = 8, + TRE_PARAM_LAST = 9 +} tre_param_t; + +/* Unset matching parameter */ +#define TRE_PARAM_UNSET -1 + +/* Signifies the default matching parameter value. */ +#define TRE_PARAM_DEFAULT -2 + +/* Instructions to compute submatch register values from tag values + after a successful match. */ +struct tre_submatch_data { + /* Tag that gives the value for rm_so (submatch start offset). */ + int so_tag; + /* Tag that gives the value for rm_eo (submatch end offset). */ + int eo_tag; + /* List of submatches this submatch is contained in. */ + int *parents; +}; + +typedef struct tre_submatch_data tre_submatch_data_t; + +typedef enum { + TRE_LITERAL_OPT_NONE = 0, + TRE_LITERAL_OPT_CONTAINS, + TRE_LITERAL_OPT_PREFIX, + TRE_LITERAL_OPT_SUFFIX, + TRE_LITERAL_OPT_EXACT +} tre_literal_opt_mode_t; + +typedef struct { + unsigned char *data; + size_t len; +} tre_literal_opt_literal_t; + +typedef struct { + tre_literal_opt_mode_t mode; + int nocase; + size_t num_literals; + /* Folded byte mapping used by the nocase fast path. */ + unsigned char fold_map[256]; + /* Literal index ranges grouped by the first literal byte. */ + size_t start_offsets[257]; + tre_literal_opt_literal_t *literals; +} tre_literal_opt_t; + + +/* TNFA definition. */ +typedef struct tnfa tre_tnfa_t; + +struct tnfa { + tre_tnfa_transition_t *transitions; + unsigned int num_transitions; + tre_tnfa_transition_t *initial; + tre_tnfa_transition_t *final; + tre_submatch_data_t *submatch_data; + char *firstpos_chars; + int first_char; + unsigned int num_submatches; + tre_tag_direction_t *tag_directions; + int *minimal_tags; + int num_tags; + int num_minimals; + int end_tag; + int num_states; + int cflags; + int have_backrefs; + int have_approx; + int params_depth; + tre_literal_opt_t literal_opt; +}; + +int +tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); + +void +tre_free(regex_t *preg); + +void +tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, + const tre_tnfa_t *tnfa, int *tags, int match_eo); + +reg_errcode_t +tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, int eflags, + int *match_end_ofs); + +reg_errcode_t +tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, int eflags, + int *match_end_ofs); + +#ifdef TRE_APPROX +reg_errcode_t +tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, regamatch_t *match, + regaparams_t params, int eflags, int *match_end_ofs); +#endif /* TRE_APPROX */ + +#endif /* TRE_INTERNAL_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-match-backtrack.c b/deps/tre/lib/tre-match-backtrack.c new file mode 100644 index 000000000..7e184929e --- /dev/null +++ b/deps/tre/lib/tre-match-backtrack.c @@ -0,0 +1,676 @@ +/* + tre-match-backtrack.c - TRE backtracking regex matching engine + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This matcher is for regexps that use back referencing. Regexp matching + with back referencing is an NP-complete problem on the number of back + references. The easiest way to match them is to use a backtracking + routine which basically goes through all possible paths in the TNFA + and chooses the one which results in the best (leftmost and longest) + match. This can be spectacularly expensive and may run out of stack + space, but there really is no better known generic algorithm. Quoting + Henry Spencer from comp.compilers: + + + POSIX.2 REs require longest match, which is really exciting to + implement since the obsolete ("basic") variant also includes + \. I haven't found a better way of tackling this than doing + a preliminary match using a DFA (or simulation) on a modified RE + that just replicates subREs for \, and then doing a + backtracking match to determine whether the subRE matches were + right. This can be rather slow, but I console myself with the + thought that people who use \ deserve very slow execution. + (Pun unintentional but very appropriate.) + +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ + +#include "tre-internal.h" +#include "tre-mem.h" +#include "tre-match-utils.h" +#include "xmalloc.h" + +typedef struct { + int pos; + const char *str_byte; +#ifdef TRE_WCHAR + const wchar_t *str_wide; +#endif /* TRE_WCHAR */ + tre_tnfa_transition_t *state; + int state_id; + int next_c; + int *tags; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* TRE_MBSTATE */ +} tre_backtrack_item_t; + +typedef struct tre_backtrack_struct { + tre_backtrack_item_t item; + struct tre_backtrack_struct *prev; + struct tre_backtrack_struct *next; +} *tre_backtrack_t; + +#ifdef TRE_WCHAR +#define BT_STACK_WIDE_IN(_str_wide) stack->item.str_wide = (_str_wide) +#define BT_STACK_WIDE_OUT (str_wide) = stack->item.str_wide +#else /* !TRE_WCHAR */ +#define BT_STACK_WIDE_IN(_str_wide) +#define BT_STACK_WIDE_OUT +#endif /* !TRE_WCHAR */ + +#ifdef TRE_MBSTATE +#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) +#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate +#else /* !TRE_MBSTATE */ +#define BT_STACK_MBSTATE_IN +#define BT_STACK_MBSTATE_OUT +#endif /* !TRE_MBSTATE */ + + +#ifdef TRE_USE_ALLOCA +#define tre_bt_mem_new tre_mem_newa +#define tre_bt_mem_alloc tre_mem_alloca +#define tre_bt_mem_destroy(obj) do { } while (0) +#define xafree(obj) do { } while (0) /* do nothing, obj was obtained with alloca() */ +#else /* !TRE_USE_ALLOCA */ +#define tre_bt_mem_new tre_mem_new +#define tre_bt_mem_alloc tre_mem_alloc +#define tre_bt_mem_destroy tre_mem_destroy +#define xafree(obj) xfree(obj) +#endif /* !TRE_USE_ALLOCA */ + + +#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \ + do \ + { \ + int i; \ + if (!stack->next) \ + { \ + tre_backtrack_t s; \ + s = tre_bt_mem_alloc(mem, sizeof(*s)); \ + if (!s) \ + { \ + tre_bt_mem_destroy(mem); \ + if (tags) \ + xafree(tags); \ + if (pmatch) \ + xafree(pmatch); \ + if (states_seen) \ + xafree(states_seen); \ + return REG_ESPACE; \ + } \ + s->prev = stack; \ + s->next = NULL; \ + s->item.tags = tre_bt_mem_alloc(mem, \ + sizeof(*tags) * tnfa->num_tags); \ + if (!s->item.tags) \ + { \ + tre_bt_mem_destroy(mem); \ + if (tags) \ + xafree(tags); \ + if (pmatch) \ + xafree(pmatch); \ + if (states_seen) \ + xafree(states_seen); \ + return REG_ESPACE; \ + } \ + stack->next = s; \ + stack = s; \ + } \ + else \ + stack = stack->next; \ + stack->item.pos = (_pos); \ + stack->item.str_byte = (_str_byte); \ + BT_STACK_WIDE_IN(_str_wide); \ + stack->item.state = (_state); \ + stack->item.state_id = (_state_id); \ + stack->item.next_c = (_next_c); \ + for (i = 0; i < tnfa->num_tags; i++) \ + stack->item.tags[i] = (_tags)[i]; \ + BT_STACK_MBSTATE_IN; \ + } \ + while (/*CONSTCOND*/(void)0,0) + +#define BT_STACK_POP() \ + do \ + { \ + int i; \ + assert(stack->prev); \ + pos = stack->item.pos; \ + if (type == STR_USER) \ + str_source->rewind(pos + pos_add_next, str_source->context); \ + str_byte = stack->item.str_byte; \ + BT_STACK_WIDE_OUT; \ + state = stack->item.state; \ + next_c = (tre_char_t) stack->item.next_c; \ + for (i = 0; i < tnfa->num_tags; i++) \ + tags[i] = stack->item.tags[i]; \ + BT_STACK_MBSTATE_OUT; \ + stack = stack->prev; \ + } \ + while (/*CONSTCOND*/(void)0,0) + +#undef MIN +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +reg_errcode_t +tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, + ssize_t len, tre_str_type_t type, int *match_tags, + int eflags, int *match_end_ofs) +{ + /* State variables required by GET_NEXT_WCHAR. */ + tre_char_t prev_c = 0, next_c = 0; + const char *str_byte = string; + ssize_t pos = 0; + unsigned int pos_add_next = 1; +#ifdef TRE_WCHAR + const wchar_t *str_wide = string; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* TRE_MBSTATE */ +#endif /* TRE_WCHAR */ + int reg_notbol = eflags & REG_NOTBOL; + int reg_noteol = eflags & REG_NOTEOL; + int reg_newline = tnfa->cflags & REG_NEWLINE; + int str_user_end = 0; + + /* These are used to remember the necessary values of the above + variables to return to the position where the current search + started from. */ + int next_c_start; + const char *str_byte_start; + int pos_start = -1; +#ifdef TRE_WCHAR + const wchar_t *str_wide_start; +#endif /* TRE_WCHAR */ +#ifdef TRE_MBSTATE + mbstate_t mbstate_start; +#endif /* TRE_MBSTATE */ + reg_errcode_t ret; + + /* End offset of best match so far, or -1 if no match found yet. */ + int match_eo = -1; + /* Tag arrays. */ + int *next_tags, *tags = NULL; + /* Current TNFA state. */ + tre_tnfa_transition_t *state; + int *states_seen = NULL; + + /* Memory allocator to for allocating the backtracking stack. */ + tre_mem_t mem = tre_bt_mem_new(); + + /* The backtracking stack. */ + tre_backtrack_t stack; + + tre_tnfa_transition_t *trans_i; + regmatch_t *pmatch = NULL; + + /* + * TRE internals tend to use int instead of size_t for positions or + * lengths and don't check for overflow. This will take time to fix + * properly. In the meantime, simply limit the input to what we can + * handle. + */ + if (len > TRE_MAX_STRING) + len = TRE_MAX_STRING; + +#ifdef TRE_MBSTATE + memset(&mbstate, '\0', sizeof(mbstate)); +#endif /* TRE_MBSTATE */ + + if (!mem) + return REG_ESPACE; + stack = tre_bt_mem_alloc(mem, sizeof(*stack)); + if (!stack) + { + ret = REG_ESPACE; + goto error_exit; + } + stack->prev = NULL; + stack->next = NULL; + + DPRINT(("tnfa_execute_backtrack, input type %d\n", type)); + DPRINT(("len = %zd\n", len)); + +#ifdef TRE_USE_ALLOCA + tags = alloca(sizeof(*tags) * tnfa->num_tags); + pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches); + states_seen = alloca(sizeof(*states_seen) * tnfa->num_states); +#else /* !TRE_USE_ALLOCA */ + if (tnfa->num_tags) + { + tags = xmalloc(sizeof(*tags) * tnfa->num_tags); + if (!tags) + { + ret = REG_ESPACE; + goto error_exit; + } + } + if (tnfa->num_submatches) + { + pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); + if (!pmatch) + { + ret = REG_ESPACE; + goto error_exit; + } + } + if (tnfa->num_states) + { + states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); + if (!states_seen) + { + ret = REG_ESPACE; + goto error_exit; + } + } +#endif /* !TRE_USE_ALLOCA */ + + retry: + { + int i; + for (i = 0; i < tnfa->num_tags; i++) + { + tags[i] = -1; + if (match_tags) + match_tags[i] = -1; + } + for (i = 0; i < tnfa->num_states; i++) + states_seen[i] = 0; + } + + state = NULL; + pos = pos_start; + if (type == STR_USER) + str_source->rewind(pos + pos_add_next, str_source->context); + GET_NEXT_WCHAR(); + pos_start = pos; + next_c_start = next_c; + str_byte_start = str_byte; +#ifdef TRE_WCHAR + str_wide_start = str_wide; +#endif /* TRE_WCHAR */ +#ifdef TRE_MBSTATE + mbstate_start = mbstate; +#endif /* TRE_MBSTATE */ + + /* Handle initial states. */ + next_tags = NULL; + for (trans_i = tnfa->initial; trans_i->state; trans_i++) + { + DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c)); + if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) + { + DPRINT(("assert failed\n")); + continue; + } + if (state == NULL) + { + /* Start from this state. */ + state = trans_i->state; + next_tags = trans_i->tags; + } + else + { + /* Backtrack to this state. */ + DPRINT(("saving state %d for backtracking\n", trans_i->state_id)); + BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state, + trans_i->state_id, next_c, tags, mbstate); + { + int *tmp = trans_i->tags; + if (tmp) + while (*tmp >= 0) + stack->item.tags[*tmp++] = pos; + } + } + } + + if (next_tags) + for (; *next_tags >= 0; next_tags++) + tags[*next_tags] = pos; + + + DPRINT(("entering match loop, pos %zd, str_byte %p\n", pos, str_byte)); + DPRINT(("pos:chr/code | state and tags\n")); + DPRINT(("-------------+------------------------------------------------\n")); + + if (state == NULL) + goto backtrack; + + while (/*CONSTCOND*/(void)1,1) + { + tre_tnfa_transition_t *next_state; + int empty_br_match; + + DPRINT(("start loop\n")); + if (state == tnfa->final) + { + DPRINT((" match found, %d %zd\n", match_eo, pos)); + if (match_eo < pos + || (match_eo == pos + && match_tags + && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, + tags, match_tags))) + { + int i; + /* This match wins the previous match. */ + DPRINT((" win previous\n")); + match_eo = pos; + if (match_tags) + for (i = 0; i < tnfa->num_tags; i++) + match_tags[i] = tags[i]; + } + /* Our TNFAs never have transitions leaving from the final state, + so we jump right to backtracking. */ + goto backtrack; + } + +#ifdef TRE_DEBUG + DPRINT(("%3zd:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c, + state)); + { + int i; + for (i = 0; i < tnfa->num_tags; i++) + DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : "")); + DPRINT(("\n")); + } +#endif /* TRE_DEBUG */ + + /* Go to the next character in the input string. */ + empty_br_match = 0; + trans_i = state; + if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) + { + /* This is a back reference state. All transitions leaving from + this state have the same back reference "assertion". Instead + of reading the next character, we match the back reference. */ + int so, eo, bt = trans_i->u.backref; + int bt_len; + int result; + + DPRINT((" should match back reference %d\n", bt)); + /* Get the substring we need to match against. Remember to + turn off REG_NOSUB temporarily. */ + tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, + tnfa, tags, pos); + so = pmatch[bt].rm_so; + eo = pmatch[bt].rm_eo; + bt_len = eo - so; + +#ifdef TRE_DEBUG + { + int slen; + if (len < 0) + slen = bt_len; + else + slen = MIN(bt_len, len - pos); + + if (type == STR_BYTE) + { + DPRINT((" substring (len %d) is [%d, %d[: '%.*s'\n", + bt_len, so, eo, bt_len, (char*)string + so)); + DPRINT((" current string is '%.*s'\n", slen, str_byte - 1)); + } +#ifdef TRE_WCHAR + else if (type == STR_WIDE) + { + DPRINT((" substring (len %d) is [%d, %d[: '%.*" STRF "'\n", + bt_len, so, eo, bt_len, (wchar_t*)string + so)); + DPRINT((" current string is '%.*" STRF "'\n", + slen, str_wide - 1)); + } +#endif /* TRE_WCHAR */ + } +#endif + + if (len < 0) + { + if (type == STR_USER) + result = str_source->compare((unsigned)so, (unsigned)pos, + (unsigned)bt_len, + str_source->context); +#ifdef TRE_WCHAR + else if (type == STR_WIDE) + result = wcsncmp((const wchar_t*)string + so, str_wide - 1, + (size_t)bt_len); +#endif /* TRE_WCHAR */ + else + result = strncmp((const char*)string + so, str_byte - 1, + (size_t)bt_len); + } + else if (len - pos < bt_len) + result = 1; +#ifdef TRE_WCHAR + else if (type == STR_WIDE) + result = wmemcmp((const wchar_t*)string + so, str_wide - 1, + (size_t)bt_len); +#endif /* TRE_WCHAR */ + else + result = memcmp((const char*)string + so, str_byte - 1, + (size_t)bt_len); + + if (result == 0) + { + /* Back reference matched. Check for infinite loop. */ + if (bt_len == 0) + empty_br_match = 1; + if (empty_br_match && states_seen[trans_i->state_id]) + { + DPRINT((" avoid loop\n")); + goto backtrack; + } + + states_seen[trans_i->state_id] = empty_br_match; + + /* Advance in input string and resync `prev_c', `next_c' + and pos. */ + DPRINT((" back reference matched\n")); + str_byte += bt_len - 1; +#ifdef TRE_WCHAR + str_wide += bt_len - 1; +#endif /* TRE_WCHAR */ + pos += bt_len - 1; + GET_NEXT_WCHAR(); + DPRINT((" pos now %zd\n", pos)); + } + else + { + DPRINT((" back reference did not match\n")); + goto backtrack; + } + } + else + { + /* Check for end of string. */ + if (len < 0) + { + if (type == STR_USER) + { + if (str_user_end) + goto backtrack; + } + else if (next_c == L'\0' || pos >= TRE_MAX_STRING) + goto backtrack; + } + else + { + if (pos >= len) + goto backtrack; + } + + /* Read the next character. */ + GET_NEXT_WCHAR(); + } + + next_state = NULL; + for (trans_i = state; trans_i->state; trans_i++) + { + DPRINT((" transition %d-%d (%c-%c) %d to %d\n", + trans_i->code_min, trans_i->code_max, + trans_i->code_min, trans_i->code_max, + trans_i->assertions, trans_i->state_id)); + if (trans_i->code_min <= (tre_cint_t)prev_c + && trans_i->code_max >= (tre_cint_t)prev_c) + { + if (trans_i->assertions + && (CHECK_ASSERTIONS(trans_i->assertions) + || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) + { + DPRINT((" assertion failed\n")); + continue; + } + + if (next_state == NULL) + { + /* First matching transition. */ + DPRINT((" Next state is %d\n", trans_i->state_id)); + next_state = trans_i->state; + next_tags = trans_i->tags; + } + else + { + /* Second matching transition. We may need to backtrack here + to take this transition instead of the first one, so we + push this transition in the backtracking stack so we can + jump back here if needed. */ + DPRINT((" saving state %d for backtracking\n", + trans_i->state_id)); + BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state, + trans_i->state_id, next_c, tags, mbstate); + { + int *tmp; + for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) + stack->item.tags[*tmp] = pos; + } +#if 0 /* XXX - it's important not to look at all transitions here to keep + the stack small! */ + break; +#endif + } + } + } + + if (next_state != NULL) + { + /* Matching transitions were found. Take the first one. */ + state = next_state; + + /* Update the tag values. */ + if (next_tags) + while (*next_tags >= 0) + tags[*next_tags++] = pos; + } + else + { + backtrack: + /* A matching transition was not found. Try to backtrack. */ + if (stack->prev) + { + DPRINT((" backtracking\n")); + if (stack->item.state->assertions & ASSERT_BACKREF) + { + DPRINT((" states_seen[%d] = 0\n", + stack->item.state_id)); + states_seen[stack->item.state_id] = 0; + } + + BT_STACK_POP(); + } + else if (match_eo < 0) + { + /* Try starting from a later position in the input string. */ + /* Check for end of string. */ + if (len < 0) + { + if (next_c_start == L'\0' || pos_start >= TRE_MAX_STRING) + { + DPRINT(("end of string.\n")); + break; + } + } + else + { + if (pos_start >= len) + { + DPRINT(("end of string.\n")); + break; + } + } + DPRINT(("restarting from next start position\n")); + next_c = (tre_char_t) next_c_start; +#ifdef TRE_MBSTATE + mbstate = mbstate_start; +#endif /* TRE_MBSTATE */ + str_byte = str_byte_start; +#ifdef TRE_WCHAR + str_wide = str_wide_start; +#endif /* TRE_WCHAR */ + goto retry; + } + else + { + DPRINT(("finished\n")); + break; + } + } + } + + ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; + *match_end_ofs = match_eo; + + error_exit: + tre_bt_mem_destroy(mem); +#ifndef TRE_USE_ALLOCA + if (tags) + xafree(tags); + if (pmatch) + xafree(pmatch); + if (states_seen) + xafree(states_seen); +#endif /* !TRE_USE_ALLOCA */ + + return ret; +} diff --git a/deps/tre/lib/tre-match-parallel.c b/deps/tre/lib/tre-match-parallel.c new file mode 100644 index 000000000..151083746 --- /dev/null +++ b/deps/tre/lib/tre-match-parallel.c @@ -0,0 +1,538 @@ +/* + tre-match-parallel.c - TRE parallel regex matching engine + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This algorithm searches for matches basically by reading characters + in the searched string one by one, starting at the beginning. All + matching paths in the TNFA are traversed in parallel. When two or + more paths reach the same state, exactly one is chosen according to + tag ordering rules; if returning submatches is not required it does + not matter which path is chosen. + + The worst case time required for finding the leftmost and longest + match, or determining that there is no match, is always linearly + dependent on the length of the text being searched. + + This algorithm cannot handle TNFAs with back referencing nodes. + See `tre-match-backtrack.c'. +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ + +#include "tre-internal.h" +#include "tre-match-utils.h" +#include "xmalloc.h" + + + +typedef struct { + tre_tnfa_transition_t *state; + int *tags; +} tre_tnfa_reach_t; + +typedef struct { + int pos; + int **tags; +} tre_reach_pos_t; + + +#ifdef TRE_DEBUG +static void +tre_print_reach(const tre_tnfa_reach_t *reach, int num_tags) +{ + int i; + + while (reach->state != NULL) + { + DPRINT((" %p", (void *)reach->state)); + if (num_tags > 0) + { + DPRINT(("/")); + for (i = 0; i < num_tags; i++) + { + DPRINT(("%d:%d", i, reach->tags[i])); + if (i < (num_tags-1)) + DPRINT((",")); + } + } + reach++; + } + DPRINT(("\n")); + +} +#endif /* TRE_DEBUG */ + +reg_errcode_t +tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, int eflags, + int *match_end_ofs) +{ + /* State variables required by GET_NEXT_WCHAR. */ + tre_char_t prev_c = 0, next_c = 0; + const char *str_byte = string; + ssize_t pos = -1; + unsigned int pos_add_next = 1; +#ifdef TRE_WCHAR + const wchar_t *str_wide = string; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* TRE_MBSTATE */ +#endif /* TRE_WCHAR */ + reg_errcode_t ret; + int reg_notbol = eflags & REG_NOTBOL; + int reg_noteol = eflags & REG_NOTEOL; + int reg_newline = tnfa->cflags & REG_NEWLINE; + int str_user_end = 0; + + char *buf; + tre_tnfa_transition_t *trans_i; + tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; + tre_reach_pos_t *reach_pos; + int *tag_i; + int num_tags, i; + + int match_eo = -1; /* end offset of match (-1 if no match found yet) */ + int new_match = 0; + int *tmp_tags = NULL; + int *tmp_iptr; + + /* + * TRE internals tend to use int instead of size_t for positions or + * lengths and don't check for overflow. This will take time to fix + * properly. In the meantime, simply limit the input to what we can + * handle. + */ + if (len > TRE_MAX_STRING) + len = TRE_MAX_STRING; + +#ifdef TRE_MBSTATE + memset(&mbstate, '\0', sizeof(mbstate)); +#endif /* TRE_MBSTATE */ + + DPRINT(("tre_tnfa_run_parallel, input type %d\n", type)); + + if (!match_tags) + num_tags = 0; + else + num_tags = tnfa->num_tags; + + /* Allocate memory for temporary data required for matching. This needs to + be done for every matching operation to be thread safe. This allocates + everything in a single large block from the stack frame using alloca() + or with malloc() if alloca is unavailable. */ + { + size_t tbytes, rbytes, pbytes, xbytes, total_bytes; + size_t num_states = (size_t)tnfa->num_states; + size_t state_tag_bytes, reach_bytes; + size_t padding = (sizeof(long) - 1) * 4; + char *tmp_buf; + + if (num_states > SIZE_MAX / sizeof(*reach_pos)) + return REG_ESPACE; + pbytes = sizeof(*reach_pos) * num_states; + + if (num_states + 1 > SIZE_MAX / sizeof(*reach_next)) + return REG_ESPACE; + rbytes = sizeof(*reach_next) * (num_states + 1); + + if ((size_t)num_tags > SIZE_MAX / sizeof(*tmp_tags)) + return REG_ESPACE; + tbytes = sizeof(*tmp_tags) * (size_t)num_tags; + + if ((size_t)num_tags > SIZE_MAX / sizeof(int)) + return REG_ESPACE; + xbytes = sizeof(int) * (size_t)num_tags; + + if (num_states > 0 && xbytes > SIZE_MAX / num_states) + return REG_ESPACE; + state_tag_bytes = xbytes * num_states; + + if (rbytes > SIZE_MAX - state_tag_bytes) + return REG_ESPACE; + reach_bytes = rbytes + state_tag_bytes; + + if (reach_bytes > (SIZE_MAX - padding - tbytes - pbytes) / 2) + return REG_ESPACE; + + /* Compute the length of the block we need. */ + total_bytes = + padding + reach_bytes * 2 + tbytes + pbytes; + + /* Allocate the memory. */ +#ifdef TRE_USE_ALLOCA + buf = alloca(total_bytes); +#else /* !TRE_USE_ALLOCA */ + buf = xmalloc(total_bytes); +#endif /* !TRE_USE_ALLOCA */ + if (buf == NULL) + return REG_ESPACE; + memset(buf, 0, total_bytes); + + /* Get the various pointers within tmp_buf (properly aligned). */ + tmp_tags = (void *)buf; + tmp_buf = buf + tbytes; + tmp_buf += ALIGN(tmp_buf, long); + reach_next = (void *)tmp_buf; + tmp_buf += rbytes; + tmp_buf += ALIGN(tmp_buf, long); + reach = (void *)tmp_buf; + tmp_buf += rbytes; + tmp_buf += ALIGN(tmp_buf, long); + reach_pos = (void *)tmp_buf; + tmp_buf += pbytes; + tmp_buf += ALIGN(tmp_buf, long); + for (i = 0; i < tnfa->num_states; i++) + { + reach[i].tags = (void *)tmp_buf; + tmp_buf += xbytes; + reach_next[i].tags = (void *)tmp_buf; + tmp_buf += xbytes; + } + } + + for (i = 0; i < tnfa->num_states; i++) + reach_pos[i].pos = -1; + + /* If only one character can start a match, find it first. */ + if (tnfa->first_char >= 0 && type == STR_BYTE && str_byte) + { + const char *orig_str = str_byte; + int first = tnfa->first_char; + + if (len >= 0) + str_byte = memchr(orig_str, first, (size_t)len); + else + str_byte = strchr(orig_str, first); + if (str_byte == NULL) + { +#ifndef TRE_USE_ALLOCA + if (buf) + xfree(buf); +#endif /* !TRE_USE_ALLOCA */ + return REG_NOMATCH; + } + DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str))); + if (str_byte >= orig_str + 1) + prev_c = (unsigned char)*(str_byte - 1); + next_c = (unsigned char)*str_byte; + pos = str_byte - orig_str; + if (len < 0 || pos < len) + str_byte++; + } + else + { + GET_NEXT_WCHAR(); + pos = 0; + } + +#if 0 + /* Skip over characters that cannot possibly be the first character + of a match. */ + if (tnfa->firstpos_chars != NULL) + { + char *chars = tnfa->firstpos_chars; + + if (len < 0) + { + const char *orig_str = str_byte; + /* XXX - use strpbrk() and wcspbrk() because they might be + optimized for the target architecture. Try also strcspn() + and wcscspn() and compare the speeds. */ + while (next_c != L'\0' && !chars[next_c]) + { + next_c = *str_byte++; + } + prev_c = *(str_byte - 2); + pos += str_byte - orig_str; + DPRINT(("skipped %d chars\n", str_byte - orig_str)); + } + else + { + while (pos <= len && !chars[next_c]) + { + prev_c = next_c; + next_c = (unsigned char)(*str_byte++); + pos++; + } + } + } +#endif + + DPRINT(("length: %zd\n", len)); + DPRINT(("pos:chr/code | states and tags\n")); + DPRINT(("-------------+------------------------------------------------\n")); + + reach_next_i = reach_next; + while (/*CONSTCOND*/(void)1,1) + { + /* If no match found yet, add the initial states to `reach_next'. */ + if (match_eo < 0) + { + DPRINT((" init >")); + trans_i = tnfa->initial; + while (trans_i->state != NULL) + { + if (reach_pos[trans_i->state_id].pos < pos) + { + if (trans_i->assertions + && CHECK_ASSERTIONS(trans_i->assertions)) + { + DPRINT(("assertion failed\n")); + trans_i++; + continue; + } + + DPRINT((" %p", (void *)trans_i->state)); + reach_next_i->state = trans_i->state; + for (i = 0; i < num_tags; i++) + reach_next_i->tags[i] = -1; + tag_i = trans_i->tags; + if (tag_i) + while (*tag_i >= 0) + { + if (*tag_i < num_tags) + reach_next_i->tags[*tag_i] = pos; + tag_i++; + } + if (reach_next_i->state == tnfa->final) + { + DPRINT((" found empty match\n")); + match_eo = pos; + new_match = 1; + for (i = 0; i < num_tags; i++) + match_tags[i] = reach_next_i->tags[i]; + } + reach_pos[trans_i->state_id].pos = pos; + reach_pos[trans_i->state_id].tags = &reach_next_i->tags; + reach_next_i++; + } + trans_i++; + } + DPRINT(("\n")); + reach_next_i->state = NULL; + } + else + { + if (num_tags == 0 || reach_next_i == reach_next) + /* We have found a match. */ + break; + } + + /* Check for end of string. */ + if (len < 0) + { + if (type == STR_USER) + { + if (str_user_end) + break; + } + else if (next_c == L'\0' || pos >= TRE_MAX_STRING) + break; + } + else + { + if (pos >= len) + break; + } + + GET_NEXT_WCHAR(); + +#ifdef TRE_DEBUG + DPRINT(("%3zd:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c)); + tre_print_reach(reach_next, num_tags); + DPRINT(("%3zd:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c)); + tre_print_reach(reach_next, num_tags); +#endif /* TRE_DEBUG */ + + /* Swap `reach' and `reach_next'. */ + reach_i = reach; + reach = reach_next; + reach_next = reach_i; + + /* For each state in `reach', weed out states that don't fulfill the + minimal matching conditions. */ + if (tnfa->num_minimals && new_match) + { + new_match = 0; + reach_next_i = reach_next; + for (reach_i = reach; reach_i->state; reach_i++) + { + int skip = 0; + for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) + { + int end = tnfa->minimal_tags[i]; + int start = tnfa->minimal_tags[i + 1]; + DPRINT((" Minimal start %d, end %d\n", start, end)); + if (end >= num_tags) + { + DPRINT((" Throwing %p out.\n", reach_i->state)); + skip = 1; + break; + } + else if (reach_i->tags[start] == match_tags[start] + && reach_i->tags[end] < match_tags[end]) + { + DPRINT((" Throwing %p out because t%d < %d\n", + reach_i->state, end, match_tags[end])); + skip = 1; + break; + } + } + if (!skip) + { + reach_next_i->state = reach_i->state; + tmp_iptr = reach_next_i->tags; + reach_next_i->tags = reach_i->tags; + reach_i->tags = tmp_iptr; + reach_next_i++; + } + } + reach_next_i->state = NULL; + + /* Swap `reach' and `reach_next'. */ + reach_i = reach; + reach = reach_next; + reach_next = reach_i; + } + + /* For each state in `reach' see if there is a transition leaving with + the current input symbol to a state not yet in `reach_next', and + add the destination states to `reach_next'. */ + reach_next_i = reach_next; + for (reach_i = reach; reach_i->state; reach_i++) + { + for (trans_i = reach_i->state; trans_i->state; trans_i++) + { + /* Does this transition match the input symbol? */ + if (trans_i->code_min <= (tre_cint_t)prev_c && + trans_i->code_max >= (tre_cint_t)prev_c) + { + if (trans_i->assertions + && (CHECK_ASSERTIONS(trans_i->assertions) + || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) + { + DPRINT(("assertion failed\n")); + continue; + } + + /* Compute the tags after this transition. */ + for (i = 0; i < num_tags; i++) + tmp_tags[i] = reach_i->tags[i]; + tag_i = trans_i->tags; + if (tag_i != NULL) + while (*tag_i >= 0) + { + if (*tag_i < num_tags) + tmp_tags[*tag_i] = pos; + tag_i++; + } + + if (reach_pos[trans_i->state_id].pos < pos) + { + /* Found an unvisited node. */ + reach_next_i->state = trans_i->state; + tmp_iptr = reach_next_i->tags; + reach_next_i->tags = tmp_tags; + tmp_tags = tmp_iptr; + reach_pos[trans_i->state_id].pos = pos; + reach_pos[trans_i->state_id].tags = &reach_next_i->tags; + + if (reach_next_i->state == tnfa->final + && (match_eo == -1 + || (num_tags > 0 + && reach_next_i->tags[0] <= match_tags[0]))) + { + DPRINT((" found match %p\n", trans_i->state)); + match_eo = pos; + new_match = 1; + for (i = 0; i < num_tags; i++) + match_tags[i] = reach_next_i->tags[i]; + } + reach_next_i++; + + } + else + { + assert(reach_pos[trans_i->state_id].pos == pos); + /* Another path has also reached this state. We choose + the winner by examining the tag values for both + paths. */ + if (tre_tag_order(num_tags, tnfa->tag_directions, + tmp_tags, + *reach_pos[trans_i->state_id].tags)) + { + /* The new path wins. */ + tmp_iptr = *reach_pos[trans_i->state_id].tags; + *reach_pos[trans_i->state_id].tags = tmp_tags; + if (trans_i->state == tnfa->final) + { + DPRINT((" found better match\n")); + match_eo = pos; + new_match = 1; + for (i = 0; i < num_tags; i++) + match_tags[i] = tmp_tags[i]; + } + tmp_tags = tmp_iptr; + } + } + } + } + } + reach_next_i->state = NULL; + } + + DPRINT(("match end offset = %d\n", match_eo)); + + *match_end_ofs = match_eo; + ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; + +#ifndef TRE_USE_ALLOCA + if (buf) + xfree(buf); +#endif /* !TRE_USE_ALLOCA */ + return ret; +} + +/* EOF */ diff --git a/deps/tre/lib/tre-match-utils.h b/deps/tre/lib/tre-match-utils.h new file mode 100644 index 000000000..76e8b1972 --- /dev/null +++ b/deps/tre/lib/tre-match-utils.h @@ -0,0 +1,215 @@ +/* + tre-match-utils.h - TRE matcher helper definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#define str_source ((const tre_str_source*)string) + +#ifdef TRE_WCHAR + +#ifdef TRE_MULTIBYTE + +/* Wide character and multibyte support. */ + +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = '\0'; \ + else \ + next_c = (unsigned char)(*str_byte++); \ + } \ + else if (type == STR_WIDE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = L'\0'; \ + else \ + next_c = *str_wide++; \ + } \ + else if (type == STR_MBS) \ + { \ + pos += pos_add_next; \ + if (str_byte == NULL) \ + next_c = L'\0'; \ + else \ + { \ + size_t w; \ + size_t max; \ + if (len >= 0) \ + max = len - pos; \ + else \ + max = 32; \ + if (max <= 0) \ + { \ + next_c = L'\0'; \ + pos_add_next = 1; \ + } \ + else \ + { \ + w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \ + if (w == (size_t)-1 || w == (size_t)-2) \ + return REG_NOMATCH; \ + if (w == 0 && len >= 0) \ + { \ + pos_add_next = 1; \ + next_c = 0; \ + str_byte++; \ + } \ + else \ + { \ + pos_add_next = w; \ + str_byte += w; \ + } \ + } \ + } \ + } \ + else if (type == STR_USER) \ + { \ + pos += pos_add_next; \ + str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ + str_source->context); \ + } \ + } while(/*CONSTCOND*/(void)0,0) + +#else /* !TRE_MULTIBYTE */ + +/* Wide character support, no multibyte support. */ + +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = '\0'; \ + else \ + next_c = (unsigned char)(*str_byte++); \ + } \ + else if (type == STR_WIDE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = L'\0'; \ + else \ + next_c = *str_wide++; \ + } \ + else if (type == STR_USER) \ + { \ + pos += pos_add_next; \ + str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ + str_source->context); \ + } \ + } while(/*CONSTCOND*/(void)0,0) + +#endif /* !TRE_MULTIBYTE */ + +#else /* !TRE_WCHAR */ + +/* No wide character or multibyte support. */ + +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = '\0'; \ + else \ + next_c = (unsigned char)(*str_byte++); \ + } \ + else if (type == STR_USER) \ + { \ + pos += pos_add_next; \ + str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ + str_source->context); \ + } \ + } while(/*CONSTCOND*/(void)0,0) + +#endif /* !TRE_WCHAR */ + + + +#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) + +#define CHECK_ASSERTIONS(assertions) \ + (((assertions & ASSERT_AT_BOL) \ + && (pos > 0 || reg_notbol) \ + && (prev_c != L'\n' || !reg_newline)) \ + || ((assertions & ASSERT_AT_EOL) \ + && (next_c != L'\0' || reg_noteol) \ + && (next_c != L'\n' || !reg_newline)) \ + || ((assertions & ASSERT_AT_BOW) \ + && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ + || ((assertions & ASSERT_AT_EOW) \ + && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ + || ((assertions & ASSERT_AT_WB) \ + && (pos != 0 && next_c != L'\0' \ + && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ + || ((assertions & ASSERT_AT_WB_NEG) \ + && (pos == 0 || next_c == L'\0' \ + || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) + +#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ + (((trans_i->assertions & ASSERT_CHAR_CLASS) \ + && !(tnfa->cflags & REG_ICASE) \ + && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ + || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ + && (tnfa->cflags & REG_ICASE) \ + && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ + && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ + || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ + && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ + tnfa->cflags & REG_ICASE))) + + + + +/* Returns 1 if `t1' wins `t2', 0 otherwise. */ +inline static int +tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, + int *t1, int *t2) +{ + int i; + for (i = 0; i < num_tags; i++) + { + if (tag_directions[i] == TRE_TAG_MINIMIZE) + { + if (t1[i] < t2[i]) + return 1; + if (t1[i] > t2[i]) + return 0; + } + else + { + if (t1[i] > t2[i]) + return 1; + if (t1[i] < t2[i]) + return 0; + } + } + /* assert(0);*/ + return 0; +} + +inline static int +tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) +{ + DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase)); + while (*classes != (tre_ctype_t)0) + if ((!icase && tre_isctype(wc, *classes)) + || (icase && (tre_isctype(tre_toupper(wc), *classes) + || tre_isctype(tre_tolower(wc), *classes)))) + return 1; /* Match. */ + else + classes++; + return 0; /* No match. */ +} diff --git a/deps/tre/lib/tre-mem.c b/deps/tre/lib/tre-mem.c new file mode 100644 index 000000000..ca56d2b7e --- /dev/null +++ b/deps/tre/lib/tre-mem.c @@ -0,0 +1,155 @@ +/* + tre-mem.c - TRE memory allocator + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This memory allocator is for allocating small memory blocks efficiently + in terms of memory overhead and execution speed. The allocated blocks + cannot be freed individually, only all at once. There can be multiple + allocators, though. +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#include "tre-internal.h" +#include "tre-mem.h" +#include "xmalloc.h" + + +/* Returns a new memory allocator or NULL if out of memory. */ +tre_mem_t +tre_mem_new_impl(int provided, void *provided_block) +{ + tre_mem_t mem; + if (provided) + { + mem = provided_block; + memset(mem, 0, sizeof(*mem)); + } + else + mem = xcalloc(1, sizeof(*mem)); + if (mem == NULL) + return NULL; + return mem; +} + + +/* Frees the memory allocator and all memory allocated with it. */ +void +tre_mem_destroy(tre_mem_t mem) +{ + tre_list_t *tmp, *l = mem->blocks; + + while (l != NULL) + { + xfree(l->data); + tmp = l->next; + xfree(l); + l = tmp; + } + xfree(mem); +} + + +/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the + allocated block or NULL if an underlying malloc() failed. */ +void * +tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block, + int zero, size_t size) +{ + void *ptr; + + if (mem->failed) + { + DPRINT(("tre_mem_alloc: oops, called after failure?!\n")); + return NULL; + } + +#ifdef MALLOC_DEBUGGING + if (!provided) + { + ptr = xmalloc(1); + if (ptr == NULL) + { + DPRINT(("tre_mem_alloc: xmalloc forced failure\n")); + mem->failed = 1; + return NULL; + } + xfree(ptr); + } +#endif /* MALLOC_DEBUGGING */ + + if (mem->n < size) + { + /* We need more memory than is available in the current block. + Allocate a new block. */ + tre_list_t *l; + if (provided) + { + DPRINT(("tre_mem_alloc: using provided block\n")); + if (provided_block == NULL) + { + DPRINT(("tre_mem_alloc: provided block was NULL\n")); + mem->failed = 1; + return NULL; + } + mem->ptr = provided_block; + mem->n = TRE_MEM_BLOCK_SIZE; + } + else + { + size_t block_size; + if (size * 8 > TRE_MEM_BLOCK_SIZE) + block_size = size * 8; + else + block_size = TRE_MEM_BLOCK_SIZE; + DPRINT(("tre_mem_alloc: allocating new %zu byte block\n", + block_size)); + l = xmalloc(sizeof(*l)); + if (l == NULL) + { + mem->failed = 1; + return NULL; + } + l->data = xmalloc(block_size); + if (l->data == NULL) + { + xfree(l); + mem->failed = 1; + return NULL; + } + l->next = NULL; + if (mem->current != NULL) + mem->current->next = l; + if (mem->blocks == NULL) + mem->blocks = l; + mem->current = l; + mem->ptr = l->data; + mem->n = block_size; + } + } + + /* Make sure the next pointer will be aligned. */ + size += ALIGN(mem->ptr + size, long); + + /* Allocate from current block. */ + ptr = mem->ptr; + mem->ptr += size; + mem->n -= size; + + /* Set to zero if needed. */ + if (zero) + memset(ptr, 0, size); + + return ptr; +} + +/* EOF */ diff --git a/deps/tre/lib/tre-mem.h b/deps/tre/lib/tre-mem.h new file mode 100644 index 000000000..285940457 --- /dev/null +++ b/deps/tre/lib/tre-mem.h @@ -0,0 +1,66 @@ +/* + tre-mem.h - TRE memory allocator interface + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_MEM_H +#define TRE_MEM_H 1 + +#include + +#define TRE_MEM_BLOCK_SIZE 1024 + +typedef struct tre_list { + void *data; + struct tre_list *next; +} tre_list_t; + +typedef struct tre_mem_struct { + tre_list_t *blocks; + tre_list_t *current; + char *ptr; + size_t n; + int failed; + void **provided; +} *tre_mem_t; + + +tre_mem_t tre_mem_new_impl(int provided, void *provided_block); +void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block, + int zero, size_t size); + +/* Returns a new memory allocator or NULL if out of memory. */ +#define tre_mem_new() tre_mem_new_impl(0, NULL) + +/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the + allocated block or NULL if an underlying malloc() failed. */ +#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size) + +/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the + allocated block or NULL if an underlying malloc() failed. The memory + is set to zero. */ +#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size) + +#ifdef TRE_USE_ALLOCA +/* alloca() versions. Like above, but memory is allocated with alloca() + instead of malloc(). */ + +#define tre_mem_newa() \ + tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct))) + +#define tre_mem_alloca(mem, size) \ + ((mem)->n >= (size) \ + ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \ + : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size))) +#endif /* TRE_USE_ALLOCA */ + + +/* Frees the memory allocator and all memory allocated with it. */ +void tre_mem_destroy(tre_mem_t mem); + +#endif /* TRE_MEM_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-parse.c b/deps/tre/lib/tre-parse.c new file mode 100644 index 000000000..64ab6aca8 --- /dev/null +++ b/deps/tre/lib/tre-parse.c @@ -0,0 +1,1758 @@ +/* + tre-parse.c - Regexp parser + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This parser is just a simple recursive descent parser for POSIX.2 + regexps. The parser supports both the obsolete default syntax and + the "extended" syntax, and some nonstandard extensions. +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include +#include + +#include "xmalloc.h" +#include "tre-mem.h" +#include "tre-ast.h" +#include "tre-stack.h" +#include "tre-parse.h" + + +/* Characters with special meanings in regexp syntax. */ +#define CHAR_PIPE L'|' +#define CHAR_LPAREN L'(' +#define CHAR_RPAREN L')' +#define CHAR_LBRACE L'{' +#define CHAR_RBRACE L'}' +#define CHAR_LBRACKET L'[' +#define CHAR_RBRACKET L']' +#define CHAR_MINUS L'-' +#define CHAR_STAR L'*' +#define CHAR_QUESTIONMARK L'?' +#define CHAR_PLUS L'+' +#define CHAR_PERIOD L'.' +#define CHAR_COLON L':' +#define CHAR_EQUAL L'=' +#define CHAR_COMMA L',' +#define CHAR_CARET L'^' +#define CHAR_DOLLAR L'$' +#define CHAR_BACKSLASH L'\\' +#define CHAR_HASH L'#' +#define CHAR_TILDE L'~' + + +/* Some macros for expanding \w, \s, etc. */ +static const struct tre_macro_struct { + const char c; + const char *expansion; +} tre_macros[] = + { {'t', "\t"}, {'n', "\n"}, {'r', "\r"}, + {'f', "\f"}, {'a', "\a"}, {'e', "\033"}, + {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"}, + {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"}, + { 0, NULL } + }; + + +/* Expands a macro delimited by `regex' and `regex_end' to `buf', which + must have at least `len' items. Sets buf[0] to zero if the there + is no match in `tre_macros'. */ +static void +tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end, + tre_char_t *buf, size_t buf_len) +{ + int i; + + buf[0] = 0; + if (regex >= regex_end) + return; + + for (i = 0; tre_macros[i].expansion; i++) + { + if (tre_macros[i].c == *regex) + { + unsigned int j; + DPRINT(("Expanding macro '%c' => '%s'\n", + tre_macros[i].c, tre_macros[i].expansion)); + for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++) + buf[j] = tre_macros[i].expansion[j]; + buf[j] = 0; + break; + } + } +} + +static reg_errcode_t +tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, + tre_ast_node_t ***items) +{ + reg_errcode_t status; + tre_ast_node_t **array = *items; + /* Allocate more space if necessary. */ + if (*i >= *max_i) + { + tre_ast_node_t **new_items; + DPRINT(("out of array space, i = %d\n", *i)); + /* If the array is already 1024 items large, give up -- there's + probably an error in the regexp (e.g. not a '\0' terminated + string and missing ']') */ + if (*max_i > 1024) + return REG_ESPACE; + *max_i *= 2; + new_items = xrealloc(array, sizeof(*items) * *max_i); + if (new_items == NULL) + return REG_ESPACE; + *items = array = new_items; + } + array[*i] = tre_ast_new_literal(mem, min, max); + status = array[*i] == NULL ? REG_ESPACE : REG_OK; + (*i)++; + return status; +} + + +/* Expands a character class to character ranges. */ +static reg_errcode_t +tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items, + int *i, int *max_i, int cflags) +{ + reg_errcode_t status = REG_OK; + tre_cint_t c; + int j, min = -1, max = 0; + + DPRINT((" expanding class to character ranges\n")); + for (j = 0; (j < 256) && (status == REG_OK); j++) + { + c = (tre_cint_t) j; + if (tre_isctype(c, class) + || ((cflags & REG_ICASE) + && (tre_isctype(tre_tolower(c), class) + || tre_isctype(tre_toupper(c), class)))) +{ + if (min < 0) + min = c; + max = c; + } + else if (min >= 0) + { + DPRINT((" range %c (%d) to %c (%d)\n", min, min, max, max)); + status = tre_new_item(mem, min, max, i, max_i, items); + min = -1; + } + } + if (min >= 0 && status == REG_OK) + status = tre_new_item(mem, min, max, i, max_i, items); + return status; +} + + +static int +tre_compare_items(const void *a, const void *b) +{ + const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a; + const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b; + tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj; + long a_min = l_a->code_min, b_min = l_b->code_min; + + if (a_min < b_min) + return -1; + else if (a_min > b_min) + return 1; + else + return 0; +} + +#ifndef TRE_USE_SYSTEM_WCTYPE + +/* isalnum() and the rest may be macros, so wrap them to functions. */ +int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); } +int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); } + +#ifdef tre_isascii +int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); } +#else /* !tre_isascii */ +int tre_isascii_func(tre_cint_t c) { return !(c >> 7); } +#endif /* !tre_isascii */ + +#ifdef tre_isblank +int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); } +#else /* !tre_isblank */ +int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); } +#endif /* !tre_isblank */ + +int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); } +int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); } +int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); } +int tre_islower_func(tre_cint_t c) { return tre_islower(c); } +int tre_isprint_func(tre_cint_t c) +{ + return +#if defined(WIN32) && TRE_WCHAR + /* On Windows, iswprint(L'\t') incorrectly returns true. */ + c != L'\t' && +#endif + tre_isprint(c); +} +int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); } +int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); } +int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); } +int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); } + +struct { + char *name; + int (*func)(tre_cint_t); +} tre_ctype_map[] = { + { "alnum", &tre_isalnum_func }, + { "alpha", &tre_isalpha_func }, +#ifdef tre_isascii + { "ascii", &tre_isascii_func }, +#endif /* tre_isascii */ +#ifdef tre_isblank + { "blank", &tre_isblank_func }, +#endif /* tre_isblank */ + { "cntrl", &tre_iscntrl_func }, + { "digit", &tre_isdigit_func }, + { "graph", &tre_isgraph_func }, + { "lower", &tre_islower_func }, + { "print", &tre_isprint_func }, + { "punct", &tre_ispunct_func }, + { "space", &tre_isspace_func }, + { "upper", &tre_isupper_func }, + { "xdigit", &tre_isxdigit_func }, + { NULL, NULL} +}; + +tre_ctype_t tre_ctype(const char *name) +{ + int i; + for (i = 0; tre_ctype_map[i].name != NULL; i++) + { + if (strcmp(name, tre_ctype_map[i].name) == 0) + return tre_ctype_map[i].func; + } + return (tre_ctype_t)0; +} +#endif /* !TRE_USE_SYSTEM_WCTYPE */ + +/* Maximum number of character classes that can occur in a negated bracket + expression. */ +#define MAX_NEG_CLASSES 64 + +/* Maximum length of character class names. */ +#define MAX_CLASS_NAME + +#define REST(re) (int)(ctx->re_end - (re)), (re) + +static reg_errcode_t +tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, + tre_ctype_t neg_classes[], int *num_neg_classes, + tre_ast_node_t ***items, int *num_items, + int *items_size) +{ + const tre_char_t *re = ctx->re; + reg_errcode_t status; + tre_ctype_t class = (tre_ctype_t)0; + tre_cint_t min = 0, max = 0; + int i = *num_items; + int max_i = *items_size; + int skip; + + /* Build an array of the items in the bracket expression. */ + for (;;) + { + skip = 0; + if (re == ctx->re_end) + { + return REG_EBRACK; + } + if (*re == CHAR_RBRACKET && re > ctx->re) + { + DPRINT(("tre_parse_bracket: done: '%.*" STRF "'\n", REST(re))); + re++; + break; + } + class = (tre_ctype_t)0; + if (re + 2 < ctx->re_end + && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET) + { + DPRINT(("tre_parse_bracket: range: '%.*" STRF "'\n", REST(re))); + min = *re; + max = *(re + 2); + re += 3; + /* XXX - Should use collation order instead of encoding values + in character ranges. */ + if (min > max) + return REG_ERANGE; + } + else if (re + 1 < ctx->re_end + && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD) + return REG_ECOLLATE; + else if (re + 1 < ctx->re_end + && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL) + return REG_ECOLLATE; + else if (re + 1 < ctx->re_end + && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON) + { + char tmp_str[64]; + const tre_char_t *endptr = re + 2; + size_t len; + DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re))); + while (endptr < ctx->re_end && *endptr != CHAR_COLON) + endptr++; + if (endptr != ctx->re_end) + { + len = MIN(endptr - re - 2, 63); +#ifdef TRE_WCHAR + { + tre_char_t tmp_wcs[64]; + wcsncpy(tmp_wcs, re + 2, len); + tmp_wcs[len] = L'\0'; +#if defined HAVE_WCSRTOMBS + { + mbstate_t state; + const tre_char_t *src = tmp_wcs; + memset(&state, '\0', sizeof(state)); + len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state); + } +#elif defined HAVE_WCSTOMBS + len = wcstombs(tmp_str, tmp_wcs, 63); +#endif /* defined HAVE_WCSTOMBS */ + if (len == (size_t)-1) + return REG_ECTYPE; + } +#else /* !TRE_WCHAR */ + strncpy(tmp_str, (const char*)re + 2, len); +#endif /* !TRE_WCHAR */ + tmp_str[len] = '\0'; + DPRINT((" class name: %s\n", tmp_str)); + class = tre_ctype(tmp_str); + if (!class) + return REG_ECTYPE; + /* Optimize character classes for 8 bit character sets. */ + if (ctx->mb_cur_max == 1) + { + status = tre_expand_ctype(ctx->mem, class, items, + &i, &max_i, ctx->cflags); + if (status != REG_OK) + return status; + class = (tre_ctype_t)0; + skip = 1; + } + re = endptr + 2; + } + else + return REG_ECTYPE; + min = 0; + max = TRE_CHAR_MAX; + } + else + { + DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re))); + if (*re == CHAR_MINUS && re + 1 < ctx->re_end + && *(re + 1) != CHAR_RBRACKET + && ctx->re != re) + /* Two ranges are not allowed to share and endpoint. */ + return REG_ERANGE; + min = max = *re++; + } + + if (class && negate) + if (*num_neg_classes >= MAX_NEG_CLASSES) + return REG_ESPACE; + else + neg_classes[(*num_neg_classes)++] = class; + else if (!skip) + { + status = tre_new_item(ctx->mem, min, max, &i, &max_i, items); + if (status != REG_OK) + return status; + ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class; + } + + /* Add opposite-case counterpoints if REG_ICASE is present. + This is broken if there are more than two "same" characters. */ + if (ctx->cflags & REG_ICASE && !class && !skip) + { + tre_cint_t cmin, ccurr; + + DPRINT(("adding opposite-case counterpoints\n")); + while (min <= max) + { + if (tre_islower(min)) + { + cmin = ccurr = tre_toupper(min++); + while (tre_islower(min) && tre_toupper(min) == ccurr + 1 + && min <= max) + ccurr = tre_toupper(min++); + status = tre_new_item(ctx->mem, cmin, ccurr, + &i, &max_i, items); + if (status != REG_OK) + return status; + } + else if (tre_isupper(min)) + { + cmin = ccurr = tre_tolower(min++); + while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 + && min <= max) + ccurr = tre_tolower(min++); + status = tre_new_item(ctx->mem, cmin, ccurr, + &i, &max_i, items); + if (status != REG_OK) + return status; + } + else + min++; + } + } + } + *num_items = i; + *items_size = max_i; + ctx->re = re; + return REG_OK; +} + +static reg_errcode_t +tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) +{ + tre_ast_node_t *node = NULL; + int negate = 0; + reg_errcode_t status = REG_OK; + tre_ast_node_t **items, *u, *n; + int i = 0, j, max_i = 32; + long curr_max, curr_min; + tre_ctype_t neg_classes[MAX_NEG_CLASSES]; + int num_neg_classes = 0; + + /* Start off with an array of `max_i' elements. */ + items = xmalloc(sizeof(*items) * max_i); + if (items == NULL) + return REG_ESPACE; + + if (ctx->re < ctx->re_end && *ctx->re == CHAR_CARET) + { + DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re))); + negate = 1; + ctx->re++; + } + + status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, + &items, &i, &max_i); + + if (status != REG_OK) + goto parse_bracket_done; + + /* Sort the array if we need to negate it. */ + if (negate) + qsort(items, (unsigned)i, sizeof(*items), tre_compare_items); + + curr_max = curr_min = 0; + /* Build a union of the items in the array, negated if necessary. */ + for (j = 0; j < i && status == REG_OK; j++) + { + long min, max; + tre_literal_t *l = items[j]->obj; + min = l->code_min; + max = l->code_max; + + DPRINT(("item: %ld - %ld, class %ld, curr_max = %ld\n", + l->code_min, l->code_max, (long)l->u.class, curr_max)); + + if (negate) + { + if (min < curr_max) + { + /* Overlap. */ + curr_max = MAX(max + 1, curr_max); + DPRINT(("overlap, curr_max = %ld\n", curr_max)); + l = NULL; + } + else + { + /* No overlap. */ + curr_max = min - 1; + if (curr_max >= curr_min) + { + DPRINT(("no overlap\n")); + l->code_min = curr_min; + l->code_max = curr_max; + } + else + { + DPRINT(("no overlap, zero room\n")); + l = NULL; + } + curr_min = curr_max = max + 1; + } + } + + if (l != NULL) + { + int k; + DPRINT(("creating %ld - %ld\n", l->code_min, l->code_max)); + if (num_neg_classes > 0) + { + l->neg_classes = tre_mem_alloc(ctx->mem, + (sizeof(l->neg_classes) + * (num_neg_classes + 1))); + if (l->neg_classes == NULL) + { + status = REG_ESPACE; + break; + } + for (k = 0; k < num_neg_classes; k++) + l->neg_classes[k] = neg_classes[k]; + l->neg_classes[k] = (tre_ctype_t)0; + } + else + l->neg_classes = NULL; + if (node == NULL) + node = items[j]; + else + { + u = tre_ast_new_union(ctx->mem, node, items[j]); + if (u == NULL) + status = REG_ESPACE; + node = u; + } + } + } + + if (status != REG_OK) + goto parse_bracket_done; + + if (negate) + { + int k; + DPRINT(("final: creating %ld - %ld\n", curr_min, (long)TRE_CHAR_MAX)); + n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX); + if (n == NULL) + status = REG_ESPACE; + else + { + tre_literal_t *l = n->obj; + if (num_neg_classes > 0) + { + l->neg_classes = tre_mem_alloc(ctx->mem, + (sizeof(l->neg_classes) + * (num_neg_classes + 1))); + if (l->neg_classes == NULL) + { + status = REG_ESPACE; + goto parse_bracket_done; + } + for (k = 0; k < num_neg_classes; k++) + l->neg_classes[k] = neg_classes[k]; + l->neg_classes[k] = (tre_ctype_t)0; + } + else + l->neg_classes = NULL; + if (node == NULL) + node = n; + else + { + u = tre_ast_new_union(ctx->mem, node, n); + if (u == NULL) + status = REG_ESPACE; + node = u; + } + } + } + + if (status != REG_OK) + goto parse_bracket_done; + +#ifdef TRE_DEBUG + tre_ast_print(node); +#endif /* TRE_DEBUG */ + + parse_bracket_done: + xfree(items); + *result = node; + return status; +} + + +/* Parses a positive decimal integer capped at INT_MAX. Returns -1 if the + string does not contain a valid number. */ +static int +tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end) +{ + unsigned long num = 0; + int overflow = 0; + const tre_char_t *r = *regex; + while (r < regex_end && *r >= L'0' && *r <= L'9') + { + if (!overflow) + { + if (num * 10 + *r - L'0' < num) + { + overflow = 1; + } + else + { + num = num * 10 + *r - L'0'; + if (num > INT_MAX) + overflow = 1; + } + } + r++; + } + if (r == *regex) + return -1; + *regex = r; + return overflow ? INT_MAX : (int)num; +} + + +static reg_errcode_t +tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result) +{ + int min, max, i; + int cost_ins, cost_del, cost_subst, cost_max; + int limit_ins, limit_del, limit_subst, limit_err; + const tre_char_t *r = ctx->re; + const tre_char_t *start; + int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; + int approx = 0; + int costs_set = 0; + int counts_set = 0; + + cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET; + limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET; + + /* Parse number (minimum repetition count). */ + min = -1; + if (r < ctx->re_end && *r >= L'0' && *r <= L'9') { + DPRINT(("tre_parse: min count: '%.*" STRF "'\n", REST(r))); + min = tre_parse_int(&r, ctx->re_end); + } + + /* Parse comma and second number (maximum repetition count). */ + max = min; + if (r < ctx->re_end && *r == CHAR_COMMA) + { + if (min < 0) + min = 0; + r++; + DPRINT(("tre_parse: max count: '%.*" STRF "'\n", REST(r))); + max = tre_parse_int(&r, ctx->re_end); + } + + /* Check that the repeat counts are sane. */ + if (max >= 0 && min > max) + return REG_BADBR; + if (min > RE_DUP_MAX || max > RE_DUP_MAX) + return REG_BADMAX; + + + /* + '{' + optionally followed immediately by a number == minimum repcount + optionally followed by , then a number == maximum repcount + + then a number == maximum insertion count + - then a number == maximum deletion count + # then a number == maximum substitution count + ~ then a number == maximum number of errors + Any of +, -, # or ~ without followed by a number means that + the maximum count/number of errors is infinite. + + An equation of the form + Xi + Yd + Zs < C + can be specified to set costs and the cost limit to a value + different from the default value: + - X is the cost of an insertion + - Y is the cost of a deletion + - Z is the cost of a substitution + - C is the maximum cost + + If no count limit or cost is set for an operation, the operation + is not allowed at all. + */ + + + do { + int done; + start = r; + + /* Parse count limit settings */ + done = 0; + if (!counts_set) + while (r + 1 < ctx->re_end && !done) + { + switch (*r) + { + case CHAR_PLUS: /* Insert limit */ + DPRINT(("tre_parse: ins limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_ins = tre_parse_int(&r, ctx->re_end); + if (limit_ins < 0) + limit_ins = INT_MAX; + counts_set = 1; + break; + case CHAR_MINUS: /* Delete limit */ + DPRINT(("tre_parse: del limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_del = tre_parse_int(&r, ctx->re_end); + if (limit_del < 0) + limit_del = INT_MAX; + counts_set = 1; + break; + case CHAR_HASH: /* Substitute limit */ + DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_subst = tre_parse_int(&r, ctx->re_end); + if (limit_subst < 0) + limit_subst = INT_MAX; + counts_set = 1; + break; + case CHAR_TILDE: /* Maximum number of changes */ + DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_err = tre_parse_int(&r, ctx->re_end); + if (limit_err < 0) + limit_err = INT_MAX; + approx = 1; + break; + case CHAR_COMMA: + r++; + break; + case L' ': + r++; + break; + case L'}': + done = 1; + break; + default: + done = 1; + break; + } + } + + /* Parse cost restriction equation. */ + done = 0; + if (!costs_set) + while (r + 1 < ctx->re_end && !done) + { + switch (*r) + { + case CHAR_PLUS: + case L' ': + r++; + break; + case L'<': + DPRINT(("tre_parse: max cost: '%.*" STRF "'\n", REST(r))); + r++; + while (*r == L' ') + r++; + cost_max = tre_parse_int(&r, ctx->re_end); + if (cost_max < 0) + cost_max = INT_MAX; + else + cost_max--; + approx = 1; + break; + case CHAR_COMMA: + r++; + done = 1; + break; + default: + if (*r >= L'0' && *r <= L'9') + { +#ifdef TRE_DEBUG + const tre_char_t *sr = r; +#endif /* TRE_DEBUG */ + int cost = tre_parse_int(&r, ctx->re_end); + /* XXX - make sure r is not past end. */ + switch (*r) + { + case L'i': /* Insert cost */ + DPRINT(("tre_parse: ins cost: '%.*" STRF "'\n", + REST(sr))); + r++; + cost_ins = cost; + costs_set = 1; + break; + case L'd': /* Delete cost */ + DPRINT(("tre_parse: del cost: '%.*" STRF "'\n", + REST(sr))); + r++; + cost_del = cost; + costs_set = 1; + break; + case L's': /* Substitute cost */ + DPRINT(("tre_parse: subst cost: '%.*" STRF "'\n", + REST(sr))); + r++; + cost_subst = cost; + costs_set = 1; + break; + default: + return REG_BADBR; + } + } + else + { + done = 1; + break; + } + } + } + } while (start != r); + + /* Missing }. */ + if (r >= ctx->re_end) + return REG_EBRACE; + + /* Empty contents of {}. */ + if (r == ctx->re) + return REG_BADBR; + + /* Parse the ending '}' or '\}'.*/ + if (ctx->cflags & REG_EXTENDED) + { + if (r >= ctx->re_end || *r != CHAR_RBRACE) + return REG_BADBR; + r++; + } + else + { + if (r + 1 >= ctx->re_end + || *r != CHAR_BACKSLASH + || *(r + 1) != CHAR_RBRACE) + return REG_BADBR; + r += 2; + } + + + /* Parse trailing '?' marking minimal repetition. */ + if (r < ctx->re_end) + { + if (*r == CHAR_QUESTIONMARK) + { + minimal = !(ctx->cflags & REG_UNGREEDY); + r++; + } + else if (*r == CHAR_STAR || *r == CHAR_PLUS) + { + /* These are reserved for future extensions. */ + return REG_BADRPT; + } + } + + /* Create the AST node(s). */ + if (min == 0 && max == 0) + { + *result = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (*result == NULL) + return REG_ESPACE; + } + else + { + if (min < 0 && max < 0) + /* Only approximate parameters set, no repetitions. */ + min = max = 1; + + *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal); + if (!*result) + return REG_ESPACE; + + /* If approximate matching parameters are set, add them to the + iteration node. */ + if (approx || costs_set || counts_set) + { + int *params; + tre_iteration_t *iter = (*result)->obj; + + if (costs_set || counts_set) + { + if (limit_ins == TRE_PARAM_UNSET) + { + if (cost_ins == TRE_PARAM_UNSET) + limit_ins = 0; + else + limit_ins = INT_MAX; + } + + if (limit_del == TRE_PARAM_UNSET) + { + if (cost_del == TRE_PARAM_UNSET) + limit_del = 0; + else + limit_del = INT_MAX; + } + + if (limit_subst == TRE_PARAM_UNSET) + { + if (cost_subst == TRE_PARAM_UNSET) + limit_subst = 0; + else + limit_subst = INT_MAX; + } + } + + if (cost_max == TRE_PARAM_UNSET) + cost_max = INT_MAX; + if (limit_err == TRE_PARAM_UNSET) + limit_err = INT_MAX; + + ctx->have_approx = 1; + params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST); + if (!params) + return REG_ESPACE; + for (i = 0; i < TRE_PARAM_LAST; i++) + params[i] = TRE_PARAM_UNSET; + params[TRE_PARAM_COST_INS] = cost_ins; + params[TRE_PARAM_COST_DEL] = cost_del; + params[TRE_PARAM_COST_SUBST] = cost_subst; + params[TRE_PARAM_COST_MAX] = cost_max; + params[TRE_PARAM_MAX_INS] = limit_ins; + params[TRE_PARAM_MAX_DEL] = limit_del; + params[TRE_PARAM_MAX_SUBST] = limit_subst; + params[TRE_PARAM_MAX_ERR] = limit_err; + iter->params = params; + } + } + + DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], " + "limits [%d,%d,%d, total %d]\n", + min, max, cost_ins, cost_del, cost_subst, cost_max, + limit_ins, limit_del, limit_subst, limit_err)); + + + ctx->re = r; + return REG_OK; +} + +typedef enum { + PARSE_RE = 0, + PARSE_ATOM, + PARSE_MARK_FOR_SUBMATCH, + PARSE_BRANCH, + PARSE_PIECE, + PARSE_CATENATION, + PARSE_POST_CATENATION, + PARSE_UNION, + PARSE_POST_UNION, + PARSE_POSTFIX, + PARSE_RESTORE_CFLAGS +} tre_parse_re_stack_symbol_t; + + +reg_errcode_t +tre_parse(tre_parse_ctx_t *ctx) +{ + tre_ast_node_t *result = NULL; + tre_parse_re_stack_symbol_t symbol; + reg_errcode_t status = REG_OK; + tre_stack_t *stack = ctx->stack; + size_t bottom = tre_stack_num_items(stack); + int depth = 0; + int temporary_cflags = 0; + + DPRINT(("tre_parse: parsing '%.*" STRF "', len = %zu\n", + (int)ctx->len, ctx->re, ctx->len)); + + if (!ctx->nofirstsub) + { + STACK_PUSH(stack, int, ctx->submatch_id); + STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH); + ctx->submatch_id++; + } + STACK_PUSH(stack, int, PARSE_RE); + ctx->re_start = ctx->re; + ctx->re_end = ctx->re + ctx->len; + + + /* The following is basically just a recursive descent parser. I use + an explicit stack instead of recursive functions mostly because of + two reasons: compatibility with systems which have an overflowable + call stack, and efficiency (both in lines of code and speed). */ + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + symbol = tre_stack_pop_int(stack); + switch (symbol) + { + case PARSE_RE: + /* Parse a full regexp. A regexp is one or more branches, + separated by the union operator `|'. */ +#ifdef REG_LITERAL + if (!(ctx->cflags & REG_LITERAL) + && ctx->cflags & REG_EXTENDED) +#endif /* REG_LITERAL */ + STACK_PUSHX(stack, int, PARSE_UNION); + STACK_PUSHX(stack, int, PARSE_BRANCH); + break; + + case PARSE_BRANCH: + /* Parse a branch. A branch is one or more pieces, concatenated. + A piece is an atom possibly followed by a postfix operator. */ + STACK_PUSHX(stack, int, PARSE_CATENATION); + STACK_PUSHX(stack, int, PARSE_PIECE); + break; + + case PARSE_PIECE: + /* Parse a piece. A piece is an atom possibly followed by one + or more postfix operators. */ +#ifdef REG_LITERAL + if (!(ctx->cflags & REG_LITERAL)) +#endif /* REG_LITERAL */ + STACK_PUSHX(stack, int, PARSE_POSTFIX); + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + + case PARSE_CATENATION: + /* If the expression has not ended, parse another piece. */ + { + tre_char_t c; + if (ctx->re >= ctx->re_end) + break; + c = *ctx->re; +#ifdef REG_LITERAL + if (!(ctx->cflags & REG_LITERAL)) + { +#endif /* REG_LITERAL */ + if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) + break; + if ((ctx->cflags & REG_EXTENDED + && c == CHAR_RPAREN && depth > 0) + || (!(ctx->cflags & REG_EXTENDED) + && (c == CHAR_BACKSLASH + && *(ctx->re + 1) == CHAR_RPAREN))) + { + if (!(ctx->cflags & REG_EXTENDED) && depth == 0) + status = REG_EPAREN; + DPRINT(("tre_parse: group end: '%.*" STRF "'\n", + REST(ctx->re))); + depth--; + if (!(ctx->cflags & REG_EXTENDED)) + ctx->re += 2; + break; + } +#ifdef REG_LITERAL + } +#endif /* REG_LITERAL */ + +#ifdef REG_RIGHT_ASSOC + if (ctx->cflags & REG_RIGHT_ASSOC) + { + /* Right associative concatenation. */ + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_CATENATION); + STACK_PUSHX(stack, int, PARSE_CATENATION); + STACK_PUSHX(stack, int, PARSE_PIECE); + } + else +#endif /* REG_RIGHT_ASSOC */ + { + /* Default case, left associative concatenation. */ + STACK_PUSHX(stack, int, PARSE_CATENATION); + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_CATENATION); + STACK_PUSHX(stack, int, PARSE_PIECE); + } + break; + } + + case PARSE_POST_CATENATION: + { + tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); + tre_ast_node_t *tmp_node; + tmp_node = tre_ast_new_catenation(ctx->mem, tree, result); + if (!tmp_node) + return REG_ESPACE; + result = tmp_node; + break; + } + + case PARSE_UNION: + if (ctx->re >= ctx->re_end) + break; +#ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + break; +#endif /* REG_LITERAL */ + switch (*ctx->re) + { + case CHAR_PIPE: + DPRINT(("tre_parse: union: '%.*" STRF "'\n", + REST(ctx->re))); + STACK_PUSHX(stack, int, PARSE_UNION); + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_UNION); + STACK_PUSHX(stack, int, PARSE_BRANCH); + ctx->re++; + break; + + case CHAR_RPAREN: + ctx->re++; + break; + + default: + break; + } + break; + + case PARSE_POST_UNION: + { + tre_ast_node_t *tmp_node; + tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); + tmp_node = tre_ast_new_union(ctx->mem, tree, result); + if (!tmp_node) + return REG_ESPACE; + result = tmp_node; + break; + } + + case PARSE_POSTFIX: + /* Parse postfix operators. */ + if (ctx->re >= ctx->re_end) + break; +#ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + break; +#endif /* REG_LITERAL */ + switch (*ctx->re) + { + case CHAR_PLUS: + case CHAR_QUESTIONMARK: + if (!(ctx->cflags & REG_EXTENDED)) + break; + /*FALLTHROUGH*/ + case CHAR_STAR: + { + tre_ast_node_t *tmp_node; + int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; + int rep_min = 0; + int rep_max = -1; +#ifdef TRE_DEBUG + const tre_char_t *tmp_re; +#endif + + if (*ctx->re == CHAR_PLUS) + rep_min = 1; + if (*ctx->re == CHAR_QUESTIONMARK) + rep_max = 1; +#ifdef TRE_DEBUG + tmp_re = ctx->re; +#endif + + if (ctx->re + 1 < ctx->re_end) + { + if (*(ctx->re + 1) == CHAR_QUESTIONMARK) + { + minimal = !(ctx->cflags & REG_UNGREEDY); + ctx->re++; + } + else if (*(ctx->re + 1) == CHAR_STAR + || *(ctx->re + 1) == CHAR_PLUS) + { + /* These are reserved for future extensions. */ + return REG_BADRPT; + } + } + + DPRINT(("tre_parse: %s star: '%.*" STRF "'\n", + minimal ? " minimal" : "greedy", REST(tmp_re))); + ctx->re++; + tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max, + minimal); + if (tmp_node == NULL) + return REG_ESPACE; + result = tmp_node; + STACK_PUSHX(stack, int, PARSE_POSTFIX); + } + break; + + case CHAR_BACKSLASH: + /* "\{" is special without REG_EXTENDED */ + if (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && *(ctx->re + 1) == CHAR_LBRACE) + { + ctx->re++; + goto parse_brace; + } + else + break; + + case CHAR_LBRACE: + /* "{" is literal without REG_EXTENDED */ + if (!(ctx->cflags & REG_EXTENDED)) + break; + + parse_brace: + DPRINT(("tre_parse: bound: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->re++; + + status = tre_parse_bound(ctx, &result); + if (status != REG_OK) + return status; + STACK_PUSHX(stack, int, PARSE_POSTFIX); + break; + } + break; + + case PARSE_ATOM: + /* Parse an atom. An atom is a regular expression enclosed in `()', + an empty set of `()', a bracket expression, `.', `^', `$', + a `\' followed by a character, or a single character. */ + + /* End of regexp? (empty string). */ + if (ctx->re >= ctx->re_end) + goto parse_literal; + +#ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + goto parse_literal; +#endif /* REG_LITERAL */ + + switch (*ctx->re) + { + case CHAR_LPAREN: /* parenthesized subexpression */ + + /* Handle "(?...)" extensions. They work in a way similar + to Perls corresponding extensions. */ + if (ctx->cflags & REG_EXTENDED + && ctx->re + 1 < ctx->re_end + && *(ctx->re + 1) == CHAR_QUESTIONMARK) + { + int new_cflags = ctx->cflags; + int bit = 1; + DPRINT(("tre_parse: extension: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re += 2; + while (/*CONSTCOND*/(void)1,1) + { + if (ctx->re >= ctx->re_end) + return REG_BADPAT; + if (*ctx->re == L'i') + { + DPRINT(("tre_parse: icase: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_ICASE; + else + new_cflags &= ~REG_ICASE; + ctx->re++; + } + else if (*ctx->re == L'n') + { + DPRINT(("tre_parse: newline: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_NEWLINE; + else + new_cflags &= ~REG_NEWLINE; + ctx->re++; + } +#ifdef REG_RIGHT_ASSOC + else if (*ctx->re == L'r') + { + DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_RIGHT_ASSOC; + else + new_cflags &= ~REG_RIGHT_ASSOC; + ctx->re++; + } +#endif /* REG_RIGHT_ASSOC */ +#ifdef REG_UNGREEDY + else if (*ctx->re == L'U') + { + DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_UNGREEDY; + else + new_cflags &= ~REG_UNGREEDY; + ctx->re++; + } +#endif /* REG_UNGREEDY */ + else if (*ctx->re == CHAR_MINUS) + { + DPRINT(("tre_parse: turn off: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re++; + bit = 0; + } + else if (*ctx->re == CHAR_COLON) + { + DPRINT(("tre_parse: no group: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re++; + depth++; + break; + } + else if (*ctx->re == CHAR_HASH) + { + DPRINT(("tre_parse: comment: '%.*" STRF "\n", + REST(ctx->re))); + /* A comment can contain any character except a + right parenthesis */ + while (ctx->re < ctx->re_end + && *ctx->re != CHAR_RPAREN) + ctx->re++; + if (ctx->re < ctx->re_end + && *ctx->re == CHAR_RPAREN) + { + ctx->re++; + break; + } + else + return REG_BADPAT; + } + else if (*ctx->re == CHAR_RPAREN) + { + ctx->re++; + break; + } + else + return REG_BADPAT; + } + + /* Turn on the cflags changes for the rest of the + enclosing group. */ + if (new_cflags != ctx->cflags) + ctx->have_inline_cflags = 1; + STACK_PUSHX(stack, int, ctx->cflags); + STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS); + STACK_PUSHX(stack, int, PARSE_RE); + ctx->cflags = new_cflags; + break; + } + + if (ctx->cflags & REG_EXTENDED + || (ctx->re > ctx->re_start + && *(ctx->re - 1) == CHAR_BACKSLASH)) + { + depth++; + if (ctx->re + 2 < ctx->re_end + && *(ctx->re + 1) == CHAR_QUESTIONMARK + && *(ctx->re + 2) == CHAR_COLON) + { + DPRINT(("tre_parse: group begin: '%.*" STRF + "', no submatch\n", REST(ctx->re))); + /* Don't mark for submatching. */ + ctx->re += 3; + STACK_PUSHX(stack, int, PARSE_RE); + } + else + { + DPRINT(("tre_parse: group begin: '%.*" STRF + "', submatch %d\n", REST(ctx->re), + ctx->submatch_id)); + ctx->re++; + /* First parse a whole RE, then mark the resulting tree + for submatching. */ + STACK_PUSHX(stack, int, ctx->submatch_id); + STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH); + STACK_PUSHX(stack, int, PARSE_RE); + ctx->submatch_id++; + } + } + else + goto parse_literal; + break; + + case CHAR_RPAREN: /* end of current subexpression */ + if ((ctx->cflags & REG_EXTENDED && depth > 0) + || (!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start + && *(ctx->re - 1) == CHAR_BACKSLASH)) + { + DPRINT(("tre_parse: empty: '%.*" STRF "'\n", + REST(ctx->re))); + /* We were expecting an atom, but instead the current + subexpression was closed. POSIX leaves the meaning of + this to be implementation-defined. We interpret this as + an empty expression (which matches an empty string). */ + result = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (result == NULL) + return REG_ESPACE; + if (!(ctx->cflags & REG_EXTENDED)) + ctx->re--; + } + else + goto parse_literal; + break; + + case CHAR_LBRACKET: /* bracket expression */ + DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->re++; + status = tre_parse_bracket(ctx, &result); + if (status != REG_OK) + return status; + break; + + case CHAR_BACKSLASH: + /* If this is "\(" or "\)" chew off the backslash and + try again. */ + if (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && (*(ctx->re + 1) == CHAR_LPAREN + || *(ctx->re + 1) == CHAR_RPAREN)) + { + ctx->re++; + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + } + + /* If a macro is used, parse the expanded macro recursively. */ + { + tre_char_t buf[64]; + tre_expand_macro(ctx->re + 1, ctx->re_end, + buf, elementsof(buf)); + if (buf[0] != 0) + { + tre_parse_ctx_t subctx; + memcpy(&subctx, ctx, sizeof(subctx)); + subctx.re = buf; + subctx.len = tre_strlen(buf); + subctx.nofirstsub = 1; + status = tre_parse(&subctx); + if (status != REG_OK) + return status; + ctx->re += 2; + result = subctx.result; + break; + } + } + + if (ctx->re + 1 >= ctx->re_end) + /* Trailing backslash. */ + return REG_EESCAPE; + +#ifdef REG_LITERAL + if (*(ctx->re + 1) == L'Q') + { + DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->cflags |= REG_LITERAL; + temporary_cflags |= REG_LITERAL; + ctx->re += 2; + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + } +#endif /* REG_LITERAL */ + + DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re))); + ctx->re++; + switch (*ctx->re) + { + case L'b': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_WB); + ctx->re++; + break; + case L'B': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_WB_NEG); + ctx->re++; + break; + case L'<': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_BOW); + ctx->re++; + break; + case L'>': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_EOW); + ctx->re++; + break; + case L'x': + ctx->re++; + if (ctx->re >= ctx->re_end) + { + result = tre_ast_new_literal(ctx->mem, 0, 0); + if (result == NULL) + return REG_ESPACE; + break; + } + if (ctx->re[0] != CHAR_LBRACE) + { + /* 8 bit hex char. */ + char tmp[3] = {0, 0, 0}; + long val; + DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n", + REST(ctx->re - 2))); + + if (ctx->re < ctx->re_end && tre_isxdigit(ctx->re[0])) + { + tmp[0] = (char)ctx->re[0]; + ctx->re++; + } + if (ctx->re < ctx->re_end && tre_isxdigit(ctx->re[0])) + { + tmp[1] = (char)ctx->re[0]; + ctx->re++; + } + val = strtol(tmp, NULL, 16); + result = tre_ast_new_literal(ctx->mem, (int)val, (int)val); + break; + } + else + { + /* Wide char. */ + char tmp[9]; /* max 8 hex digits + terminator */ + long val; + size_t i = 0; + ctx->re++; + while (ctx->re < ctx->re_end) + { + if (ctx->re[0] == CHAR_RBRACE) + break; + if (tre_isxdigit(ctx->re[0]) && i < sizeof(tmp) - 1) + { + tmp[i] = (char)ctx->re[0]; + i++; + ctx->re++; + continue; + } + return REG_EBRACE; + } + if (ctx->re >= ctx->re_end) + return REG_EBRACE; + ctx->re++; + tmp[i] = 0; + val = strtol(tmp, NULL, 16); + result = tre_ast_new_literal(ctx->mem, (int)val, (int)val); + break; + } + /*FALLTHROUGH*/ + + default: + if (tre_isdigit(*ctx->re)) + { + /* Back reference. */ + int val = *ctx->re - L'0'; + DPRINT(("tre_parse: backref: '%.*" STRF "'\n", + REST(ctx->re - 1))); + result = tre_ast_new_literal(ctx->mem, BACKREF, val); + if (result == NULL) + return REG_ESPACE; + ctx->max_backref = MAX(val, ctx->max_backref); + ctx->re++; + } + else + { + /* Escaped character. */ + DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", + REST(ctx->re - 1))); + result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re); + ctx->re++; + } + break; + } + if (result == NULL) + return REG_ESPACE; + break; + + case CHAR_PERIOD: /* the any-symbol */ + DPRINT(("tre_parse: any: '%.*" STRF "'\n", + REST(ctx->re))); + if (ctx->cflags & REG_NEWLINE) + { + tre_ast_node_t *tmp1; + tre_ast_node_t *tmp2; + tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1); + if (!tmp1) + return REG_ESPACE; + tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX); + if (!tmp2) + return REG_ESPACE; + result = tre_ast_new_union(ctx->mem, tmp1, tmp2); + if (!result) + return REG_ESPACE; + } + else + { + result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX); + if (!result) + return REG_ESPACE; + } + ctx->re++; + break; + + case CHAR_CARET: /* beginning of line assertion */ + /* '^' has a special meaning everywhere in EREs, and in the + beginning of the RE and after \( is BREs. */ + if (ctx->cflags & REG_EXTENDED + || (ctx->re - 2 >= ctx->re_start + && *(ctx->re - 2) == CHAR_BACKSLASH + && *(ctx->re - 1) == CHAR_LPAREN) + || ctx->re == ctx->re_start) + { + DPRINT(("tre_parse: BOL: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_BOL); + if (result == NULL) + return REG_ESPACE; + ctx->re++; + } + else + goto parse_literal; + break; + + case CHAR_DOLLAR: /* end of line assertion. */ + /* '$' is special everywhere in EREs, and in the end of the + string and before \) is BREs. */ + if (ctx->cflags & REG_EXTENDED + || (ctx->re + 2 < ctx->re_end + && *(ctx->re + 1) == CHAR_BACKSLASH + && *(ctx->re + 2) == CHAR_RPAREN) + || ctx->re + 1 == ctx->re_end) + { + DPRINT(("tre_parse: EOL: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_EOL); + if (result == NULL) + return REG_ESPACE; + ctx->re++; + } + else + goto parse_literal; + break; + + default: + parse_literal: + + if (temporary_cflags && ctx->re + 1 < ctx->re_end + && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E') + { + DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->cflags &= ~temporary_cflags; + temporary_cflags = 0; + ctx->re += 2; + STACK_PUSHX(stack, int, PARSE_PIECE); + break; + } + + + /* We are expecting an atom. If the subexpression (or the whole + regexp) ends here, we interpret it as an empty expression + (which matches an empty string). */ + if ( +#ifdef REG_LITERAL + !(ctx->cflags & REG_LITERAL) && +#endif /* REG_LITERAL */ + (ctx->re >= ctx->re_end + || *ctx->re == CHAR_STAR + || (ctx->cflags & REG_EXTENDED + && (*ctx->re == CHAR_PIPE + || *ctx->re == CHAR_LBRACE + || *ctx->re == CHAR_PLUS + || *ctx->re == CHAR_QUESTIONMARK)) + /* Test for "\)" in BRE mode. */ + || (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && *ctx->re == CHAR_BACKSLASH + && *(ctx->re + 1) == CHAR_LBRACE))) + { + DPRINT(("tre_parse: empty: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (!result) + return REG_ESPACE; + break; + } + + DPRINT(("tre_parse: literal: '%.*" STRF "'\n", + REST(ctx->re))); + /* Note that we can't use an tre_isalpha() test here, since there + may be characters which are alphabetic but neither upper or + lower case. */ + if (ctx->cflags & REG_ICASE + && (tre_isupper(*ctx->re) || tre_islower(*ctx->re))) + { + tre_ast_node_t *tmp1; + tre_ast_node_t *tmp2; + + /* XXX - Can there be more than one opposite-case + counterpoints for some character in some locale? Or + more than two characters which all should be regarded + the same character if case is ignored? If yes, there + does not seem to be a portable way to detect it. I guess + that at least for multi-character collating elements there + could be several opposite-case counterpoints, but they + cannot be supported portably anyway. */ + tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), + tre_toupper(*ctx->re)); + if (!tmp1) + return REG_ESPACE; + tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), + tre_tolower(*ctx->re)); + if (!tmp2) + return REG_ESPACE; + result = tre_ast_new_union(ctx->mem, tmp1, tmp2); + if (!result) + return REG_ESPACE; + } + else + { + result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re); + if (!result) + return REG_ESPACE; + } + ctx->re++; + break; + } + break; + + case PARSE_MARK_FOR_SUBMATCH: + { + int submatch_id = tre_stack_pop_int(stack); + + assert(result); + if (result->submatch_id >= 0) + { + tre_ast_node_t *n, *tmp_node; + n = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (n == NULL) + return REG_ESPACE; + tmp_node = tre_ast_new_catenation(ctx->mem, n, result); + if (tmp_node == NULL) + return REG_ESPACE; + tmp_node->num_submatches = result->num_submatches; + result = tmp_node; + } + result->submatch_id = submatch_id; + result->num_submatches++; + break; + } + + case PARSE_RESTORE_CFLAGS: + ctx->cflags = tre_stack_pop_int(stack); + break; + + default: + assert(0); + break; + } + } + + if (status != REG_OK) + return status; + + /* Check for missing closing parentheses. */ + if (depth > 0) + return REG_EPAREN; + + ctx->result = result; + return REG_OK; +} + +/* EOF */ diff --git a/deps/tre/lib/tre-parse.h b/deps/tre/lib/tre-parse.h new file mode 100644 index 000000000..39260ea7f --- /dev/null +++ b/deps/tre/lib/tre-parse.h @@ -0,0 +1,52 @@ +/* + tre-parse.c - Regexp parser definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_PARSE_H +#define TRE_PARSE_H 1 + +/* Parse context. */ +typedef struct { + /* Memory allocator. The AST is allocated using this. */ + tre_mem_t mem; + /* Stack used for keeping track of regexp syntax. */ + tre_stack_t *stack; + /* The parse result. */ + tre_ast_node_t *result; + /* The regexp to parse and its length. */ + const tre_char_t *re; + /* The first character of the entire regexp. */ + const tre_char_t *re_start; + /* The first character after the end of the regexp. */ + const tre_char_t *re_end; + size_t len; + /* Current submatch ID. */ + int submatch_id; + /* The highest back reference or -1 if none seen so far. */ + int max_backref; + /* This flag is set if the regexp uses approximate matching. */ + int have_approx; + /* This flag is set if the regexp changes cflags inline using (?...) */ + int have_inline_cflags; + /* Compilation flags. */ + int cflags; + /* If this flag is set the top-level submatch is not captured. */ + int nofirstsub; + /* The currently set approximate matching parameters. */ + int params[TRE_PARAM_LAST]; + /* the MB_CUR_MAX in use */ + int mb_cur_max; +} tre_parse_ctx_t; + +/* Parses a wide character regexp pattern into a syntax tree. This parser + handles both syntaxes (BRE and ERE), including the TRE extensions. */ +reg_errcode_t +tre_parse(tre_parse_ctx_t *ctx); + +#endif /* TRE_PARSE_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-stack.c b/deps/tre/lib/tre-stack.c new file mode 100644 index 000000000..199aaf1b7 --- /dev/null +++ b/deps/tre/lib/tre-stack.c @@ -0,0 +1,123 @@ +/* + tre-stack.c - Simple stack implementation + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#include "tre-internal.h" +#include "tre-stack.h" +#include "xmalloc.h" + +union tre_stack_item { + void *voidptr_value; + int int_value; +}; + +struct tre_stack_rec { + size_t size; + size_t max_size; + size_t ptr; + union tre_stack_item *stack; +}; + + +tre_stack_t * +tre_stack_new(size_t size, size_t max_size) +{ + tre_stack_t *s; + + s = xmalloc(sizeof(*s)); + if (s != NULL) + { + s->stack = xmalloc(sizeof(*s->stack) * size); + if (s->stack == NULL) + { + xfree(s); + return NULL; + } + s->size = size; + s->max_size = max_size; + s->ptr = 0; + } + return s; +} + +void +tre_stack_destroy(tre_stack_t *s) +{ + xfree(s->stack); + xfree(s); +} + +size_t +tre_stack_num_items(tre_stack_t *s) +{ + return s->ptr; +} + +static reg_errcode_t +tre_stack_push(tre_stack_t *s, union tre_stack_item value) +{ + if (s->ptr < s->size) + { + s->stack[s->ptr] = value; + s->ptr++; + } + else + { + if (s->size >= s->max_size) + { + DPRINT(("tre_stack_push: stack full\n")); + return REG_ESPACE; + } + else + { + union tre_stack_item *new_buffer; + size_t new_size; + DPRINT(("tre_stack_push: trying to realloc more space\n")); + new_size = s->size + s->size; + if (new_size > s->max_size) + new_size = s->max_size; + new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size); + if (new_buffer == NULL) + { + DPRINT(("tre_stack_push: realloc failed.\n")); + return REG_ESPACE; + } + DPRINT(("tre_stack_push: realloc succeeded.\n")); + assert(new_size > s->size); + s->size = new_size; + s->stack = new_buffer; + tre_stack_push(s, value); + } + } + return REG_OK; +} + +#define define_pushf(typetag, type) \ + declare_pushf(typetag, type) { \ + union tre_stack_item item; \ + item.typetag ## _value = value; \ + return tre_stack_push(s, item); \ +} + +define_pushf(int, int) +define_pushf(voidptr, void *) + +#define define_popf(typetag, type) \ + declare_popf(typetag, type) { \ + return s->stack[--s->ptr].typetag ## _value; \ + } + +define_popf(int, int) +define_popf(voidptr, void *) + +/* EOF */ diff --git a/deps/tre/lib/tre-stack.h b/deps/tre/lib/tre-stack.h new file mode 100644 index 000000000..1408f322a --- /dev/null +++ b/deps/tre/lib/tre-stack.h @@ -0,0 +1,76 @@ +/* + tre-stack.h: Stack definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_STACK_H +#define TRE_STACK_H 1 + +#include "../local_includes/tre.h" + +typedef struct tre_stack_rec tre_stack_t; + +/* Creates a new stack object with initial size `size' and maximum size + `max_size'. Pushing an additional item onto a full stack will resize + the stack to double its capacity until the maximum is reached. Returns + the stack object or NULL if out of memory. */ +tre_stack_t * +tre_stack_new(size_t size, size_t max_size); + +/* Frees the stack object. */ +void +tre_stack_destroy(tre_stack_t *s); + +/* Returns the current number of items on the stack. */ +size_t +tre_stack_num_items(tre_stack_t *s); + +/* Each tre_stack_push_*(tre_stack_t *s, value) function pushes + `value' on top of stack `s'. Returns REG_ESPACE if out of memory. + This tries to realloc() more space before failing if maximum size + has not yet been reached. Returns REG_OK if successful. */ +#define declare_pushf(typetag, type) \ + reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value) + +declare_pushf(voidptr, void *); +declare_pushf(int, int); + +/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost + element off of stack `s' and returns it. The stack must not be + empty. */ +#define declare_popf(typetag, type) \ + type tre_stack_pop_ ## typetag(tre_stack_t *s) + +declare_popf(voidptr, void *); +declare_popf(int, int); + +/* Just to save some typing. */ +#define STACK_PUSH(s, typetag, value) \ + do \ + { \ + status = tre_stack_push_ ## typetag(s, value); \ + } \ + while (/*CONSTCOND*/(void)0,0) + +#define STACK_PUSHX(s, typetag, value) \ + { \ + status = tre_stack_push_ ## typetag(s, value); \ + if (status != REG_OK) \ + break; \ + } + +#define STACK_PUSHR(s, typetag, value) \ + { \ + reg_errcode_t _status; \ + _status = tre_stack_push_ ## typetag(s, value); \ + if (_status != REG_OK) \ + return _status; \ + } + +#endif /* TRE_STACK_H */ + +/* EOF */ diff --git a/deps/tre/lib/xmalloc.c b/deps/tre/lib/xmalloc.c new file mode 100644 index 000000000..4179d7c03 --- /dev/null +++ b/deps/tre/lib/xmalloc.c @@ -0,0 +1,362 @@ +/* + xmalloc.c - Simple malloc debugging library implementation + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + TODO: + - red zones + - group dumps by source location +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define XMALLOC_INTERNAL 1 +#include "xmalloc.h" + + +/* + Internal stuff. +*/ + +typedef struct hashTableItemRec { + void *ptr; + size_t bytes; + const char *file; + int line; + const char *func; + struct hashTableItemRec *next; +} hashTableItem; + +typedef struct { + hashTableItem **table; +} hashTable; + +static int xmalloc_peak; +int xmalloc_current; +static int xmalloc_peak_blocks; +int xmalloc_current_blocks; +static int xmalloc_fail_after; + +#define TABLE_BITS 8 +#define TABLE_MASK ((1 << TABLE_BITS) - 1) +#define TABLE_SIZE (1 << TABLE_BITS) + +static hashTable * +hash_table_new(void) +{ + hashTable *tbl; + + tbl = malloc(sizeof(*tbl)); + + if (tbl != NULL) + { + tbl->table = calloc(TABLE_SIZE, sizeof(*tbl->table)); + + if (tbl->table == NULL) + { + free(tbl); + return NULL; + } + } + + return tbl; +} + +static unsigned int +hash_void_ptr(void *ptr) +{ + unsigned int hash; + unsigned int i; + + /* I took this hash function just off the top of my head, I have + no idea whether it is bad or very bad. */ + hash = 0; + for (i = 0; i < sizeof(ptr) * 8 / TABLE_BITS; i++) + { + hash ^= (uintptr_t)ptr >> i * 8; + hash += i * 17; + hash &= TABLE_MASK; + } + return hash; +} + +static void +hash_table_add(hashTable *tbl, void *ptr, size_t bytes, + const char *file, int line, const char *func) +{ + unsigned int i; + hashTableItem *item, *new; + + i = hash_void_ptr(ptr); + + item = tbl->table[i]; + if (item != NULL) + while (item->next != NULL) + item = item->next; + + new = malloc(sizeof(*new)); + assert(new != NULL); + new->ptr = ptr; + new->bytes = bytes; + new->file = file; + new->line = line; + new->func = func; + new->next = NULL; + if (item != NULL) + item->next = new; + else + tbl->table[i] = new; + + xmalloc_current += bytes; + if (xmalloc_current > xmalloc_peak) + xmalloc_peak = xmalloc_current; + xmalloc_current_blocks++; + if (xmalloc_current_blocks > xmalloc_peak_blocks) + xmalloc_peak_blocks = xmalloc_current_blocks; +} + +static void +#if defined(__GNUC__) && __GNUC__ >= 11 +__attribute__((access(none, 2))) +#endif +hash_table_del(hashTable *tbl, void *ptr) +{ + int i; + hashTableItem *item, *prev; + + i = hash_void_ptr(ptr); + + item = tbl->table[i]; + if (item == NULL) + { + printf("xfree: invalid ptr %p\n", ptr); + abort(); + } + prev = NULL; + while (item->ptr != ptr) + { + prev = item; + item = item->next; + } + if (item->ptr != ptr) + { + printf("xfree: invalid ptr %p\n", ptr); + abort(); + } + + xmalloc_current -= item->bytes; + xmalloc_current_blocks--; + + if (prev != NULL) + { + prev->next = item->next; + free(item); + } + else + { + tbl->table[i] = item->next; + free(item); + } +} + +static hashTable *xmalloc_table = NULL; + +static void +xmalloc_init(void) +{ + if (xmalloc_table == NULL) + { + xmalloc_table = hash_table_new(); + xmalloc_peak = 0; + xmalloc_peak_blocks = 0; + xmalloc_current = 0; + xmalloc_current_blocks = 0; + xmalloc_fail_after = -1; + } + assert(xmalloc_table != NULL); + assert(xmalloc_table->table != NULL); +} + + + +/* + Public API. +*/ + +void +xmalloc_configure(int fail_after) +{ + xmalloc_init(); + xmalloc_fail_after = fail_after; +} + +int +xmalloc_dump_leaks(void) +{ + unsigned int i; + unsigned int num_leaks = 0; + size_t leaked_bytes = 0; + hashTableItem *item; + + xmalloc_init(); + + for (i = 0; i < TABLE_SIZE; i++) + { + item = xmalloc_table->table[i]; + while (item != NULL) + { + printf("%s:%d: %s: %zu bytes at %p not freed\n", + item->file, item->line, item->func, item->bytes, item->ptr); + num_leaks++; + leaked_bytes += item->bytes; + item = item->next; + } + } + if (num_leaks == 0) + printf("No memory leaks.\n"); + else + printf("%u unfreed memory chuncks, total %zu unfreed bytes.\n", + num_leaks, leaked_bytes); + printf("Peak memory consumption %d bytes (%.1f kB, %.1f MB) in %d blocks ", + xmalloc_peak, (double)xmalloc_peak / 1024, + (double)xmalloc_peak / (1024*1024), xmalloc_peak_blocks); + printf("(average "); + if (xmalloc_peak_blocks) + printf("%d", ((xmalloc_peak + xmalloc_peak_blocks / 2) + / xmalloc_peak_blocks)); + else + printf("N/A"); + printf(" bytes per block).\n"); + + return num_leaks; +} + +void * +xmalloc_impl(size_t size, const char *file, int line, const char *func) +{ + void *ptr; + + xmalloc_init(); + assert(size > 0); + + if (xmalloc_fail_after == 0) + { + xmalloc_fail_after = -2; +#if 0 + printf("xmalloc: forced failure %s:%d: %s\n", file, line, func); +#endif + return NULL; + } + else if (xmalloc_fail_after == -2) + { + printf("xmalloc: called after failure from %s:%d: %s\n", + file, line, func); + assert(0); + } + else if (xmalloc_fail_after > 0) + xmalloc_fail_after--; + + ptr = malloc(size); + if (ptr != NULL) + hash_table_add(xmalloc_table, ptr, (int)size, file, line, func); + return ptr; +} + +void * +xcalloc_impl(size_t nmemb, size_t size, const char *file, int line, + const char *func) +{ + void *ptr; + + xmalloc_init(); + assert(size > 0); + + if (xmalloc_fail_after == 0) + { + xmalloc_fail_after = -2; +#if 0 + printf("xcalloc: forced failure %s:%d: %s\n", file, line, func); +#endif + return NULL; + } + else if (xmalloc_fail_after == -2) + { + printf("xcalloc: called after failure from %s:%d: %s\n", + file, line, func); + assert(0); + } + else if (xmalloc_fail_after > 0) + xmalloc_fail_after--; + + ptr = calloc(nmemb, size); + if (ptr != NULL) + hash_table_add(xmalloc_table, ptr, (int)(nmemb * size), file, line, func); + return ptr; +} + +void +xfree_impl(void *ptr, const char *file, int line, const char *func) +{ + /*LINTED*/(void)&file; + /*LINTED*/(void)&line; + /*LINTED*/(void)&func; + xmalloc_init(); + + if (ptr != NULL) + hash_table_del(xmalloc_table, ptr); + free(ptr); +} + +void * +xrealloc_impl(void *ptr, size_t new_size, const char *file, int line, + const char *func) +{ + void *new_ptr; + + xmalloc_init(); + assert(ptr != NULL); + assert(new_size > 0); + + if (xmalloc_fail_after == 0) + { + xmalloc_fail_after = -2; + return NULL; + } + else if (xmalloc_fail_after == -2) + { + printf("xrealloc: called after failure from %s:%d: %s\n", + file, line, func); + assert(0); + } + else if (xmalloc_fail_after > 0) + xmalloc_fail_after--; + + new_ptr = realloc(ptr, new_size); + if (new_ptr != NULL && new_ptr != ptr) + { +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuse-after-free" +#endif + hash_table_del(xmalloc_table, ptr); +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12 +#pragma GCC diagnostic pop +#endif + hash_table_add(xmalloc_table, new_ptr, (int)new_size, file, line, func); + } + return new_ptr; +} + + + +/* EOF */ diff --git a/deps/tre/lib/xmalloc.h b/deps/tre/lib/xmalloc.h new file mode 100644 index 000000000..ce310af52 --- /dev/null +++ b/deps/tre/lib/xmalloc.h @@ -0,0 +1,77 @@ +/* + xmalloc.h - Simple malloc debugging library API + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef _XMALLOC_H +#define _XMALLOC_H 1 + +void *xmalloc_impl(size_t size, const char *file, int line, const char *func); +void *xcalloc_impl(size_t nmemb, size_t size, const char *file, int line, + const char *func); +void xfree_impl(void *ptr, const char *file, int line, const char *func); +void *xrealloc_impl(void *ptr, size_t new_size, const char *file, int line, + const char *func); +int xmalloc_dump_leaks(void); +void xmalloc_configure(int fail_after); + + +#ifndef XMALLOC_INTERNAL +#ifdef MALLOC_DEBUGGING + +/* Version 2.4 and later of GCC define a magical variable `__PRETTY_FUNCTION__' + which contains the name of the function currently being defined. +# define __XMALLOC_FUNCTION __PRETTY_FUNCTION__ + This is broken in G++ before version 2.6. + C9x has a similar variable called __func__, but prefer the GCC one since + it demangles C++ function names. */ +# ifdef __GNUC__ +# if __GNUC__ > 2 || (__GNUC__ == 2 \ + && __GNUC_MINOR__ >= (defined __cplusplus ? 6 : 4)) +# define __XMALLOC_FUNCTION __PRETTY_FUNCTION__ +# else +# define __XMALLOC_FUNCTION ((const char *) 0) +# endif +# else +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __XMALLOC_FUNCTION __func__ +# else +# define __XMALLOC_FUNCTION ((const char *) 0) +# endif +# endif + +#define xmalloc(size) xmalloc_impl(size, __FILE__, __LINE__, \ + __XMALLOC_FUNCTION) +#define xcalloc(nmemb, size) xcalloc_impl(nmemb, size, __FILE__, __LINE__, \ + __XMALLOC_FUNCTION) +#define xfree(ptr) xfree_impl(ptr, __FILE__, __LINE__, __XMALLOC_FUNCTION) +#define xrealloc(ptr, new_size) xrealloc_impl(ptr, new_size, __FILE__, \ + __LINE__, __XMALLOC_FUNCTION) +#undef malloc +#undef calloc +#undef free +#undef realloc + +#define malloc USE_XMALLOC_INSTEAD_OF_MALLOC +#define calloc USE_XCALLOC_INSTEAD_OF_CALLOC +#define free USE_XFREE_INSTEAD_OF_FREE +#define realloc USE_XREALLOC_INSTEAD_OF_REALLOC + +#else /* !MALLOC_DEBUGGING */ + +#include + +#define xmalloc(size) malloc(size) +#define xcalloc(nmemb, size) calloc(nmemb, size) +#define xfree(ptr) free(ptr) +#define xrealloc(ptr, new_size) realloc(ptr, new_size) + +#endif /* !MALLOC_DEBUGGING */ +#endif /* !XMALLOC_INTERNAL */ + +#endif /* _XMALLOC_H */ + +/* EOF */ diff --git a/deps/tre/local_includes/regex.h b/deps/tre/local_includes/regex.h new file mode 100644 index 000000000..daa15a741 --- /dev/null +++ b/deps/tre/local_includes/regex.h @@ -0,0 +1,48 @@ +/* + regex.h - TRE legacy API + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + + This header is for source level compatibility with old code using + the header which defined the TRE API functions without + a prefix. New code should include instead. + +*/ + +#ifndef TRE_REXEX_H +#define TRE_REGEX_H 1 + +#ifdef USE_LOCAL_TRE_H +/* Use the header(s) from the TRE package that this file is part of. + (Yes, this file is in local_include too, but the explict path + means there is no way to get a system tre.h by accident.) */ +#include "../local_includes/tre.h" +#else +/* Use the header(s) from an installed version of the TRE package + (so that this application matches the installed libtre), + not the one(s) in the local_includes directory. */ +#include +#endif + +#ifndef TRE_USE_SYSTEM_REGEX_H +#define regcomp tre_regcomp +#define regerror tre_regerror +#define regexec tre_regexec +#define regfree tre_regfree +#endif /* TRE_USE_SYSTEM_REGEX_H */ + +#define regacomp tre_regacomp +#define regaexec tre_regaexec +#define regancomp tre_regancomp +#define reganexec tre_reganexec +#define regawncomp tre_regawncomp +#define regawnexec tre_regawnexec +#define regncomp tre_regncomp +#define regnexec tre_regnexec +#define regwcomp tre_regwcomp +#define regwexec tre_regwexec +#define regwncomp tre_regwncomp +#define regwnexec tre_regwnexec + +#endif /* TRE_REGEX_H */ diff --git a/deps/tre/local_includes/tre-config.h b/deps/tre/local_includes/tre-config.h new file mode 100644 index 000000000..4b73c1289 --- /dev/null +++ b/deps/tre/local_includes/tre-config.h @@ -0,0 +1,14 @@ +/* Minimal TRE configuration for Redis. + * + * We use TRE as a byte-oriented regex matcher for ARGREP. Redis SDS values are + * binary-safe byte strings, so we intentionally keep the dependency build + * simple: no wide-char path, no multibyte locale handling, and no approximate + * matching engine. + */ + +#define HAVE_SYS_TYPES_H 1 + +#define TRE_VERSION "redis-vendored" +#define TRE_VERSION_1 0 +#define TRE_VERSION_2 0 +#define TRE_VERSION_3 0 diff --git a/deps/tre/local_includes/tre.h b/deps/tre/local_includes/tre.h new file mode 100644 index 000000000..675153990 --- /dev/null +++ b/deps/tre/local_includes/tre.h @@ -0,0 +1,344 @@ +/* + tre.h - TRE public API definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_H +#define TRE_H 1 + +#ifdef USE_LOCAL_TRE_H +/* Make certain to use the header(s) from the TRE package that this + file is part of by giving the full path to the header from this directory. */ +#include "../local_includes/tre-config.h" +#else +/* Use the header in the same directory as this file if there is one. */ +#include "tre-config.h" +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ + +#ifdef HAVE_LIBUTF8_H +#include +#endif /* HAVE_LIBUTF8_H */ + +#ifdef TRE_USE_SYSTEM_REGEX_H +/* Include the system regex.h to make TRE ABI compatible with the + system regex. */ +#include TRE_SYSTEM_REGEX_H_PATH +#define tre_regcomp regcomp +#define tre_regexec regexec +#define tre_regerror regerror +#define tre_regfree regfree +/* The GNU C regex has a number of refinements to the POSIX standard for the + formal parameter list of the regexec() function, and some of those fail to + compile when using LLVM. The refinements seem to be opt-out rather than + opt-in when using a recent gcc, and they produce a warning when TRE tries + to mimic the API without the refinements. The TRE code still works but + the warnings are distracting, so try to #define a flag to indicate when to + add the refinements to TRE's parameter list too. */ +#ifdef __GNUC__ +/* Try to test something that looks pretty REGEX specific and hope we don't + need a zillion different platform+compiler specific tests to deal with this. */ +#ifdef _REGEX_NELTS +/* Define a TRE specific flag here so that: + 1) there is only one place where code has to be changed if the test above is not adequate, and + 2) the flag can be used in any other parts of the TRE source that might be affected by the + GNUC refinements. + Note that this flag is only defined when all of TRE_USE_SYSTEM_REGEX_H, __GNUC__, and _REGEX_NELTS are defined. */ +#define TRE_USE_GNUC_REGEXEC_FPL 1 +#endif +#endif +#endif /* TRE_USE_SYSTEM_REGEX_H */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef TRE_USE_SYSTEM_REGEX_H + +#ifndef REG_OK +#define REG_OK 0 +#endif /* !REG_OK */ + +#ifndef HAVE_REG_ERRCODE_T +typedef int reg_errcode_t; +#endif /* !HAVE_REG_ERRCODE_T */ + +#if !defined(REG_NOSPEC) && !defined(REG_LITERAL) +#define REG_LITERAL 0x1000 +#endif + +/* Extra tre_regcomp() return error codes. */ +#define REG_BADMAX REG_BADBR + +/* Extra tre_regcomp() flags. */ +#ifndef REG_BASIC +#define REG_BASIC 0 +#endif /* !REG_BASIC */ +#define REG_RIGHT_ASSOC (REG_LITERAL << 1) +#ifdef REG_UNGREEDY +/* We're going to use TRE code, so we need the TRE define (dodge problem in MacOS). */ +#undef REG_UNGREEDY +#endif +#define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) + +#define REG_USEBYTES (REG_UNGREEDY << 1) + +/* Extra tre_regexec() flags. */ +#define REG_APPROX_MATCHER 0x1000 +#ifdef REG_BACKTRACKING_MATCHER +/* We're going to use TRE code, so we need the TRE define (dodge problem in MacOS). */ +#undef REG_BACKTRACKING_MATCHER +#endif +#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) + +#else /* !TRE_USE_SYSTEM_REGEX_H */ + +/* If the we're not using system regex.h, we need to define the + structs and enums ourselves. */ + +typedef int regoff_t; +typedef struct { + size_t re_nsub; /* Number of parenthesized subexpressions. */ + void *value; /* For internal use only. */ +} regex_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + + +typedef enum { + REG_OK = 0, /* No error. */ + /* POSIX tre_regcomp() return error codes. (In the order listed in the + standard.) */ + REG_NOMATCH, /* No match. */ + REG_BADPAT, /* Invalid regexp. */ + REG_ECOLLATE, /* Unknown collating element. */ + REG_ECTYPE, /* Unknown character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* "[]" imbalance */ + REG_EPAREN, /* "\(\)" or "()" imbalance */ + REG_EBRACE, /* "\{\}" or "{}" imbalance */ + REG_BADBR, /* Invalid content of {} */ + REG_ERANGE, /* Invalid use of range operator */ + REG_ESPACE, /* Out of memory. */ + REG_BADRPT, /* Invalid use of repetition operators. */ + REG_BADMAX, /* Maximum repetition in {} too large */ +} reg_errcode_t; + +/* POSIX tre_regcomp() flags. */ +#define REG_EXTENDED 1 +#define REG_ICASE (REG_EXTENDED << 1) +#define REG_NEWLINE (REG_ICASE << 1) +#define REG_NOSUB (REG_NEWLINE << 1) + +/* Extra tre_regcomp() flags. */ +#define REG_BASIC 0 +#define REG_LITERAL (REG_NOSUB << 1) +#define REG_RIGHT_ASSOC (REG_LITERAL << 1) +#define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) +#define REG_USEBYTES (REG_UNGREEDY << 1) + +/* POSIX tre_regexec() flags. */ +#define REG_NOTBOL 1 +#define REG_NOTEOL (REG_NOTBOL << 1) + +/* Extra tre_regexec() flags. */ +#define REG_APPROX_MATCHER (REG_NOTEOL << 1) +#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) + +#endif /* !TRE_USE_SYSTEM_REGEX_H */ + +/* REG_NOSPEC and REG_LITERAL mean the same thing. */ +#if defined(REG_LITERAL) && !defined(REG_NOSPEC) +#define REG_NOSPEC REG_LITERAL +#elif defined(REG_NOSPEC) && !defined(REG_LITERAL) +#define REG_LITERAL REG_NOSPEC +#endif /* defined(REG_NOSPEC) */ + +/* The maximum number of iterations in a bound expression. */ +#undef RE_DUP_MAX +#define RE_DUP_MAX 255 + +/* The POSIX.2 regexp functions */ +extern int +tre_regcomp(regex_t *preg, const char *regex, int cflags); + +#ifdef TRE_USE_GNUC_REGEXEC_FPL +extern int +tre_regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[_Restrict_arr_ _REGEX_NELTS (nmatch)], + int eflags); +#else +extern int +tre_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags); +#endif + +extern int +tre_regcompb(regex_t *preg, const char *regex, int cflags); + +extern int +tre_regexecb(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags); + +extern size_t +tre_regerror(int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size); + +extern void +tre_regfree(regex_t *preg); + +#ifdef TRE_WCHAR +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ + +/* Wide character versions (not in POSIX.2). */ +extern int +tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags); + +extern int +tre_regwexec(const regex_t *preg, const wchar_t *string, + size_t nmatch, regmatch_t pmatch[], int eflags); +#endif /* TRE_WCHAR */ + +/* Versions with a maximum length argument and therefore the capability to + handle null characters in the middle of the strings (not in POSIX.2). */ +extern int +tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags); + +extern int +tre_regnexec(const regex_t *preg, const char *string, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); + +/* regn*b versions take byte literally as 8-bit values */ +extern int +tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags); + +extern int +tre_regnexecb(const regex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); + +#ifdef TRE_WCHAR +extern int +tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags); + +extern int +tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); +#endif /* TRE_WCHAR */ + +#ifdef TRE_APPROX + +/* Approximate matching parameter struct. */ +typedef struct { + int cost_ins; /* Default cost of an inserted character. */ + int cost_del; /* Default cost of a deleted character. */ + int cost_subst; /* Default cost of a substituted character. */ + int max_cost; /* Maximum allowed cost of a match. */ + + int max_ins; /* Maximum allowed number of inserts. */ + int max_del; /* Maximum allowed number of deletes. */ + int max_subst; /* Maximum allowed number of substitutes. */ + int max_err; /* Maximum allowed number of errors total. */ +} regaparams_t; + +/* Approximate matching result struct. */ +typedef struct { + size_t nmatch; /* Length of pmatch[] array. */ + regmatch_t *pmatch; /* Submatch data. */ + int cost; /* Cost of the match. */ + int num_ins; /* Number of inserts in the match. */ + int num_del; /* Number of deletes in the match. */ + int num_subst; /* Number of substitutes in the match. */ +} regamatch_t; + + +/* Approximate matching functions. */ +extern int +tre_regaexec(const regex_t *preg, const char *string, + regamatch_t *match, regaparams_t params, int eflags); + +extern int +tre_reganexec(const regex_t *preg, const char *string, size_t len, + regamatch_t *match, regaparams_t params, int eflags); + +extern int +tre_regaexecb(const regex_t *preg, const char *string, + regamatch_t *match, regaparams_t params, int eflags); + +#ifdef TRE_WCHAR +/* Wide character approximate matching. */ +extern int +tre_regawexec(const regex_t *preg, const wchar_t *string, + regamatch_t *match, regaparams_t params, int eflags); + +extern int +tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len, + regamatch_t *match, regaparams_t params, int eflags); +#endif /* TRE_WCHAR */ + +/* Sets the parameters to default values. */ +extern void +tre_regaparams_default(regaparams_t *params); +#endif /* TRE_APPROX */ + +#ifdef TRE_WCHAR +typedef wchar_t tre_char_t; +#else /* !TRE_WCHAR */ +typedef unsigned char tre_char_t; +#endif /* !TRE_WCHAR */ + +typedef struct { + int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context); + void (*rewind)(size_t pos, void *context); + int (*compare)(size_t pos1, size_t pos2, size_t len, void *context); + void *context; +} tre_str_source; + +extern int +tre_reguexec(const regex_t *preg, const tre_str_source *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + +/* Returns the version string. The returned string is static. */ +extern char * +tre_version(void); + +/* Returns the value for a config parameter. The type to which `result' + must point to depends of the value of `query', see documentation for + more details. */ +extern int +tre_config(int query, void *result); + +enum { + TRE_CONFIG_APPROX, + TRE_CONFIG_WCHAR, + TRE_CONFIG_MULTIBYTE, + TRE_CONFIG_SYSTEM_ABI, + TRE_CONFIG_VERSION +}; + +/* Returns 1 if the compiled pattern has back references, 0 if not. */ +extern int +tre_have_backrefs(const regex_t *preg); + +/* Returns 1 if the compiled pattern uses approximate matching features, + 0 if not. */ +extern int +tre_have_approx(const regex_t *preg); + +#ifdef __cplusplus +} +#endif +#endif /* TRE_H */ + +/* EOF */ diff --git a/deps/tre/tests/retest.c b/deps/tre/tests/retest.c new file mode 100644 index 000000000..c486a819c --- /dev/null +++ b/deps/tre/tests/retest.c @@ -0,0 +1,1871 @@ +/* + retest.c - TRE regression test program + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This is just a simple test application containing various hands-written + tests for regression testing TRE. I've tried to surround TRE specific + tests inside ifdefs, so this can be used to test any POSIX compatible + regexp implementation. +*/ + +/* + 2023/06 - Compilers now sometimes require the input string constants to be + properly encoded, but how they decide on which encoding (if any) + is poorly documented and different for different platforms. + The non-ASCII encoded strings are now guarded by #ifdefs with one + of the following values. Define/undef whichever one(s) you need. + #define SRC_IN_ISO_8859_1 + #define SRC_IN_UTF_8 + #define SRC_IN_EUC_JP + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +/* look for getopt in order to use a -o option for output. */ +#if defined(HAVE_UNISTD_H) +#include +#elif defined(HAVE_GETOPT_H) +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ + +#ifdef TRE_VERSION +#define HAVE_REGNEXEC 1 +#define HAVE_REGNCOMP 1 +#include "xmalloc.h" +#else /* !TRE_VERSION */ +#define xmalloc malloc +#define xfree free +#endif /* !TRE_VERSION */ + +#include "tre-internal.h" + +#ifdef WRETEST +#include +#define CHAR_T wchar_t +#define L(x) (L ## x) + +#define MAXSTRSIZE 8192 +static wchar_t wstr[MAXSTRSIZE]; +static wchar_t wregex[MAXSTRSIZE]; +static int woffs[MAXSTRSIZE]; + +#ifdef TRE_USE_SYSTEM_REGEX_H +/* Avoid some redefinition warnings from including tre.h. */ +#ifdef tre_regexec +#undef tre_regexec +/* No need for the *n* fn, it isn't in the system abi. */ +#endif +#endif +#define tre_regexec tre_regwexec +#define tre_regnexec tre_regwnexec +#ifdef TRE_USE_SYSTEM_REGEX_H +/* Avoid some redefinition warnings from including tre.h. */ +#ifdef tre_regcomp +#undef tre_regcomp +#endif +/* No need for the *n* fn, it isn't in the system abi. */ +#endif +#define tre_regcomp tre_regwcomp +#define tre_regncomp tre_regwncomp + +/* Iterate mbrtowc over the multi-byte sequence STR of length LEN, + store the result in BUF and memoize the successive byte offsets + in OFF. */ + +static int +mbntowc (wchar_t *buf, const char *str, size_t len, int *off) +{ + int n, wlen; +#ifdef HAVE_MBSTATE_T + mbstate_t cst; + memset(&cst, 0, sizeof(cst)); +#endif + + if (len >= MAXSTRSIZE) + { + fprintf(stderr, "Increase MAXSTRSIZE to %ld or more and recompile!\n", + (long)len + 1); + exit(EXIT_FAILURE); + } + + if (off) + { + memset(off + 1, -1, len * sizeof(int)); + *off = 0; + } + + wlen = 0; + while (len > 0) + { + n = tre_mbrtowc(buf ? buf++ : NULL, str, len, &cst); + if (n < 0) + return n; + if (n == 0) + n = 1; + str += n; + len -= n; + wlen += 1; + if (off) + *(off += n) = wlen; + } + + return(wlen); +} + +#else /* !WRETEST */ +#define CHAR_T char +#define L(x) (x) +#endif /* !WRETEST */ + +static FILE *outf = NULL; + +static int valid_reobj = 0; +static regex_t reobj; +static regmatch_t pmatch_global[32]; +static const CHAR_T *regex_pattern; +static int cflags_global; +static int use_regnexec = 0; +static int use_regncomp = 0; +static int avoid_eflags = 0; + +static int comp_tests = 0; +static int exec_tests = 0; +static int comp_errors = 0; +static int exec_errors = 0; + +#ifndef REG_OK +#define REG_OK 0 +#endif /* REG_OK */ + +#define END -2 + +static void +test_status(char c) +{ + static int k = 0; + fprintf(outf, "%c", c); + if (++k % 79 == 0) + fprintf(outf, "\n"); + fflush(outf); +} + + +static int +wrap_regexec(const CHAR_T *data, size_t len, + size_t pmatch_len, regmatch_t *pmatch, int eflags) +{ + CHAR_T *buf = NULL; + int result; + + if (len == 0 && use_regnexec) + { + /* Zero length string and using tre_regnexec(), the pointer we give + should not be dereferenced at all. */ + buf = NULL; + } + else + { + /* Copy the data to a separate buffer to make a better test for + tre_regexec() and tre_regnexec(). */ + buf = xmalloc((len + !use_regnexec) * sizeof(CHAR_T)); + if (!buf) + return REG_ESPACE; + memcpy(buf, data, len * sizeof(CHAR_T)); + test_status('#'); + } + +#ifdef HAVE_REGNEXEC + if (use_regnexec) + { + if (len == 0) + result = tre_regnexec(&reobj, NULL, len, pmatch_len, pmatch, eflags); + else + result = tre_regnexec(&reobj, buf, len, pmatch_len, pmatch, eflags); + } + else +#endif /* HAVE_REGNEXEC */ + { + buf[len] = L('\0'); + result = tre_regexec(&reobj, buf, pmatch_len, pmatch, eflags); + } + + xfree(buf); + return result; +} + +static int +wrap_regcomp(regex_t *preg, const CHAR_T *data, size_t len, int cflags) +{ +#ifdef HAVE_REGNCOMP + if (use_regncomp) + return tre_regncomp(preg, data, len, cflags); + else + return tre_regcomp(preg, data, cflags); +#else /* !HAVE_REGNCOMP */ + fprintf(stderr, "%s\n", data); + return tre_regcomp(preg, data, cflags); +#endif /* !HAVE_REGNCOMP */ +} + +static int +execute(const CHAR_T *data, int len, size_t pmatch_len, regmatch_t *pmatch, + int eflags) +{ +#ifdef MALLOC_DEBUGGING + int i = 0; + int ret; + + while (1) + { + xmalloc_configure(i); + comp_tests++; + ret = wrap_regexec(data, len, pmatch_len, pmatch, eflags); + if (ret != REG_ESPACE) + { + break; + } +#ifdef REGEX_DEBUG + xmalloc_dump_leaks(); +#endif /* REGEX_DEBUG */ + i++; + } + return ret; +#else /* !MALLOC_DEBUGGING */ + return wrap_regexec(data, len, pmatch_len, pmatch, eflags); +#endif /* !MALLOC_DEBUGGING */ +} + +static int +check(va_list ap, int ret, const CHAR_T *str, + size_t pmatch_len, regmatch_t *pmatch, int eflags) +{ + int fail = 0; + + if (ret != va_arg(ap, int)) + { +#ifndef WRETEST + fprintf(outf, "Exec error, regex: \"%s\", cflags %d, " + "string: \"%s\", eflags %d\n", regex_pattern, cflags_global, + str, eflags); +#else /* WRETEST */ + fprintf(outf, "Exec error, regex: \"%ls\", cflags %d, " + "string: \"%ls\", eflags %d\n", regex_pattern, cflags_global, + str, eflags); +#endif /* WRETEST */ + fprintf(outf, " got %smatch (tre_regexec returned %d)\n", ret ? "no " : "", ret); + return 1; + } + + if (ret == 0) + { + unsigned int i; + + for (i = 0; i < pmatch_len; i++) + { + int rm_so, rm_eo; + rm_so = va_arg(ap, int); + if (rm_so == END) + break; + rm_eo = va_arg(ap, int); +#ifdef WRETEST + if (rm_so >= 0) + { + int n = rm_so; + + if ((rm_so = woffs[rm_so]) < 0 || + (n = rm_eo, rm_eo = woffs[rm_eo]) < 0) + { + fprintf(outf, "Invalid or incomplete multi-byte sequence " + "in string %ls before byte offset %d\n", str, n); + return 1; + } + } +#endif /* WRETEST */ + if (pmatch[i].rm_so != rm_so + || pmatch[i].rm_eo != rm_eo) + { +#ifndef WRETEST + fprintf(outf, "Exec error, regex: \"%s\", string: \"%s\"\n", + regex_pattern, str); + fprintf(outf, " group %d: expected (%d, %d) \"%.*s\", " + "got (%d, %d) \"%.*s\"\n", +#else /* WRETEST */ + fprintf(outf, "Exec error, regex: \"%ls\", string: \"%ls\"\n", + regex_pattern, str); + fprintf(outf, " group %d: expected (%d, %d) \"%.*ls\", " + "got (%d, %d) \"%.*ls\"\n", +#endif /* WRETEST */ + i, rm_so, rm_eo, rm_eo - rm_so, str + rm_so, + (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo, + (int)(pmatch[i].rm_eo - pmatch[i].rm_so), + str + pmatch[i].rm_so); + fail = 1; + } + } + + if (!(cflags_global & REG_NOSUB) && reobj.re_nsub != i - 1 + && reobj.re_nsub <= pmatch_len && pmatch) + { +#ifndef WRETEST + fprintf(outf, "Comp error, regex: \"%s\"\n", regex_pattern); +#else /* WRETEST */ + fprintf(outf, "Comp error, regex: \"%ls\"\n", regex_pattern); +#endif /* WRETEST */ + fprintf(outf, " re_nsub is %d, should be %d\n", (int)reobj.re_nsub, i - 1); + fail = 1; + } + + + for (; i < pmatch_len; i++) + if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1) + { + if (!fail) +#ifndef WRETEST + fprintf(outf, "Exec error, regex: \"%s\", string: \"%s\"\n", + regex_pattern, str); +#else /* WRETEST */ + fprintf(outf, "Exec error, regex: \"%ls\", string: \"%ls\"\n", + regex_pattern, str); +#endif /* WRETEST */ + fprintf(outf, " group %d: expected (-1, -1), got (%d, %d)\n", + i, (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo); + fail = 1; + } + } + + return fail; +} + + +static void +test_nexec(const char *data, size_t len, int eflags, ...) +{ + int m; + int fail = 0; + int extra_flags[] = {0, REG_BACKTRACKING_MATCHER, REG_APPROX_MATCHER}; + size_t i; + va_list ap; + + if (!valid_reobj) + { + exec_errors++; + return; + } + +#ifdef WRETEST + { + int wlen = mbntowc(wstr, data, len, woffs); + if (wlen < 0) + { + exec_errors++; + fprintf(outf, "Invalid or incomplete multi-byte sequence in %s\n", data); + return; + } + wstr[wlen] = L'\0'; + len = wlen; + } +#define data wstr +#endif /* WRETEST */ + + use_regnexec = 1; + + for (i = 0; i < elementsof(extra_flags); i++) + { + int final_flags = eflags | extra_flags[i]; + + if ((final_flags & REG_BACKTRACKING_MATCHER + && tre_have_approx(&reobj)) + || (final_flags & REG_APPROX_MATCHER + && tre_have_backrefs(&reobj)) + || (final_flags & avoid_eflags)) + continue; + + /* Test with a pmatch array. */ + exec_tests++; + m = execute(data, len, elementsof(pmatch_global), pmatch_global, + final_flags); + va_start(ap, eflags); + fail |= check(ap, m, data, elementsof(pmatch_global), pmatch_global, + final_flags); + va_end(ap); + + /* Same test with a NULL pmatch. */ + exec_tests++; + m = execute(data, len, 0, NULL, final_flags); + va_start(ap, eflags); + fail |= check(ap, m, data, 0, NULL, final_flags); + va_end(ap); + } + +#ifdef WRETEST +#undef data +#endif /* WRETEST */ + + if (fail) + exec_errors++; +} + + + +static void +test_exec(const char *str, int eflags, ...) +{ + int m; + int fail = 0; + size_t len = strlen(str); + int extra_flags[] = {0, + REG_BACKTRACKING_MATCHER, + REG_APPROX_MATCHER, + REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER}; + size_t i; + va_list ap; + + if (!valid_reobj) + { + exec_errors++; + return; + } + +#ifdef WRETEST + { + int wlen = mbntowc(wstr, str, len, woffs); + if (wlen < 0) + { + exec_errors++; + fprintf(outf, "Invalid or incomplete multi-byte sequence in %s\n", str); + return; + } + wstr[wlen] = L'\0'; + len = wlen; + } +#define str wstr +#endif /* WRETEST */ + + for (use_regnexec = 0; use_regnexec < 2; use_regnexec++) + { + for (i = 0; i < elementsof(extra_flags); i++) + { + int final_flags = eflags | extra_flags[i]; + + if ((final_flags & REG_BACKTRACKING_MATCHER + && tre_have_approx(&reobj)) + || (final_flags & REG_APPROX_MATCHER + && tre_have_backrefs(&reobj)) + || (final_flags & avoid_eflags)) + continue; + + /* Test with a pmatch array. */ + exec_tests++; + m = execute(str, len, elementsof(pmatch_global), pmatch_global, + final_flags); + va_start(ap, eflags); + fail |= check(ap, m, str, elementsof(pmatch_global), pmatch_global, + final_flags); + va_end(ap); + + /* Same test with a NULL pmatch. */ + exec_tests++; + m = execute(str, len, 0, NULL, final_flags); + va_start(ap, eflags); + fail |= check(ap, m, str, 0, NULL, final_flags); + va_end(ap); + } + } + +#ifdef WRETEST +#undef str +#endif /* WRETEST */ + + if (fail) + exec_errors++; +} + + +static void +test_comp(const char *re, int flags, int ret) +{ + int errcode = 0; + int len = re ? strlen(re) : 0; + + if (valid_reobj) + { + tre_regfree(&reobj); + valid_reobj = 0; + } + + comp_tests++; + +#ifdef WRETEST + { + int wlen = mbntowc(wregex, re, len, NULL); + + if (wlen < 0) + { + comp_errors++; + fprintf(outf, "Invalid or incomplete multi-byte sequence in %s\n", re); + return; + } + wregex[wlen] = L'\0'; + len = wlen; + } +#define re wregex +#endif /* WRETEST */ + regex_pattern = re; + cflags_global = flags; + +#ifdef MALLOC_DEBUGGING + xmalloc_configure(-1); + if (ret != REG_ESPACE) { + static int j = 0; + int i = 0; + while (1) + { + xmalloc_configure(i); + comp_tests++; + if (j++ % 20 == 0) + test_status('.'); + errcode = wrap_regcomp(&reobj, re, len, flags); + if (errcode != REG_ESPACE) + { + test_status('*'); + break; + } +#ifdef REGEX_DEBUG + xmalloc_dump_leaks(); +#endif /* REGEX_DEBUG */ + i++; + } + } else +#endif /* !MALLOC_DEBUGGING */ + errcode = wrap_regcomp(&reobj, re, len, flags); + +#ifdef WRETEST +#undef re +#endif /* WRETEST */ + + if (errcode != ret) + { +#ifndef WRETEST + fprintf(outf, "Comp error, regex: \"%s\"\n", regex_pattern); +#else /* WRETEST */ + fprintf(outf, "Comp error, regex: \"%ls\"\n", regex_pattern); +#endif /* WRETEST */ + fprintf(outf, " expected return code %d, got %d.\n", + ret, errcode); + comp_errors++; + } + + if (errcode == 0) + valid_reobj = 1; +} + + + +/* To enable tests for known bugs, set this to 1. */ +#define KNOWN_BUG 0 + +int +main(int argc, char **argv) +{ + outf = stdout; +#if defined(HAVE_UNISTD_H) || defined(HAVE_GETOPT_H) + int opt; + while ((opt = getopt(argc, argv, "o:")) != EOF) + { + switch (opt) + { + case 'o': + if ((outf = fopen(optarg, "w")) == NULL) + { + perror(optarg); + exit(1); + } + break; + default: + /* getopt() will have printed an error message already */ + exit(1); + } + } +#endif /* HAVE_UNISTD_H */ + +#ifdef WRETEST + /* Need an 8-bit locale. Or move the two tests with non-ascii + characters to the localized internationalization tests. */ + if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") == NULL && + setlocale(LC_CTYPE, "en_US.ISO8859-1") == NULL) + fprintf(stderr, "Could not set locale en_US.ISO-8859-1. Expect some\n" + "`Invalid or incomplete multi-byte sequence' errors.\n"); +#endif /* WRETEST */ + /* Large number of macros in one regexp. */ + test_comp("[A-Z]\\d\\s?\\d[A-Z]{2}|[A-Z]\\d{2}\\s?\\d[A-Z]{2}|[A-Z]{2}\\d" + "\\s?\\d[A-Z]{2}|[A-Z]{2}\\d{2}\\s?\\d[A-Z]{2}|[A-Z]\\d[A-Z]\\s?" + "\\d[A-Z]{2}|[A-Z]{2}\\d[A-Z]\\s?\\d[A-Z]{2}|[A-Z]{3}\\s?\\d[A-Z]" + "{2}", REG_EXTENDED, 0); + + test_comp("a{11}(b{2}c){2}", REG_EXTENDED, 0); + test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); + test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", + REG_EXTENDED, 0); + test_comp("^!pfast [0-9]{1,15} ([0-9]{1,3}\\.){3}[0-9]{1,3}[0-9]{1,5}$", + REG_EXTENDED, 0); + +#if KNOWN_BUG + /* Should these match or not? */ + test_comp("(a)*-\\1b", REG_EXTENDED, 0); + test_exec("aaa-b", 0, REG_NOMATCH); + test_comp("((.*)\\1)+", REG_EXTENDED, 0); + test_exec("xxxxxx", 0, REG_NOMATCH); +#endif + +#ifdef TRE_APPROX + /* + * Approximate matching tests. + * + * The approximate matcher always searches for the best match, and returns + * the leftmost and longest one if there are several best matches. + */ + + test_comp("(fou){# ~1}", REG_EXTENDED, 0); + test_comp("(fuu){#}", REG_EXTENDED, 0); + test_comp("(fuu){# ~}", REG_EXTENDED, 0); + test_comp("(anaconda){ 1i + 1d < 1, #1}", REG_EXTENDED, 0); + test_comp("(anaconda){ 1i + 1d < 1 #1 ~10 }", REG_EXTENDED, 0); + test_comp("(anaconda){ #1, ~1, 1i + 1d < 1 }", REG_EXTENDED, 0); + + test_comp("(znacnda){ #1 ~3 1i + 1d < 1 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_NOMATCH); + test_comp("(znacnda){ #1 ~3 1i + 1d < 2 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_OK, 9, 17, 9, 17, END); + test_comp("(ananda){ 1i + 1d < 2 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_NOMATCH); + + test_comp("(fuu){ +3 -3 ~5}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + test_comp("(fuu){ +2 -2 ~5}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + test_comp("(fuu){ +3 -3 ~}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + + test_comp("(laurikari){ #3, 1i + 1d < 3 }", REG_EXTENDED, 0); + + /* No cost limit. */ + test_comp("(foobar){~}", REG_EXTENDED, 0); + test_exec("xirefoabralfobarxie", 0, REG_OK, 11, 16, 11, 16, END); + + /* At most two errors. */ + test_comp("(foobar){~2}", REG_EXTENDED, 0); + test_exec("xirefoabrzlfd", 0, REG_OK, 4, 9, 4, 9, END); + test_exec("xirefoabzlfd", 0, REG_NOMATCH); + + /* At most two inserts or substitutions and max two errors total. */ + test_comp("(foobar){+2#2~2}", REG_EXTENDED, 0); + test_exec("oobargoobaploowap", 0, REG_OK, 5, 11, 5, 11, END); + + /* Find best whole word match for "foobar". */ + test_comp("\\<(foobar){~}\\>", REG_EXTENDED, 0); + test_exec("zfoobarz", 0, REG_OK, 0, 8, 0, 8, END); + test_exec("boing zfoobarz goobar woop", 0, REG_OK, 15, 21, 15, 21, END); + + /* Match whole string, allow only 1 error. */ + test_comp("^(foobar){~1}$", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("xfoobar", 0, REG_OK, 0, 7, 0, 7, END); + /* + This currently fails. + test_exec("foobarx", 0, REG_OK, 0, 7, 0, 7, END); + */ + test_exec("fooxbar", 0, REG_OK, 0, 7, 0, 7, END); + test_exec("foxbar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("xoobar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("foobax", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("oobar", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("fobar", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("fooba", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("xfoobarx", 0, REG_NOMATCH); + test_exec("foobarxx", 0, REG_NOMATCH); + test_exec("xxfoobar", 0, REG_NOMATCH); + test_exec("xfoxbar", 0, REG_NOMATCH); + test_exec("foxbarx", 0, REG_NOMATCH); + + /* At most one insert, two deletes, and three substitutions. + Additionally, deletes cost two and substitutes one, and total + cost must be less than 4. */ + test_comp("(foobar){+1 -2 #3, 2d + 1s < 4}", REG_EXTENDED, 0); + test_exec("3oifaowefbaoraofuiebofasebfaobfaorfeoaro", + 0, REG_OK, 26, 33, 26, 33, END); + + /* Partially approximate matches. */ + test_comp("foo(bar){~1}zap", REG_EXTENDED, 0); + test_exec("foobarzap", 0, REG_OK, 0, 9, 3, 6, END); + test_exec("fobarzap", 0, REG_NOMATCH); + test_exec("foobrzap", 0, REG_OK, 0, 8, 3, 5, END); + test_comp("^.*(dot.org){~}.*$", REG_EXTENDED, 0); + test_exec("www.cnn.com 64.236.16.20\n" + "www.slashdot.org 66.35.250.150\n" + "For useful information, use www.slashdot.org\n" + "this is demo data!\n", + 0, REG_OK, 0, 120, 93, 100, END); + + /* Approximate matching and back referencing cannot be used together. */ + test_comp("(foo{~})\\1", REG_EXTENDED, REG_BADPAT); + +#endif /* TRE_APPROX */ + + /* + * Basic tests with pure regular expressions + */ + + /* Basic string matching. */ + test_comp("foobar", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, END); + test_exec("xxxfoobarzapzot", 0, REG_OK, 3, 9, END); + test_comp("foobar", REG_EXTENDED | REG_NOSUB, 0); + test_exec("foobar", 0, REG_OK, END); + test_comp("aaaa", REG_EXTENDED, 0); + test_exec("xxaaaaaaaaaaaaaaaaa", 0, REG_OK, 2, 6, END); + + /* Test zero length matches. */ + test_comp("(a*)", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + + test_comp("(a*)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + + test_comp("((a*)*)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_comp("(a*bcd)*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabcx", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcxbcdbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcdbcdbcxaabcxaabc", 0, REG_OK, 0, 18, 15, 18, END); + + test_comp("(a*)+", REG_EXTENDED, 0); + test_exec("-", 0, REG_OK, 0, 0, 0, 0, END); + + /* This test blows up the backtracking matcher. */ + avoid_eflags = REG_BACKTRACKING_MATCHER; + test_comp("((a*)*b)*b", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaaaaaaaaaaaaaaab", 0, REG_OK, + 25, 26, -1, -1, -1, -1, END); + avoid_eflags = 0; + + test_comp("", 0, 0); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("foo", 0, REG_OK, 0, 0, END); + + /* Test for submatch addressing which requires arbitrary lookahead. */ + test_comp("(a*)aaaaaa", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaaaaax", 0, REG_OK, 0, 15, 0, 9, END); + + /* Test leftmost and longest matching and some tricky submatches. */ + test_comp("(a*)(a*)", REG_EXTENDED, 0); + test_exec("aaaa", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abcd|abc)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abc|abcd)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abc|abcd)(d?)e", REG_EXTENDED, 0); + test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); + test_comp("(abcd|abc)(d?)e", REG_EXTENDED, 0); + test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); + test_comp("a(bc|bcd)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); + test_comp("a(bcd|bc)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); + test_comp("a*(a?bc|bcd)(d?)", REG_EXTENDED, 0); + test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); + test_comp("a*(bcd|a?bc)(d?)", REG_EXTENDED, 0); + test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); + test_comp("(a|(a*b*))*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, -1, -1, END); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); + test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); + test_comp("((a*b*)|a)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); + test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); + test_comp("a.*(.*b.*(.*c.*).*d.*).*e.*(.*f.*).*g", REG_EXTENDED, 0); + test_exec("aabbccddeeffgg", 0, REG_OK, 0, 14, 3, 9, 5, 7, 11, 13, END); + test_comp("(wee|week)(night|knights)s*", REG_EXTENDED, 0); + test_exec("weeknights", 0, REG_OK, 0, 10, 0, 3, 3, 10, END); + test_exec("weeknightss", 0, REG_OK, 0, 11, 0, 3, 3, 10, END); + test_comp("a*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aaa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aaaa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + + /* Test clearing old submatch data with nesting parentheses + and iteration. */ + test_comp("((a)|(b))*c", REG_EXTENDED, 0); + test_exec("aaabc", 0, REG_OK, 0, 5, 3, 4, -1, -1, 3, 4, END); + test_exec("aaaac", 0, REG_OK, 0, 5, 3, 4, 3, 4, -1, -1, END); + test_comp("foo((bar)*)*zot", REG_EXTENDED, 0); + test_exec("foozot", 0, REG_OK, 0, 6, 3, 3, -1, -1, END); + test_exec("foobarzot", 0, REG_OK, 0, 9, 3, 6, 3, 6, END); + test_exec("foobarbarzot", 0, REG_OK, 0, 12, 3, 9, 6, 9, END); + + test_comp("foo((zup)*|(bar)*|(zap)*)*zot", REG_EXTENDED, 0); + test_exec("foobarzapzot", 0, REG_OK, + 0, 12, 6, 9, -1, -1, -1, -1, 6, 9, END); + test_exec("foobarbarzapzot", 0, REG_OK, + 0, 15, 9, 12, -1, -1, -1, -1, 9, 12, END); + test_exec("foozupzot", 0, REG_OK, + 0, 9, 3, 6, 3, 6, -1, -1, -1, -1, END); + test_exec("foobarzot", 0, REG_OK, + 0, 9, 3, 6, -1, -1, 3, 6, -1, -1, END); + test_exec("foozapzot", 0, REG_OK, + 0, 9, 3, 6, -1, -1, -1, -1, 3, 6, END); + test_exec("foozot", 0, REG_OK, + 0, 6, 3, 3, -1, -1, -1, -1, -1, -1, END); + + + /* Test case where, e.g., Perl and Python regexp functions, and many + other backtracking matchers, fail to produce the longest match. + It is not exactly a bug since Perl does not claim to find the + longest match, but a confusing feature and, in my opinion, a bad + design choice because the union operator is traditionally defined + to be commutative (with respect to the language denoted by the RE). */ + test_comp("(a|ab)(blip)?", REG_EXTENDED, 0); + test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); + test_comp("(ab|a)(blip)?", REG_EXTENDED, 0); + test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); + + /* Test more submatch addressing. */ + test_comp("((a|b)*)a(a|b)*", REG_EXTENDED, 0); + test_exec("aaaaabaaaba", 0, REG_OK, 0, 11, 0, 10, 9, 10, -1, -1, END); + test_exec("aaaaabaaab", 0, REG_OK, 0, 10, 0, 8, 7, 8, 9, 10, END); + test_exec("caa", 0, REG_OK, 1, 3, 1, 2, 1, 2, -1, -1, END); + test_comp("((a|aba)*)(ababbaba)((a|b)*)", REG_EXTENDED, 0); + test_exec("aabaababbabaaababbab", 0, REG_OK, + 0, 20, 0, 4, 1, 4, 4, 12, 12, 20, 19, 20, END); + test_exec("aaaaababbaba", 0, REG_OK, + 0, 12, 0, 4, 3, 4, 4, 12, 12, 12, -1, -1, END); + test_comp("((a|aba|abb|bba|bab)*)(ababbababbabbbabbbbbbabbaba)((a|b)*)", + REG_EXTENDED, 0); + test_exec("aabaabbbbabababaababbababbabbbabbbbbbabbabababbababababbabababa", + 0, REG_OK, 0, 63, 0, 16, 13, 16, 16, 43, 43, 63, 62, 63, END); + + /* Test for empty subexpressions. */ + test_comp("", 0, 0); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("foo", 0, REG_OK, 0, 0, END); + test_comp("(a|)", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + test_comp("a|", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, END); + test_comp("|a", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, END); + + /* Miscellaneous tests. */ + test_comp("(a*)b(c*)", REG_EXTENDED, 0); + test_exec("abc", 0, REG_OK, 0, 3, 0, 1, 2, 3, END); + test_exec("***abc***", 0, REG_OK, 3, 6, 3, 4, 5, 6, END); + test_comp("(a)", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_comp("((a))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); + test_comp("(((a)))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, END); + test_comp("((((((((((((((((((((a))))))))))))))))))))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, END); + + test_comp("ksntoeaiksntoeaikstneoaiksnteoaiksntoeaiskntoeaiskntoekainstoei" + "askntoeakisntoeksaitnokesantiksoentaikosentaiksoentaiksnoeaiskn" + "teoaksintoekasitnoeksaitkosetniaksoetnaisknoetakistoeksintokesa" + "nitksoentaisknoetaisknoetiaksotneaikstoekasitoeskatioksentaikso" + "enatiksoetnaiksonateiksoteaeskanotisknetaiskntoeasknitoskenatis" + "konetaisknoteai", 0, 0); + + test_comp("((aab)|(aac)|(aa*))c", REG_EXTENDED, 0); + test_exec("aabc", 0, REG_OK, 0, 4, 0, 3, 0, 3, -1, -1, -1, -1, END); + test_exec("aacc", 0, REG_OK, 0, 4, 0, 3, -1, -1, 0, 3, -1, -1, END); + test_exec("aaac", 0, REG_OK, 0, 4, 0, 3, -1, -1, -1, -1, 0, 3, END); + + test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + test_comp("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + + test_comp("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", + REG_EXTENDED, 0); + test_exec("Muammar Quathafi", 0, REG_OK, 0, 16, -1, -1, 11, 13, END); + + test_comp("(Ab|cD)*", REG_EXTENDED | REG_ICASE, 0); + test_exec("aBcD", 0, REG_OK, 0, 4, 2, 4, END); + + test_comp("a**", REG_EXTENDED, REG_BADRPT); + test_comp("a*+", REG_EXTENDED, REG_BADRPT); + test_comp("a+*", REG_EXTENDED, REG_BADRPT); + test_comp("a++", REG_EXTENDED, REG_BADRPT); + test_comp("a?+", REG_EXTENDED, REG_BADRPT); + test_comp("a?*", REG_EXTENDED, REG_BADRPT); + test_comp("a{1,2}*", REG_EXTENDED, REG_BADRPT); + test_comp("a{1,2}+", REG_EXTENDED, REG_BADRPT); + + /* + * Many of the following tests were mostly inspired by (or copied from) the + * libhackerlab posix test suite by Tom Lord. + */ + + test_comp("a", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_comp("\\.", 0, 0); + test_exec(".", 0, REG_OK, 0, 1, END); + test_comp("\\[", 0, 0); + test_exec("[", 0, REG_OK, 0, 1, END); + test_comp("\\\\", 0, 0); + test_exec("\\", 0, REG_OK, 0, 1, END); + test_comp("\\*", 0, 0); + test_exec("*", 0, REG_OK, 0, 1, END); + test_comp("\\^", 0, 0); + test_exec("^", 0, REG_OK, 0, 1, END); + test_comp("\\$", 0, 0); + test_exec("$", 0, REG_OK, 0, 1, END); + + test_comp("\\", 0, REG_EESCAPE); + + test_comp("x\\.", 0, 0); + test_exec("x.", 0, REG_OK, 0, 2, END); + test_comp("x\\[", 0, 0); + test_exec("x[", 0, REG_OK, 0, 2, END); + test_comp("x\\\\", 0, 0); + test_exec("x\\", 0, REG_OK, 0, 2, END); + test_comp("x\\*", 0, 0); + test_exec("x*", 0, REG_OK, 0, 2, END); + test_comp("x\\^", 0, 0); + test_exec("x^", 0, REG_OK, 0, 2, END); + test_comp("x\\$", 0, 0); + test_exec("x$", 0, REG_OK, 0, 2, END); + + test_comp("x\\", 0, REG_EESCAPE); + + test_comp(".", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("\n", 0, REG_OK, 0, 1, END); + + test_comp("(+|?)", 0, 0); + test_exec("(+|?)", 0, REG_OK, 0, 5, END); + test_exec("+|?", 0, REG_NOMATCH); + test_exec("(+)", 0, REG_NOMATCH); + test_exec("+", 0, REG_NOMATCH); + + + /* + * Test bracket expressions. + */ + + test_comp("[", 0, REG_EBRACK); + test_comp("[]", 0, REG_EBRACK); + test_comp("[^]", 0, REG_EBRACK); + + test_comp("[]x]", 0, 0); + test_exec("]", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + + test_comp("[.]", 0, 0); + test_exec(".", 0, REG_OK, 0, 1, END); + test_exec("a", 0, REG_NOMATCH); + + test_comp("[*]", 0, 0); + test_exec("*", 0, REG_OK, 0, 1, END); + + test_comp("[[]", 0, 0); + test_exec("[", 0, REG_OK, 0, 1, END); + + test_comp("[\\]", 0, 0); + test_exec("\\", 0, REG_OK, 0, 1, END); + + test_comp("[-x]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + test_comp("[x-]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + test_comp("[-]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + + test_comp("[abc]", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 1, END); + test_exec("c", 0, REG_OK, 0, 1, END); + test_exec("d", 0, REG_NOMATCH); + test_exec("xa", 0, REG_OK, 1, 2, END); + test_exec("xb", 0, REG_OK, 1, 2, END); + test_exec("xc", 0, REG_OK, 1, 2, END); + test_exec("xd", 0, REG_NOMATCH); + test_comp("x[abc]", 0, 0); + test_exec("xa", 0, REG_OK, 0, 2, END); + test_exec("xb", 0, REG_OK, 0, 2, END); + test_exec("xc", 0, REG_OK, 0, 2, END); + test_exec("xd", 0, REG_NOMATCH); + test_comp("[^abc]", 0, 0); + test_exec("a", 0, REG_NOMATCH); + test_exec("b", 0, REG_NOMATCH); + test_exec("c", 0, REG_NOMATCH); + test_exec("d", 0, REG_OK, 0, 1, END); + test_exec("xa", 0, REG_OK, 0, 1, END); + test_exec("xb", 0, REG_OK, 0, 1, END); + test_exec("xc", 0, REG_OK, 0, 1, END); + test_exec("xd", 0, REG_OK, 0, 1, END); + test_comp("x[^abc]", 0, 0); + test_exec("xa", 0, REG_NOMATCH); + test_exec("xb", 0, REG_NOMATCH); + test_exec("xc", 0, REG_NOMATCH); + test_exec("xd", 0, REG_OK, 0, 2, END); + + test_comp("[()+?*\\]+", REG_EXTENDED, 0); + test_exec("x\\*?+()x", 0, REG_OK, 1, 7, END); + + /* Standard character classes. */ + test_comp("[[:alnum:]]+", REG_EXTENDED, 0); + test_exec("%abc123890XYZ=", 0, REG_OK, 1, 13, END); + test_comp("[[:cntrl:]]+", REG_EXTENDED, 0); + test_exec("%\n\t\015\f ", 0, REG_OK, 1, 5, END); + test_comp("[[:lower:]]+", REG_EXTENDED, 0); + test_exec("AbcdE", 0, REG_OK, 1, 4, END); + test_comp("[[:lower:]]+", REG_EXTENDED | REG_ICASE, 0); + test_exec("AbcdE", 0, REG_OK, 0, 5, END); + test_comp("[[:space:]]+", REG_EXTENDED, 0); + test_exec("x \t\f\nx", 0, REG_OK, 1, 5, END); + test_comp("[[:alpha:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 1, 4, END); + test_comp("[[:digit:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 4, 10, END); + test_comp("[^[:digit:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 0, 4, END); + test_comp("[[:print:]]+", REG_EXTENDED, 0); + test_exec("\n\t %abC12\f", 0, REG_OK, 2, 9, END); + test_comp("[[:upper:]]+", REG_EXTENDED, 0); + test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 3, 27, END); + test_comp("[[:upper:]]+", REG_EXTENDED | REG_ICASE, 0); + test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 2, 28, END); +#ifdef HAVE_ISWBLANK +#ifdef HAVE_ISBLANK + test_comp("[[:blank:]]+", REG_EXTENDED, 0); + test_exec("\na \t b", 0, REG_OK, 2, 5, END); +#endif /* HAVE_ISBLANK */ +#endif /* HAVE_ISWBLANK */ + test_comp("[[:graph:]]+", REG_EXTENDED, 0); + test_exec("\n %abC12\f", 0, REG_OK, 2, 8, END); + test_comp("[[:punct:]]+", REG_EXTENDED, 0); + test_exec("a~!@#$%^&*()_+=-`[]{};':\"|\\,./?>< ", + 0, REG_OK, 1, 33, END); + test_comp("[[:xdigit:]]+", REG_EXTENDED, 0); + test_exec("-0123456789ABCDEFabcdef", 0, REG_OK, 1, 23, END); + test_comp("[[:bogus-character-class-name:]", REG_EXTENDED, REG_ECTYPE); + test_comp("[[:\xff:", REG_EXTENDED, REG_ECTYPE); + + + /* Range expressions (assuming that the C locale is being used). */ + test_comp("[a-z]+", REG_EXTENDED, 0); + test_exec("ABCabcxyzABC", 0, REG_OK, 3, 9, END); + test_comp("[z-a]+", REG_EXTENDED, REG_ERANGE); + test_comp("[a-b-c]", 0, REG_ERANGE); + test_comp("[a-a]+", REG_EXTENDED, 0); + test_exec("zaaaaab", 0, REG_OK, 1, 6, END); + test_comp("[--Z]+", REG_EXTENDED, 0); + test_exec("!ABC-./XYZ~", 0, REG_OK, 1, 10, END); + test_comp("[*--]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("*", 0, REG_OK, 0, 1, END); + test_comp("[*--Z]+", REG_EXTENDED, 0); + test_exec("!+*,---ABC", 0, REG_OK, 1, 7, END); + test_comp("[a-]+", REG_EXTENDED, 0); + test_exec("xa-a--a-ay", 0, REG_OK, 1, 9, END); + + /* REG_ICASE and character sets. */ + test_comp("[a-c]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("cABbage", 0, REG_OK, 0, 5, END); + test_comp("[^a-c]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("tObAcCo", 0, REG_OK, 0, 2, END); + test_comp("[A-C]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("cABbage", 0, REG_OK, 0, 5, END); + test_comp("[^A-C]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("tObAcCo", 0, REG_OK, 0, 2, END); + + /* Complex character sets. */ + test_comp("[[:digit:]a-z#$%]+", REG_EXTENDED, 0); + test_exec("__abc#lmn012$x%yz789*", 0, REG_OK, 2, 20, END); + test_comp("[[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("__abcLMN012x%#$yz789*", 0, REG_OK, 2, 20, END); + test_comp("[^[:digit:]a-z#$%]+", REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); + test_comp("[^[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); + test_comp("[^[:digit:]#$%[:xdigit:]]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 4, 7, END); + test_comp("[^-]+", REG_EXTENDED, 0); + test_exec("---afd*(&,ml---", 0, REG_OK, 3, 12, END); + test_comp("[^--Z]+", REG_EXTENDED, 0); + test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 12, END); + test_comp("[^--Z]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 10, END); + + /* Unsupported things (equivalence classes and multicharacter collating + elements) */ + test_comp("[[.foo.]]", 0, REG_ECOLLATE); + test_comp("[[=foo=]]", 0, REG_ECOLLATE); + test_comp("[[..]]", 0, REG_ECOLLATE); + test_comp("[[==]]", 0, REG_ECOLLATE); + test_comp("[[.]]", 0, REG_ECOLLATE); + test_comp("[[=]]", 0, REG_ECOLLATE); + test_comp("[[.]", 0, REG_ECOLLATE); + test_comp("[[=]", 0, REG_ECOLLATE); + test_comp("[[.", 0, REG_ECOLLATE); + test_comp("[[=", 0, REG_ECOLLATE); + + + + /* Miscellaneous tests. */ + test_comp("abc\\(\\(de\\)\\(fg\\)\\)hi", 0, 0); + test_exec("xabcdefghiy", 0, REG_OK, 1, 10, 4, 8, 4, 6, 6, 8, END); + + test_comp("abc*def", 0, 0); + test_exec("xabdefy", 0, REG_OK, 1, 6, END); + test_exec("xabcdefy", 0, REG_OK, 1, 7, END); + test_exec("xabcccccccdefy", 0, REG_OK, 1, 13, END); + + test_comp("abc\\(def\\)*ghi", 0, 0); + test_exec("xabcghiy", 0, REG_OK, 1, 7, -1, -1, END); + test_exec("xabcdefghi", 0, REG_OK, 1, 10, 4, 7, END); + test_exec("xabcdefdefdefghi", 0, REG_OK, 1, 16, 10, 13, END); + + test_comp("a?", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 1, END); + test_exec("xaaaaa", 0, REG_OK, 0, 0, END); + test_comp("a+", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 5, END); + test_exec("xaaaaa", 0, REG_OK, 1, 6, END); + + + /* + * Test anchors and their behaviour with the REG_NEWLINE compilation + * flag and the REG_NOTBOL, REG_NOTEOL execution flags. + */ + + /* Normally, `^' matches the empty string at beginning of input. + If REG_NOTBOL is used, `^' won't match the zero length string. */ + test_comp("^abc", 0, 0); + test_exec("abcdef", 0, REG_OK, 0, 3, END); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("xyzabcdef", 0, REG_NOMATCH); + test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("\nabcdef", REG_NOTBOL, REG_NOMATCH); + + /* Normally, `$' matches the empty string at end of input. + If REG_NOTEOL is used, `$' won't match the zero length string. */ + test_comp("abc$", 0, 0); + test_exec("defabc", 0, REG_OK, 3, 6, END); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("defabcxyz", 0, REG_NOMATCH); + test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("defabc\n", REG_NOTEOL, REG_NOMATCH); + + test_comp("^abc$", 0, 0); + test_exec("abc", 0, REG_OK, 0, 3, END); + test_exec("abc", REG_NOTBOL, REG_NOMATCH); + test_exec("abc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("\nabc\n", 0, REG_NOMATCH); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("abcdef", 0, REG_NOMATCH); + test_exec("defabc", 0, REG_NOMATCH); + test_exec("abc\ndef", 0, REG_NOMATCH); + test_exec("def\nabc", 0, REG_NOMATCH); + + /* If REG_NEWLINE is used, `^' matches the empty string immediately after + a newline, regardless of whether execution flags contain REG_NOTBOL. + Similarly, if REG_NEWLINE is used, `$' matches the empty string + immediately before a newline, regardless of execution flags. */ + test_comp("^abc", REG_NEWLINE, 0); + test_exec("abcdef", 0, REG_OK, 0, 3, END); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("xyzabcdef", 0, REG_NOMATCH); + test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_OK, 1, 4, END); + test_exec("\nabcdef", REG_NOTBOL, 0, 1, 4, END); + test_comp("abc$", REG_NEWLINE, 0); + test_exec("defabc", 0, REG_OK, 3, 6, END); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("defabcxyz", 0, REG_NOMATCH); + test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); + test_exec("defabc\n", 0, REG_OK, 3, 6, END); + test_exec("defabc\n", REG_NOTEOL, 0, 3, 6, END); + test_comp("^abc$", REG_NEWLINE, 0); + test_exec("abc", 0, REG_OK, 0, 3, END); + test_exec("abc", REG_NOTBOL, REG_NOMATCH); + test_exec("abc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("\nabc\n", 0, REG_OK, 1, 4, END); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("abcdef", 0, REG_NOMATCH); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("defabc", 0, REG_NOMATCH); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc\ndef", 0, REG_OK, 0, 3, END); + test_exec("abc\ndef", REG_NOTBOL, REG_NOMATCH); + test_exec("abc\ndef", REG_NOTEOL, 0, 0, 3, END); + test_exec("abc\ndef", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("def\nabc", 0, REG_OK, 4, 7, END); + test_exec("def\nabc", REG_NOTBOL, 0, 4, 7, END); + test_exec("def\nabc", REG_NOTEOL, REG_NOMATCH); + test_exec("def\nabc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + + /* With BRE syntax, `^' has a special meaning only at the beginning of the + RE or the beginning of a parenthesized subexpression. */ + test_comp("a\\{0,1\\}^bc", 0, 0); + test_exec("bc", 0, REG_NOMATCH); + test_exec("^bc", 0, REG_OK, 0, 3, END); + test_exec("abc", 0, REG_NOMATCH); + test_exec("a^bc", 0, REG_OK, 0, 4, END); + test_comp("a\\{0,1\\}\\(^bc\\)", 0, 0); + test_exec("bc", 0, REG_OK, 0, 2, 0, 2, END); + test_exec("^bc", 0, REG_NOMATCH); + test_exec("abc", 0, REG_NOMATCH); + test_exec("a^bc", 0, REG_NOMATCH); + test_comp("(^a", 0, 0); + test_exec("(^a", 0, REG_OK, 0, 3, END); + + /* With BRE syntax, `$' has a special meaning only at the end of the + RE or the end of a parenthesized subexpression. */ + test_comp("ab$c\\{0,1\\}", 0, 0); + test_exec("ab", 0, REG_NOMATCH); + test_exec("ab$", 0, REG_OK, 0, 3, END); + test_exec("abc", 0, REG_NOMATCH); + test_exec("ab$c", 0, REG_OK, 0, 4, END); + test_comp("\\(ab$\\)c\\{0,1\\}", 0, 0); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, END); + test_exec("ab$", 0, REG_NOMATCH); + test_exec("abc", 0, REG_NOMATCH); + test_exec("ab$c", 0, REG_NOMATCH); + test_comp("a$)", 0, 0); + test_exec("a$)", 0, REG_OK, 0, 3, END); + + /* Miscellaneous tests for `^' and `$'. */ + test_comp("foo^$", REG_EXTENDED, 0); + test_exec("foo", 0, REG_NOMATCH); + test_comp("x$\n^y", REG_EXTENDED | REG_NEWLINE, 0); + test_exec("foo\nybarx\nyes\n", 0, REG_OK, 8, 11, END); + test_comp("^$", 0, 0); + test_exec("x", 0, REG_NOMATCH); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("\n", 0, REG_NOMATCH); + test_comp("^$", REG_NEWLINE, 0); + test_exec("x", 0, REG_NOMATCH); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("\n", 0, REG_OK, 0, 0, END); + + /* REG_NEWLINE causes `.' not to match newlines. */ + test_comp(".*", 0, 0); + test_exec("ab\ncd", 0, REG_OK, 0, 5, END); + test_comp(".*", REG_NEWLINE, 0); + test_exec("ab\ncd", 0, REG_OK, 0, 2, END); + + /* + * Tests for nonstandard syntax extensions. + */ + + /* Zero width assertions. */ + test_comp("\\", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 2, 3, END); + test_exec("aax", 0, REG_OK, 2, 3, END); + test_comp("\\bx", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 4, 5, END); + test_exec("aax", 0, REG_NOMATCH); + test_exec("xax", 0, REG_OK, 0, 1, END); + test_comp("x\\b", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 2, 3, END); + test_exec("aax", 0, REG_OK, 2, 3, END); + test_exec("xaa", 0, REG_NOMATCH); + test_comp("\\Bx", REG_EXTENDED, 0); + test_exec("aax xxa", 0, REG_OK, 2, 3, END); + test_comp("\\Bx\\b", REG_EXTENDED, 0); + test_exec("aax xxx", 0, REG_OK, 2, 3, END); + test_comp("\\<.", REG_EXTENDED, 0); + test_exec(";xaa", 0, REG_OK, 1, 2, END); + + /* Shorthands for character classes. */ + test_comp("\\w+", REG_EXTENDED, 0); +#ifdef SRC_IN_ISO_8859_1 + test_exec(",.(a23_Nt-o)", 0, REG_OK, 3, 9, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f ISO-8859-1 -t UTF-8 file_with_lines_above > www_utf_8 */ + test_exec(",.(a23_Nt-öo)", 0, REG_OK, 3, 9, END); +#else + unsigned char str_000[] = { + ',','.','(','a','2','3','_','N','t','-',0xF6,'o',0x00 + }; + test_exec((char const *)str_000, 0, REG_OK, 3, 9, END); +#endif +#endif + test_comp("\\d+", REG_EXTENDED, 0); + test_exec("uR120_4=v4", 0, REG_OK, 2, 5, END); + test_comp("\\D+", REG_EXTENDED, 0); + test_exec("120d_=vA4s", 0, REG_OK, 3, 8, END); + + /* Quoted special characters. */ + test_comp("\\t", REG_EXTENDED, 0); + test_comp("\\e", REG_EXTENDED, 0); + + /* Test the \x1B and \x{263a} extensions for specifying 8 bit and wide + characters in hexadecimal. */ + test_comp("\\x41", REG_EXTENDED, 0); + test_exec("ABC", 0, REG_OK, 0, 1, END); + test_comp("\\x5", REG_EXTENDED, 0); + test_exec("\005", 0, REG_OK, 0, 1, END); + test_comp("\\x5r", REG_EXTENDED, 0); + test_exec("\005r", 0, REG_OK, 0, 2, END); + test_comp("\\x", REG_EXTENDED, 0); + test_nexec("\000", 1, 0, REG_OK, 0, 1, END); + test_comp("\\xr", REG_EXTENDED, 0); + test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); + test_comp("\\x{41}", REG_EXTENDED, 0); + test_exec("ABC", 0, REG_OK, 0, 1, END); + test_comp("\\x{5}", REG_EXTENDED, 0); + test_exec("\005", 0, REG_OK, 0, 1, END); + test_comp("\\x{5}r", REG_EXTENDED, 0); + test_exec("\005r", 0, REG_OK, 0, 2, END); + test_comp("\\x{}", REG_EXTENDED, 0); + test_nexec("\000", 1, 0, REG_OK, 0, 1, END); + test_comp("\\x{}r", REG_EXTENDED, 0); + test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); + test_comp("\\x{00000000}", REG_EXTENDED, 0); + test_comp("\\x{000000000}", REG_EXTENDED, REG_EBRACE); + + /* Tests for (?inrU-inrU) and (?inrU-inrU:) */ + test_comp("foo(?i)bar", REG_EXTENDED, 0); + test_exec("fooBaR", 0, REG_OK, 0, 6, END); + test_comp("foo(?i)bar|zap", REG_EXTENDED, 0); + test_exec("fooBaR", 0, REG_OK, 0, 6, END); + test_exec("foozap", 0, REG_OK, 0, 6, END); + test_exec("foozAp", 0, REG_OK, 0, 6, END); + test_exec("zap", 0, REG_NOMATCH); + test_comp("foo(?-i:zap)zot", REG_EXTENDED | REG_ICASE, 0); + test_exec("FoOzapZOt", 0, REG_OK, 0, 9, END); + test_exec("FoOzApZOt", 0, REG_NOMATCH); + test_comp("foo(?i:bar|zap)", REG_EXTENDED, 0); + test_exec("foozap", 0, REG_OK, 0, 6, END); + test_exec("foobar", 0, REG_OK, 0, 6, END); + test_exec("foobAr", 0, REG_OK, 0, 6, END); + test_exec("fooZaP", 0, REG_OK, 0, 6, END); + test_comp("foo(?U:o*)(o*)", REG_EXTENDED, 0); + test_exec("foooo", 0, REG_OK, 0, 5, 3, 5, END); + + /* Test comment syntax. */ + test_comp("foo(?# This here is a comment. )bar", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, END); + + /* Tests for \Q and \E. */ + test_comp("\\((\\Q)?:\\<[^$\\E)", REG_EXTENDED, 0); + test_exec("()?:\\<[^$", 0, REG_OK, 0, 9, 1, 9, END); + test_comp("\\Qabc\\E.*", REG_EXTENDED, 0); + test_exec("abcdef", 0, REG_OK, 0, 6, END); + test_comp("\\Qabc\\E.*|foo", REG_EXTENDED, 0); + test_exec("parabc123wxyz", 0, REG_OK, 3, 13, END); + test_exec("fooabc123wxyz", 0, REG_OK, 0, 3, END); + + /* + * Test integer parser used for bounded repititions. + */ + + test_comp("a{9223372036854775808,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{9223372036854775808}", REG_EXTENDED, REG_BADMAX); + test_comp("a{9223372036854775807,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{9223372036854775807}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483648,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483648}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483647,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483647}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32768,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32768}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32767,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32767}", REG_EXTENDED, REG_BADMAX); + test_comp("a{256,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{256}", REG_EXTENDED, REG_BADMAX); + test_comp("a{255,}", REG_EXTENDED, REG_OK); + test_comp("a{255}", REG_EXTENDED, REG_OK); + + /* + * Test bounded repetitions. + */ + + test_comp("a{0,0}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 0, END); + test_comp("a{0,1}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 1, END); + test_comp("a{1,1}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 1, END); + test_comp("a{1,3}", REG_EXTENDED, REG_OK); + test_exec("xaaaaa", 0, REG_OK, 1, 4, END); + test_comp("a{0,3}", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 3, END); + test_comp("a{0,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{1,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{2,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{3,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_NOMATCH); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 4, END); + test_exec("aaaaa", 0, REG_OK, 0, 5, END); + test_exec("aaaaaa", 0, REG_OK, 0, 6, END); + test_exec("aaaaaaa", 0, REG_OK, 0, 7, END); + test_comp("a{,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{,0}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("aaa", 0, REG_OK, 0, 0, END); + test_comp("a{,1}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 1, END); + test_comp("a{,2}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 2, END); + + test_comp("a{5,10}", REG_EXTENDED, REG_OK); + test_comp("a{6,6}", REG_EXTENDED, REG_OK); + test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); + test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); + test_exec("xxaaaaa", 0, REG_NOMATCH); + test_comp("a{5,6}", REG_EXTENDED, REG_OK); + test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); + test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); + test_exec("xxaaaaa", 0, REG_OK, 2, 7, END); + test_exec("xxaaaa", 0, REG_NOMATCH); + + /* Trickier ones... */ + test_comp("([ab]{5,10})*b", REG_EXTENDED, REG_OK); + test_exec("bbbbbabaaaaab", 0, REG_OK, 0, 13, 5, 12, END); + test_exec("bbbbbbaaaaab", 0, REG_OK, 0, 12, 5, 11, END); + test_exec("bbbbbbaaaab", 0, REG_OK, 0, 11, 0, 10, END); + test_exec("bbbbbbaaab", 0, REG_OK, 0, 10, 0, 9, END); + test_exec("bbbbbbaab", 0, REG_OK, 0, 9, 0, 8, END); + test_exec("bbbbbbab", 0, REG_OK, 0, 8, 0, 7, END); + + test_comp("([ab]*)(ab[ab]{5,10})ba", REG_EXTENDED, REG_OK); + test_exec("abbabbbabaabbbbbbbbbbbbbabaaaabab", 0, REG_OK, + 0, 10, 0, 0, 0, 8, END); + test_exec("abbabbbabaabbbbbbbbbbbbabaaaaabab", 0, REG_OK, + 0, 32, 0, 23, 23, 30, END); + test_exec("abbabbbabaabbbbbbbbbbbbabaaaabab", 0, REG_OK, + 0, 24, 0, 10, 10, 22, END); + test_exec("abbabbbabaabbbbbbbbbbbba", 0, REG_OK, + 0, 24, 0, 10, 10, 22, END); + + test_comp("^((a{1,2})?x)*y", REG_EXTENDED | REG_NOSUB, REG_OK); + test_exec("y", 0, REG_OK, END); + test_exec("xy", 0, REG_OK, END); + test_exec("axy", 0, REG_OK, END); + test_exec("aaxy", 0, REG_OK, END); + test_exec("aaaxy", 0, REG_NOMATCH, END); + + /* Test repeating something that has submatches inside. */ + test_comp("(a){0,5}", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); + test_exec("aaaaa", 0, REG_OK, 0, 5, 4, 5, END); + test_exec("aaaaaa", 0, REG_OK, 0, 5, 4, 5, END); + + test_comp("(a){2,3}", REG_EXTENDED, 0); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 3, 2, 3, END); + + test_comp("\\(a\\)\\{4\\}", 0, 0); + test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); + + test_comp("\\(a*\\)\\{2\\}", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, 1, 1, END); + + test_comp("((..)|(.)){2}", REG_EXTENDED, 0); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, -1, -1, 1, 2, END); + + /* Nested repeats. */ + test_comp("(.){2}{3}", REG_EXTENDED, 0); + test_exec("xxxxx", 0, REG_NOMATCH); + test_exec("xxxxxx", 0, REG_OK, 0, 6, 5, 6, END); + test_comp("(..){2}{3}", REG_EXTENDED, 0); + test_exec("xxxxxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxxxxx", 0, REG_OK, 0, 12, 10, 12, END); + test_comp("((..){2}.){3}", REG_EXTENDED, 0); + test_exec("xxxxxxxxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxxxxxxxx", 0, REG_OK, 0, 15, 10, 15, 12, 14, END); + test_comp("((..){1,2}.){3}", REG_EXTENDED, 0); + test_exec("xxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); + test_exec("xxxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); + test_exec("xxxxxxxxxxx", 0, REG_OK, 0, 11, 8, 11, 8, 10, END); + test_comp("a{2}{2}x", REG_EXTENDED, 0); + test_exec("", 0, REG_NOMATCH); + test_exec("x", 0, REG_NOMATCH); + test_exec("ax", 0, REG_NOMATCH); + test_exec("aax", 0, REG_NOMATCH); + test_exec("aaax", 0, REG_NOMATCH); + test_exec("aaaax", 0, REG_OK, 0, 5, END); + test_exec("aaaaax", 0, REG_OK, 1, 6, END); + test_exec("aaaaaax", 0, REG_OK, 2, 7, END); + test_exec("aaaaaaax", 0, REG_OK, 3, 8, END); + test_exec("aaaaaaaax", 0, REG_OK, 4, 9, END); + + /* Repeats with iterations inside. */ + test_comp("([a-z]+){2,5}", REG_EXTENDED, 0); + test_exec("a\n", 0, REG_NOMATCH); + test_exec("aa\n", 0, REG_OK, 0, 2, 1, 2, END); + + /* Multiple repeats in one regexp. */ + test_comp("a{3}b{3}", REG_EXTENDED, 0); + test_exec("aaabbb", 0, REG_OK, 0, 6, END); + test_exec("aaabbbb", 0, REG_OK, 0, 6, END); + test_exec("aaaabbb", 0, REG_OK, 1, 7, END); + test_exec("aabbb", 0, REG_NOMATCH); + test_exec("aaabb", 0, REG_NOMATCH); + + /* Test that different types of repetitions work correctly when used + in the same regexp. */ + test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); + test_exec("aaaaxbxcxdx", 0, REG_OK, 0, 11, END); + test_exec("aaaxbxcxdx", 0, REG_NOMATCH); + test_exec("aabxcxdx", 0, REG_NOMATCH); + test_exec("aaaacxdx", 0, REG_NOMATCH); + test_exec("aaaaxbdx", 0, REG_NOMATCH); + test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", + REG_EXTENDED, 0); + test_exec("!packet 10.0.2.4 12765 ei voittoa", 0, REG_OK, 0, 22, END); + + /* + * Back referencing tests. + */ + test_comp("([a-z]*) \\1", REG_EXTENDED, 0); + test_exec("foobar foobar", 0, REG_OK, 0, 13, 0, 6, END); + + /* Searching for a leftmost longest square (repeated string) */ + test_comp("(.*)\\1", REG_EXTENDED, 0); + test_exec("foobarfoobar", 0, REG_OK, 0, 12, 0, 6, END); + + test_comp("a(b)*c\\1", REG_EXTENDED, 0); + test_exec("acb", 0, REG_OK, 0, 2, -1, -1, END); + test_exec("abbcbbb", 0, REG_OK, 0, 5, 2, 3, END); + test_exec("abbdbd", 0, REG_NOMATCH); + + test_comp("([a-c]*)\\1", REG_EXTENDED, 0); + test_exec("abcacdef", 0, REG_OK, 0, 0, 0, 0, END); + test_exec("abcabcabcd", 0, REG_OK, 0, 6, 0, 3, END); + test_comp("(.{1,3})\\1", REG_EXTENDED, 0); + test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END); + + test_comp("\\(a*\\)*\\(x\\)\\(\\1\\)", 0, 0); + test_exec("x", 0, REG_OK, 0, 1, 0, 0, 0, 1, 1, 1, END); +#if KNOWN_BUG + test_exec("ax", 0, REG_OK, 0, 2, 1, 1, 1, 2, 2, 2, END); +#endif + + test_comp("(a)\\1{1,2}", REG_EXTENDED, 0); + test_exec("aabc", 0, REG_OK, 0, 2, 0, 1, END); + + test_comp("((.*)\\1)+", REG_EXTENDED, 0); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 1, END); + +#if KNOWN_BUG + test_comp("()(\\1\\1)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); +#endif + + /* Check that back references work with REG_NOSUB. */ + test_comp("(o)\\1", REG_EXTENDED | REG_NOSUB, 0); + test_exec("foobar", 0, REG_OK, END); + test_comp("(o)\\1", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 1, 3, 1, 2, END); + test_comp("(o)\\1", REG_EXTENDED, 0); + test_exec("fobar", 0, REG_NOMATCH); + + test_comp("\\1foo", REG_EXTENDED, REG_ESUBREG); + test_comp("\\1foo(bar)", REG_EXTENDED, 0); + + /* Back reference with zero-width assertion. */ + test_comp("(.)\\1$", REG_EXTENDED, 0); + test_exec("foox", 0, REG_NOMATCH); + test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END); + + /* Back references together with {}. */ + test_comp("([0-9]{5})\\1", REG_EXTENDED, 0); + test_exec("12345", 0, REG_NOMATCH); + test_exec("1234512345", 0, REG_OK, 0, 10, 0, 5, END); + test_comp("([0-9]{4})\\1", REG_EXTENDED, 0); + test_exec("1234", 0, REG_NOMATCH); + test_exec("12341234", 0, REG_OK, 0, 8, 0, 4, END); + + /* + * Test minimal repetitions (non-greedy repetitions) + */ + avoid_eflags = REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER; + + /* Basic .*/ + test_comp(".*?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".+?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 1, END); + test_comp(".??", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".{2,5}?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 2, END); + + /* More complicated. */ + test_comp("(.*?)", REG_EXTENDED, 0); + test_exec("text1text2", 0, REG_OK, 0, 12, 3, 8, END); + test_comp("a(.*?)(foo|bar|zap)", REG_EXTENDED, 0); + test_exec("hubba wooga-booga zabar gafoo wazap", 0, REG_OK, + 4, 23, 5, 20, 20, 23, END); + + /* Test REG_UNGREEDY. */ + test_comp(".*", REG_EXTENDED | REG_UNGREEDY, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".*?", REG_EXTENDED | REG_UNGREEDY, 0); + test_exec("abcd", 0, REG_OK, 0, 4, END); + + avoid_eflags = 0; + + + /* + * Error reporting tests. + */ + + test_comp("\\", REG_EXTENDED, REG_EESCAPE); + test_comp("\\\\", REG_EXTENDED, REG_OK); + test_exec("\\", 0, REG_OK, 0, 1, END); + test_comp("(", REG_EXTENDED, REG_EPAREN); + test_comp("(aaa", REG_EXTENDED, REG_EPAREN); + test_comp(")", REG_EXTENDED, REG_OK); + test_exec(")", 0, REG_OK, 0, 1, END); + test_comp("a{1", REG_EXTENDED, REG_EBRACE); + test_comp("a{1,x}", REG_EXTENDED, REG_BADBR); + test_comp("a{1x}", REG_EXTENDED, REG_BADBR); + test_comp("a{1,0}", REG_EXTENDED, REG_BADBR); + test_comp("a{x}", REG_EXTENDED, REG_BADBR); + test_comp("a{}", REG_EXTENDED, REG_BADBR); + + + test_comp("\\", 0, REG_EESCAPE); + test_comp("\\(", 0, REG_EPAREN); + test_comp("\\)", 0, REG_EPAREN); + test_comp("a\\{1", 0, REG_EBRACE); + test_comp("a\\{1,x\\}", 0, REG_BADBR); + test_comp("a\\{1x\\}", 0, REG_BADBR); + test_comp("a\\{1,0\\}", 0, REG_BADBR); + test_comp("a\\{x\\}", 0, REG_BADBR); + test_comp("a\\{\\}", 0, REG_BADBR); + test_comp("a\\{1,256\\}", 0, REG_BADMAX); + + + test_comp(NULL, REG_BASIC, REG_OK); + test_comp(NULL, REG_EXTENDED, REG_OK); + + + /* + * Internationalization tests. + */ + + /* This same test with the correct locale is below. + TBR: This is a guess for the source encoding, see comments below after the locale is set to a Japanese locale. */ +#ifdef SRC_IN_EUC_JP + test_comp("+", REG_EXTENDED, 0); + test_exec("ξޤϡ", + 0, REG_OK, 10, 13, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f EUC_JP -t UTF-8 file_with_lines_above > zzz_utf_8 + This may be incorrect because the match results might be incorrect for UTF-8, I (TBR) just don't know enough to be certain. + It compiles and runs successfully on my desktop with the C.UTF-8 locale. */ + test_comp("機+", REG_EXTENDED, 0); + test_exec("この賞は、機・利便性・セキ", + 0, REG_OK, 15, 18, END); +#else + /* Represent the test strings as a sequence of bytes so we don't run afoul of the compiler's expected source-charset. */ + unsigned char str_001[] = { + 0xB5,0xA1,'+',0x00 + }; + unsigned char str_002[] = { + 0xA4,0xB3,0xA4,0xCE,0xBE,0xDE,0xA4,0xCF,0xA1,0xA2,0xB5,0xA1,0xA1,0xA6,0xCD,0xF8,0xCA,0xD8,0xC0,0xAD,0xA1,0xA6,0xA5,0xBB,0xA5,0xAD,0x00 + }; + test_comp((char const *)str_001, REG_EXTENDED, 0); + test_exec((char const *)str_002, 0, REG_OK, 10, 13, END); +#endif +#endif + +#if !defined(WIN32) && !defined(__OpenBSD__) + if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") != NULL || + setlocale(LC_CTYPE, "en_US.ISO8859-1") != NULL) + { + fprintf(outf, "\nTesting LC_CTYPE en_US.ISO-8859-1\n"); +#ifdef SRC_IN_ISO_8859_1 + test_comp("aBCdeFghiJKlmnoPQRstuvWXyZ", REG_ICASE, 0); + test_exec("abCDefGhiJKlmNoPqRStuVwXyz", 0, REG_OK, 0, 29, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f ISO-8859-1 -t UTF-8 file_with_lines_above > yyy_utf_8 */ + /* This fails with no match on freebsd, but succeeds in linux. */ + test_comp("aBCdeFghiJKlmnoPQRstuvWXyZåäö", REG_ICASE, 0); + test_exec("abCDefGhiJKlmNoPqRStuVwXyzÅÄÖ", 0, REG_OK, 0, 29, END); +#else + /* Represent the test strings as a sequence of bytes so we don't run afoul of the compiler's expected source-charset. */ + unsigned char str_003[] = { + 'a','B','C','d','e','F','g','h','i','J','K','l','m','n','o','P','Q','R','s','t','u','v','W','X','y','Z',0xE5,0xE4,0xF6,0x00 + }; + unsigned char str_004[] = { + 'a','b','C','D','e','f','G','h','i','J','K','l','m','N','o','P','q','R','S','t','u','V','w','X','y','z',0xC5,0xC4,0xD6,0x00 + }; + test_comp((char const *)str_003, REG_ICASE, 0); + test_exec((char const *)str_004, 0, REG_OK, 0, 29, END); +#endif +#endif + } + +#ifdef TRE_MULTIBYTE + if (setlocale(LC_CTYPE, "ja_JP.eucjp") != NULL || + setlocale(LC_CTYPE, "ja_JP.eucJP") != NULL) + { + fprintf(outf, "\nTesting LC_CTYPE ja_JP.eucjp\n"); + /* I tried to make a test where implementations not aware of multibyte + character sets will fail. I have no idea what the japanese text here + means, I took it from http://www.ipsec.co.jp/. */ + /* TBR 2023/03/22: iconv has (at least) the following encoding names for Japanese: + EUC-JIS-2004 EUC-JISX0213 + EUC-JP-MS EUCJP-MS EUCJP-OPEN EUCJP-WIN EUCJPMS + EUC-JP CSEUCPKDFMTJAPANESE EUCJP IBM-EUCJP + ISO-2022-JP-1 ISO2022-JP1 + ISO-2022-JP-2 CSISO2022JP2 ISO2022-JP2 ISO-2022-JP-2004 ISO-2022-JP-3 ISO2022-JP2004 ISO2022-JP3 + ISO-2022-JP CSISO2022JP ISO2022-JP + Both iconv arguments of EUC-JP and EUC-JP-MS produced the converted strings below, + all the others I tried resulted in invalid characters. So guess at EUC-JP. + If anyone knows what the encoding actually was, feel free to let me know at tbr at acm dot org :). */ +#ifdef SRC_IN_EUC_JP + test_comp("+", REG_EXTENDED, 0); + test_exec("ξޤϡ", 0, REG_OK, 10, 12, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f EUC_JP -t UTF-8 file_with_lines_above > zzz_utf_8 + This may fail because the match results might be incorrect for UTF-8, I (TBR) just don't know enough to be certain. + It compiles and runs successfully on my desktop with the C.UTF-8 locale. */ + test_comp("機+", REG_EXTENDED, 0); + test_exec("この賞は、機・利便性・セキ", 0, REG_OK, 10, 12, END); +#else + /* Represent the test strings as a sequence of bytes so we don't run afoul of the compiler's expected source-charset. */ + /* This test uses the same strings (str_001 and str_002) as above, now with a Japanese locale. + NOTE THE DIFFERENCE IN MATCH RESULTS - (10,13) earlier with the default locale, and (10,12) here with the Japanese locale. */ + test_comp((char const *)str_001, REG_EXTENDED, 0); + test_exec((char const *)str_002, 0, REG_OK, 10, 12, END); +#endif +#endif + test_comp("a", REG_EXTENDED, 0); + test_nexec("foo\000bar", 7, 0, REG_OK, 5, 6, END); + test_comp("c$", REG_EXTENDED, 0); + test_exec("abc", 0, REG_OK, 2, 3, END); + } + else + { + fprintf(outf, "\nTRE_MULTIBYTE enabled, but skipping LC_CTYPE ja_JP.eucJP (locale unavailable)\n"); + } +#endif /* TRE_MULTIBYTE */ +#endif + + tre_regfree(&reobj); + + fprintf(outf, "\n"); + if (comp_errors || exec_errors) + fprintf(outf, "%d (%d + %d) out of %d tests FAILED!\n", + comp_errors + exec_errors, comp_errors, exec_errors, + comp_tests + exec_tests); + else + fprintf(outf, "All %d tests passed.\n", comp_tests + exec_tests); + + +#ifdef MALLOC_DEBUGGING + if (xmalloc_dump_leaks()) + return 1; +#endif /* MALLOC_DEBUGGING */ + + return comp_errors || exec_errors; +} + +/* EOF */ diff --git a/deps/tre/tests/test-literal-opt.c b/deps/tre/tests/test-literal-opt.c new file mode 100644 index 000000000..62853e07d --- /dev/null +++ b/deps/tre/tests/test-literal-opt.c @@ -0,0 +1,303 @@ +/* + test-literal-opt.c - Validate TRE literal optimization against the + generic matcher. + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#include "tre-internal.h" + +#define PMATCH_SLOTS 4 +#define RC_ANY -9999 + +typedef struct { + const char *name; + const char *pattern; + size_t pattern_len; + int cflags; + const char *string; + size_t string_len; + int eflags; + int expected_rc; + tre_literal_opt_mode_t expected_mode; +} litopt_case_t; + +static void +init_pmatch(regmatch_t pmatch[], size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + { + pmatch[i].rm_so = 111; + pmatch[i].rm_eo = 222; + } +} + +static int +same_pmatch(const regmatch_t a[], const regmatch_t b[], size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + if (a[i].rm_so != b[i].rm_so || a[i].rm_eo != b[i].rm_eo) + return 0; + return 1; +} + +static int +pmatch_cleared(const regmatch_t pmatch[], size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1) + return 0; + return 1; +} + +static int +run_case(const litopt_case_t *tc) +{ + regex_t preg; + tre_tnfa_t *tnfa; + regmatch_t fast[PMATCH_SLOTS], slow[PMATCH_SLOTS]; + tre_literal_opt_mode_t saved_mode; + char errbuf[256]; + int errcode, fast_rc, slow_rc; + + memset(&preg, 0, sizeof(preg)); + errcode = tre_regncompb(&preg, tc->pattern, tc->pattern_len, tc->cflags); + if (errcode != REG_OK) + { + tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); + fprintf(stderr, "%s: compile failed: %s\n", tc->name, errbuf); + return 1; + } + + tnfa = (tre_tnfa_t *)preg.value; + if (tnfa->literal_opt.mode != tc->expected_mode) + { + fprintf(stderr, "%s: optimizer mode %d, expected %d\n", + tc->name, (int)tnfa->literal_opt.mode, (int)tc->expected_mode); + tre_regfree(&preg); + return 1; + } + + init_pmatch(fast, PMATCH_SLOTS); + init_pmatch(slow, PMATCH_SLOTS); + + fast_rc = tre_regnexecb(&preg, tc->string, tc->string_len, + PMATCH_SLOTS, fast, tc->eflags); + + saved_mode = tnfa->literal_opt.mode; + tnfa->literal_opt.mode = TRE_LITERAL_OPT_NONE; + slow_rc = tre_regnexecb(&preg, tc->string, tc->string_len, + PMATCH_SLOTS, slow, tc->eflags); + tnfa->literal_opt.mode = saved_mode; + + if (fast_rc != slow_rc) + { + fprintf(stderr, "%s: fast rc %d, slow rc %d\n", + tc->name, fast_rc, slow_rc); + tre_regfree(&preg); + return 1; + } + + if (tc->expected_rc != RC_ANY && fast_rc != tc->expected_rc) + { + fprintf(stderr, "%s: rc %d, expected %d\n", + tc->name, fast_rc, tc->expected_rc); + tre_regfree(&preg); + return 1; + } + + if (!same_pmatch(fast, slow, PMATCH_SLOTS)) + { + fprintf(stderr, "%s: fast and slow pmatch differ\n", tc->name); + tre_regfree(&preg); + return 1; + } + + if ((tc->cflags & REG_NOSUB) && fast_rc == REG_OK + && !pmatch_cleared(fast, PMATCH_SLOTS)) + { + fprintf(stderr, "%s: REG_NOSUB match did not clear pmatch\n", tc->name); + tre_regfree(&preg); + return 1; + } + + tre_regfree(&preg); + return 0; +} + +int +main(void) +{ + static const char nonascii_pattern[] = { (char)0xc0, '|', (char)0xe0 }; + static const char nonascii_haystack[] = { 'x', (char)0xe0, 'y' }; + static const litopt_case_t cases[] = { + { + "contains basic", + "foo|bar|baz", + sizeof("foo|bar|baz") - 1, + REG_EXTENDED | REG_NOSUB, + "xxbaryy", + sizeof("xxbaryy") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_CONTAINS + }, + { + "contains ignores bol/eol flags", + "foo|bar|baz", + sizeof("foo|bar|baz") - 1, + REG_EXTENDED | REG_NOSUB, + "xxbaryy", + sizeof("xxbaryy") - 1, + REG_NOTBOL | REG_NOTEOL, + REG_OK, + TRE_LITERAL_OPT_CONTAINS + }, + { + "prefix basic", + "^(foo|bar|baz)", + sizeof("^(foo|bar|baz)") - 1, + REG_EXTENDED | REG_NOSUB, + "barrier", + sizeof("barrier") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_PREFIX + }, + { + "prefix respects REG_NOTBOL", + "^(foo|bar|baz)", + sizeof("^(foo|bar|baz)") - 1, + REG_EXTENDED | REG_NOSUB, + "barrier", + sizeof("barrier") - 1, + REG_NOTBOL, + REG_NOMATCH, + TRE_LITERAL_OPT_PREFIX + }, + { + "suffix basic", + "(foo|bar|baz)$", + sizeof("(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "crowbar", + sizeof("crowbar") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_SUFFIX + }, + { + "suffix respects REG_NOTEOL", + "(foo|bar|baz)$", + sizeof("(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "crowbar", + sizeof("crowbar") - 1, + REG_NOTEOL, + REG_NOMATCH, + TRE_LITERAL_OPT_SUFFIX + }, + { + "exact basic", + "^(foo|bar|baz)$", + sizeof("^(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "bar", + sizeof("bar") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_EXACT + }, + { + "exact respects REG_NOTBOL", + "^(foo|bar|baz)$", + sizeof("^(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "bar", + sizeof("bar") - 1, + REG_NOTBOL, + REG_NOMATCH, + TRE_LITERAL_OPT_EXACT + }, + { + "exact respects REG_NOTEOL", + "^(foo|bar|baz)$", + sizeof("^(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "bar", + sizeof("bar") - 1, + REG_NOTEOL, + REG_NOMATCH, + TRE_LITERAL_OPT_EXACT + }, + { + "empty alternation disables optimization", + "(|foo|bar)", + sizeof("(|foo|bar)") - 1, + REG_EXTENDED | REG_NOSUB, + "", + 0, + 0, + REG_OK, + TRE_LITERAL_OPT_NONE + }, + { + "inline flag disable stays generic", + "foo(?-i:zap)zot", + sizeof("foo(?-i:zap)zot") - 1, + REG_EXTENDED | REG_ICASE | REG_NOSUB, + "FoOzApZOt", + sizeof("FoOzApZOt") - 1, + 0, + REG_NOMATCH, + TRE_LITERAL_OPT_NONE + }, + { + "inline flag disable still matches exact scoped bytes", + "foo(?-i:zap)zot", + sizeof("foo(?-i:zap)zot") - 1, + REG_EXTENDED | REG_ICASE | REG_NOSUB, + "FoOzapZOt", + sizeof("FoOzapZOt") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_NONE + }, + { + "nocase non-ascii bytes stay in sync", + nonascii_pattern, + sizeof(nonascii_pattern), + REG_EXTENDED | REG_ICASE | REG_NOSUB, + nonascii_haystack, + sizeof(nonascii_haystack), + 0, + RC_ANY, + TRE_LITERAL_OPT_CONTAINS + } + }; + size_t i; + int failures = 0; + + setlocale(LC_CTYPE, "en_US.ISO-8859-1"); + + for (i = 0; i < elementsof(cases); i++) + failures += run_case(&cases[i]); + + return failures; +} diff --git a/deps/tre/tests/test-malformed-regn.c b/deps/tre/tests/test-malformed-regn.c new file mode 100644 index 000000000..7d3074a1e --- /dev/null +++ b/deps/tre/tests/test-malformed-regn.c @@ -0,0 +1,85 @@ +/* + test-malformed-regn.c - Verify exact-length edge-case regexps compile or fail + cleanly both with and without a trailing NUL byte. + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. +*/ + +#include +#include +#include + +#include "tre.h" + +typedef struct { + const char *name; + const char *pattern; + int expected_err; +} malformed_case_t; + +static int +run_case(const malformed_case_t *tc, int nul_terminated) +{ + regex_t preg; + size_t len = strlen(tc->pattern); + size_t alloc_len = len + (nul_terminated ? 1 : 0); + char *pattern = malloc(alloc_len ? alloc_len : 1); + int errcode; + + if (pattern == NULL) + { + fprintf(stderr, "%s: out of memory\n", tc->name); + return 1; + } + + if (len > 0) + memcpy(pattern, tc->pattern, len); + if (nul_terminated) + pattern[len] = '\0'; + + memset(&preg, 0, sizeof(preg)); + errcode = tre_regncompb(&preg, pattern, len, REG_EXTENDED | REG_NOSUB); + if (errcode == REG_OK) + tre_regfree(&preg); + + free(pattern); + + if (errcode != tc->expected_err) + { + char errbuf[128]; + memset(&preg, 0, sizeof(preg)); + tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); + fprintf(stderr, "%s (%s): got %d (%s), expected %d\n", + tc->name, nul_terminated ? "nul" : "exact", + errcode, errbuf, tc->expected_err); + return 1; + } + + return 0; +} + +int +main(void) +{ + static const malformed_case_t cases[] = { + { "open paren", "(", REG_EPAREN }, + { "open bracket", "[", REG_EBRACK }, + { "unterminated comment", "(?#", REG_BADPAT }, + { "unterminated inline flags", "(?i", REG_BADPAT }, + { "short hex escape", "\\x", REG_OK }, + { "unterminated wide hex", "\\x{", REG_EBRACE }, + { "empty wide hex", "\\x{}", REG_OK } + }; + size_t i; + + for (i = 0; i < sizeof(cases) / sizeof(*cases); i++) + { + if (run_case(&cases[i], 0)) + return 1; + if (run_case(&cases[i], 1)) + return 1; + } + + return 0; +} diff --git a/deps/tre/tests/test-str-source.c b/deps/tre/tests/test-str-source.c new file mode 100644 index 000000000..985f5b247 --- /dev/null +++ b/deps/tre/tests/test-str-source.c @@ -0,0 +1,192 @@ +/* + test-str-source.c - Sample program for using tre_reguexec() + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +/* look for getopt in order to use a -o option for output. */ +#if defined(HAVE_UNISTD_H) +#include +#elif defined(HAVE_GETOPT_H) +#include +#endif + +#include "tre-internal.h" + +static FILE *outf = NULL; + +/* Context structure for the tre_str_source wrappers. */ +typedef struct { + /* Our string. */ + const char *str; + /* Current position in the string. */ + size_t pos; +} str_handler_ctx; + +/* The get_next_char() handler. Sets `c' to the value of the next character, + and increases `pos_add' by the number of bytes read. Returns 1 if the + string has ended, 0 if there are more characters. */ +static int +str_handler_get_next(tre_char_t *c, unsigned int *pos_add, void *context) +{ + str_handler_ctx *ctx = context; + unsigned char ch = ctx->str[ctx->pos]; + +#ifdef TRE_DEBUG + fprintf(outf, "str[%lu] = %d\n", (unsigned long)ctx->pos, ch); +#endif /* TRE_DEBUG */ + *c = ch; + if (ch) + ctx->pos++; + *pos_add = 1; + + return ch == '\0'; +} + +/* The rewind() handler. Resets the current position in the input string. */ +static void +str_handler_rewind(size_t pos, void *context) +{ + str_handler_ctx *ctx = context; + +#ifdef TRE_DEBUG + fprintf(outf, "rewind to %lu\n", (unsigned long)pos); +#endif /* TRE_DEBUG */ + ctx->pos = pos; +} + +/* The compare() handler. Compares two substrings in the input and returns + 0 if the substrings are equal, and a nonzero value if not. */ +static int +str_handler_compare(size_t pos1, size_t pos2, size_t len, void *context) +{ + str_handler_ctx *ctx = context; +#ifdef TRE_DEBUG + fprintf(outf, "comparing %lu-%lu and %lu-%lu\n", + (unsigned long)pos1, (unsigned long)pos1 + len, + (unsigned long)pos2, (unsigned long)pos2 + len); +#endif /* TRE_DEBUG */ + return strncmp(ctx->str + pos1, ctx->str + pos2, len); +} + +/* Creates a tre_str_source wrapper around the string `str'. Returns the + tre_str_source object or NULL if out of memory. */ +static tre_str_source * +make_str_source(const char *str) +{ + tre_str_source *s; + str_handler_ctx *ctx; + + s = calloc(1, sizeof(*s)); + if (!s) + return NULL; + + ctx = malloc(sizeof(str_handler_ctx)); + if (!ctx) + { + free(s); + return NULL; + } + + ctx->str = str; + ctx->pos = 0; + s->context = ctx; + s->get_next_char = str_handler_get_next; + s->rewind = str_handler_rewind; + s->compare = str_handler_compare; + + return s; +} + +/* Frees the memory allocated for `s'. */ +static void +free_str_source(tre_str_source *s) +{ + free(s->context); + free(s); +} + +/* Run one test with tre_reguexec. Returns 1 if the regex matches, 0 if + it doesn't, and -1 if an error occurs. */ +static int +test_reguexec(const char *str, const char *regex) +{ + regex_t preg; + tre_str_source *source; + regmatch_t pmatch[5]; + int ret; + + if ((source = make_str_source(str)) == NULL) + { + fprintf(stderr, "Out of memory\n"); + ret = -1; + } + else + { + if (tre_regcomp(&preg, regex, REG_EXTENDED) != REG_OK) + { + fprintf(stderr, "Failed to compile /%s/\n", regex); + ret = -1; + } + else + { + if (tre_reguexec(&preg, source, elementsof(pmatch), pmatch, 0) == 0) + { + fprintf(outf, "Match: /%s/ matches \"%.*s\" in \"%s\"\n", regex, + (int)(pmatch[0].rm_eo - pmatch[0].rm_so), + str + pmatch[0].rm_so, str); + ret = 1; + } + else + { + fprintf(outf, "No match: /%s/ in \"%s\"\n", regex, str); + ret = 0; + } + tre_regfree(&preg); + } + free_str_source(source); + } + return ret; +} + +int +main(int argc, char **argv) +{ + int ret = 0; + outf = stdout; +#if defined(HAVE_UNISTD_H) || defined(HAVE_GETOPT_H) + int opt; + while ((opt = getopt(argc, argv, "o:")) != EOF) + { + switch (opt) + { + case 'o': + if ((outf = fopen(optarg, "w")) == NULL) + { + perror(optarg); + exit(1); + } + break; + default: + /* getopt() will have printed an error message already */ + exit(1); + } + } +#endif + ret += test_reguexec("xfoofofoofoo", "(foo)\\1") != 1; + ret += test_reguexec("catcat", "(cat|dog)\\1") != 1; + ret += test_reguexec("catdog", "(cat|dog)\\1") != 0; + ret += test_reguexec("dogdog", "(cat|dog)\\1") != 1; + ret += test_reguexec("dogcat", "(cat|dog)\\1") != 0; + + return ret; +} diff --git a/modules/redisbloom/Makefile b/modules/redisbloom/Makefile index f40cc7c1f..2fa608a0e 100644 --- a/modules/redisbloom/Makefile +++ b/modules/redisbloom/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redisbloom/redisbloom TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redisbloom.so diff --git a/modules/redisearch/Makefile b/modules/redisearch/Makefile index 1672d7454..3f84bb98e 100644 --- a/modules/redisearch/Makefile +++ b/modules/redisearch/Makefile @@ -1,7 +1,20 @@ SRC_DIR = src -MODULE_VERSION = v8.5.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redisearch/redisearch TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/search-community/redisearch.so -include ../common.mk +# Enable link-time optimization for RediSearch by default. Override with LTO=0. +LTO ?= 1 +export LTO +# Use the committed C headers for Rust modules, rather than regenerating them +# from Rust source. Override with REDISEARCH_GENERATE_HEADERS=1. +REDISEARCH_GENERATE_HEADERS ?= 0 +export REDISEARCH_GENERATE_HEADERS + + # Set INLINE_LSE_ATOMICS=1 for perf improvement on common ARM CPUs (i.e. Graviton2/3/4); no effect on x86 or macOS. + # Default 0 keeps the binary runnable on pre-Armv8.1-a cores (Cortex-A72, Graviton1, RPi4) that would otherwise SIGILL at module load. +INLINE_LSE_ATOMICS ?= 0 +export INLINE_LSE_ATOMICS + +include ../common.mk diff --git a/modules/redisjson/Makefile b/modules/redisjson/Makefile index 4d13ed7bc..e85e5297d 100644 --- a/modules/redisjson/Makefile +++ b/modules/redisjson/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redisjson/redisjson TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/rejson.so diff --git a/modules/redistimeseries/Makefile b/modules/redistimeseries/Makefile index 1bd8b46ca..b5da541dd 100644 --- a/modules/redistimeseries/Makefile +++ b/modules/redistimeseries/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redistimeseries/redistimeseries TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redistimeseries.so diff --git a/modules/vector-sets/tests/dimension_max_limit.py b/modules/vector-sets/tests/dimension_max_limit.py new file mode 100644 index 000000000..5a142d441 --- /dev/null +++ b/modules/vector-sets/tests/dimension_max_limit.py @@ -0,0 +1,129 @@ +from test import TestCase, generate_random_vector +import struct +import redis.exceptions + +MAX_DIM = 65536 + + +class DimensionMaxLimitVaddAtLimit(TestCase): + def getname(self): + return "[regression] VADD VALUES dim == MAX_DIM accepted" + + def estimated_runtime(self): + return 0.5 + + def test(self): + dim = MAX_DIM + vec = generate_random_vector(dim) + + result = self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', dim, + *[str(x) for x in vec], + f"{self.test_key}:item:maxdim") + assert result == 1, "VADD with dimension at the limit should succeed" + + +class DimensionMaxLimitVaddAboveLimit(TestCase): + def getname(self): + return "[regression] VADD VALUES dim > MAX_DIM rejected" + + def estimated_runtime(self): + return 0.1 + + def test(self): + too_big_dim = MAX_DIM + 1 + too_big_vec = generate_random_vector(16) + try: + self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', too_big_dim, + *[str(x) for x in too_big_vec], + f"{self.test_key}:item:toolarge") + assert False, "VADD with dimension above the limit should fail" + except redis.exceptions.ResponseError as e: + # parseVector returns NULL so caller uses the generic invalid spec error + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector specification error, got: {e}") + + +class DimensionMaxLimitVsimAtLimit(TestCase): + def getname(self): + return "[regression] VSIM VALUES dim == MAX_DIM accepted" + + def estimated_runtime(self): + return 0.5 + + def test(self): + # Insert a vector at the maximum allowed dimension, then query at the same dimension. + dim = MAX_DIM + base_vec = generate_random_vector(dim) + + result = self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', dim, + *[str(x) for x in base_vec], + f"{self.test_key}:item:1") + assert result == 1, "VADD with dimension at the limit should succeed" + + query = generate_random_vector(dim) + res = self.redis.execute_command( + 'VSIM', self.test_key, + 'VALUES', dim, + *[str(x) for x in query], + 'COUNT', 1) + assert isinstance(res, list), "VSIM with dimension at the limit should return a list" + + +class DimensionMaxLimitVsimAboveLimit(TestCase): + def getname(self): + return "[regression] VSIM VALUES dim > MAX_DIM rejected" + + def estimated_runtime(self): + return 0.1 + + def test(self): + # Create a small index, then issue a VSIM with an over-limit dimension. + base_dim = 16 + base_vec = generate_random_vector(base_dim) + result = self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', base_dim, + *[str(x) for x in base_vec], + f"{self.test_key}:item:1") + assert result == 1, "VADD with base_dim should succeed" + + too_big_dim = MAX_DIM + 1 + too_big_vec = generate_random_vector(16) + try: + self.redis.execute_command( + 'VSIM', self.test_key, + 'VALUES', too_big_dim, + *[str(x) for x in too_big_vec], + 'COUNT', 1) + assert False, "VSIM with dimension above the limit should fail" + except redis.exceptions.ResponseError as e: + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector specification error in VSIM, got: {e}") + + +class DimensionMaxLimitHugeDimension(TestCase): + def getname(self): + return "[regression] VADD VALUES absurdly large dim rejected" + + def estimated_runtime(self): + return 0.1 + + def test(self): + # Extremely large dimension close to LLONG_MAX should also be rejected safely. + huge_dim = 9223372036854775807 # LLONG_MAX from the original report + try: + self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', huge_dim, + '0') # Just a dummy value; parseVector should reject based on dimension alone + assert False, "VADD with absurdly large dimension should fail" + except redis.exceptions.ResponseError as e: + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector specification error for huge dim, got: {e}") + diff --git a/modules/vector-sets/tests/dimension_validation.py b/modules/vector-sets/tests/dimension_validation.py index f0811529a..7e13f57cf 100644 --- a/modules/vector-sets/tests/dimension_validation.py +++ b/modules/vector-sets/tests/dimension_validation.py @@ -65,3 +65,33 @@ class DimensionValidation(TestCase): assert False, "VSIM with wrong dimension should fail" except redis.exceptions.ResponseError as e: assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}" + +class ReduceDimConstraintValidation(TestCase): + def getname(self): + return "[regression] VADD enforces reduce_dim <= dim" + + def estimated_runtime(self): + return 0.1 + + def test(self): + import struct + + dim = 16 + reduce_dim = dim + 1 # Intentionally larger than dim + + # Build a simple FP32 vector of the given dimension. + vec = [0.0] * dim + vec_bytes = struct.pack(f'{dim}f', *vec) + + try: + self.redis.execute_command( + 'VADD', self.test_key, + 'REDUCE', reduce_dim, + 'FP32', vec_bytes, + f'{self.test_key}:item:reducemismatch') + assert False, "VADD with reduce_dim > dim should fail" + except redis.exceptions.ResponseError as e: + # Same generic validation error path as other vector spec problems. + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector error, got: {e}") + diff --git a/modules/vector-sets/vset.c b/modules/vector-sets/vset.c index 618723e91..b3b47871b 100644 --- a/modules/vector-sets/vset.c +++ b/modules/vector-sets/vset.c @@ -134,6 +134,9 @@ static uint64_t VectorSetTypeNextId = 0; // Default num elements returned by VSIM. #define VSET_DEFAULT_COUNT 10 +// Maximum allowed vector dimension for input vectors and sets. +#define VSET_MAX_VECTOR_DIM (1<<16) + /* ========================== Internal data structure ====================== */ /* Our abstract data type needs a dual representation similar to Redis @@ -408,6 +411,7 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx, // Must be 4 bytes per component. if (vec_raw_len % 4 || vec_raw_len < 4) return NULL; *dim = vec_raw_len/4; + if (*dim > VSET_MAX_VECTOR_DIM) return NULL; vec = RedisModule_Alloc(vec_raw_len); if (!vec) return NULL; @@ -417,7 +421,7 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx, if (argc < start_idx + 2) return NULL; // Need at least the dimension. long long vdim; // Vector dimension passed by the user. if (RedisModule_StringToLongLong(argv[start_idx+1],&vdim) - != REDISMODULE_OK || vdim < 1) return NULL; + != REDISMODULE_OK || vdim < 1 || vdim > VSET_MAX_VECTOR_DIM) return NULL; // Check that all the arguments are available. if (argc < start_idx + 2 + vdim) return NULL; @@ -441,6 +445,12 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx, return NULL; // Unknown format. } + // reduce_dim must be <= dim + if (reduce_dim && *reduce_dim && *reduce_dim > *dim) { + if (vec) RedisModule_Free(vec); + return NULL; + } + if (consumed_args) *consumed_args = consumed; return vec; } @@ -1966,6 +1976,15 @@ void *VectorSetRdbLoad(RedisModuleIO *rdb, int encver) { uint32_t quant_type = hnsw_config & 0xff; uint32_t hnsw_m = (hnsw_config >> 8) & 0xffff; + /* Validate dimension loaded from RDB to enforce invariants and + * avoid absurd allocations or inconsistent state. */ + if (dim == 0 || dim > VSET_MAX_VECTOR_DIM) { + RedisModule_LogIOError(rdb, "warning", + "Invalid vector dimension in RDB: dim=%u (max allowed %u)", + (unsigned)dim, (unsigned)VSET_MAX_VECTOR_DIM); + return NULL; + } + /* Check that the quantization type is correct. Otherwise * return ASAP signaling the error. */ if (quant_type != HNSW_QUANT_NONE && @@ -1987,14 +2006,44 @@ void *VectorSetRdbLoad(RedisModuleIO *rdb, int encver) { uint32_t input_dim = RedisModule_LoadUnsigned(rdb); if (RedisModule_IsIOError(rdb)) goto ioerr; uint32_t output_dim = dim; - size_t matrix_size = sizeof(float) * input_dim * output_dim; + + /* Sanity check projection dimensions. */ + if (input_dim == 0 || output_dim == 0 || input_dim > VSET_MAX_VECTOR_DIM || output_dim > input_dim) { + RedisModule_LogIOError(rdb, "warning", + "Invalid projection matrix dimensions: input_dim=%u, output_dim=%u (max allowed %u)", + (unsigned)input_dim, (unsigned)output_dim, + (unsigned)VSET_MAX_VECTOR_DIM); + goto ioerr; + } + + /* Check for overflow in matrix_size = sizeof(float) * input_dim * output_dim. */ + #if SIZE_MAX == UINT32_MAX + uint64_t product = (uint64_t) output_dim * (uint64_t) input_dim * sizeof(float); + if (product > SIZE_MAX) { + RedisModule_LogIOError(rdb, "warning", + "Projection matrix size overflow (output_dim too large): input_dim=%u, output_dim=%u", + (unsigned)input_dim, (unsigned)output_dim); + goto ioerr; + } + #endif + + size_t matrix_size = sizeof(float) * (size_t)input_dim * (size_t)output_dim; + + /* Load projection matrix as a binary blob and validate length. */ + size_t blob_len = 0; + char *matrix_blob = RedisModule_LoadStringBuffer(rdb, &blob_len); + if (matrix_blob == NULL) goto ioerr; + + if (blob_len != matrix_size) { + RedisModule_LogIOError(rdb, "warning", + "Mismatching projection matrix length: expected=%zu, got=%zu", + matrix_size, blob_len); + RedisModule_Free(matrix_blob); + goto ioerr; + } vset->proj_matrix = RedisModule_Alloc(matrix_size); vset->proj_input_size = input_dim; - - // Load projection matrix as a binary blob - char *matrix_blob = RedisModule_LoadStringBuffer(rdb, NULL); - if (matrix_blob == NULL) goto ioerr; memcpy(vset->proj_matrix, matrix_blob, matrix_size); RedisModule_Free(matrix_blob); } diff --git a/redis.conf b/redis.conf index 845be292f..9151c8fc8 100644 --- a/redis.conf +++ b/redis.conf @@ -1971,6 +1971,17 @@ slowlog-log-slower-than 10000 # You can reclaim memory used by the slow log with SLOWLOG RESET. slowlog-max-len 128 +# When a command is written to the slowlog we check how many arguments it has +# and if it has more than slowlog-entry-max-argc we trim the excess ones. The +# last of the non-trimmed arguments is overwritten with an info string about +# how many args were trimmed. That's why slowlog-entry-max-argc has minimum +# value of 2, so we can always preserve the command name. +# Moreover, each individual argument string is also trimmed depending on +# slowlog-entry-max-string-len. Default values: +# +# slowlog-entry-max-argc 32 +# slowlog-entry-max-string-len 128 + ################################ LATENCY MONITOR ############################## # The Redis latency monitoring subsystem samples different operations @@ -2033,15 +2044,21 @@ latency-monitor-threshold 0 # e Evicted events (events generated when a key is evicted for maxmemory) # n New key events (Note: not included in the 'A' class) # t Stream commands +# a Array commands # d Module key type events # m Key-miss events (Note: It is not included in the 'A' class) # o Overwritten events generated every time a key is overwritten. # (Note: not included in the 'A' class) # c Type-changed events generated every time a key's type changes # (Note: not included in the 'A' class) -# r rate limit event -# A Alias for g$lshzxetd, so that the "AKE" string means all the events -# except key-miss, new key, overwritten, type-changed and rate-limit. +# S Subkeyspace events, published with __subkeyspace@__: prefix. +# T Subkeyevent events, published with __subkeyevent@__: prefix. +# I Subkeyspaceitem events, published per subkey with +# __subkeyspaceitem@__:\n prefix. +# V Subkeyspaceevent events, published with +# __subkeyspaceevent@__:| prefix. +# A Alias for g$lshzxetad, so that the "AKE" string means all the events +# except key-miss, new key, overwritten and type-changed. # # The "notify-keyspace-events" takes as argument a string that is composed # of zero or multiple characters. The empty string means that notifications @@ -2170,6 +2187,37 @@ stream-node-max-entries 100 # stream-idmp-duration 100 # stream-idmp-maxsize 100 +# Arrays use a sliced directory structure for O(1) access. The slice size +# controls the granularity of memory allocation - each slice covers a range +# of indices. Must be a power of two between 256 and 65536. +# +# Smaller slices (1024-2048): Better for sparse data with large gaps between +# indices, or many small arrays. Uses less memory per slice but more directory +# entries. +# +# Larger slices (8192-16384): Better for dense/contiguous data. Fewer directory +# entries but may waste memory if data is sparse within slices. +# +# Default 4096 works well for mixed workloads. If you change this setting via +# CONFIG SET, existing arrays retain their original slice size. +# +# IMPORTANT CONSIDERATION: Redis arrays, for slices with very few elements, are +# able to use a sparse representation, where the slice is not really +# materialized into an actual contiguous allocation. See the next configuration +# parameters for more information. +array-slice-size 4096 + +# Arrays start with sparse slices (sorted key-value pairs) for memory efficiency +# when elements are scattered. When a sparse slice exceeds array-sparse-kmax +# entries, it promotes to a dense slice (direct array). When a dense slice's +# element count drops below array-sparse-kmin and demotion would save memory, +# it demotes back to sparse. Set kmax to 0 to disable sparse encoding entirely. +# Set kmin to 0 if you never want dense slices to be demoted to sparse (useful +# when in your work load arrays reach an almost empty state to be filled again +# and so forth). +array-sparse-kmax 10 +array-sparse-kmin 5 + # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # order to help rehashing the main Redis hash table (the one mapping top-level # keys to values). The hash table implementation Redis uses (see dict.c) diff --git a/src/Makefile b/src/Makefile index 046aaa5aa..86c40b722 100644 --- a/src/Makefile +++ b/src/Makefile @@ -25,17 +25,19 @@ CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1')) # some automatic defaults are added to it. To specify optimization flags # explicitly without any defaults added, pass the OPT variable instead. OPTIMIZATION?=-O3 +ENABLE_LTO?= ifeq ($(OPTIMIZATION),-O3) ifeq (clang,$(CLANG)) - OPTIMIZATION+=-flto + ENABLE_LTO=-flto else - OPTIMIZATION+=-flto=auto + ENABLE_LTO=-flto=auto -ffat-lto-objects endif + OPTIMIZATION+=$(ENABLE_LTO) endif ifneq ($(OPTIMIZATION),-O0) OPTIMIZATION+=-fno-omit-frame-pointer endif -DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv xxhash +DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv xxhash tre NODEPS:=clean distclean # Default settings @@ -382,7 +384,7 @@ endif REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX) REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX) -REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o flax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o fast_float_strtod.o +REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_array.o sparsearray.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o flax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o fast_float_strtod.o REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX) @@ -423,7 +425,7 @@ persist-settings: distclean echo REDIS_LDFLAGS=$(REDIS_LDFLAGS) >> .make-settings echo PREV_FINAL_CFLAGS=$(FINAL_CFLAGS) >> .make-settings echo PREV_FINAL_LDFLAGS=$(FINAL_LDFLAGS) >> .make-settings - -(cd ../deps && $(MAKE) $(DEPENDENCY_TARGETS)) + -(cd ../deps && $(MAKE) $(DEPENDENCY_TARGETS) ENABLE_LTO="$(ENABLE_LTO)") .PHONY: persist-settings @@ -442,7 +444,7 @@ endif # redis-server $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/xxhash/libxxhash.a $(FINAL_LIBS) + $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/xxhash/libxxhash.a ../deps/tre/libtre.a $(FINAL_LIBS) # redis-sentinel $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME) diff --git a/src/acl.c b/src/acl.c index 79a900200..177077d45 100644 --- a/src/acl.c +++ b/src/acl.c @@ -57,6 +57,7 @@ struct ACLCategoryItem { {"list", ACL_CATEGORY_LIST}, {"hash", ACL_CATEGORY_HASH}, {"string", ACL_CATEGORY_STRING}, + {"array", ACL_CATEGORY_ARRAY}, {"bitmap", ACL_CATEGORY_BITMAP}, {"hyperloglog", ACL_CATEGORY_HYPERLOGLOG}, {"geo", ACL_CATEGORY_GEO}, @@ -70,7 +71,9 @@ struct ACLCategoryItem { {"connection", ACL_CATEGORY_CONNECTION}, {"transaction", ACL_CATEGORY_TRANSACTION}, {"scripting", ACL_CATEGORY_SCRIPTING}, +#ifdef ENABLE_GCRA {"ratelimit", ACL_CATEGORY_RATE_LIMIT}, +#endif {NULL,0} /* Terminator. */ }; diff --git a/src/aof.c b/src/aof.c index d58996877..16c169597 100644 --- a/src/aof.c +++ b/src/aof.c @@ -2467,6 +2467,7 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { return 1; } +#ifdef ENABLE_GCRA int rewriteGCRAObject(rio *r, robj *key, robj *o) { long long val; getLongLongFromGCRAObject(o, &val); @@ -2478,6 +2479,7 @@ int rewriteGCRAObject(rio *r, robj *key, robj *o) { if (rioWriteBulkLongLong(r,val) == 0) return 0; return 1; } +#endif /* Call the module type callback in order to rewrite a data type * that is exported by a module and is not handled by Redis itself. @@ -2515,6 +2517,116 @@ werr: return 0; } +/* Write unsigned 64-bit integer as bulk string. + * Unlike rioWriteBulkLongLong which uses signed representation, + * this correctly handles values >= 2^63 (e.g., array indices). */ +static int rioWriteBulkUnsignedLongLong(rio *r, uint64_t value) { + char buf[24]; + int len = ull2string(buf, sizeof(buf), value); + return rioWriteBulkString(r, buf, len); +} + +/* Helper to emit a single array element for AOF rewrite. + * Returns 0 on error, 1 on success. Updates count and items. */ +static int aofEmitArrayElement(rio *r, robj *key, uint64_t idx, void *v, + long long *count, long long *items) { + if (*count == 0) { + int cmd_items = (*items > AOF_REWRITE_ITEMS_PER_CMD/2) ? + AOF_REWRITE_ITEMS_PER_CMD/2 : *items; /* pairs of idx+val */ + if (!rioWriteBulkCount(r,'*',2+cmd_items*2) || + !rioWriteBulkString(r,"ARMSET",6) || + !rioWriteBulkObject(r,key)) + { + return 0; + } + } + + /* Write index (unsigned to handle indices >= 2^63) */ + if (!rioWriteBulkUnsignedLongLong(r, idx)) return 0; + + /* Write value - inline types use scratch space, arString aliases directly. */ + char buf[AR_INLINE_BUFSIZE]; + size_t len; + const char *data = arDecode(v, buf, sizeof(buf), &len); + if (!rioWriteBulkString(r, data, len)) return 0; + + if (++(*count) == AOF_REWRITE_ITEMS_PER_CMD/2) *count = 0; + (*items)--; + return 1; +} + +/* Helper to emit all elements from a slice for AOF rewrite. */ +static int aofEmitSliceElements(rio *r, robj *key, arSlice *s, uint64_t slice_id, + uint32_t slice_size, long long *count, long long *items) { + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + void *v = s->layout.dense.items[i]; + if (arIsEmpty(v)) continue; + uint64_t idx = arMakeIdx(slice_id, s->layout.dense.offset + i, slice_size); + if (!aofEmitArrayElement(r, key, idx, v, count, items)) return 0; + } + } else { + /* Sparse slice */ + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < s->count; i++) { + uint64_t idx = arMakeIdx(slice_id, offsets[i], slice_size); + if (!aofEmitArrayElement(r, key, idx, values[i], count, items)) return 0; + } + } + return 1; +} + +/* Emit the commands needed to rebuild an array object. + * The function returns 0 on error, 1 on success. */ +int rewriteArrayObject(rio *r, robj *key, robj *o) { + redisArray *ar = o->ptr; + long long count = 0, items = ar->count; + if (items == 0) return 1; + + /* Iterate through all slices, handling both flat directory mode and + * superdir mode. This mirrors the iteration logic in rdb.c. */ + if (ar->superdir) { + /* Superdir mode: iterate through blocks */ + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + if (!s) continue; + uint64_t slice_id = block_base + si; + if (!aofEmitSliceElements(r, key, s, slice_id, ar->slice_size, + &count, &items)) return 0; + } + } + } else { + /* Flat directory mode */ + for (uint64_t slice_id = 0; slice_id <= ar->dir_highest_used && slice_id < ar->dir_alloc; slice_id++) { + arSlice *s = ar->dir[slice_id]; + if (!s) continue; + if (!aofEmitSliceElements(r, key, s, slice_id, ar->slice_size, + &count, &items)) return 0; + } + } + + /* If insert_idx is set, emit ARSEEK command to restore it. + * When insert_idx == UINT64_MAX-1, we emit ARSEEK UINT64_MAX which + * correctly sets insert_idx back to UINT64_MAX-1 (terminal state). */ + if (ar->insert_idx != AR_INSERT_IDX_NONE) { + /* ARSEEK key insert_idx+1 (ARSEEK sets position for next insert) */ + if (!rioWriteBulkCount(r,'*',3) || + !rioWriteBulkString(r,"ARSEEK",6) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkUnsignedLongLong(r, ar->insert_idx + 1)) + { + return 0; + } + } + + return 1; +} + int rewriteObject(rio *r, robj *key, robj *o, int dbid, long long expiretime) { /* Save the key and associated value */ if (o->type == OBJ_STRING) { @@ -2534,8 +2646,12 @@ int rewriteObject(rio *r, robj *key, robj *o, int dbid, long long expiretime) { if (rewriteHashObject(r,key,o) == 0) return C_ERR; } else if (o->type == OBJ_STREAM) { if (rewriteStreamObject(r,key,o) == 0) return C_ERR; +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { if (rewriteGCRAObject(r,key,o) == 0) return C_ERR; +#endif + } else if (o->type == OBJ_ARRAY) { + if (rewriteArrayObject(r,key,o) == 0) return C_ERR; } else if (o->type == OBJ_MODULE) { if (rewriteModuleObject(r,key,o,dbid) == 0) return C_ERR; } else { diff --git a/src/atomicvar.h b/src/atomicvar.h index 3c332ee69..43227639b 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -183,4 +183,23 @@ #error "Unable to determine atomic operations for your platform" #endif + +/* atomicIncrGetSingleWriter(var, delta, newvalue_var) + * + * Adds `delta` to `var` and writes the resulting value to `newvalue_var`. + * Same end result as atomicIncrGet() but implemented as load+add+store instead + * of an atomic read-modify-write. This avoids the `lock` prefix on x86 + * (~20-40 cycles vs ~2-3 for plain load+store). + * + * SAFETY: the caller MUST guarantee that no other thread ever writes to `var` + * (no atomicIncr, no atomicSet, no other call to this macro from a different + * thread). Concurrent writers cause silent lost updates. Readers on other + * threads using atomicGet are fine: they will observe either the pre or + * post update value. */ +#define atomicIncrGetSingleWriter(var, delta, newvalue_var) do { \ + atomicGet((var), (newvalue_var)); \ + (newvalue_var) += (delta); \ + atomicSet((var), (newvalue_var)); \ +} while(0) + #endif /* __ATOMIC_VAR_H */ diff --git a/src/blocked.c b/src/blocked.c index b973adeaf..74558b485 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -699,7 +699,13 @@ static void unblockClientOnKey(client *c, robj *key) { client *old_client = server.current_client; server.current_client = c; enterExecutionUnit(1, 0); - processCommandAndResetClient(c); + if (processCommandAndResetClient(c) == C_ERR) { + /* Client was freed during command processing, exit immediately */ + exitExecutionUnit(); + server.current_client = old_client; + return; + } + if (!(c->flags & CLIENT_BLOCKED)) { if (c->flags & CLIENT_MODULE) { moduleCallCommandUnblockedHandler(c); diff --git a/src/cluster.c b/src/cluster.c index b831c203a..637b5dd9a 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -804,7 +804,12 @@ int verifyClusterNodeId(const char *name, int length) { } int isValidAuxChar(int c) { - return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL); + /* Reject control characters (0x00-0x1F and 0x7F). */ + if (iscntrl(c)) { + return 0; + } + /* Reject forbidden characters including nodes.conf delimiters and special parsing characters */ + return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~,= \"'\\", c) == NULL); } int isValidAuxString(char *s, unsigned int length) { @@ -1741,7 +1746,7 @@ unsigned int clusterDelKeysInSlot(unsigned int hashslot, int by_command) { * just moved to another node. The modules needs to know that these * keys are no longer available locally, so just send the keyspace * notification to the modules, but not to clients. */ - moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id); + moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id, NULL, 0); } exitExecutionUnit(); postExecutionUnitOperations(); diff --git a/src/cluster_asm.c b/src/cluster_asm.c index 01a071167..ac94a5103 100644 --- a/src/cluster_asm.c +++ b/src/cluster_asm.c @@ -589,7 +589,7 @@ size_t asmGetImportInputBufferSize(void) { return 0; } -size_t asmGetMigrateOutputBufferSize(void) { +size_t asmGetMigrateOutputMemoryUsage(void) { if (!asmManager || listLength(asmManager->tasks) == 0) return 0; asmTask *task = listNodeValue(listFirst(asmManager->tasks)); @@ -3662,7 +3662,7 @@ void asmActiveTrimDeleteKey(redisDb *db, robj *keyobj, int migration_cleanup) { * to another node. The modules need to know that these keys are no longer * available locally, so just send the keyspace notification to the modules, * but not to clients. */ - moduleNotifyKeyspaceEvent(NOTIFY_KEY_TRIMMED, "key_trimmed", keyobj, db->id); + moduleNotifyKeyspaceEvent(NOTIFY_KEY_TRIMMED, "key_trimmed", keyobj, db->id, NULL, 0); } else { /* Not a migration cleanup, the key is really deleted from the database, * need to notify the clients. */ diff --git a/src/cluster_asm.h b/src/cluster_asm.h index 475c97113..b08837b8e 100644 --- a/src/cluster_asm.h +++ b/src/cluster_asm.h @@ -37,7 +37,7 @@ struct slotRangeArray *asmTaskGetSlotRanges(const char *task_id); int asmNotifyConfigUpdated(struct asmTask *task, sds *err); size_t asmGetPeakSyncBufferSize(void); size_t asmGetImportInputBufferSize(void); -size_t asmGetMigrateOutputBufferSize(void); +size_t asmGetMigrateOutputMemoryUsage(void); int clusterAsmCancel(const char *task_id, const char *reason); int clusterAsmCancelBySlot(int slot, const char *reason); int clusterAsmCancelBySlotRangeArray(struct slotRangeArray *slots, const char *reason); diff --git a/src/commands.def b/src/commands.def index fed08eda3..9b5692aa3 100644 --- a/src/commands.def +++ b/src/commands.def @@ -24,13 +24,545 @@ const char *COMMAND_GROUP_STR[] = { "geo", "stream", "bitmap", + "array", "module", +#ifdef ENABLE_GCRA "rate_limit" +#endif }; const char *commandGroupStr(int index) { return COMMAND_GROUP_STR[index]; } +/********** ARCOUNT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARCOUNT history */ +#define ARCOUNT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARCOUNT tips */ +#define ARCOUNT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARCOUNT key specs */ +keySpec ARCOUNT_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARCOUNT argument table */ +struct COMMAND_ARG ARCOUNT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARDEL ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARDEL history */ +#define ARDEL_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARDEL tips */ +#define ARDEL_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARDEL key specs */ +keySpec ARDEL_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARDEL argument table */ +struct COMMAND_ARG ARDEL_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARDELRANGE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARDELRANGE history */ +#define ARDELRANGE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARDELRANGE tips */ +#define ARDELRANGE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARDELRANGE key specs */ +keySpec ARDELRANGE_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARDELRANGE range argument table */ +struct COMMAND_ARG ARDELRANGE_range_Subargs[] = { +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARDELRANGE argument table */ +struct COMMAND_ARG ARDELRANGE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("range",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=ARDELRANGE_range_Subargs}, +}; + +/********** ARGET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARGET history */ +#define ARGET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARGET tips */ +#define ARGET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARGET key specs */ +keySpec ARGET_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARGET argument table */ +struct COMMAND_ARG ARGET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARGETRANGE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARGETRANGE history */ +#define ARGETRANGE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARGETRANGE tips */ +#define ARGETRANGE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARGETRANGE key specs */ +keySpec ARGETRANGE_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARGETRANGE argument table */ +struct COMMAND_ARG ARGETRANGE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARGREP ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARGREP history */ +#define ARGREP_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARGREP tips */ +#define ARGREP_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARGREP key specs */ +keySpec ARGREP_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARGREP predicate exact argument table */ +struct COMMAND_ARG ARGREP_predicate_exact_Subargs[] = { +{MAKE_ARG("exact",ARG_TYPE_PURE_TOKEN,-1,"EXACT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("string",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate match argument table */ +struct COMMAND_ARG ARGREP_predicate_match_Subargs[] = { +{MAKE_ARG("match",ARG_TYPE_PURE_TOKEN,-1,"MATCH",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("string",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate glob argument table */ +struct COMMAND_ARG ARGREP_predicate_glob_Subargs[] = { +{MAKE_ARG("glob",ARG_TYPE_PURE_TOKEN,-1,"GLOB",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("pattern",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate re argument table */ +struct COMMAND_ARG ARGREP_predicate_re_Subargs[] = { +{MAKE_ARG("re",ARG_TYPE_PURE_TOKEN,-1,"RE",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("pattern",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate argument table */ +struct COMMAND_ARG ARGREP_predicate_Subargs[] = { +{MAKE_ARG("exact",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_exact_Subargs}, +{MAKE_ARG("match",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_match_Subargs}, +{MAKE_ARG("glob",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_glob_Subargs}, +{MAKE_ARG("re",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_re_Subargs}, +}; + +/* ARGREP options argument table */ +struct COMMAND_ARG ARGREP_options_Subargs[] = { +{MAKE_ARG("and",ARG_TYPE_PURE_TOKEN,-1,"AND",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("or",ARG_TYPE_PURE_TOKEN,-1,"OR",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("withvalues",ARG_TYPE_PURE_TOKEN,-1,"WITHVALUES",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("nocase",ARG_TYPE_PURE_TOKEN,-1,"NOCASE",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP argument table */ +struct COMMAND_ARG ARGREP_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("predicate",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,4,NULL),.subargs=ARGREP_predicate_Subargs}, +{MAKE_ARG("options",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,5,NULL),.subargs=ARGREP_options_Subargs}, +}; + +/********** ARINFO ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARINFO history */ +#define ARINFO_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARINFO tips */ +#define ARINFO_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARINFO key specs */ +keySpec ARINFO_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARINFO argument table */ +struct COMMAND_ARG ARINFO_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("full",ARG_TYPE_PURE_TOKEN,-1,"FULL",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** ARINSERT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARINSERT history */ +#define ARINSERT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARINSERT tips */ +#define ARINSERT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARINSERT key specs */ +keySpec ARINSERT_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARINSERT argument table */ +struct COMMAND_ARG ARINSERT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARLASTITEMS ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARLASTITEMS history */ +#define ARLASTITEMS_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARLASTITEMS tips */ +#define ARLASTITEMS_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARLASTITEMS key specs */ +keySpec ARLASTITEMS_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARLASTITEMS argument table */ +struct COMMAND_ARG ARLASTITEMS_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("rev",ARG_TYPE_PURE_TOKEN,-1,"REV",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** ARLEN ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARLEN history */ +#define ARLEN_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARLEN tips */ +#define ARLEN_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARLEN key specs */ +keySpec ARLEN_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARLEN argument table */ +struct COMMAND_ARG ARLEN_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARMGET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARMGET history */ +#define ARMGET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARMGET tips */ +#define ARMGET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARMGET key specs */ +keySpec ARMGET_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARMGET argument table */ +struct COMMAND_ARG ARMGET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARMSET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARMSET history */ +#define ARMSET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARMSET tips */ +#define ARMSET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARMSET key specs */ +keySpec ARMSET_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARMSET data argument table */ +struct COMMAND_ARG ARMSET_data_Subargs[] = { +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARMSET argument table */ +struct COMMAND_ARG ARMSET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=ARMSET_data_Subargs}, +}; + +/********** ARNEXT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARNEXT history */ +#define ARNEXT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARNEXT tips */ +#define ARNEXT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARNEXT key specs */ +keySpec ARNEXT_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARNEXT argument table */ +struct COMMAND_ARG ARNEXT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** AROP ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* AROP history */ +#define AROP_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* AROP tips */ +#define AROP_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* AROP key specs */ +keySpec AROP_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* AROP operation match argument table */ +struct COMMAND_ARG AROP_operation_match_Subargs[] = { +{MAKE_ARG("match",ARG_TYPE_PURE_TOKEN,-1,"MATCH",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* AROP operation argument table */ +struct COMMAND_ARG AROP_operation_Subargs[] = { +{MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("and",ARG_TYPE_PURE_TOKEN,-1,"AND",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("or",ARG_TYPE_PURE_TOKEN,-1,"OR",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xor",ARG_TYPE_PURE_TOKEN,-1,"XOR",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("match",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=AROP_operation_match_Subargs}, +{MAKE_ARG("used",ARG_TYPE_PURE_TOKEN,-1,"USED",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* AROP argument table */ +struct COMMAND_ARG AROP_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,8,NULL),.subargs=AROP_operation_Subargs}, +}; + +/********** ARRING ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARRING history */ +#define ARRING_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARRING tips */ +#define ARRING_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARRING key specs */ +keySpec ARRING_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARRING argument table */ +struct COMMAND_ARG ARRING_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("size",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARSCAN ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARSCAN history */ +#define ARSCAN_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARSCAN tips */ +#define ARSCAN_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARSCAN key specs */ +keySpec ARSCAN_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARSCAN argument table */ +struct COMMAND_ARG ARSCAN_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** ARSEEK ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARSEEK history */ +#define ARSEEK_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARSEEK tips */ +#define ARSEEK_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARSEEK key specs */ +keySpec ARSEEK_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARSEEK argument table */ +struct COMMAND_ARG ARSEEK_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARSET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARSET history */ +#define ARSET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARSET tips */ +#define ARSET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARSET key specs */ +keySpec ARSET_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARSET argument table */ +struct COMMAND_ARG ARSET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + /********** BITCOUNT ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -5380,59 +5912,6 @@ struct COMMAND_ARG UNSUBSCRIBE_Args[] = { {MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, }; -/********** GCRA ********************/ - -#ifndef SKIP_CMD_HISTORY_TABLE -/* GCRA history */ -#define GCRA_History NULL -#endif - -#ifndef SKIP_CMD_TIPS_TABLE -/* GCRA tips */ -#define GCRA_Tips NULL -#endif - -#ifndef SKIP_CMD_KEY_SPECS_TABLE -/* GCRA key specs */ -keySpec GCRA_Keyspecs[1] = { -{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} -}; -#endif - -/* GCRA argument table */ -struct COMMAND_ARG GCRA_Args[] = { -{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("max-burst",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("tokens-per-period",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("period",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"TOKENS",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, -}; - -/********** GCRASETVALUE ********************/ - -#ifndef SKIP_CMD_HISTORY_TABLE -/* GCRASETVALUE history */ -#define GCRASETVALUE_History NULL -#endif - -#ifndef SKIP_CMD_TIPS_TABLE -/* GCRASETVALUE tips */ -#define GCRASETVALUE_Tips NULL -#endif - -#ifndef SKIP_CMD_KEY_SPECS_TABLE -/* GCRASETVALUE key specs */ -keySpec GCRASETVALUE_Keyspecs[1] = { -{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} -}; -#endif - -/* GCRASETVALUE argument table */ -struct COMMAND_ARG GCRASETVALUE_Args[] = { -{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("tat",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -}; - /********** EVAL ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11355,6 +11834,58 @@ struct COMMAND_ARG INCRBYFLOAT_Args[] = { {MAKE_ARG("increment",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** INCREX ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* INCREX history */ +#define INCREX_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* INCREX tips */ +#define INCREX_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* INCREX key specs */ +keySpec INCREX_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* INCREX increment argument table */ +struct COMMAND_ARG INCREX_increment_Subargs[] = { +{MAKE_ARG("float",ARG_TYPE_DOUBLE,-1,"BYFLOAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("integer",ARG_TYPE_INTEGER,-1,"BYINT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* INCREX overflow_block argument table */ +struct COMMAND_ARG INCREX_overflow_block_Subargs[] = { +{MAKE_ARG("fail",ARG_TYPE_PURE_TOKEN,-1,"FAIL",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("sat",ARG_TYPE_PURE_TOKEN,-1,"SAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("reject",ARG_TYPE_PURE_TOKEN,-1,"REJECT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* INCREX expiration argument table */ +struct COMMAND_ARG INCREX_expiration_Subargs[] = { +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("persist",ARG_TYPE_PURE_TOKEN,-1,"PERSIST",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* INCREX argument table */ +struct COMMAND_ARG INCREX_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("increment",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=INCREX_increment_Subargs}, +{MAKE_ARG("overflow-block",ARG_TYPE_ONEOF,-1,"OVERFLOW","Out-of-bounds policy; defaults to FAIL. Missing LBOUND/UBOUND default to the type limits (LLONG_MIN/LLONG_MAX for BYINT, -LDBL_MAX/LDBL_MAX for BYFLOAT).",NULL,CMD_ARG_OPTIONAL,3,NULL),.subargs=INCREX_overflow_block_Subargs}, +{MAKE_ARG("lowerbound",ARG_TYPE_STRING,-1,"LBOUND","Integer when used with BYINT, floating-point when used with BYFLOAT.",NULL,CMD_ARG_OPTIONAL,0,NULL)}, +{MAKE_ARG("upperbound",ARG_TYPE_STRING,-1,"UBOUND","Integer when used with BYINT, floating-point when used with BYFLOAT.",NULL,CMD_ARG_OPTIONAL,0,NULL)}, +{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=INCREX_expiration_Subargs}, +{MAKE_ARG("enx",ARG_TYPE_PURE_TOKEN,-1,"ENX","Only set the expiration if the key currently has no TTL. Requires one of EX/PX/EXAT/PXAT; cannot be combined with PERSIST.",NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + /********** LCS ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11824,6 +12355,25 @@ struct COMMAND_ARG WATCH_Args[] = { /* Main command table */ struct COMMAND_STRUCT redisCommandTable[] = { +/* array */ +{MAKE_CMD("arcount","Returns the number of non-empty elements in an array.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARCOUNT_History,0,ARCOUNT_Tips,0,arcountCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARCOUNT_Keyspecs,1,NULL,1),.args=ARCOUNT_Args}, +{MAKE_CMD("ardel","Deletes elements at the specified indices in an array.","O(N) where N is the number of indices to delete","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARDEL_History,0,ARDEL_Tips,0,ardelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_ARRAY,ARDEL_Keyspecs,1,NULL,2),.args=ARDEL_Args}, +{MAKE_CMD("ardelrange","Deletes elements in one or more ranges.","Proportional to the number of existing elements / slices touched, not to the numeric span of the requested ranges","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARDELRANGE_History,0,ARDELRANGE_Tips,0,ardelrangeCommand,-4,CMD_WRITE,ACL_CATEGORY_ARRAY,ARDELRANGE_Keyspecs,1,NULL,2),.args=ARDELRANGE_Args}, +{MAKE_CMD("arget","Gets the value at an index in an array.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARGET_History,0,ARGET_Tips,0,argetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARGET_Keyspecs,1,NULL,2),.args=ARGET_Args}, +{MAKE_CMD("argetrange","Gets values in a range of indices.","O(N) where N is the range length","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARGETRANGE_History,0,ARGETRANGE_Tips,0,argetrangeCommand,4,CMD_READONLY,ACL_CATEGORY_ARRAY,ARGETRANGE_Keyspecs,1,NULL,3),.args=ARGETRANGE_Args}, +{MAKE_CMD("argrep","Searches array elements in a range using textual predicates.","O(P * C) where P is the number of visited positions in touched slices and C is the cost of evaluating the predicates on one existing element.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARGREP_History,0,ARGREP_Tips,0,argrepCommand,-6,CMD_READONLY,ACL_CATEGORY_ARRAY,ARGREP_Keyspecs,1,NULL,5),.args=ARGREP_Args}, +{MAKE_CMD("arinfo","Returns metadata about an array.","O(1), or O(N) with FULL option where N is the number of slices.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARINFO_History,0,ARINFO_Tips,0,arinfoCommand,-2,CMD_READONLY,ACL_CATEGORY_ARRAY,ARINFO_Keyspecs,1,NULL,2),.args=ARINFO_Args}, +{MAKE_CMD("arinsert","Inserts one or more values at consecutive indices.","O(N) where N is the number of values","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARINSERT_History,0,ARINSERT_Tips,0,arinsertCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_ARRAY,ARINSERT_Keyspecs,1,NULL,2),.args=ARINSERT_Args}, +{MAKE_CMD("arlastitems","Returns the most recently inserted elements.","O(N) where N is the count","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARLASTITEMS_History,0,ARLASTITEMS_Tips,0,arlastitemsCommand,-3,CMD_READONLY,ACL_CATEGORY_ARRAY,ARLASTITEMS_Keyspecs,1,NULL,3),.args=ARLASTITEMS_Args}, +{MAKE_CMD("arlen","Returns the length of an array (max index + 1).","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARLEN_History,0,ARLEN_Tips,0,arlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARLEN_Keyspecs,1,NULL,1),.args=ARLEN_Args}, +{MAKE_CMD("armget","Gets values at multiple indices in an array.","O(N) where N is the number of indices","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARMGET_History,0,ARMGET_Tips,0,armgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARMGET_Keyspecs,1,NULL,2),.args=ARMGET_Args}, +{MAKE_CMD("armset","Sets multiple index-value pairs in an array.","O(N) where N is the number of pairs","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARMSET_History,0,ARMSET_Tips,0,armsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_ARRAY,ARMSET_Keyspecs,1,NULL,2),.args=ARMSET_Args}, +{MAKE_CMD("arnext","Returns the next index ARINSERT would use.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARNEXT_History,0,ARNEXT_Tips,0,arnextCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARNEXT_Keyspecs,1,NULL,1),.args=ARNEXT_Args}, +{MAKE_CMD("arop","Performs aggregate operations on array elements in a range.","O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,AROP_History,0,AROP_Tips,0,aropCommand,-5,CMD_READONLY,ACL_CATEGORY_ARRAY,AROP_Keyspecs,1,NULL,4),.args=AROP_Args}, +{MAKE_CMD("arring","Inserts values into a ring buffer of specified size, wrapping and truncating as needed.","O(M) normally, O(N+M) on ring resize, where N is the maximum of the old and new ring size and M is the number of inserted values","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARRING_History,0,ARRING_Tips,0,arringCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_ARRAY,ARRING_Keyspecs,1,NULL,3),.args=ARRING_Args}, +{MAKE_CMD("arscan","Iterates existing elements in a range, returning index-value pairs.","O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARSCAN_History,0,ARSCAN_Tips,0,arscanCommand,-4,CMD_READONLY,ACL_CATEGORY_ARRAY,ARSCAN_Keyspecs,1,NULL,4),.args=ARSCAN_Args}, +{MAKE_CMD("arseek","Sets the ARINSERT / ARRING cursor to a specific index.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARSEEK_History,0,ARSEEK_Tips,0,arseekCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_ARRAY,ARSEEK_Keyspecs,1,NULL,2),.args=ARSEEK_Args}, +{MAKE_CMD("arset","Sets one or more contiguous values starting at an index in an array.","O(N) where N is the number of values","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARSET_History,0,ARSET_Tips,0,arsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_ARRAY,ARSET_Keyspecs,1,NULL,3),.args=ARSET_Args}, /* bitmap */ {MAKE_CMD("bitcount","Counts the number of set bits (population counting) in a string.","O(N)","2.6.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITCOUNT_History,1,BITCOUNT_Tips,0,bitcountCommand,-2,CMD_READONLY,ACL_CATEGORY_BITMAP,BITCOUNT_Keyspecs,1,NULL,2),.args=BITCOUNT_Args}, {MAKE_CMD("bitfield","Performs arbitrary bitfield integer operations on strings.","O(1) for each subcommand specified","3.2.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITFIELD_History,0,BITFIELD_Tips,0,bitfieldCommand,-2,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_BITMAP,BITFIELD_Keyspecs,1,bitfieldGetKeys,2),.args=BITFIELD_Args}, @@ -11946,18 +12496,15 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("rpush","Appends one or more elements to a list. Creates the key if it doesn't exist.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSH_History,1,RPUSH_Tips,0,rpushCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSH_Keyspecs,1,NULL,2),.args=RPUSH_Args}, {MAKE_CMD("rpushx","Appends an element to a list only when the list exists.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSHX_History,1,RPUSHX_Tips,0,rpushxCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSHX_Keyspecs,1,NULL,2),.args=RPUSHX_Args}, /* pubsub */ -{MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args}, +{MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_DENYOOM,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args}, {MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args}, {MAKE_CMD("pubsub","A container for Pub/Sub commands.","Depends on subcommand.","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_History,0,PUBSUB_Tips,0,NULL,-2,0,0,PUBSUB_Keyspecs,0,NULL,0),.subcommands=PUBSUB_Subcommands}, {MAKE_CMD("punsubscribe","Stops listening to messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUNSUBSCRIBE_History,0,PUNSUBSCRIBE_Tips,0,punsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PUNSUBSCRIBE_Keyspecs,0,NULL,1),.args=PUNSUBSCRIBE_Args}, {MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args}, -{MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args}, -{MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, +{MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_DENYOOM,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args}, +{MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_DENYOOM,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, {MAKE_CMD("sunsubscribe","Stops listening to messages posted to shard channels.","O(N) where N is the number of shard channels to unsubscribe.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUNSUBSCRIBE_History,0,SUNSUBSCRIBE_Tips,0,sunsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SUNSUBSCRIBE_Keyspecs,1,NULL,1),.args=SUNSUBSCRIBE_Args}, {MAKE_CMD("unsubscribe","Stops listening to messages posted to channels.","O(N) where N is the number of channels to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,UNSUBSCRIBE_History,0,UNSUBSCRIBE_Tips,0,unsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,UNSUBSCRIBE_Keyspecs,0,NULL,1),.args=UNSUBSCRIBE_Args}, -/* rate_limit */ -{MAKE_CMD("gcra","Rate limit via GCRA (Generic Cell Rate Algorithm).","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"rate_limit",COMMAND_GROUP_RATE_LIMIT,GCRA_History,0,GCRA_Tips,0,gcraCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_RATE_LIMIT,GCRA_Keyspecs,1,NULL,5),.args=GCRA_Args}, -{MAKE_CMD("gcrasetvalue","An internal command for recording a GCRA TAT value during AOF rewrite and replication.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"rate_limit",COMMAND_GROUP_RATE_LIMIT,GCRASETVALUE_History,0,GCRASETVALUE_Tips,0,gcraSetValueCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_RATE_LIMIT,GCRASETVALUE_Keyspecs,1,NULL,2),.args=GCRASETVALUE_Args}, /* scripting */ {MAKE_CMD("eval","Executes a server-side Lua script.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVAL_History,0,EVAL_Tips,0,evalCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVAL_Keyspecs,1,evalGetKeys,4),.args=EVAL_Args}, {MAKE_CMD("evalsha","Executes a server-side Lua script by SHA1 digest.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVALSHA_History,0,EVALSHA_Tips,0,evalShaCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVALSHA_Keyspecs,1,evalGetKeys,4),.args=EVALSHA_Args}, @@ -12090,6 +12637,7 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("incr","Increments the integer value of a key by one. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCR_History,0,INCR_Tips,0,incrCommand,2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCR_Keyspecs,1,NULL,1),.args=INCR_Args}, {MAKE_CMD("incrby","Increments the integer value of a key by a number. Uses 0 as initial value if the key doesn't exist.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCRBY_History,0,INCRBY_Tips,0,incrbyCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCRBY_Keyspecs,1,NULL,2),.args=INCRBY_Args}, {MAKE_CMD("incrbyfloat","Increment the floating point value of a key by a number. Uses 0 as initial value if the key doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCRBYFLOAT_History,0,INCRBYFLOAT_Tips,0,incrbyfloatCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCRBYFLOAT_Keyspecs,1,NULL,2),.args=INCRBYFLOAT_Args}, +{MAKE_CMD("increx","Increments the numeric value of a key by a number and sets its expiration time. Uses 0 as initial value if the key doesn't exist.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,INCREX_History,0,INCREX_Tips,0,increxCommand,-2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_STRING,INCREX_Keyspecs,1,NULL,7),.args=INCREX_Args}, {MAKE_CMD("lcs","Finds the longest common substring.","O(N*M) where N and M are the lengths of s1 and s2, respectively","7.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,LCS_History,0,LCS_Tips,0,lcsCommand,-3,CMD_READONLY,ACL_CATEGORY_STRING,LCS_Keyspecs,1,NULL,6),.args=LCS_Args}, {MAKE_CMD("mget","Atomically returns the string values of one or more keys.","O(N) where N is the number of keys to retrieve.","1.0.0",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MGET_History,0,MGET_Tips,1,mgetCommand,-2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_STRING,MGET_Keyspecs,1,NULL,1),.args=MGET_Args}, {MAKE_CMD("mset","Atomically creates or modifies the string values of one or more keys.","O(N) where N is the number of keys to set.","1.0.1",CMD_DOC_NONE,NULL,NULL,"string",COMMAND_GROUP_STRING,MSET_History,0,MSET_Tips,2,msetCommand,-3,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_STRING,MSET_Keyspecs,1,NULL,1),.args=MSET_Args}, diff --git a/src/commands/arcount.json b/src/commands/arcount.json new file mode 100644 index 000000000..3452a6ec7 --- /dev/null +++ b/src/commands/arcount.json @@ -0,0 +1,48 @@ +{ + "ARCOUNT": { + "summary": "Returns the number of non-empty elements in an array.", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 2, + "function": "arcountCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The number of non-empty elements, or 0 if key does not exist.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + } + ] + } +} diff --git a/src/commands/ardel.json b/src/commands/ardel.json new file mode 100644 index 000000000..e29d56181 --- /dev/null +++ b/src/commands/ardel.json @@ -0,0 +1,53 @@ +{ + "ARDEL": { + "summary": "Deletes elements at the specified indices in an array.", + "complexity": "O(N) where N is the number of indices to delete", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "ardelCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "DELETE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of elements deleted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer", + "multiple": true + } + ] + } +} diff --git a/src/commands/ardelrange.json b/src/commands/ardelrange.json new file mode 100644 index 000000000..0ed67ced9 --- /dev/null +++ b/src/commands/ardelrange.json @@ -0,0 +1,62 @@ +{ + "ARDELRANGE": { + "summary": "Deletes elements in one or more ranges.", + "complexity": "Proportional to the number of existing elements / slices touched, not to the numeric span of the requested ranges", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "ardelrangeCommand", + "command_flags": [ + "WRITE" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "DELETE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of elements deleted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "range", + "type": "block", + "multiple": true, + "arguments": [ + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + } + ] + } + ] + } +} diff --git a/src/commands/arget.json b/src/commands/arget.json new file mode 100644 index 000000000..481bb4f66 --- /dev/null +++ b/src/commands/arget.json @@ -0,0 +1,60 @@ +{ + "ARGET": { + "summary": "Gets the value at an index in an array.", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 3, + "function": "argetCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "The value at the given index.", + "type": "string" + }, + { + "description": "Null reply if key or index does not exist.", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer" + } + ] + } +} diff --git a/src/commands/argetrange.json b/src/commands/argetrange.json new file mode 100644 index 000000000..02d1fa6f0 --- /dev/null +++ b/src/commands/argetrange.json @@ -0,0 +1,64 @@ +{ + "ARGETRANGE": { + "summary": "Gets values in a range of indices.", + "complexity": "O(N) where N is the range length", + "group": "array", + "since": "8.8.0", + "arity": 4, + "function": "argetrangeCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + } + ] + } +} diff --git a/src/commands/argrep.json b/src/commands/argrep.json new file mode 100644 index 000000000..4ca4fa6f8 --- /dev/null +++ b/src/commands/argrep.json @@ -0,0 +1,182 @@ +{ + "ARGREP": { + "summary": "Searches array elements in a range using textual predicates.", + "complexity": "O(P * C) where P is the number of visited positions in touched slices and C is the cost of evaluating the predicates on one existing element.", + "group": "array", + "since": "8.8.0", + "arity": -6, + "function": "argrepCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "anyOf": [ + { + "description": "Array of matching indexes.", + "type": "array", + "items": { + "type": "integer", + "description": "Index of a matching element" + } + }, + { + "description": "Array of [index, value] pairs. Returned in case `WITHVALUES` was used.", + "type": "array", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + { + "type": "integer", + "description": "Index of a matching element" + }, + { + "type": "string", + "description": "Value at that index" + } + ] + } + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "string" + }, + { + "name": "end", + "type": "string" + }, + { + "name": "predicate", + "type": "oneof", + "multiple": true, + "arguments": [ + { + "name": "exact", + "type": "block", + "arguments": [ + { + "name": "exact", + "type": "pure-token", + "token": "EXACT" + }, + { + "name": "string", + "type": "string" + } + ] + }, + { + "name": "match", + "type": "block", + "arguments": [ + { + "name": "match", + "type": "pure-token", + "token": "MATCH" + }, + { + "name": "string", + "type": "string" + } + ] + }, + { + "name": "glob", + "type": "block", + "arguments": [ + { + "name": "glob", + "type": "pure-token", + "token": "GLOB" + }, + { + "name": "pattern", + "type": "string" + } + ] + }, + { + "name": "re", + "type": "block", + "arguments": [ + { + "name": "re", + "type": "pure-token", + "token": "RE" + }, + { + "name": "pattern", + "type": "string" + } + ] + } + ] + }, + { + "name": "options", + "type": "oneof", + "optional": true, + "multiple": true, + "arguments": [ + { + "name": "and", + "type": "pure-token", + "token": "AND" + }, + { + "name": "or", + "type": "pure-token", + "token": "OR" + }, + { + "name": "limit", + "type": "integer", + "token": "LIMIT" + }, + { + "name": "withvalues", + "type": "pure-token", + "token": "WITHVALUES" + }, + { + "name": "nocase", + "type": "pure-token", + "token": "NOCASE" + } + ] + } + ] + } +} diff --git a/src/commands/arinfo.json b/src/commands/arinfo.json new file mode 100644 index 000000000..09b06ef10 --- /dev/null +++ b/src/commands/arinfo.json @@ -0,0 +1,103 @@ +{ + "ARINFO": { + "summary": "Returns metadata about an array.", + "complexity": "O(1), or O(N) with FULL option where N is the number of slices.", + "group": "array", + "since": "8.8.0", + "arity": -2, + "function": "arinfoCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "object", + "additionalProperties": false, + "properties": { + "count": { + "type": "integer", + "description": "Total number of non-empty elements." + }, + "len": { + "type": "integer", + "description": "Logical length (highest index + 1)." + }, + "next-insert-index": { + "type": "integer", + "description": "Index the next ARINSERT would use, or 0 if unset/exhausted." + }, + "slices": { + "type": "integer", + "description": "Number of allocated slices." + }, + "directory-size": { + "type": "integer", + "description": "Directory allocation capacity (flat dir_alloc or superdir sdir_cap)." + }, + "super-dir-entries": { + "type": "integer", + "description": "Number of super-directory entries (0 if not in superdir mode)." + }, + "slice-size": { + "type": "integer", + "description": "Configured slice size." + }, + "dense-slices": { + "type": "integer", + "description": "Number of dense slices (FULL only)." + }, + "sparse-slices": { + "type": "integer", + "description": "Number of sparse slices (FULL only)." + }, + "avg-dense-size": { + "type": "number", + "description": "Average allocation size of dense slices (FULL only)." + }, + "avg-dense-fill": { + "type": "number", + "description": "Average fill rate of dense slices (FULL only)." + }, + "avg-sparse-size": { + "type": "number", + "description": "Average capacity of sparse slices (FULL only)." + } + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "full", + "type": "pure-token", + "token": "FULL", + "optional": true + } + ] + } +} diff --git a/src/commands/arinsert.json b/src/commands/arinsert.json new file mode 100644 index 000000000..6b8c6ed76 --- /dev/null +++ b/src/commands/arinsert.json @@ -0,0 +1,54 @@ +{ + "ARINSERT": { + "summary": "Inserts one or more values at consecutive indices.", + "complexity": "O(N) where N is the number of values", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "arinsertCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The last index where a value was inserted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "value", + "type": "string", + "multiple": true + } + ] + } +} diff --git a/src/commands/arlastitems.json b/src/commands/arlastitems.json new file mode 100644 index 000000000..ed888bf83 --- /dev/null +++ b/src/commands/arlastitems.json @@ -0,0 +1,66 @@ +{ + "ARLASTITEMS": { + "summary": "Returns the most recently inserted elements.", + "complexity": "O(N) where N is the count", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "arlastitemsCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "count", + "type": "integer" + }, + { + "name": "rev", + "type": "pure-token", + "token": "REV", + "optional": true + } + ] + } +} diff --git a/src/commands/arlen.json b/src/commands/arlen.json new file mode 100644 index 000000000..36143dfc7 --- /dev/null +++ b/src/commands/arlen.json @@ -0,0 +1,48 @@ +{ + "ARLEN": { + "summary": "Returns the length of an array (max index + 1).", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 2, + "function": "arlenCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The length of the array (max index + 1), or 0 if key does not exist.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + } + ] + } +} diff --git a/src/commands/armget.json b/src/commands/armget.json new file mode 100644 index 000000000..f05023e03 --- /dev/null +++ b/src/commands/armget.json @@ -0,0 +1,62 @@ +{ + "ARMGET": { + "summary": "Gets values at multiple indices in an array.", + "complexity": "O(N) where N is the number of indices", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "armgetCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer", + "multiple": true + } + ] + } +} diff --git a/src/commands/armset.json b/src/commands/armset.json new file mode 100644 index 000000000..002f01bc2 --- /dev/null +++ b/src/commands/armset.json @@ -0,0 +1,64 @@ +{ + "ARMSET": { + "summary": "Sets multiple index-value pairs in an array.", + "complexity": "O(N) where N is the number of pairs", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "armsetCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of new slots that were set (previously empty).", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "data", + "type": "block", + "multiple": true, + "arguments": [ + { + "name": "index", + "type": "integer" + }, + { + "name": "value", + "type": "string" + } + ] + } + ] + } +} diff --git a/src/commands/arnext.json b/src/commands/arnext.json new file mode 100644 index 000000000..f64b178d0 --- /dev/null +++ b/src/commands/arnext.json @@ -0,0 +1,56 @@ +{ + "ARNEXT": { + "summary": "Returns the next index ARINSERT would use.", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 2, + "function": "arnextCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "The next index ARINSERT would use. Returns 0 for missing keys or when no insert happened yet.", + "type": "integer" + }, + { + "description": "Null when the insertion cursor is exhausted (next insert would overflow).", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + } + ] + } +} diff --git a/src/commands/arop.json b/src/commands/arop.json new file mode 100644 index 000000000..eb18566bb --- /dev/null +++ b/src/commands/arop.json @@ -0,0 +1,123 @@ +{ + "AROP": { + "summary": "Performs aggregate operations on array elements in a range.", + "complexity": "O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.", + "group": "array", + "since": "8.8.0", + "arity": -5, + "function": "aropCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "Result of the operation.", + "type": "string" + }, + { + "description": "Integer result for MATCH, USED, AND, OR, XOR.", + "type": "integer" + }, + { + "description": "Null if no elements match the operation.", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + }, + { + "name": "operation", + "type": "oneof", + "arguments": [ + { + "name": "sum", + "type": "pure-token", + "token": "SUM" + }, + { + "name": "min", + "type": "pure-token", + "token": "MIN" + }, + { + "name": "max", + "type": "pure-token", + "token": "MAX" + }, + { + "name": "and", + "type": "pure-token", + "token": "AND" + }, + { + "name": "or", + "type": "pure-token", + "token": "OR" + }, + { + "name": "xor", + "type": "pure-token", + "token": "XOR" + }, + { + "name": "match", + "type": "block", + "arguments": [ + { + "name": "match", + "type": "pure-token", + "token": "MATCH" + }, + { + "name": "value", + "type": "string" + } + ] + }, + { + "name": "used", + "type": "pure-token", + "token": "USED" + } + ] + } + ] + } +} diff --git a/src/commands/arring.json b/src/commands/arring.json new file mode 100644 index 000000000..01bddf7d7 --- /dev/null +++ b/src/commands/arring.json @@ -0,0 +1,57 @@ +{ + "ARRING": { + "summary": "Inserts values into a ring buffer of specified size, wrapping and truncating as needed.", + "complexity": "O(M) normally, O(N+M) on ring resize, where N is the maximum of the old and new ring size and M is the number of inserted values", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "arringCommand", + "command_flags": [ + "WRITE", + "DENYOOM" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The last index where a value was inserted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "value", + "type": "string", + "multiple": true + } + ] + } +} diff --git a/src/commands/arscan.json b/src/commands/arscan.json new file mode 100644 index 000000000..3c75f3207 --- /dev/null +++ b/src/commands/arscan.json @@ -0,0 +1,76 @@ +{ + "ARSCAN": { + "summary": "Iterates existing elements in a range, returning index-value pairs.", + "complexity": "O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "arscanCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Array of [index, value] pairs.", + "type": "array", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + { + "type": "integer", + "description": "Index of existing element" + }, + { + "type": "string", + "description": "Value at that index" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + }, + { + "name": "limit", + "token": "LIMIT", + "type": "integer", + "optional": true + } + ] + } +} diff --git a/src/commands/gcrasetvalue.json b/src/commands/arseek.json similarity index 73% rename from src/commands/gcrasetvalue.json rename to src/commands/arseek.json index 5cce15cf4..58904c77d 100644 --- a/src/commands/gcrasetvalue.json +++ b/src/commands/arseek.json @@ -1,23 +1,22 @@ { - "GCRASETVALUE": { - "summary": "An internal command for recording a GCRA TAT value during AOF rewrite and replication.", + "ARSEEK": { + "summary": "Sets the ARINSERT / ARRING cursor to a specific index.", "complexity": "O(1)", - "group": "rate_limit", + "group": "array", "since": "8.8.0", "arity": 3, - "function": "gcraSetValueCommand", + "function": "arseekCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ - "RATE_LIMIT" + "ARRAY" ], "key_specs": [ { "flags": [ - "OW", + "RW", "UPDATE" ], "begin_search": { @@ -35,7 +34,8 @@ } ], "reply_schema": { - "const": "OK" + "description": "1 if the cursor was set, 0 if the key does not exist.", + "type": "integer" }, "arguments": [ { @@ -44,7 +44,7 @@ "key_spec_index": 0 }, { - "name": "tat", + "name": "index", "type": "integer" } ] diff --git a/src/commands/arset.json b/src/commands/arset.json new file mode 100644 index 000000000..6d5e8453f --- /dev/null +++ b/src/commands/arset.json @@ -0,0 +1,58 @@ +{ + "ARSET": { + "summary": "Sets one or more contiguous values starting at an index in an array.", + "complexity": "O(N) where N is the number of values", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "arsetCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of new slots that were set (previously empty).", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer" + }, + { + "name": "value", + "type": "string", + "multiple": true + } + ] + } +} diff --git a/src/commands/command-docs.json b/src/commands/command-docs.json index 75df5b4c6..7648d7d51 100644 --- a/src/commands/command-docs.json +++ b/src/commands/command-docs.json @@ -59,6 +59,9 @@ { "const": "hyperloglog" }, + { + "const": "array" + }, { "const": "list" }, @@ -91,6 +94,9 @@ }, { "const": "transactions" + }, + { + "const": "rate_limit" } ] }, diff --git a/src/commands/gcra.json b/src/commands/gcra.json deleted file mode 100644 index 6980af1ac..000000000 --- a/src/commands/gcra.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "GCRA": { - "summary": "Rate limit via GCRA (Generic Cell Rate Algorithm).", - "complexity": "O(1)", - "group": "rate_limit", - "since": "8.8.0", - "arity": -5, - "function": "gcraCommand", - "command_flags": [ - "WRITE", - "DENYOOM", - "FAST" - ], - "acl_categories": [ - "RATE_LIMIT" - ], - "key_specs": [ - { - "flags": [ - "RW", - "ACCESS", - "UPDATE" - ], - "begin_search": { - "index": { - "pos": 1 - } - }, - "find_keys": { - "range": { - "lastkey": 0, - "step": 1, - "limit": 0 - } - } - } - ], - "reply_schema": { - "type": "array", - "minItems": 5, - "maxItems": 5, - "description": "Rate limiting result", - "items": [ - { - "type": "integer", - "description": "Limited: 0 if allowed, 1 if rate limited" - }, - { - "type": "integer", - "description": "Max request tokens: always equal to max_burst+1" - }, - { - "type": "integer", - "description": "Number of tokens available immediately" - }, - { - "type": "integer", - "description": "Retry after: seconds after which the caller should retry. Always -1 if not limited" - }, - { - "type": "integer", - "description": "Full burst after: seconds after which a full burst will be allowed" - } - ] - }, - "arguments": [ - { - "name": "key", - "type": "key", - "key_spec_index": 0 - }, - { - "name": "max-burst", - "type": "integer" - }, - { - "name": "tokens-per-period", - "type": "integer" - }, - { - "name": "period", - "type": "double" - }, - { - "name": "count", - "type": "integer", - "token": "TOKENS", - "optional": true - } - ] - } -} diff --git a/src/commands/increx.json b/src/commands/increx.json new file mode 100644 index 000000000..964822b49 --- /dev/null +++ b/src/commands/increx.json @@ -0,0 +1,155 @@ +{ + "INCREX": { + "summary": "Increments the numeric value of a key by a number and sets its expiration time. Uses 0 as initial value if the key doesn't exist.", + "complexity": "O(1)", + "group": "string", + "since": "8.8.0", + "arity": -2, + "function": "increxCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "STRING" + ], + "key_specs": [ + { + "flags": [ + "RW", + "ACCESS", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + { + "description": "the value of the key after the increment", + "type": "number" + }, + { + "description": "the actual increment", + "type": "number" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "increment", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "float", + "type": "double", + "token": "BYFLOAT" + }, + { + "name": "integer", + "type": "integer", + "token": "BYINT" + } + ] + }, + { + "name": "overflow-block", + "token": "OVERFLOW", + "type": "oneof", + "optional": true, + "summary": "Out-of-bounds policy; defaults to FAIL. Missing LBOUND/UBOUND default to the type limits (LLONG_MIN/LLONG_MAX for BYINT, -LDBL_MAX/LDBL_MAX for BYFLOAT).", + "arguments": [ + { + "name": "fail", + "type": "pure-token", + "token": "FAIL" + }, + { + "name": "sat", + "type": "pure-token", + "token": "SAT" + }, + { + "name": "reject", + "type": "pure-token", + "token": "REJECT" + } + ] + }, + { + "name": "lowerbound", + "token": "LBOUND", + "type": "string", + "summary": "Integer when used with BYINT, floating-point when used with BYFLOAT.", + "optional": true + }, + { + "name": "upperbound", + "token": "UBOUND", + "type": "string", + "summary": "Integer when used with BYINT, floating-point when used with BYFLOAT.", + "optional": true + }, + { + "name": "expiration", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "seconds", + "type": "integer", + "token": "EX" + }, + { + "name": "milliseconds", + "type": "integer", + "token": "PX" + }, + { + "name": "unix-time-seconds", + "type": "unix-time", + "token": "EXAT" + }, + { + "name": "unix-time-milliseconds", + "type": "unix-time", + "token": "PXAT" + }, + { + "name": "persist", + "type": "pure-token", + "token": "PERSIST" + } + ] + }, + { + "name": "enx", + "type": "pure-token", + "token": "ENX", + "optional": true, + "summary": "Only set the expiration if the key currently has no TTL. Requires one of EX/PX/EXAT/PXAT; cannot be combined with PERSIST." + } + ] + } +} diff --git a/src/commands/memory-stats.json b/src/commands/memory-stats.json index 0e95e0f36..4098c1b36 100644 --- a/src/commands/memory-stats.json +++ b/src/commands/memory-stats.json @@ -38,6 +38,12 @@ "clients.normal": { "type": "integer" }, + "clients.normal.shared": { + "type": "integer" + }, + "clients.normal.unshared": { + "type": "integer" + }, "cluster.links": { "type": "integer" }, diff --git a/src/commands/psubscribe.json b/src/commands/psubscribe.json index cab5d14ef..8c56db2cc 100644 --- a/src/commands/psubscribe.json +++ b/src/commands/psubscribe.json @@ -11,7 +11,8 @@ "NOSCRIPT", "LOADING", "STALE", - "SENTINEL" + "SENTINEL", + "DENYOOM" ], "arguments": [ { diff --git a/src/commands/ssubscribe.json b/src/commands/ssubscribe.json index 46373d541..5bebc6c8c 100644 --- a/src/commands/ssubscribe.json +++ b/src/commands/ssubscribe.json @@ -10,7 +10,8 @@ "PUBSUB", "NOSCRIPT", "LOADING", - "STALE" + "STALE", + "DENYOOM" ], "arguments": [ { diff --git a/src/commands/subscribe.json b/src/commands/subscribe.json index bdf12b726..63e838d7d 100644 --- a/src/commands/subscribe.json +++ b/src/commands/subscribe.json @@ -12,7 +12,8 @@ "NOSCRIPT", "LOADING", "STALE", - "SENTINEL" + "SENTINEL", + "DENYOOM" ], "arguments": [ { diff --git a/src/config.c b/src/config.c index 0ad28ef5b..97fa58a11 100644 --- a/src/config.c +++ b/src/config.c @@ -24,6 +24,7 @@ #include #include #include +#include /*----------------------------------------------------------------------------- * Config file name-value maps. @@ -2411,7 +2412,7 @@ static int isValidShutdownOnSigFlags(int val, const char **err) { } static int updateMemoryTrackingEnabled(const char **err) { - int memory_tracking_enabled = server.key_memory_histograms || clusterSlotStatsEnabled(CLUSTER_SLOT_STATS_MEM); + int memory_tracking_enabled = server.key_memory_histograms || (server.cluster_slot_stats_enabled & CLUSTER_SLOT_STATS_MEM); if (!server.memory_tracking_enabled && memory_tracking_enabled) { *err = "memory tracking cannot be enabled at runtime"; return 0; @@ -2428,13 +2429,7 @@ static int isValidAnnouncedNodename(char *val,const char **err) { return 1; } -static int isValidAnnouncedHostname(char *val, const char **err) { - if (strlen(val) >= NET_HOST_STR_LEN) { - *err = "Hostnames must be less than " - STRINGIFY(NET_HOST_STR_LEN) " characters"; - return 0; - } - +static int isValidHostnameChars(char *val, const char **err) { int i = 0; char c; while ((c = val[i])) { @@ -2452,6 +2447,39 @@ static int isValidAnnouncedHostname(char *val, const char **err) { return 1; } +static int isValidAnnouncedHostname(char *val, const char **err) { + if (strlen(val) >= NET_HOST_STR_LEN) { + *err = "Hostnames must be less than " + STRINGIFY(NET_HOST_STR_LEN) " characters"; + return 0; + } + return isValidHostnameChars(val, err); +} + +/* Validation function for cluster-announce-ip. + * Ensures the IP address is valid and rejects control characters. */ +static int isValidClusterAnnounceIp(char *val, const char **err) { + unsigned char buf[sizeof(struct in6_addr)]; + /* Empty string is allowed - it will be converted to NULL by EMPTY_STRING_IS_NULL flag */ + if (val[0] == '\0') { + return 1; + } + + /* Accept valid IPv4 or IPv6 */ + if (inet_pton(AF_INET, val, buf) == 1 || inet_pton(AF_INET6, val, buf) == 1) { + return 1; + } + /* Also accept valid hostnames, but limited to NET_IP_STR_LEN since + * cluster_announce_ip is stored in a NET_IP_STR_LEN buffer */ + if (strlen(val) >= NET_IP_STR_LEN) { + *err = "Hostnames for cluster-announce-ip must be less than " + STRINGIFY(NET_IP_STR_LEN) " characters"; + return 0; + } + /* Also accept valid hostnames */ + return isValidHostnameChars(val, err); +} + /* Validate specified string is a valid proc-title-template */ static int isValidProcTitleTemplate(char *val, const char **err) { if (!validateProcTitleTemplate(val)) { @@ -2461,6 +2489,33 @@ static int isValidProcTitleTemplate(char *val, const char **err) { return 1; } +/* Validate that array-slice-size is a power of two */ +static int isValidArraySliceSize(long long val, const char **err) { + if (val <= 0 || (val & (val - 1)) != 0) { + *err = "array-slice-size must be a power of two"; + return 0; + } + return 1; +} + +/* Validate array-sparse-kmax: if non-zero, must be > kmin */ +static int isValidArraySparseKmax(long long val, const char **err) { + if (val > 0 && (unsigned int)val <= server.array_sparse_kmin) { + *err = "array-sparse-kmax must be greater than array-sparse-kmin when non-zero"; + return 0; + } + return 1; +} + +/* Validate array-sparse-kmin: must be < kmax when kmax is non-zero */ +static int isValidArraySparseKmin(long long val, const char **err) { + if (server.array_sparse_kmax > 0 && (unsigned int)val >= server.array_sparse_kmax) { + *err = "array-sparse-kmin must be less than array-sparse-kmax"; + return 0; + } + return 1; +} + static int updateLocaleCollate(const char **err) { const char *s = setlocale(LC_COLLATE, server.locale_collate); if (s == NULL) { @@ -2917,7 +2972,11 @@ static int setConfigNotifyKeyspaceEventsOption(standardConfig *config, sds *argv } int flags = keyspaceEventsStringToFlags(argv[0]); if (flags == -1) { - *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocr'."; +#ifdef ENABLE_GCRA + *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocraSTIV'."; +#else + *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocaSTIV'."; +#endif return 0; } server.notify_keyspace_events = flags; @@ -3159,7 +3218,7 @@ standardConfig static_configs[] = { createStringConfig("pidfile", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.pidfile, NULL, NULL, NULL), createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.slave_announce_ip, NULL, NULL, NULL), createStringConfig("masteruser", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.masteruser, NULL, NULL, NULL), - createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp), + createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, isValidClusterAnnounceIp, updateClusterIp), createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", NULL, NULL), createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname), createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename), @@ -3244,6 +3303,7 @@ standardConfig static_configs[] = { createIntConfig("cluster-compatibility-sample-ratio", NULL, MODIFIABLE_CONFIG, 0, 100, server.cluster_compatibility_sample_ratio, 0, INTEGER_CONFIG, NULL, NULL), createIntConfig("cluster-slot-migration-max-archived-tasks", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1, INT_MAX, server.asm_max_archived_tasks, 32, INTEGER_CONFIG, NULL, NULL), createIntConfig("lookahead", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.lookahead, REDIS_DEFAULT_LOOKAHEAD, INTEGER_CONFIG, NULL, NULL), + createIntConfig("slowlog-entry-max-argc", NULL, MODIFIABLE_CONFIG, 2, INT_MAX, server.slowlog_max_argc, 32, INTEGER_CONFIG, NULL, NULL), /* Unsigned int configs */ createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, server.maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients), @@ -3251,6 +3311,10 @@ standardConfig static_configs[] = { createUIntConfig("socket-mark-id", NULL, IMMUTABLE_CONFIG, 0, UINT_MAX, server.socket_mark_id, 0, INTEGER_CONFIG, NULL, NULL), createUIntConfig("max-new-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_conns_per_cycle, 10, INTEGER_CONFIG, NULL, NULL), createUIntConfig("max-new-tls-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_tls_conns_per_cycle, 1, INTEGER_CONFIG, NULL, NULL), + /* Array type configuration */ + createUIntConfig("array-slice-size", NULL, MODIFIABLE_CONFIG, AR_SLICE_SIZE_MIN, AR_SLICE_SIZE_MAX, server.array_slice_size, AR_SLICE_SIZE_DEFAULT, INTEGER_CONFIG, isValidArraySliceSize, NULL), + createUIntConfig("array-sparse-kmax", NULL, MODIFIABLE_CONFIG, 0, 256, server.array_sparse_kmax, AR_SPARSE_KMAX_DEFAULT, INTEGER_CONFIG, isValidArraySparseKmax, NULL), + createUIntConfig("array-sparse-kmin", NULL, MODIFIABLE_CONFIG, 0, 256, server.array_sparse_kmin, AR_SPARSE_KMIN_DEFAULT, INTEGER_CONFIG, isValidArraySparseKmin, NULL), #ifdef LOG_REQ_RES createUIntConfig("client-default-resp", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, 2, 3, server.client_default_resp, 2, INTEGER_CONFIG, NULL, NULL), #endif @@ -3258,6 +3322,7 @@ standardConfig static_configs[] = { /* Unsigned Long configs */ createULongConfig("active-defrag-max-scan-fields", NULL, MODIFIABLE_CONFIG, 1, LONG_MAX, server.active_defrag_max_scan_fields, 1000, INTEGER_CONFIG, NULL, NULL), /* Default: keys with more than 1000 fields will be processed separately */ createULongConfig("slowlog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.slowlog_max_len, 128, INTEGER_CONFIG, NULL, NULL), + createULongConfig("slowlog-entry-max-string-len", NULL, MODIFIABLE_CONFIG, 1, LONG_MAX, server.slowlog_max_string_len, 128, INTEGER_CONFIG, NULL, NULL), createULongConfig("acllog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.acllog_max_len, 128, INTEGER_CONFIG, NULL, NULL), /* Long Long configs */ diff --git a/src/db.c b/src/db.c index 32c058dab..87881a991 100644 --- a/src/db.c +++ b/src/db.c @@ -13,6 +13,7 @@ */ #include "server.h" +#include "vector.h" #include "cluster.h" #include "atomicvar.h" #include "latency.h" @@ -1631,7 +1632,7 @@ void keysCommand(client *c) { /* Data used by the dict scan callback. */ typedef struct { - list *keys; /* elements that collect from dict */ + vec *keys; /* elements collected from dict */ robj *o; /* o must be a hash/set/zset object, NULL means current db */ long long type; /* the particular type when scan the db */ sds pattern; /* pattern string, NULL means no pattern */ @@ -1663,7 +1664,7 @@ void scanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) { UNUSED(plink); Entry *hashEntry = NULL; scanData *data = (scanData *)privdata; - list *keys = data->keys; + vec *keys = data->keys; robj *o = data->o; sds val = NULL; void *key = NULL; /* if OBJ_HASH then key is of type `hfield`. Otherwise, `sds` */ @@ -1733,8 +1734,8 @@ void scanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) { serverPanic("Type not handled in SCAN callback."); } - listAddNodeTail(keys, key); - if (val && !data->no_values) listAddNodeTail(keys, val); + vecPush(keys, key); + if (val && !data->no_values) vecPush(keys, val); } /* Try to parse a SCAN cursor stored at object 'o': @@ -1750,14 +1751,17 @@ int parseScanCursorOrReply(client *c, robj *o, unsigned long long *cursor) { } char *obj_type_name[OBJ_TYPE_MAX] = { - "string", - "list", - "set", - "zset", - "hash", + "string", + "list", + "set", + "zset", + "hash", NULL, /* module type is special */ "stream", + "array", +#ifdef ENABLE_GCRA "gcra" +#endif }; /* Helper function to get type from a string in scan commands */ @@ -1808,7 +1812,6 @@ static int scanShouldSkipDict(dict *d, int didx) { * of every element on the Hash. */ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { int i, j; - listNode *node; long count = 10; sds pat = NULL; sds typename = NULL; @@ -1893,18 +1896,13 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { ht = zs->dict; } - list *keys = listCreate(); - /* Set a free callback for the contents of the collected keys list. - * For the main keyspace dict, and when we scan a key that's dict encoded - * (we have 'ht'), we don't need to define free method because the strings - * in the list are just a shallow copy from the pointer in the dictEntry. - * When scanning a key with other encodings (e.g. listpack), we need to - * free the temporary strings we add to that list. - * The exception to the above is ZSET, where we do allocate temporary - * strings even when scanning a dict. */ - if (o && (!ht || o->type == OBJ_ZSET)) { - listSetFreeMethod(keys, sdsfreegeneric); - } + vec keys; + void *keys_stack[256]; + vecInit(&keys, keys_stack, 256); + /* Hash on dict only has pointers to dict entries; other paths allocate + * temporary sds that must be released. */ + if (o && (!ht || o->type == OBJ_ZSET)) + vecSetFreeMethod(&keys, sdsfreegeneric); /* For main dictionary scan or data structure using hashtable. */ if (!o || ht) { @@ -1912,7 +1910,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { * COUNT, so if the hash table is in a pathological state (very * sparsely populated) we avoid to block too much time at the cost * of returning no or very few elements. */ - long maxiterations = count*10; + long maxiterations = (count > LONG_MAX / 10) ? LONG_MAX : count * 10; /* We pass scanData which have three pointers to the callback: * 1. data.keys: the list to which it will add new elements; @@ -1928,7 +1926,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { * 6. data.no_values: to control whether values will be returned or * only keys are returned. */ scanData data = { - .keys = keys, + .keys = &keys, .o = o, .type = type, .pattern = use_pattern ? pat : NULL, @@ -1955,7 +1953,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { } else if (o->type == OBJ_SET) { unsigned long array_reply_len = 0; void *replylen = NULL; - listRelease(keys); + vecRelease(&keys); char *str; char buf[LONG_STR_SIZE]; size_t len; @@ -2001,7 +1999,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { unsigned long array_reply_len = 0; unsigned char intbuf[LP_INTBUF_SIZE]; void *replylen = NULL; - listRelease(keys); + vecRelease(&keys); /* Reply to the client. */ addReplyArrayLen(c, 2); @@ -2052,7 +2050,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { unsigned char intbuf[LP_INTBUF_SIZE]; void *replylen = NULL; - listRelease(keys); + vecRelease(&keys); /* Reply to the client. */ addReplyArrayLen(c, 2); /* Cursor is always 0 given we iterate over all set */ @@ -2098,14 +2096,13 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) { addReplyArrayLen(c, 2); addReplyBulkLongLong(c,cursor); - addReplyArrayLen(c, listLength(keys)); - while ((node = listFirst(keys)) != NULL) { - void *key = listNodeValue(node); + addReplyArrayLen(c, vecSize(&keys)); + for (size_t i = 0; i < vecSize(&keys); i++) { + sds key = vecGet(&keys, i); addReplyBulkCBuffer(c, key, sdslen(key)); - listDelNode(keys, node); } - listRelease(keys); + vecRelease(&keys); } /* The SCAN command completely relies on scanGenericCommand. */ @@ -2439,11 +2436,14 @@ void copyCommand(client *c) { case OBJ_ZSET: newobj = zsetDup(o); break; case OBJ_HASH: newobj = hashTypeDup(o, &minHashExpire); break; case OBJ_STREAM: newobj = streamDup(o); break; +#ifdef ENABLE_GCRA case OBJ_GCRA: newobj = gcraDup(o); break; +#endif case OBJ_MODULE: newobj = moduleTypeDupOrReply(c, key, newkey, dst->id, o); if (!newobj) return; break; + case OBJ_ARRAY: newobj = arrayTypeDup(o); break; default: addReplyError(c, "unknown type object"); return; diff --git a/src/debug.c b/src/debug.c index c6baf4b4d..e14f2a52b 100644 --- a/src/debug.c +++ b/src/debug.c @@ -123,6 +123,7 @@ void mixStringObjectDigest(unsigned char *digest, robj *o) { decrRefCount(o); } +#ifdef ENABLE_GCRA void mixGCRAObjectDigest(unsigned char *digest, robj *o) { char buf[LONG_STR_SIZE]; long long val; @@ -130,6 +131,7 @@ void mixGCRAObjectDigest(unsigned char *digest, robj *o) { int len = ll2string(buf, sizeof(buf), val); mixDigest(digest,buf,len); } +#endif /* This function computes the digest of a data structure stored in the * object 'o'. It is the core of the DEBUG DIGEST command: when taking the @@ -263,8 +265,10 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) } } streamIteratorStop(&si); +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { mixGCRAObjectDigest(digest, o); +#endif } else if (o->type == OBJ_MODULE) { RedisModuleDigest md = {{0},{0},keyobj,db->id}; moduleValue *mv = o->ptr; @@ -274,6 +278,21 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) mt->digest(&md,mv->value); xorDigest(digest,md.x,sizeof(md.x)); } + } else if (o->type == OBJ_ARRAY) { + redisArray *ar = o->ptr; + uint64_t len = arLen(ar); + for (uint64_t idx = 0; idx < len; idx++) { + void *v = arGet(ar, idx); + if (arIsEmpty(v)) { + /* For empty slots, contribute "(null)" */ + mixDigest(digest, "(null)", 6); + } else { + char vbuf[AR_INLINE_BUFSIZE]; + size_t vlen; + const char *data = arDecode(v, vbuf, sizeof(vbuf), &vlen); + mixDigest(digest, data, vlen); + } + } } else { serverPanic("Unknown object type"); } @@ -1312,9 +1331,11 @@ void serverLogObjectDebugInfo(const robj *o) { serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level); } else if (o->type == OBJ_STREAM) { serverLog(LL_WARNING,"Stream size: %d", (int) streamLength(o)); +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { #if UINTPTR_MAX == 0xffffffffffffffff serverLog(LL_WARNING, "GCRA object: %lld", (long long)o->ptr); +#endif #endif } #endif diff --git a/src/defrag.c b/src/defrag.c index ae1e632d8..15df6c8f4 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -755,6 +755,32 @@ void defragSet(defragKeysCtx *ctx, kvobj *ob) { ob->ptr = newd; } +/* Arrays can be expensive to defrag in one shot because they may contain many + * independently allocated slices. Small arrays are defragmented immediately, + * while large arrays are queued for later and processed one slice per step. */ +void defragArray(defragKeysCtx *ctx, kvobj *ob) { + serverAssert(ob->type == OBJ_ARRAY); + /* Maybe arCount() is not the best possible value to check against + * server.active_defrag_max_scan_fields, also because anyway when we + * defrag incrementally, we defrag a since slice per call. Yet it makes + * sense in a non very obvious way, for several reasons: + * + * 1. If the array is very sparse, it is an upper bound to the max + * number of slices it is composed to. + * 2. If the array is dense, we will scan in the default case at most 4096 + * entries, and the default defrag limit for max scans is 1000. They + * are kinda comparable numbers. + * 3. In case of a highly sparse array with huge indexes, in superdir mode, + * yet the super blocks are going to be at max arCount(). + * + * So regardless of the fact we later will defrag in slice units, this + * is a good trigger for the one shot or incremental selection. */ + if (arCount(ob->ptr) > server.active_defrag_max_scan_fields) + defragLater(ctx, ob); + else + ob->ptr = arDefrag(ob->ptr, activeDefragAlloc); +} + /* Defrag callback for radix tree iterator, called for each node, * used in order to defrag the nodes allocations. */ int defragRaxNode(raxNode **noderef, void *privdata) { @@ -1211,15 +1237,19 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de, dictEntryLink link) { } } else if (ob->type == OBJ_STREAM) { defragStream(ctx, ob); +#ifdef ENABLE_GCRA } else if (ob->type == OBJ_GCRA) { /* GCRA object is just an allocation to a long long value */ #if UINTPTR_MAX == 0xffffffff void *newptr, *ptr = ob->ptr; if ((newptr = activeDefragAlloc(ptr))) ob->ptr = newptr; +#endif #endif } else if (ob->type == OBJ_MODULE) { defragModule(ctx,db, ob); + } else if (ob->type == OBJ_ARRAY) { + defragArray(ctx, ob); } else { serverPanic("Unknown object type"); } @@ -1336,6 +1366,10 @@ int defragLaterItem(kvobj *ob, unsigned long *cursor, monotime endtime, int dbid robj keyobj; initStaticStringObject(keyobj, kvobjGetKey(ob)); return moduleLateDefrag(&keyobj, ob, cursor, endtime, dbid); + } else if (ob->type == OBJ_ARRAY) { + redisArray *ar = ob->ptr; + *cursor = arDefragIncremental(&ar, *cursor, activeDefragAlloc); + ob->ptr = ar; } else { *cursor = 0; /* object type/encoding may have changed since we schedule it for later */ } diff --git a/src/dict.h b/src/dict.h index 25e4cf1bd..26c9d1e16 100644 --- a/src/dict.h +++ b/src/dict.h @@ -135,6 +135,25 @@ typedef struct dictType { /* Optional callback called when the dict is destroyed. */ void (*onDictRelease)(dict *d); + + /* Optional prefetch hooks used by the memory_prefetch state machine. + * Both default to NULL; when both are NULL the state machine just + * prefetches the bucket + dictEntry chain and stops there. + * + * prefetchEntryKey: called after a dictEntry has been brought into + * cache. Returns an address to issue redis_prefetch_read on (so the + * key payload behind the entry is warm before keyCompare runs), or + * NULL if nothing extra is needed (e.g. the key is co-located with + * the entry). + * prefetchEntryValue: called when the entry is the *presumed* match + * for the lookup key — either keyCompare returned equal, or the + * state machine took the "last entry in chain, not rehashing" + * shortcut and is betting on a hit without comparing. Callbacks + * must therefore not assume the key has been verified equal; the + * prefetch is advisory. Returns an address to prefetch for the + * value-side payload, or NULL. */ + void *(*prefetchEntryKey)(const dictEntry *de); + void *(*prefetchEntryValue)(const dictEntry *de); } dictType; #define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp)) diff --git a/src/eval.c b/src/eval.c index 018956135..0edea5ddd 100644 --- a/src/eval.c +++ b/src/eval.c @@ -1027,7 +1027,7 @@ int ldbDelBreakpoint(int line) { for (j = 0; j < ldb.bpcount; j++) { if (ldb.bp[j] == line) { ldb.bpcount--; - memmove(ldb.bp+j,ldb.bp+j+1,ldb.bpcount-j); + memmove(ldb.bp+j,ldb.bp+j+1,(ldb.bpcount-j) * sizeof(int)); return 1; } } @@ -1502,7 +1502,9 @@ void ldbEval(lua_State *lua, sds *argv, int argc) { sdsfree(code); sdsfree(expr); if (lua_pcall(lua,0,1,0)) { - ldbLog(sdscatfmt(sdsempty()," %s",lua_tostring(lua,-1))); + const char *err = lua_tostring(lua,-1); + ldbLog(sdscatfmt(sdsempty()," %s", + err ? err : "(error object is not a string)")); lua_pop(lua,1); return; } diff --git a/src/evict.c b/src/evict.c index e287edec6..50037c689 100644 --- a/src/evict.c +++ b/src/evict.c @@ -349,7 +349,7 @@ size_t freeMemoryGetNotCountedMemory(void) { /* The migrate client is like a replica, we also push DELs into it when * evicting keys belonging to the migrating slot, so we don't count its * output buffer to avoid eviction loop. */ - overhead += asmGetMigrateOutputBufferSize(); + overhead += asmGetMigrateOutputMemoryUsage(); if (server.aof_state != AOF_OFF) { overhead += sdsAllocSize(server.aof_buf); diff --git a/src/fast_float_strtod.c b/src/fast_float_strtod.c index 48a5df502..8039c5a9b 100644 --- a/src/fast_float_strtod.c +++ b/src/fast_float_strtod.c @@ -48,6 +48,195 @@ static const double powers_of_ten[] = { 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 }; +/* ---------------------------------------------------------------------------- + * Eisel-Lemire algorithm — extended-precision powers of five. + * + * The table below maps from decimal scaling (10^q) to a 128-bit binary + * approximation. Since 10^q = 2^q * 5^q and the 2^q factor is exact in + * binary, only 5^q affects the binary significand — so we precompute + * 5^q rounded toward 1 to 128 bits. Used by `compute_float()` to avoid + * any iterative rounding in the widened (mantissa > 2^53) range. + * + * Pulled verbatim from fast_float by Daniel Lemire & Joao Paulo Magalhaes + * (MIT-licensed, https://github.com/fastfloat/fast_float — fast_table.h). + * + * Range: 5^-342 ... 5^308 — covers every value that can produce a finite + * non-zero double from a 64-bit decimal mantissa. 651 entries, each stored + * as { high64, low64 } pairs (1302 uint64_t total). + * ---------------------------------------------------------------------------- */ + +#define EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE -342 +#define EISEL_LEMIRE_LARGEST_POWER_OF_FIVE 308 +#define EISEL_LEMIRE_NUMBER_OF_ENTRIES (2 * (EISEL_LEMIRE_LARGEST_POWER_OF_FIVE - \ + EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE + 1)) + +static const uint64_t power_of_five_128[EISEL_LEMIRE_NUMBER_OF_ENTRIES] = { + 0xeef453d6923bd65a, 0x113faa2906a13b3f, 0x9558b4661b6565f8, 0x4ac7ca59a424c507, 0xbaaee17fa23ebf76, 0x5d79bcf00d2df649, 0xe95a99df8ace6f53, 0xf4d82c2c107973dc, + 0x91d8a02bb6c10594, 0x79071b9b8a4be869, 0xb64ec836a47146f9, 0x9748e2826cdee284, 0xe3e27a444d8d98b7, 0xfd1b1b2308169b25, 0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f, 0xbdbd2d335e51a935, 0xde8b2b66b3bc4723, 0xad2c788035e61382, 0x8b16fb203055ac76, 0x4c3bcb5021afcc31, 0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78, 0xd71d6dad34a2af0d, 0x87d4713d6f33aa6b, 0x8672648c40e5ad68, 0xa9c98d8ccb009506, 0x680efdaf511f18c2, 0xd43bf0effdc0ba48, 0x212bd1b2566def2, + 0x84a57695fe98746d, 0x14bb630f7604b57, 0xa5ced43b7e3e9188, 0x419ea3bd35385e2d, 0xcf42894a5dce35ea, 0x52064cac828675b9, 0x818995ce7aa0e1b2, 0x7343efebd1940993, + 0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8, 0xca66fa129f9b60a6, 0xd41a26e077774ef6, 0xfd00b897478238d0, 0x8920b098955522b4, 0x9e20735e8cb16382, 0x55b46e5f5d5535b0, + 0xc5a890362fddbc62, 0xeb2189f734aa831d, 0xf712b443bbd52b7b, 0xa5e9ec7501d523e4, 0x9a6bb0aa55653b2d, 0x47b233c92125366e, 0xc1069cd4eabe89f8, 0x999ec0bb696e840a, + 0xf148440a256e2c76, 0xc00670ea43ca250d, 0x96cd2a865764dbca, 0x380406926a5e5728, 0xbc807527ed3e12bc, 0xc605083704f5ecf2, 0xeba09271e88d976b, 0xf7864a44c633682e, + 0x93445b8731587ea3, 0x7ab3ee6afbe0211d, 0xb8157268fdae9e4c, 0x5960ea05bad82964, 0xe61acf033d1a45df, 0x6fb92487298e33bd, 0x8fd0c16206306bab, 0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696, 0x8f48a4899877186c, 0xe0b62e2929aba83c, 0x331acdabfe94de87, 0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14, 0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a, 0xc9e82cd9f69d6150, 0x892731ac9faf056e, 0xbe311c083a225cd2, 0xab70fe17c79ac6ca, 0x6dbd630a48aaf406, 0xd64d3d9db981787d, 0x92cbbccdad5b108, + 0x85f0468293f0eb4e, 0x25bbf56008c58ea5, 0xa76c582338ed2621, 0xaf2af2b80af6f24e, 0xd1476e2c07286faa, 0x1af5af660db4aee1, 0x82cca4db847945ca, 0x50d98d9fc890ed4d, + 0xa37fce126597973c, 0xe50ff107bab528a0, 0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8, 0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a, 0x9faacf3df73609b1, 0x77b191618c54e9ac, + 0xc795830d75038c1d, 0xd59df5b9ef6a2417, 0xf97ae3d0d2446f25, 0x4b0573286b44ad1d, 0x9becce62836ac577, 0x4ee367f9430aec32, 0xc2e801fb244576d5, 0x229c41f793cda73f, + 0xf3a20279ed56d48a, 0x6b43527578c1110f, 0x9845418c345644d6, 0x830a13896b78aaa9, 0xbe5691ef416bd60c, 0x23cc986bc656d553, 0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9, 0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53, 0xe858ad248f5c22c9, 0xd1b3400f8f9cff68, 0x91376c36d99995be, 0x23100809b9c21fa1, + 0xb58547448ffffb2d, 0xabd40a0c2832a78a, 0xe2e69915b3fff9f9, 0x16c90c8f323f516c, 0x8dd01fad907ffc3b, 0xae3da7d97f6792e3, 0xb1442798f49ffb4a, 0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d, 0x40405643d711d583, 0x8a7d3eef7f1cfc52, 0x482835ea666b2572, 0xad1c8eab5ee43b66, 0xda3243650005eecf, 0xd863b256369d4a40, 0x90bed43e40076a82, + 0x873e4f75e2224e68, 0x5a7744a6e804a291, 0xa90de3535aaae202, 0x711515d0a205cb36, 0xd3515c2831559a83, 0xd5a5b44ca873e03, 0x8412d9991ed58091, 0xe858790afe9486c2, + 0xa5178fff668ae0b6, 0x626e974dbe39a872, 0xce5d73ff402d98e3, 0xfb0a3d212dc8128f, 0x80fa687f881c7f8e, 0x7ce66634bc9d0b99, 0xa139029f6a239f72, 0x1c1fffc1ebc44e80, + 0xc987434744ac874e, 0xa327ffb266b56220, 0xfbe9141915d7a922, 0x4bf1ff9f0062baa8, 0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9, 0xc4ce17b399107c22, 0xcb550fb4384d21d3, + 0xf6019da07f549b2b, 0x7e2a53a146606a48, 0x99c102844f94e0fb, 0x2eda7444cbfc426d, 0xc0314325637a1939, 0xfa911155fefb5308, 0xf03d93eebc589f88, 0x793555ab7eba27ca, + 0x96267c7535b763b5, 0x4bc1558b2f3458de, 0xbbb01b9283253ca2, 0x9eb1aaedfb016f16, 0xea9c227723ee8bcb, 0x465e15a979c1cadc, 0x92a1958a7675175f, 0xbfacd89ec191ec9, + 0xb749faed14125d36, 0xcef980ec671f667b, 0xe51c79a85916f484, 0x82b7e12780e7401a, 0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810, 0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9, 0x67a791e093e1d49a, 0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0, 0xaecc49914078536d, 0x58fae9f773886e18, 0xda7f5bf590966848, 0xaf39a475506a899e, + 0x888f99797a5e012d, 0x6d8406c952429603, 0xaab37fd7d8f58178, 0xc8e5087ba6d33b83, 0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64, 0x855c3be0a17fcd26, 0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e, 0xd0601d8efc57b08b, 0xf13b94daf124da26, 0x823c12795db6ce57, 0x76c53d08d6b70858, 0xa2cb1717b52481ed, 0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09, 0xfe5d54150b090b02, 0xd3f93b35435d7c4c, 0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf, 0xc6b8e9b0709f109a, 0x359ab6419ca1091b, + 0xf867241c8cc6d4c0, 0xc30163d203c94b62, 0x9b407691d7fc44f8, 0x79e0de63425dcf1d, 0xc21094364dfb5636, 0x985915fc12f542e4, 0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a, 0xa705992ceecf9c42, 0xbd8430bd08277231, 0x50c6ff782a838353, 0xece53cec4a314ebd, 0xa4f8bf5635246428, 0x940f4613ae5ed136, 0x871b7795e136be99, + 0xb913179899f68584, 0x28e2557b59846e3f, 0xe757dd7ec07426e5, 0x331aeada2fe589cf, 0x9096ea6f3848984f, 0x3ff0d2c85def7621, 0xb4bca50b065abe63, 0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb, 0xd3e8495912c62894, 0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c, 0xb080392cc4349dec, 0xbd8d794d96aacfb3, 0xdca04777f541c567, 0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60, 0xf41686c49db57244, 0xac5d37d5b79b6239, 0x311c2875c522ced5, 0xd77485cb25823ac7, 0x7d633293366b828b, 0x86a8d39ef77164bc, 0xae5dff9c02033197, + 0xa8530886b54dbdeb, 0xd9f57f830283fdfc, 0xd267caa862a12d66, 0xd072df63c324fd7b, 0x8380dea93da4bc60, 0x4247cb9e59f71e6d, 0xa46116538d0deb78, 0x52d9be85f074e608, + 0xcd795be870516656, 0x67902e276c921f8b, 0x806bd9714632dff6, 0xba1cd8a3db53b6, 0xa086cfcd97bf97f3, 0x80e8a40eccd228a4, 0xc8a883c0fdaf7df0, 0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c, 0x796b805720085f81, 0x9cc3a6eec6311a63, 0xcbe3303674053bb0, 0xc3f490aa77bd60fc, 0xbedbfc4411068a9c, 0xf4f1b4d515acb93b, 0xee92fb5515482d44, + 0x991711052d8bf3c5, 0x751bdd152d4d1c4a, 0xbf5cd54678eef0b6, 0xd262d45a78a0635d, 0xef340a98172aace4, 0x86fb897116c87c34, 0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0, + 0xbae0a846d2195712, 0x8974836059cca109, 0xe998d258869facd7, 0x2bd1a438703fc94b, 0x91ff83775423cc06, 0x7b6306a34627ddcf, 0xb67f6455292cbf08, 0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93, 0x8e938662882af53e, 0x547eb47b7282ee9c, 0xb23867fb2a35b28d, 0xe99e619a4f23aa43, 0xdec681f9f4c31f31, 0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e, 0xde83bc408dd3dd04, 0xae0b158b4738705e, 0x9624ab50b148d445, 0xd98ddaee19068c76, 0x3badd624dd9b0957, 0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c, 0xd47487cc8470652b, 0x7647c3200069671f, 0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073, 0xa5fb0a17c777cf09, 0xf468107100525890, + 0xcf79cc9db955c2cc, 0x7182148d4066eeb4, 0x81ac1fe293d599bf, 0xc6f14cd848405530, 0xa21727db38cb002f, 0xb8ada00e5a506a7c, 0xca9cf1d206fdc03b, 0xa6d90811f0e4851c, + 0xfd442e4688bd304a, 0x908f4a166d1da663, 0x9e4a9cec15763e2e, 0x9a598e4e043287fe, 0xc5dd44271ad3cdba, 0x40eff1e1853f29fd, 0xf7549530e188c128, 0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9, 0x82bb74f8301958ce, 0xc13a148e3032d6e7, 0xe36a52363c1faf01, 0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1, 0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de, 0x7415d448f6b6f0e7, 0xebdf661791d60f56, 0x111b495b3464ad21, 0x936b9fcebb25c995, 0xcab10dd900beec34, 0xb84687c269ef3bfb, 0x3d5d514f40eea742, + 0xe65829b3046b0afa, 0xcb4a5a3112a5112, 0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab, 0xb3f4e093db73a093, 0x59ed216765690f56, 0xe0f218b8d25088b8, 0x306869c13ec3532c, + 0x8c974f7383725573, 0x1e414218c73a13fb, 0xafbd2350644eeacf, 0xe5d1929ef90898fa, 0xdbac6c247d62a583, 0xdf45f746b74abf39, 0x894bc396ce5da772, 0x6b8bba8c328eb783, + 0xab9eb47c81f5114f, 0x66ea92f3f326564, 0xd686619ba27255a2, 0xc80a537b0efefebd, 0x8613fd0145877585, 0xbd06742ce95f5f36, 0xa798fc4196e952e7, 0x2c48113823b73704, + 0xd17f3b51fca3a7a0, 0xf75a15862ca504c5, 0x82ef85133de648c4, 0x9a984d73dbe722fb, 0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba, 0xcc963fee10b7d1b3, 0x318df905079926a8, + 0xffbbcfe994e5c61f, 0xfdf17746497f7052, 0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633, 0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0, 0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d, 0x6bea10ca65c084e, 0xc31bfa0fe5698db8, 0x486e494fcff30a62, 0xf3e2f893dec3f126, 0x5a89dba3c3efccfa, 0x986ddb5c6b3a76b7, 0xf89629465a75e01c, + 0xbe89523386091465, 0xf6bbb397f1135823, 0xee2ba6c0678b597f, 0x746aa07ded582e2c, 0x94db483840b717ef, 0xa8c2a44eb4571cdc, 0xba121a4650e4ddeb, 0x92f34d62616ce413, + 0xe896a0d7e51e1566, 0x77b020baf9c81d17, 0x915e2486ef32cd60, 0xace1474dc1d122e, 0xb5b5ada8aaff80b8, 0xd819992132456ba, 0xe3231912d5bf60e6, 0x10e1fff697ed6c69, + 0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1, 0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2, 0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde, 0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d, 0x86c16c98d2c953c6, 0xd89d64d57a607744, 0xe871c7bf077ba8b7, 0x87625f056c7c4a8b, 0x11471cd764ad4972, 0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf, + 0xd389b47879823479, 0x4aff1d108d4ec2c3, 0x843610cb4bf160cb, 0xcedf722a585139ba, 0xa54394fe1eedb8fe, 0xc2974eb4ee658828, 0xce947a3da6a9273e, 0x733d226229feea32, + 0x811ccc668829b887, 0x806357d5a3f525f, 0xa163ff802a3426a8, 0xca07c2dcb0cf26f7, 0xc9bcff6034c13052, 0xfc89b393dd02f0b5, 0xfc2c3f3841f17c67, 0xbbac2078d443ace2, + 0x9d9ba7832936edc0, 0xd54b944b84aa4c0d, 0xc5029163f384a931, 0xa9e795e65d4df11, 0xf64335bcf065d37d, 0x4d4617b5ff4a16d5, 0x99ea0196163fa42e, 0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6, 0xf07da27a82c37088, 0x5d767327bb4e5a4c, 0x964e858c91ba2655, 0x3a6a07f8d510f86f, 0xbbe226efb628afea, 0x890489f70a55368b, + 0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e, 0x92c8ae6b464fc96f, 0x3b0b8bc90012929d, 0xb77ada0617e3bbcb, 0x9ce6ebb40173744, 0xe55990879ddcaabd, 0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6, 0x9fa946824a12232d, 0xb32df8e9f3546564, 0x47939822dc96abf9, 0xdff9772470297ebd, 0x59787e2b93bc56f7, 0x8bfbea76c619ef36, 0x57eb4edb3c55b65a, + 0xaefae51477a06b03, 0xede622920b6b23f1, 0xdab99e59958885c4, 0xe95fab368e45eced, 0x88b402f7fd75539b, 0x11dbcb0218ebb414, 0xaae103b5fcd2a881, 0xd652bdc29f26a119, + 0xd59944a37c0752a2, 0x4be76d3346f0495f, 0x857fcae62d8493a5, 0x6f70a4400c562ddb, 0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952, 0xd097ad07a71f26b2, 0x7e2000a41346a7a7, + 0x825ecc24c873782f, 0x8ed400668c0c28c8, 0xa2f67f2dfa90563b, 0x728900802f0f32fa, 0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9, 0xfea126b7d78186bc, 0xe2f610c84987bfa8, + 0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9, 0xc6ede63fa05d3143, 0x91503d1c79720dbb, 0xf8a95fcf88747d94, 0x75a44c6397ce912a, 0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba, + 0xc24452da229b021b, 0xfbe85badce996168, 0xf2d56790ab41c2a2, 0xfae27299423fb9c3, 0x97c560ba6b0919a5, 0xdccd879fc967d41a, 0xbdb6b8e905cb600f, 0x5400e987bbc1c920, + 0xed246723473e3813, 0x290123e9aab23b68, 0x9436c0760c86e30b, 0xf9a0b6720aaf6521, 0xb94470938fa89bce, 0xf808e40e8d5b3e69, 0xe7958cb87392c2c2, 0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2, 0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3, 0xe2280b6c20dd5232, 0x25c6da63c38de1b0, 0x8d590723948a535f, 0x579c487e5a38ad0e, + 0xb0af48ec79ace837, 0x2d835a9df0c6d851, 0xdcdb1b2798182244, 0xf8e431456cf88e65, 0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff, 0xac8b2d36eed2dac5, 0xe272467e3d222f3f, + 0xd7adf884aa879177, 0x5b0ed81dcc6abb0f, 0x86ccbb52ea94baea, 0x98e947129fc2b4e9, 0xa87fea27a539e9a5, 0x3f2398d747b36224, 0xd29fe4b18e88640e, 0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89, 0x1953cf68300424ac, 0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7, 0xcdb02555653131b6, 0x3792f412cb06794d, 0x808e17555f3ebf11, 0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4, 0xc8de047564d20a8b, 0xf245825a5a445275, 0xfb158592be068d2e, 0xeed6e2f0f0d56712, 0x9ced737bb6c4183d, 0x55464dd69685606b, + 0xc428d05aa4751e4c, 0xaa97e14c3c26b886, 0xf53304714d9265df, 0xd53dd99f4b3066a8, 0x993fe2c6d07b7fab, 0xe546a8038efe4029, 0xbf8fdb78849a5f96, 0xde98520472bdd033, + 0xef73d256a5c0f77c, 0x963e66858f6d4440, 0x95a8637627989aad, 0xdde7001379a44aa8, 0xbb127c53b17ec159, 0x5560c018580d5d52, 0xe9d71b689dde71af, 0xaab8f01e6e10b4a6, + 0x9226712162ab070d, 0xcab3961304ca70e8, 0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22, 0xe45c10c42a2b3b05, 0x8cb89a7db77c506a, 0x8eb98a7a9a5b04e3, 0x77f3608e92adb242, + 0xb267ed1940f1c61c, 0x55f038b237591ed3, 0xdf01e85f912e37a3, 0x6b6c46dec52f6688, 0x8b61313bbabce2c6, 0x2323ac4b3b3da015, 0xae397d8aa96c1b77, 0xabec975e0a0d081a, + 0xd9c7dced53c72255, 0x96e7bd358c904a21, 0x881cea14545c7575, 0x7e50d64177da2e54, 0xaa242499697392d2, 0xdde50bd1d5d0b9e9, 0xd4ad2dbfc3d07787, 0x955e4ec64b44e864, + 0x84ec3c97da624ab4, 0xbd5af13bef0b113e, 0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e, 0xcfb11ead453994ba, 0x67de18eda5814af2, 0x81ceb32c4b43fcf4, 0x80eacf948770ced7, + 0xa2425ff75e14fc31, 0xa1258379a94d028d, 0xcad2f7f5359a3b3e, 0x96ee45813a04330, 0xfd87b5f28300ca0d, 0x8bca9d6e188853fc, 0x9e74d1b791e07e48, 0x775ea264cf55347e, + 0xc612062576589dda, 0x95364afe032a819e, 0xf79687aed3eec551, 0x3a83ddbd83f52205, 0x9abe14cd44753b52, 0xc4926a9672793543, 0xc16d9a0095928a27, 0x75b7053c0f178294, + 0xf1c90080baf72cb1, 0x5324c68b12dd6339, 0x971da05074da7bee, 0xd3f6fc16ebca5e04, 0xbce5086492111aea, 0x88f4bb1ca6bcf585, 0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07, 0x3aff322e62439fd0, 0xb877aa3236a4b449, 0x9befeb9fad487c3, 0xe69594bec44de15b, 0x4c2ebe687989a9b4, 0x901d7cf73ab0acd9, 0xf9d37014bf60a11, + 0xb424dc35095cd80f, 0x538484c19ef38c95, 0xe12e13424bb40e13, 0x2865a5f206b06fba, 0x8cbccc096f5088cb, 0xf93f87b7442e45d4, 0xafebff0bcb24aafe, 0xf78f69a51539d749, + 0xdbe6fecebdedd5be, 0xb573440e5a884d1c, 0x89705f4136b4a597, 0x31680a88f8953031, 0xabcc77118461cefc, 0xfdc20d2b36ba7c3e, 0xd6bf94d5e57a42bc, 0x3d32907604691b4d, + 0x8637bd05af6c69b5, 0xa63f9a49c2c1b110, 0xa7c5ac471b478423, 0xfcf80dc33721d54, 0xd1b71758e219652b, 0xd3c36113404ea4a9, 0x83126e978d4fdf3b, 0x645a1cac083126ea, + 0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4, 0xcccccccccccccccc, 0xcccccccccccccccd, 0x8000000000000000, 0x0, 0xa000000000000000, 0x0, + 0xc800000000000000, 0x0, 0xfa00000000000000, 0x0, 0x9c40000000000000, 0x0, 0xc350000000000000, 0x0, + 0xf424000000000000, 0x0, 0x9896800000000000, 0x0, 0xbebc200000000000, 0x0, 0xee6b280000000000, 0x0, + 0x9502f90000000000, 0x0, 0xba43b74000000000, 0x0, 0xe8d4a51000000000, 0x0, 0x9184e72a00000000, 0x0, + 0xb5e620f480000000, 0x0, 0xe35fa931a0000000, 0x0, 0x8e1bc9bf04000000, 0x0, 0xb1a2bc2ec5000000, 0x0, + 0xde0b6b3a76400000, 0x0, 0x8ac7230489e80000, 0x0, 0xad78ebc5ac620000, 0x0, 0xd8d726b7177a8000, 0x0, + 0x878678326eac9000, 0x0, 0xa968163f0a57b400, 0x0, 0xd3c21bcecceda100, 0x0, 0x84595161401484a0, 0x0, + 0xa56fa5b99019a5c8, 0x0, 0xcecb8f27f4200f3a, 0x0, 0x813f3978f8940984, 0x4000000000000000, 0xa18f07d736b90be5, 0x5000000000000000, + 0xc9f2c9cd04674ede, 0xa400000000000000, 0xfc6f7c4045812296, 0x4d00000000000000, 0x9dc5ada82b70b59d, 0xf020000000000000, 0xc5371912364ce305, 0x6c28000000000000, + 0xf684df56c3e01bc6, 0xc732000000000000, 0x9a130b963a6c115c, 0x3c7f400000000000, 0xc097ce7bc90715b3, 0x4b9f100000000000, 0xf0bdc21abb48db20, 0x1e86d40000000000, + 0x96769950b50d88f4, 0x1314448000000000, 0xbc143fa4e250eb31, 0x17d955a000000000, 0xeb194f8e1ae525fd, 0x5dcfab0800000000, 0x92efd1b8d0cf37be, 0x5aa1cae500000000, + 0xb7abc627050305ad, 0xf14a3d9e40000000, 0xe596b7b0c643c719, 0x6d9ccd05d0000000, 0x8f7e32ce7bea5c6f, 0xe4820023a2000000, 0xb35dbf821ae4f38b, 0xdda2802c8a800000, + 0xe0352f62a19e306e, 0xd50b2037ad200000, 0x8c213d9da502de45, 0x4526f422cc340000, 0xaf298d050e4395d6, 0x9670b12b7f410000, 0xdaf3f04651d47b4c, 0x3c0cdd765f114000, + 0x88d8762bf324cd0f, 0xa5880a69fb6ac800, 0xab0e93b6efee0053, 0x8eea0d047a457a00, 0xd5d238a4abe98068, 0x72a4904598d6d880, 0x85a36366eb71f041, 0x47a6da2b7f864750, + 0xa70c3c40a64e6c51, 0x999090b65f67d924, 0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d, 0x82818f1281ed449f, 0xbff8f10e7a8921a4, 0xa321f2d7226895c7, 0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490, 0xfee50b7025c36a08, 0x2f236d04753d5b4, 0x9f4f2726179a2245, 0x1d762422c946590, 0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2, 0x9b934c3b330c8577, 0x63cc55f49f88eb2f, 0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb, 0xf316271c7fc3908a, 0x8bef464e3945ef7a, + 0x97edd871cfda3a56, 0x97758bf0e3cbb5ac, 0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317, 0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd, 0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436, 0xb3e2fd538e122b44, 0xe7d34c64a9c85d44, 0x60dbbca87196b616, 0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd, 0xb51d13aea4a488dd, 0x6babab6398bdbe41, + 0xe264589a4dcdab14, 0xc696963c7eed2dd1, 0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2, 0xb0de65388cc8ada8, 0x3b25a55f43294bcb, 0xdd15fe86affad912, 0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab, 0x6e3569326c784337, 0xacb92ed9397bf996, 0x49c2c37f07965404, 0xd7e77a8f87daf7fb, 0xdc33745ec97be906, 0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3, + 0xa8acd7c0222311bc, 0xc40832ea0d68ce0c, 0xd2d80db02aabd62b, 0xf50a3fa490c30190, 0x83c7088e1aab65db, 0x792667c6da79e0fa, 0xa4b8cab1a1563f52, 0x577001b891185938, + 0xcde6fd5e09abcf26, 0xed4c0226b55e6f86, 0x80b05e5ac60b6178, 0x544f8158315b05b4, 0xa0dc75f1778e39d6, 0x696361ae3db1c721, 0xc913936dd571c84c, 0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f, 0x4ab48a04065c723, 0x9d174b2dcec0e47b, 0x62eb0d64283f9c76, 0xc45d1df942711d9a, 0x3ba5d0bd324f8394, 0xf5746577930d6500, 0xca8f44ec7ee36479, + 0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb, 0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e, 0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e, 0x95d04aee3b80ece5, 0xbba1f1d158724a12, + 0xbb445da9ca61281f, 0x2a8a6e45ae8edc97, 0xea1575143cf97226, 0xf52d09d71a3293bd, 0x924d692ca61be758, 0x593c2626705f9c56, 0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c, + 0xe498f455c38b997a, 0xb6dfb9c0f956447, 0x8edf98b59a373fec, 0x4724bd4189bd5eac, 0xb2977ee300c50fe7, 0x58edec91ec2cb657, 0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed, + 0x8b865b215899f46c, 0xbd79e0d20082ee74, 0xae67f1e9aec07187, 0xecd8590680a3aa11, 0xda01ee641a708de9, 0xe80e6f4820cc9495, 0x884134fe908658b2, 0x3109058d147fdcdd, + 0xaa51823e34a7eede, 0xbd4b46f0599fd415, 0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a, 0x850fadc09923329e, 0x3e2cf6bc604ddb0, 0xa6539930bf6bff45, 0x84db8346b786151c, + 0xcfe87f7cef46ff16, 0xe612641865679a63, 0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e, 0xa26da3999aef7749, 0xe3be5e330f38f09d, 0xcb090c8001ab551c, 0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6, 0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa, 0xc646d63501a1511d, 0xb281e1fd541501b8, 0xf7d88bc24209a565, 0x1f225a7ca91a4226, + 0x9ae757596946075f, 0x3375788de9b06958, 0xc1a12d2fc3978937, 0x52d6b1641c83ae, 0xf209787bb47d6b84, 0xc0678c5dbd23a49a, 0x9745eb4d50ce6332, 0xf840b7ba963646e0, + 0xbd176620a501fbff, 0xb650e5a93bc3d898, 0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe, 0x93ba47c980e98cdf, 0xc66f336c36b10137, 0xb8a8d9bbe123f017, 0xb80b0047445d4184, + 0xe6d3102ad96cec1d, 0xa60dc059157491e5, 0x9043ea1ac7e41392, 0x87c89837ad68db2f, 0xb454e4a179dd1877, 0x29babe4598c311fb, 0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d, 0x1899e4a65f58660c, 0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f, 0xdc21a1171d42645d, 0x76707543f4fa1f73, 0x899504ae72497eba, 0x6a06494a791c53a8, + 0xabfa45da0edbde69, 0x487db9d17636892, 0xd6f8d7509292d603, 0x45a9d2845d3c42b6, 0x865b86925b9bc5c2, 0xb8a2392ba45a9b2, 0xa7f26836f282b732, 0x8e6cac7768d7141e, + 0xd1ef0244af2364ff, 0x3207d795430cd926, 0x8335616aed761f1f, 0x7f44e6bd49e807b8, 0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6, 0xcd036837130890a1, 0x36dba887c37a8c0f, + 0x802221226be55a64, 0xc2494954da2c9789, 0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c, 0xc83553c5c8965d3d, 0x6f92829494e5acc7, 0xfa42a8b73abbf48c, 0xcb772339ba1f17f9, + 0x9c69a97284b578d7, 0xff2a760414536efb, 0xc38413cf25e2d70d, 0xfef5138519684aba, 0xf46518c2ef5b8cd1, 0x7eb258665fc25d69, 0x98bf2f79d5993802, 0xef2f773ffbd97a61, + 0xbeeefb584aff8603, 0xaafb550ffacfd8fa, 0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38, 0x952ab45cfa97a0b2, 0xdd945a747bf26183, 0xba756174393d88df, 0x94f971119aeef9e4, + 0xe912b9d1478ceb17, 0x7a37cd5601aab85d, 0x91abb422ccb812ee, 0xac62e055c10ab33a, 0xb616a12b7fe617aa, 0x577b986b314d6009, 0xe39c49765fdf9d94, 0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d, 0x14588f13be847307, 0xb1d219647ae6b31c, 0x596eb2d8ae258fc8, 0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb, 0x8aec23d680043bee, 0x25de7bb9480d5854, + 0xada72ccc20054ae9, 0xaf561aa79a10ae6a, 0xd910f7ff28069da4, 0x1b2ba1518094da04, 0x87aa9aff79042286, 0x90fb44d2f05d0842, 0xa99541bf57452b28, 0x353a1607ac744a53, + 0xd3fa922f2d1675f2, 0x42889b8997915ce8, 0x847c9b5d7c2e09b7, 0x69956135febada11, 0xa59bc234db398c25, 0x43fab9837e699095, 0xcf02b2c21207ef2e, 0x94f967e45e03f4bb, + 0x8161afb94b44f57d, 0x1d1be0eebac278f5, 0xa1ba1ba79e1632dc, 0x6462d92a69731732, 0xca28a291859bbf93, 0x7d7b8f7503cfdcfe, 0xfcb2cb35e702af78, 0x5cda735244c3d43e, + 0x9defbf01b061adab, 0x3a0888136afa64a7, 0xc56baec21c7a1916, 0x88aaa1845b8fdd0, 0xf6c69a72a3989f5b, 0x8aad549e57273d45, 0x9a3c2087a63f6399, 0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd, 0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5, 0x969eb7c47859e743, 0x9f644ae5a4b1b325, 0xbc4665b596706114, 0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959, 0xa90cb506d155a7ea, 0x9316ff75dd87cbd8, 0x9a7f12442d588f2, 0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f, 0xe5d3ef282a242e81, 0x8f1668c8a86da5fa, + 0x8fa475791a569d10, 0xf96e017d694487bc, 0xb38d92d760ec4455, 0x37c981dcc395a9ac, 0xe070f78d3927556a, 0x85bbe253f47b1417, 0x8c469ab843b89562, 0x93956d7478ccec8e, + 0xaf58416654a6babb, 0x387ac8d1970027b2, 0xdb2e51bfe9d0696a, 0x6997b05fcc0319e, 0x88fcf317f22241e2, 0x441fece3bdf81f03, 0xab3c2fddeeaad25a, 0xd527e81cad7626c3, + 0xd60b3bd56a5586f1, 0x8a71e223d8d3b074, 0x85c7056562757456, 0xf6872d5667844e49, 0xa738c6bebb12d16c, 0xb428f8ac016561db, 0xd106f86e69d785c7, 0xe13336d701beba52, + 0x82a45b450226b39c, 0xecc0024661173473, 0xa34d721642b06084, 0x27f002d7f95d0190, 0xcc20ce9bd35c78a5, 0x31ec038df7b441f4, 0xff290242c83396ce, 0x7e67047175a15271, + 0x9f79a169bd203e41, 0xf0062c6e984d386, 0xc75809c42c684dd1, 0x52c07b78a3e60868, 0xf92e0c3537826145, 0xa7709a56ccdf8a82, 0x9bbcc7a142b17ccb, 0x88a66076400bb691, + 0xc2abf989935ddbfe, 0x6acff893d00ea435, 0xf356f7ebf83552fe, 0x583f6b8c4124d43, 0x98165af37b2153de, 0xc3727a337a8b704a, 0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c, + 0xeda2ee1c7064130c, 0x1162def06f79df73, 0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8, 0xb9a74a0637ce2ee1, 0x6d953e2bd7173692, 0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0, 0x1d9c9892400a22a2, 0xb54d5e4a127f59c8, 0x2503beb6d00cab4b, 0xe2a0b5dc971f303a, 0x2e44ae64840fd61d, 0x8da471a9de737e24, 0x5ceaecfed289e5d2, + 0xb10d8e1456105dad, 0x7425a83e872c5f47, 0xdd50f1996b947518, 0xd12f124e28f77719, 0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f, 0xace73cbfdc0bfb7b, 0x636cc64d1001550b, + 0xd8210befd30efa5a, 0x3c47f7e05401aa4e, 0x8714a775e3e95c78, 0x65acfaec34810a71, 0xa8d9d1535ce3b396, 0x7f1839a741a14d0d, 0xd31045a8341ca07c, 0x1ede48111209a050, + 0x83ea2b892091e44d, 0x934aed0aab460432, 0xa4e4b66b68b65d60, 0xf81da84d5617853f, 0xce1de40642e3f4b9, 0x36251260ab9d668e, 0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019, + 0xa1075a24e4421730, 0xb24cf65b8612f81f, 0xc94930ae1d529cfc, 0xdee033f26797b627, 0xfb9b7cd9a4a7443c, 0x169840ef017da3b1, 0x9d412e0806e88aa5, 0x8e1f289560ee864e, + 0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2, 0xf5b5d7ec8acb58a2, 0xae10af696774b1db, 0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29, 0xbff610b0cc6edd3f, 0x17fd090a58d32af3, + 0xeff394dcff8a948e, 0xddfc4b4cef07f5b0, 0x95f83d0a1fb69cd9, 0x4abdaf101564f98e, 0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1, 0xea53df5fd18d5513, 0x84c86189216dc5ed, + 0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4, 0xb7118682dbb66a77, 0x3fbc8c33221dc2a1, 0xe4d5e82392a40515, 0xfabaf3feaa5334a, 0x8f05b1163ba6832d, 0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8, 0x743e20e9ef511012, 0xdf78e4b2bd342cf6, 0x914da9246b255416, 0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e, 0xae9672aba3d0c320, 0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e, 0x8865899617fb1871, 0x7e2fa67c7a658892, 0xaa7eebfb9df9de8d, 0xddbb901b98feeab7, 0xd51ea6fa85785631, 0x552a74227f3ea565, + 0x8533285c936b35de, 0xd53a88958f87275f, 0xa67ff273b8460356, 0x8a892abaf368f137, 0xd01fef10a657842c, 0x2d2b7569b0432d85, 0x8213f56a67f6b29b, 0x9c3b29620e29fc73, + 0xa298f2c501f45f42, 0x8349f3ba91b47b8f, 0xcb3f2f7642717713, 0x241c70a936219a73, 0xfe0efb53d30dd4d7, 0xed238cd383aa0110, 0x9ec95d1463e8a506, 0xf4363804324a40aa, + 0xc67bb4597ce2ce48, 0xb143c6053edcd0d5, 0xf81aa16fdc1b81da, 0xdd94b7868e94050a, 0x9b10a4e5e9913128, 0xca7cf2b4191c8326, 0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf, 0xbc633b39673c8cec, 0x976e41088617ca01, 0xd5be0503e085d813, 0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18, 0xec9c459d51852ba2, 0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45, 0xcabb90e5c942b503, 0xb8da1662e7b00a17, 0x3d6a751f3b936243, 0xe7109bfba19c0c9d, 0xcc512670a783ad4, 0x906a617d450187e2, 0x27fb2b80668b24c5, + 0xb484f9dc9641e9da, 0xb1f9f660802dedf6, 0xe1a63853bbd26451, 0x5e7873f8a0396973, 0x8d07e33455637eb2, 0xdb0b487b6423e1e8, 0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7, 0x7641a140cc7810fb, 0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d, 0xac2820d9623bf429, 0x546345fa9fbdcd44, 0xd732290fbacaf133, 0xa97c177947ad4095, + 0x867f59a9d4bed6c0, 0x49ed8eabcccc485d, 0xa81f301449ee8c70, 0x5c68f256bfff5a74, 0xd226fc195c6a2f8c, 0x73832eec6fff3111, 0x83585d8fd9c25db7, 0xc831fd53c5ff7eab, + 0xa42e74f3d032f525, 0xba3e7ca8b77f5e55, 0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb, 0x80444b5e7aa7cf85, 0x7980d163cf5b81b3, 0xa0555e361951c366, 0xd7e105bcc332621f, + 0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7, 0xfa856334878fc150, 0xb14f98f6f0feb951, 0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3, 0xc3b8358109e84f07, 0xa862f80ec4700c8, + 0xf4a642e14c6262c8, 0xcd27bb612758c0fa, 0x98e7e9cccfbd7dbd, 0x8038d51cb897789c, 0xbf21e44003acdd2c, 0xe0470a63e6bd56c3, 0xeeea5d5004981478, 0x1858ccfce06cac74, + 0x95527a5202df0ccb, 0xf37801e0c43ebc8, 0xbaa718e68396cffd, 0xd30560258f54e6ba, 0xe950df20247c83fd, 0x47c6b82ef32a2069, 0x91d28b7416cdd27e, 0x4cdc331d57fa5441, + 0xb6472e511c81471d, 0xe0133fe4adf8e952, 0xe3d8f9e563a198e5, 0x58180fddd97723a6, 0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648, +}; + + /* Maximum mantissa for fast path: 2^53 */ #define MAX_MANTISSA_FAST_PATH 9007199254740992ULL /* 2^53 */ @@ -159,6 +348,190 @@ static inline uint32_t parse_eight_digits_swar(uint64_t val) { return (uint32_t)val; } +/* ---------------------------------------------------------------------------- + * Eisel-Lemire algorithm — core (compute_float / am_to_double). + * + * Given a decimal mantissa `w` (≤ 19 digits, fits in uint64) and exponent `q`, + * compute the correctly-rounded `double` representing `w * 10^q`. Internally: + * + * 1. Shift `w` so its leading bit is set (full 64-bit mantissa). + * 2. Multiply by the 128-bit precomputed power-of-five entry above. + * 3. Extract the 53-bit mantissa from the high 64 bits of the product, with + * one extra bit for round-to-nearest-even. + * 4. Apply the round-half-to-even rule, including the rare power-of-2 tie + * case that needs a second-pass check. + * + * For the 19-digit / |q| ≤ 22 input range the result is provably bit-exact + * with strtod() (Mushtak & Lemire, "Fast Number Parsing Without Fallback"). + * The caller falls back to strtod() if compute_float() signals indeterminate + * (we never trigger that branch with parse_number_string's bounded inputs). + * + * Ported from fast_float by Daniel Lemire & Joao Paulo Magalhaes + * (MIT-licensed, https://github.com/fastfloat/fast_float — decimal_to_binary.h + * and float_common.h). C++ template machinery dropped in favour of a + * double-only specialisation; struct layouts kept to ease future review. + * ---------------------------------------------------------------------------- */ + +/* IEEE-754 binary64 constants (mirrors fast_float's binary_format). */ +#define DOUBLE_MANTISSA_EXPLICIT_BITS 52 +#define DOUBLE_MIN_EXPONENT_ROUND_EVEN -4 +#define DOUBLE_MAX_EXPONENT_ROUND_EVEN 23 +#define DOUBLE_MINIMUM_EXPONENT -1023 +#define DOUBLE_INFINITE_POWER 0x7FF + +/* 128-bit unsigned, little-endian: low holds bits [0..63]. */ +typedef struct { + uint64_t low; + uint64_t high; +} value128; + +/* Result of compute_float(): a 53-bit mantissa and a biased binary exponent. + * power2 < 0 signals indeterminate (caller should fall back to strtod()). */ +typedef struct { + uint64_t mantissa; + int32_t power2; +} adjusted_mantissa; + +/* `__builtin_clzll` is undefined on input 0 — caller guarantees v > 0. */ +static inline int leading_zeroes_u64(uint64_t v) { + return __builtin_clzll(v); +} + +/* 64x64 -> 128 multiplication. __uint128_t is available on every 64-bit + * target Redis supports (gated explicitly in the call site). */ +static inline value128 full_multiplication(uint64_t a, uint64_t b) { + value128 r; +#ifdef __SIZEOF_INT128__ + __uint128_t prod = (__uint128_t)a * (__uint128_t)b; + r.low = (uint64_t)prod; + r.high = (uint64_t)(prod >> 64); +#else + /* 32-bit fallback: split each operand into two 32-bit halves. */ + uint64_t a_lo = (uint32_t)a, a_hi = a >> 32; + uint64_t b_lo = (uint32_t)b, b_hi = b >> 32; + uint64_t ll = a_lo * b_lo; + uint64_t lh = a_lo * b_hi; + uint64_t hl = a_hi * b_lo; + uint64_t hh = a_hi * b_hi; + uint64_t mid = (ll >> 32) + (uint32_t)lh + (uint32_t)hl; + r.low = (mid << 32) | (uint32_t)ll; + r.high = hh + (lh >> 32) + (hl >> 32) + (mid >> 32); +#endif + return r; +} + +/* For q in (-400, 350), this approximates floor(log2(5^q)) + q + 63 + * (or -ceil(log2(5^|q|)) + q + 63 for negative q). Used to derive power2. */ +static inline int32_t eisel_lemire_power(int32_t q) { + return (((152170 + 65536) * q) >> 16) + 63; +} + +/* 128-bit approximation of `w * 5^q`. The optional fixup multiplies by the + * second (extension) entry of the power-of-five table when the high half is + * close to a rounding boundary. Mathematical proof of sufficiency: see + * Mushtak & Lemire, "Fast Number Parsing Without Fallback". */ +static inline value128 compute_product_approximation_d(int64_t q, uint64_t w) { + int index = 2 * (int)(q - EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE); + value128 firstproduct = full_multiplication(w, power_of_five_128[index]); + /* For double, bit_precision = mantissa_explicit_bits (52) + 3 = 55. */ + const uint64_t precision_mask = + (uint64_t)0xFFFFFFFFFFFFFFFFULL >> 55; + if ((firstproduct.high & precision_mask) == precision_mask) { + value128 secondproduct = + full_multiplication(w, power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if (secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + return firstproduct; +} + +/* Eisel-Lemire main: compute a correctly-rounded representation of w * 10^q. + * Returns an `adjusted_mantissa`. Special outputs: + * - mantissa == 0 && power2 == 0: result is +/-0 + * - power2 == DOUBLE_INFINITE_POWER && mantissa == 0: result is infinity + * - power2 < 0: indeterminate (caller should fall back to strtod()). With + * parse_number_string()'s bounded mantissa (<= 19 digits), this branch + * is unreachable, but we keep the signature for safety. + */ +static adjusted_mantissa compute_float_d(int64_t q, uint64_t w) { + adjusted_mantissa answer; + + if (w == 0 || q < EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE) { + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + if (q > EISEL_LEMIRE_LARGEST_POWER_OF_FIVE) { + answer.power2 = DOUBLE_INFINITE_POWER; + answer.mantissa = 0; + return answer; + } + + /* Renormalise w so its top bit is set. */ + int lz = leading_zeroes_u64(w); + w <<= lz; + + value128 product = compute_product_approximation_d(q, w); + + int upperbit = (int)(product.high >> 63); + int shift = upperbit + 64 - DOUBLE_MANTISSA_EXPLICIT_BITS - 3; + + answer.mantissa = product.high >> shift; + answer.power2 = (int32_t)(eisel_lemire_power((int32_t)q) + upperbit - lz - DOUBLE_MINIMUM_EXPONENT); + + if (answer.power2 <= 0) { + /* Subnormal path. */ + if (-answer.power2 + 1 >= 64) { + /* More than 64 bits below minimum exponent — definitely zero. */ + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + /* Safe: -answer.power2 + 1 < 64. */ + answer.mantissa >>= -answer.power2 + 1; + answer.mantissa += (answer.mantissa & 1); /* round up */ + answer.mantissa >>= 1; + /* If post-rounding the value crosses back into the normal range, mark + * it normal (power2 = 1) rather than subnormal (power2 = 0). */ + answer.power2 = (answer.mantissa < ((uint64_t)1 << DOUBLE_MANTISSA_EXPLICIT_BITS)) ? 0 : 1; + return answer; + } + + /* Normal path: handle the round-half-to-even tie case. */ + if ((product.low <= 1) && + (q >= DOUBLE_MIN_EXPONENT_ROUND_EVEN) && + (q <= DOUBLE_MAX_EXPONENT_ROUND_EVEN) && + ((answer.mantissa & 3) == 1)) { + if ((answer.mantissa << shift) == product.high) { + answer.mantissa &= ~(uint64_t)1; /* clear LSB so we round down */ + } + } + answer.mantissa += (answer.mantissa & 1); + answer.mantissa >>= 1; + if (answer.mantissa >= ((uint64_t)2 << DOUBLE_MANTISSA_EXPLICIT_BITS)) { + answer.mantissa = (uint64_t)1 << DOUBLE_MANTISSA_EXPLICIT_BITS; + answer.power2++; + } + answer.mantissa &= ~((uint64_t)1 << DOUBLE_MANTISSA_EXPLICIT_BITS); + if (answer.power2 >= DOUBLE_INFINITE_POWER) { + answer.power2 = DOUBLE_INFINITE_POWER; + answer.mantissa = 0; + } + return answer; +} + +/* Pack adjusted_mantissa back to a double via IEEE-754 bit layout. */ +static inline double am_to_double(int negative, adjusted_mantissa am) { + uint64_t word = am.mantissa; + word |= (uint64_t)am.power2 << DOUBLE_MANTISSA_EXPLICIT_BITS; + if (negative) word |= (uint64_t)1 << 63; + double value; + memcpy(&value, &word, sizeof(value)); + return value; +} + /* Parse a decimal number string into components. * This follows the fast_float algorithm closely. */ static inline int parse_number_string(const char *p, const char *pend, double *result, const char **endptr) { @@ -261,22 +634,40 @@ static inline int parse_number_string(const char *p, const char *pend, double *r if (digit_count > MAX_DIGITS) return 0; } - /* Check if we're within fast path bounds */ - if (exponent < MIN_EXPONENT_FAST_PATH) return 0; - if (exponent > MAX_EXPONENT_FAST_PATH) return 0; - if (mantissa > MAX_MANTISSA_FAST_PATH) return 0; - - /* Fast path: direct conversion */ - double value = (double)mantissa; + /* Pick the conversion path. Two regimes: + * Clinger fast path: small mantissa (<= 2^53) and small |exp| (<= 22). + * One double multiply or divide; cheapest, exact by construction. + * Eisel-Lemire: large mantissa or wide exponent range (full double + * domain). Slightly slower per call (128-bit multiply + table lookup) + * but correctly-rounded by the Mushtak-Lemire proof. + * Inputs outside both ranges fall back to strtod() (caller of this fn). */ + double value; + if (mantissa <= MAX_MANTISSA_FAST_PATH && + exponent >= MIN_EXPONENT_FAST_PATH && + exponent <= MAX_EXPONENT_FAST_PATH) + { + /* Clinger fast path: all operands exact in double precision, + * single multiply/divide produces a correctly-rounded result. */ + value = (double)mantissa; + if (exponent < 0) value = value / powers_of_ten[-exponent]; + else if (exponent > 0) value = value * powers_of_ten[exponent]; + if (negative) value = -value; + } else { + /* Eisel-Lemire path. Replaces a previously hand-rolled widened branch + * (`(double)hi * 2^64 + (double)lo` shortcut) that produced ±1 ULP + * mismatches vs strtod() on inputs like 9007199255094284e-19 and + * 2489830482329185244e1. compute_float_d is bit-exact with strtod() + * for every input parse_number_string can produce. */ + if (exponent < EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE || exponent > EISEL_LEMIRE_LARGEST_POWER_OF_FIVE) + return 0; - if (exponent < 0) { - value = value / powers_of_ten[-exponent]; - } else if (exponent > 0) { - value = value * powers_of_ten[exponent]; - } - - if (negative) { - value = -value; + adjusted_mantissa am = compute_float_d(exponent, mantissa); + /* power2 < 0 would mean indeterminate (caller should fall back to + * strtod). With our bounded mantissa (<= 19 digits) this branch is + * unreachable per the Mushtak-Lemire proof, but we keep the guard so + * any future caller that supplies a larger mantissa stays correct. */ + if (am.power2 < 0) return 0; + value = am_to_double(negative, am); } *result = value; @@ -448,9 +839,122 @@ int fastFloatTest(int argc, char **argv, int flags) { {"12345678901234567890", 1.2345678901234567e19}, {"2.2250738585072012e-308", 2.2250738585072012e-308}, /* Near DBL_MIN boundary */ {"0x10", 16.0}, + + /* Widened fast path: mantissa > 2^53 (==9007199254740992), |exp| in [1,19]. + * These cover the __uint128_t code path that avoids the strtod() fallback. + * Each expected value is the IEEE-correct round-to-nearest double. */ + + /* 17-19 significant digit mantissas — negative exponent (scores in [0,1)) */ + {"0.49606648747577575", 0.49606648747577575}, /* 17 sig digits, ZADD hot case */ + {"0.8731899671198792", 0.8731899671198792}, /* 16 sig digits */ + {"0.34912978268081996", 0.34912978268081996}, /* 17 sig digits */ + {"0.0033318113277969186", 0.0033318113277969186}, /* 19 sig digits after leading-zero strip */ + {"0.9955843393406656", 0.9955843393406656}, + {"0.999999999999999", 0.999999999999999}, /* repunit-ish, ULP boundary */ + + /* Mantissa just above 2^53: triggers the widened path */ + {"9007199254740993.0", 9007199254740992.0}, /* rounds down */ + {"9007199254740995.0", 9007199254740996.0}, /* ties-to-even up */ + {"9007199254740996.0", 9007199254740996.0}, + {"10000000000000000", 1e16}, /* exact 10^16, mantissa = 10^16 */ + {"99999999999999999", 1e17}, /* one less than 10^17 */ + + /* 18-digit mantissa with various exponents */ + {"1234567890123456789", 1.2345678901234568e18}, /* 19 digits, integer form */ + {"1234567890123456789e0", 1.2345678901234568e18}, + {"1234567890123456789e-5", 12345678901234.568}, + {"1234567890123456789e-19", 0.12345678901234568}, + {"1234567890123456789e5", 1.2345678901234569e23}, /* 19-digit mantissa × 10^5 — widened path */ + + /* Boundary: exponent exactly ±19 (widened-path limit) */ + {"1234567890123.456789e-19", 1.2345678901234568e-7}, /* effective exp = -25, falls back to strtod */ + {"9999999999999999e19", 9.999999999999999e34}, + {"9999999999999999e-19", 9.999999999999999e-4}, + + /* Negative numbers exercising the widened path */ + {"-0.49606648747577575", -0.49606648747577575}, + {"-9007199254740993", -9007199254740992.0}, + + /* Eisel-Lemire rounding-boundary cases. + * Reported by @vitahlin on #14661 against the previous + * `(double)hi * 2^64 + (double)lo` widened branch which + * double-rounded the 128-bit product. Both must now match + * strtod() exactly. */ + {"9007199255094284e-19", 9007199255094284e-19}, /* was -1 ULP */ + {"2489830482329185244e1", 2489830482329185244e1}, /* was +1 ULP */ + + /* Subnormal boundaries (Eisel-Lemire's subnormal branch). */ + {"5e-324", 5e-324}, /* smallest pos subnormal */ + {"4.9e-324", 5e-324}, /* below half: rounds up */ + {"2.2250738585072009e-308", 2.2250738585072009e-308}, /* largest subnormal */ + {"2.2250738585072014e-308", 2.2250738585072014e-308}, /* smallest normal */ + {"1e-323", 1e-323}, + + /* Round-half-to-even ties: post-Clinger range, hits compute_float_d + * tie path (product.low <= 1, q in [-4, 23], mantissa & 3 == 1). */ + {"5497558138880", 5497558138880.0}, /* 2^42 + 2^33 boundary */ + {"5e-22", 5e-22}, + {"7.038531e-26", 7.038531e-26}, + {"4503599627475501e-10", 4503599627475501e-10}, /* near 2^52 */ + + /* Largest finite double + overflow. */ + {"1.7976931348623157e308", 1.7976931348623157e308}, /* DBL_MAX */ + {"1.7976931348623158e308", 1.7976931348623157e308}, /* nearest is DBL_MAX */ + {"1e308", 1e308}, + + /* Wide exponent range now reachable via Eisel-Lemire (previously + * fell to strtod). */ + {"1.234567890123456e100", 1.234567890123456e100}, + {"9.999999999999999e99", 9.999999999999999e99}, + {"1e-300", 1e-300}, + {"1.7e-300", 1.7e-300}, + + /* Repunit / many-9 mantissas — adjacent-double tie territory. */ + {"9999999999999998", 9999999999999998.0}, + {"99999999999999999", 1e17}, }; run_ff_tests(decimal_ok, COUNTOF(decimal_ok), 0); + /* Differential cross-check: every accepted input must produce the + * exact same bits as libc strtod(). Hand-picked hard cases covering + * every code path in compute_float_d (subnormal branch, round-half- + * to-even tie path, near-infinity, repunit mantissa, wide exponent). */ + { + static const char *diff_inputs[] = { + /* Boundary classics around 2^53. */ + "9007199254740992", "9007199254740993", "9007199254740994", + "9007199254740995", "9007199254740996", + /* Limits of finite double. */ + "1.7976931348623157e308", "2.2250738585072014e-308", + "5e-324", "1e-323", "4.9406564584124654e-324", + /* The two reproducer inputs the previous widened branch missed. */ + "9007199255094284e-19", "2489830482329185244e1", + /* Mushtak-Lemire stress range — 19-digit mantissas. */ + "1234567890123456789e0", "1234567890123456789e-5", + "1234567890123456789e5", "9999999999999999e19", + /* Common scientific constants — mid-exponent sanity. */ + "3.141592653589793", "2.718281828459045", + "1.4142135623730951e150", "6.022140857e23", + "1.602176634e-19", "9.10938356e-31", + }; + for (int i = 0; i < COUNTOF(diff_inputs); i++) { + const char *s = diff_inputs[i]; + char *fend, *lend; + errno = 0; + double got = fast_float_strtod(s, strlen(s), &fend); + errno = 0; + double libc = strtod(s, &lend); + uint64_t gb, lb; + memcpy(&gb, &got, sizeof(gb)); + memcpy(&lb, &libc, sizeof(lb)); + char descr[160]; + snprintf(descr, sizeof(descr), + "differential vs strtod: \"%s\" ff=0x%016llx libc=0x%016llx", + s, (unsigned long long)gb, (unsigned long long)lb); + test_cond(descr, gb == lb); + } + } + /* No valid prefix for full buffer, or trailing junk. */ ff_testcase decimal_bad[] = { {"1abc", 1.0}, diff --git a/src/gcra.c b/src/gcra.c index 488fad5ce..a6b738824 100644 --- a/src/gcra.c +++ b/src/gcra.c @@ -9,6 +9,8 @@ #include "server.h" #include +#ifdef ENABLE_GCRA + /* GCRA algorithm for rate limiting. * Implementation is heavily based on the implementation of (redis-cell) * [https://github.com/brandur/redis-cell] by (brandur)[https://github.com/brandur]. @@ -278,3 +280,5 @@ robj *gcraDup(robj *o) { getLongLongFromGCRAObject(o, &val); return createGCRAObject(val); } + +#endif /* ENABLE_GCRA */ diff --git a/src/geo.c b/src/geo.c index ce890f7f0..978270bb6 100644 --- a/src/geo.c +++ b/src/geo.c @@ -108,7 +108,7 @@ int extractLongLatOrReply(client *c, robj **argv, double *xy) { if (xy[0] < GEO_LONG_MIN || xy[0] > GEO_LONG_MAX || xy[1] < GEO_LAT_MIN || xy[1] > GEO_LAT_MAX) { addReplyErrorFormat(c, - "-ERR invalid longitude,latitude pair %f,%f\r\n",xy[0],xy[1]); + "invalid longitude,latitude pair %f,%f",xy[0],xy[1]); return C_ERR; } return C_OK; diff --git a/src/hotkeys.c b/src/hotkeys.c index bdcc831e4..817a8c394 100644 --- a/src/hotkeys.c +++ b/src/hotkeys.c @@ -13,11 +13,6 @@ #include "cluster.h" #include -static inline int nearestNextPowerOf2(unsigned int count) { - if (count <= 1) return 1; - return 1 << (32 - __builtin_clz(count-1)); -} - /* Comparison function for qsort to sort slot indices */ static inline int slotCompare(const void *a, const void *b) { return (*(const int *)a) - (*(const int *)b); diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 66a7f5e2a..a3e8dd180 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -530,11 +530,17 @@ void hllDenseRegHisto(uint8_t *registers, int* reghisto) { /* Redis default is to use 16384 registers 6 bits each. The code works * with other values by modifying the defines, but for our target value - * we take a faster path with unrolled loops. */ + * we take a faster path with unrolled loops. + * + * Uses 4 independent histogram accumulators to break store→load + * dependency chains when multiple registers map to the same bin. + * Each group of 4 registers goes to a different accumulator. */ if (HLL_REGISTERS == 16384 && HLL_BITS == 6) { uint8_t *r = registers; unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; + int h0[64] = {0}, h1[64] = {0}, h2[64] = {0}, h3[64] = {0}; + for (j = 0; j < 1024; j++) { /* Handle 16 registers per iteration. */ r0 = r[0] & 63; @@ -554,25 +560,36 @@ void hllDenseRegHisto(uint8_t *registers, int* reghisto) { r14 = (r[10] >> 4 | r[11] << 4) & 63; r15 = (r[11] >> 2) & 63; - reghisto[r0]++; - reghisto[r1]++; - reghisto[r2]++; - reghisto[r3]++; - reghisto[r4]++; - reghisto[r5]++; - reghisto[r6]++; - reghisto[r7]++; - reghisto[r8]++; - reghisto[r9]++; - reghisto[r10]++; - reghisto[r11]++; - reghisto[r12]++; - reghisto[r13]++; - reghisto[r14]++; - reghisto[r15]++; + /* Interleave across 4 accumulators by index mod 4: + * r0,r4,r8,r12 → h0; r1,r5,r9,r13 → h1; + * r2,r6,r10,r14 → h2; r3,r7,r11,r15 → h3. + * HLL register values cluster in a few consecutive bins, so adjacent + * registers frequently hit the same histogram bin. 4 accumulators + * break the resulting store→load dependency chain. */ + h0[r0]++; + h1[r1]++; + h2[r2]++; + h3[r3]++; + h0[r4]++; + h1[r5]++; + h2[r6]++; + h3[r7]++; + h0[r8]++; + h1[r9]++; + h2[r10]++; + h3[r11]++; + h0[r12]++; + h1[r13]++; + h2[r14]++; + h3[r15]++; r += 12; } + + /* Merge accumulators — 64 entries (6-bit register values), negligible cost. */ + for (j = 0; j < 64; j++) { + reghisto[j] = h0[j] + h1[j] + h2[j] + h3[j]; + } } else { for(j = 0; j < HLL_REGISTERS; j++) { unsigned long reg; @@ -986,27 +1003,39 @@ void hllSparseRegHisto(uint8_t *sparse, int sparselen, int *invalid, int* reghis * computation, which is representation-specific, while all the rest is common. */ /* Implements the register histogram calculation for uint8_t data type - * which is only used internally as speedup for PFCOUNT with multiple keys. */ + * which is only used internally as speedup for PFCOUNT with multiple keys. + * + * Uses 4 independent histogram accumulators to break store→load dependency + * chains: when two bytes in the same word map to the same histogram bin, + * a single accumulator serializes on the load-modify-store cycle. With 4 + * accumulators, each byte goes to a different copy, allowing the CPU's + * out-of-order engine to overlap the increments. */ void hllRawRegHisto(uint8_t *registers, int* reghisto) { - uint64_t *word = (uint64_t*) registers; - uint8_t *bytes; + /* 4 independent accumulators — each byte position in the 8-byte word + * maps to a different accumulator to maximize ILP. Accumulator + * assignment is by byte index mod 4: bytes 0,4 → h0, 1,5 → h1, + * 2,6 → h2, 3,7 → h3. */ + int h0[64] = {0}, h1[64] = {0}, h2[64] = {0}, h3[64] = {0}; + uint8_t *r = registers; int j; - for (j = 0; j < HLL_REGISTERS/8; j++) { - if (*word == 0) { - reghisto[0] += 8; - } else { - bytes = (uint8_t*) word; - reghisto[bytes[0]]++; - reghisto[bytes[1]]++; - reghisto[bytes[2]]++; - reghisto[bytes[3]]++; - reghisto[bytes[4]]++; - reghisto[bytes[5]]++; - reghisto[bytes[6]]++; - reghisto[bytes[7]]++; - } - word++; + for (j = 0; j < HLL_REGISTERS; j += 8) { + h0[r[0]]++; + h1[r[1]]++; + h2[r[2]]++; + h3[r[3]]++; + h0[r[4]]++; + h1[r[5]]++; + h2[r[6]]++; + h3[r[7]]++; + r += 8; + } + + /* Merge accumulators. The histogram has 64 entries (register values + * are 6-bit, range 0-63), so this loop is negligible compared to + * the 16384-register main loop. */ + for (j = 0; j < 64; j++) { + reghisto[j] = h0[j] + h1[j] + h2[j] + h3[j]; } } diff --git a/src/iothread.c b/src/iothread.c index 981edb951..73919cce1 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -859,6 +859,8 @@ int IOThreadCron(struct aeEventLoop *eventLoop, long long id, void *clientData) * and IO thread will communicate through event notifier. */ void *IOThreadMain(void *ptr) { IOThread *t = ptr; + /* Claim a reserved used_memory accounting slot before any allocation. */ + zmalloc_register_reserved_slot(); char thdname[16]; snprintf(thdname, sizeof(thdname), "io_thd_%d", t->id); redis_set_thread_title(thdname); diff --git a/src/keymeta.c b/src/keymeta.c index e4430da2c..fba77a2d6 100644 --- a/src/keymeta.c +++ b/src/keymeta.c @@ -416,7 +416,7 @@ int rdbLoadSkipMetaIfAllowed(rio *rdb, char *cname, int flags) { * * Note: rdbLoadCheckModuleValue() reads opcodes until it finds RDB_MODULE_OPCODE_EOF, * so it consumes the EOF marker as well. We don't need to read it separately. */ - robj *dummy = rdbLoadCheckModuleValue(rdb, cname); + robj *dummy = rdbLoadCheckModuleValue(rdb, cname, 1); if (dummy == NULL) { serverLog(LL_WARNING, "Corrupted metadata value for class '%s'", cname); return -1; diff --git a/src/lazyfree.c b/src/lazyfree.c index 01f34bec6..5b4b7b6c6 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -207,6 +207,9 @@ size_t lazyfreeGetFreeEffort(robj *key, robj *obj, int dbid) { /* If the module's free_effort returns 0, we will use asynchronous free * memory by default. */ return effort == 0 ? ULONG_MAX : effort; + } else if (obj->type == OBJ_ARRAY) { + redisArray *ar = obj->ptr; + return arCount(ar); } else { return 1; /* Everything else is a single allocation. */ } diff --git a/src/listpack.c b/src/listpack.c index 5b37d2f7b..c09bb8eca 100644 --- a/src/listpack.c +++ b/src/listpack.c @@ -337,20 +337,20 @@ static inline unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) { if (l <= 127) { if (buf) buf[0] = l; return 1; - } else if (l < 16383) { + } else if (l <= 16383) { if (buf) { buf[0] = l>>7; buf[1] = (l&127)|128; } return 2; - } else if (l < 2097151) { + } else if (l <= 2097151) { if (buf) { buf[0] = l>>14; buf[1] = ((l>>7)&127)|128; buf[2] = (l&127)|128; } return 3; - } else if (l < 268435455) { + } else if (l <= 268435455) { if (buf) { buf[0] = l>>21; buf[1] = ((l>>14)&127)|128; @@ -376,11 +376,11 @@ static inline unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) { static inline unsigned long lpEncodeBacklenBytes(uint64_t l) { if (l <= 127) { return 1; - } else if (l < 16383) { + } else if (l <= 16383) { return 2; - } else if (l < 2097151) { + } else if (l <= 2097151) { return 3; - } else if (l < 268435455) { + } else if (l <= 268435455) { return 4; } else { return 5; @@ -2643,6 +2643,32 @@ int listpackTest(int argc, char *argv[], int flags) { lpFree(lp); } + TEST("Backlen encode/decode at width boundaries") { + /* Body lengths where backlen widens; maxima per width must use the + * minimum byte count and round-trip (lpEncodeBacklen vs + * lpEncodeBacklenBytes and lpDecodeBacklen). */ + const uint64_t cases[] = { + 128ULL, + 16382ULL, + 16383ULL, + 16384ULL, + 2097150ULL, + 2097151ULL, + 2097152ULL, + 268435454ULL, + 268435455ULL, + 268435456ULL, + }; + unsigned char enc[LP_MAX_BACKLEN_SIZE]; + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + uint64_t enclen = cases[i]; + unsigned long n = lpEncodeBacklen(NULL, enclen); + assert(n == lpEncodeBacklenBytes(enclen)); + assert(lpEncodeBacklen(enc, enclen) == n); + assert(lpDecodeBacklen(enc + n - 1) == enclen); + } + } + TEST("Create long list and check indices") { lp = lpNew(0); char buf[32]; diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 46810147f..f9a911e03 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -19,18 +19,50 @@ #include "server.h" #include "dict.h" -typedef enum { HT_IDX_FIRST = 0, HT_IDX_SECOND = 1, HT_IDX_INVALID = -1 } HashTableIndex; +/* -------------------------------------------------------------------------- + * Dict prefetching state machine + * -------------------------------------------------------------------------- */ + +typedef enum { HT_IDX_FIRST = 0, HT_IDX_SECOND = 1, HT_IDX_INVALID = -1 } dictHtIdx; typedef enum { - PREFETCH_BUCKET, /* Initial state, determines which hash table to use and prefetch the table's bucket */ - PREFETCH_ENTRY, /* prefetch entries associated with the given key's hash */ - PREFETCH_KVOBJ, /* prefetch the kv object of the entry found in the previous step */ - PREFETCH_VALDATA, /* prefetch the value data of the kv object found in the previous step */ - PREFETCH_DONE /* Indicates that prefetching for this key is complete */ -} PrefetchState; + PREFETCH_BUCKET, /* Initial state, determines which hash table to use and prefetch the table's bucket */ + PREFETCH_ENTRY, /* prefetch entries associated with the given key's hash */ + PREFETCH_ENTRY_KEY, /* dictType-driven prefetch of the entry's key payload (for keyCompare) */ + PREFETCH_ENTRY_VALUE, /* compare keys; on match, dictType-driven prefetch of the value payload */ + PREFETCH_DONE /* Indicates that prefetching for this key is complete */ +} dictPrefetchState; +/* Per-key state of an in-flight, software-pipelined dictFind, advanced one + * stage at a time by dictPrefetcher (see below). The non-state fields mirror + * the locals that a synchronous dictFind would otherwise carry across one + * bucket walk. */ +typedef struct dictPrefetchLookup { + dictPrefetchState state; /* Current FSM stage of this lookup */ + dictHtIdx ht_idx; /* Index of the current hash table (0 or 1 for rehashing) */ + uint64_t bucket_idx; /* Index of the bucket in the current hash table */ + uint64_t key_hash; /* Hash value of the key being looked up */ + dictEntry *current_entry; /* Pointer to the current entry being processed */ +} dictPrefetchLookup; -/************************************ State machine diagram for the prefetch operation. ******************************** +/* dictPrefetcher drives a batch of dictPrefetchLookup objects through the + * prefetch FSM, yielding to the next in-flight lookup each time a prefetch + * is issued — so one lookup's memory stall overlaps another's work. The + * state machine itself is fully dict-pure: any key/value payload prefetching + * is delegated to the dictType->prefetchEntryKey / prefetchEntryValue + * callbacks of each key's dict. The same prefetcher is used by both the + * cross-command batch path and the intra-command dictPrefetchKeys() API. */ +typedef struct dictPrefetcher { + size_t cur_idx; /* Cursor; advances on each prefetch issue */ + size_t nkeys; /* Total key lookups in this batch */ + size_t remaining; /* Number of in-flight key lookups (not yet PREFETCH_DONE) */ + void **keys; /* Array of key pointers (sds) */ + dict **dicts; /* Per-key dictionary pointers */ + dictPrefetchLookup *lookups; /* Per-key lookup state, capacity == max_keys */ + size_t max_keys; /* Capacity of lookups[] */ +} dictPrefetcher; + +/******************************** State machine diagram for the dict prefetch operation. ****************************** │ start │ @@ -44,33 +76,254 @@ typedef enum { ┌────────────►└────────┬────────┘ │ | Entry│found │ │ | │ - | ┌───────▼────────┐ │ - │ | PREFETCH_KVOBJ | ▼ - │ └───────┬────────┘ │ - kvobj not found - goto next entry | | - │ ┌───────────▼────────────┐ │ - └──────◄───│ PREFETCH_VALDATA │ ▼ - └───────────┬────────────┘ │ + | ┌───────────▼─────────────┐ │ + │ | PREFETCH_ENTRY_KEY | ▼ + │ └───────────┬─────────────┘ │ + key mismatch - goto next entry | | + │ ┌───────────▼─────────────┐ │ + └──────◄───│ PREFETCH_ENTRY_VALUE │ ▼ + └───────────┬─────────────┘ │ | │ ┌───────-─▼─────────────┐ │ │ PREFETCH_DONE │◄────────┘ └───────────────────────┘ + **********************************************************************************************************************/ -typedef void *(*GetValueDataFunc)(const void *val); +/* Issue a software prefetch for `addr`, then yield to the next lookup by + * advancing the cursor. */ +static inline void dictPrefetchAdvance(dictPrefetcher *p, void *addr) { + redis_prefetch_read(addr); + if (++p->cur_idx >= p->nkeys) p->cur_idx = 0; +} -typedef struct KeyPrefetchInfo { - PrefetchState state; /* Current state of the prefetch operation */ - HashTableIndex ht_idx; /* Index of the current hash table (0 or 1 for rehashing) */ - uint64_t bucket_idx; /* Index of the bucket in the current hash table */ - uint64_t key_hash; /* Hash value of the key being prefetched */ - dictEntry *current_entry; /* Pointer to the current entry being processed */ - kvobj *current_kv; /* Pointer to the kv object being prefetched */ -} KeyPrefetchInfo; +static inline void dictPrefetchMarkDone(dictPrefetcher *p, dictPrefetchLookup *lk) { + lk->state = PREFETCH_DONE; + p->remaining--; + server.stat_total_prefetch_entries++; +} + +/* Return the next in-flight lookup that still needs work, or NULL if all done. */ +static inline dictPrefetchLookup *dictPrefetchNextInFlight(dictPrefetcher *p) { + if (p->remaining == 0) return NULL; + while (p->lookups[p->cur_idx].state == PREFETCH_DONE) { + if (++p->cur_idx >= p->nkeys) p->cur_idx = 0; + } + return &p->lookups[p->cur_idx]; +} + +/* Prefetch the bucket of the next hash table index. + * If no tables are left, move to the PREFETCH_DONE state. */ +static void dictPrefetchBucket(dictPrefetcher *p, dictPrefetchLookup *lk) { + size_t i = p->cur_idx; + dict *d = p->dicts[i]; + + /* Determine which hash table to use */ + if (lk->ht_idx == HT_IDX_INVALID) { + lk->ht_idx = HT_IDX_FIRST; + } else if (lk->ht_idx == HT_IDX_FIRST && dictIsRehashing(d)) { + lk->ht_idx = HT_IDX_SECOND; + } else { + /* No more tables left - mark as done. */ + dictPrefetchMarkDone(p, lk); + return; + } + + /* Prefetch the bucket */ + lk->bucket_idx = lk->key_hash & DICTHT_SIZE_MASK(d->ht_size_exp[lk->ht_idx]); + dictPrefetchAdvance(p, &d->ht_table[lk->ht_idx][lk->bucket_idx]); + lk->current_entry = NULL; + lk->state = PREFETCH_ENTRY; +} + +/* Prefetch the entry in the bucket and move to the PREFETCH_ENTRY_KEY state. + * If no more entries in the bucket, move to the PREFETCH_BUCKET state to look at the next table. */ +static void dictPrefetchEntry(dictPrefetcher *p, dictPrefetchLookup *lk) { + size_t i = p->cur_idx; + + if (lk->current_entry) { + /* We already found an entry in the bucket - move to the next entry */ + lk->current_entry = dictGetNext(lk->current_entry); + } else { + /* Go to the first entry in the bucket */ + lk->current_entry = p->dicts[i]->ht_table[lk->ht_idx][lk->bucket_idx]; + } + + if (lk->current_entry) { + dictPrefetchAdvance(p, lk->current_entry); + lk->state = PREFETCH_ENTRY_KEY; + } else { + /* No entry found in the bucket - try the bucket in the next table */ + lk->state = PREFETCH_BUCKET; + } +} + +/* Bring the entry's key payload into cache via the dictType callback, + * then move to PREFETCH_ENTRY_VALUE where the keyCompare runs. If the + * dict provides no callback, the entry alone already carries everything + * keyCompare needs. */ +static void dictPrefetchEntryKey(dictPrefetcher *p, dictPrefetchLookup *lk) { + dictType *type = p->dicts[p->cur_idx]->type; + lk->state = PREFETCH_ENTRY_VALUE; + if (type->prefetchEntryKey) { + void *addr = type->prefetchEntryKey(lk->current_entry); + if (addr) dictPrefetchAdvance(p, addr); + } +} + +/* Compare the entry's stored key against the lookup key. On match, ask + * the dictType to prefetch the value-side payload (if any) and mark the + * lookup done. On mismatch, walk to the next entry in the chain. + * + * The entry's stored key may be in a different shape than the lookup key + * (e.g. dbDictType stores a kvobj but keyCompare wants the sds). When that + * is the case the dict provides keyFromStoredKey to convert; otherwise the + * stored key is already in comparable form. This mirrors what + * dictFindLinkInternal does. */ +static void dictPrefetchEntryValue(dictPrefetcher *p, dictPrefetchLookup *lk) { + size_t i = p->cur_idx; + dict *d = p->dicts[i]; + dictType *type = d->type; + const void *stored_key = dictGetKey(lk->current_entry); + const void *cmp_key = type->keyFromStoredKey ? type->keyFromStoredKey(stored_key) : stored_key; + + /* 1. If this is the last element, we assume a hit and don't compare the keys + * 2. The stored entry matches the lookup key. */ + if ((!dictGetNext(lk->current_entry) && !dictIsRehashing(d)) || + dictCompareKeys(d, p->keys[i], cmp_key)) + { + if (type->prefetchEntryValue) { + void *addr = type->prefetchEntryValue(lk->current_entry); + if (addr) dictPrefetchAdvance(p, addr); + } + dictPrefetchMarkDone(p, lk); + } else { + /* Not found in the current entry, move to the next entry */ + lk->state = PREFETCH_ENTRY; + } +} + +/* Allocate the per-key lookup array. The prefetcher can then be reused across + * many batches by repeated dictPrefetcherReset / dictPrefetcherRun calls. */ +static void dictPrefetcherInit(dictPrefetcher *p, size_t max_keys) { + p->lookups = zcalloc(max_keys * sizeof(dictPrefetchLookup)); + p->max_keys = max_keys; +} + +static void dictPrefetcherFree(dictPrefetcher *p) { + zfree(p->lookups); + p->lookups = NULL; + p->max_keys = 0; +} + +/* Configure the prefetcher for a single batch and seed every lookup's + * starting state. dicts/keys must remain valid until dictPrefetcherRun + * returns; only the pointers are stored. */ +static void dictPrefetcherReset(dictPrefetcher *p, dict **dicts, void **keys, size_t nkeys) { + serverAssert(nkeys <= p->max_keys); + p->dicts = dicts; + p->keys = keys; + p->nkeys = nkeys; + p->cur_idx = 0; + + size_t remaining = 0; + for (size_t i = 0; i < nkeys; i++) { + dictPrefetchLookup *lk = &p->lookups[i]; + if (!dicts[i] || dictSize(dicts[i]) == 0) { + lk->state = PREFETCH_DONE; + continue; + } + + /* We skip prefetch during loading, so ht_table[0] should never be NULL + * when dictSize() > 0 (which only happens mid-dictEmpty via _dictReset). */ + serverAssert(dicts[i]->ht_table[0]); + + lk->ht_idx = HT_IDX_INVALID; + lk->current_entry = NULL; + lk->state = PREFETCH_BUCKET; + lk->key_hash = dictGetHash(dicts[i], keys[i]); + remaining++; + } + p->remaining = remaining; +} + +/* Drive the prefetch state machine across all dict lookups until every lookup + * reaches PREFETCH_DONE. + * + * Conceptually each dict lookup is a dictFind broken into four stages: + * bucket → entry → entry key payload → entry value payload + * If the key is not found in ht[0] and the dict is mid-rehash, the lookup + * loops back to the bucket stage to retry against ht[1]. + * + * Instead of waiting for each stage's memory access to complete, the FSM + * issues a prefetch and yields to another in-flight lookup, hiding the + * memory access latency. + * + * Any prefetching of the entry's key payload (e.g. an out-of-line kvobj head) + * and the entry's value payload (e.g. kv->ptr for a RAW string) is delegated + * to dictType->prefetchEntryKey and prefetchEntryValue respectively. */ +static void dictPrefetcherRun(dictPrefetcher *p) { + dictPrefetchLookup *lk; + while ((lk = dictPrefetchNextInFlight(p))) { + switch (lk->state) { + case PREFETCH_BUCKET: dictPrefetchBucket(p, lk); break; + case PREFETCH_ENTRY: dictPrefetchEntry(p, lk); break; + case PREFETCH_ENTRY_KEY: dictPrefetchEntryKey(p, lk); break; + case PREFETCH_ENTRY_VALUE: dictPrefetchEntryValue(p, lk); break; + default: serverPanic("Unknown prefetch state %d", lk->state); + } + } +} + +/* -------------------------------------------------------------------------- + * Intra-command prefetch API + * -------------------------------------------------------------------------- + * dictPrefetchKeys() allows a single multi-key command (e.g. MGET) to + * prefetch dict data for a batch of its own keys, reusing the same state + * machine that the cross-command path uses. + * + * Typical usage from a command implementation: + * + * #define BATCH 16 + * void myMultiKeyCommand(client *c) { + * dict *d = kvstoreGetDict(c->db->keys, slot); + * for (int j = 0; j < numkeys; j += BATCH) { + * int n = MIN(BATCH, numkeys - j); + * void *keys[BATCH]; dict *dicts[BATCH]; + * for (int k = 0; k < n; k++) { + * keys[k] = c->argv[j+k+1]->ptr; + * dicts[k] = d; + * } + * dictPrefetchKeys(dicts, keys, n); + * // Now process these n keys — dict bucket / entry / key payload + * // (and value payload, if dictType->prefetchEntryValue is set) + * // are warm in cache. + * } + * } + * ----------------------------------------------------------------------- */ +void dictPrefetchKeys(dict **dicts, void **keys, size_t nkeys) { + /* Single-key prefetch has no benefit — nothing to interleave with. + * Callers passing nkeys==1 (e.g. tail of a multi-key batch) should + * fall through to a direct lookup. */ + if (nkeys <= 1) return; + + /* Guard the fixed-size stack array below; callers must batch larger + * inputs into chunks of DICT_PREFETCH_MAX_SIZE or smaller. */ + serverAssert(nkeys <= DICT_PREFETCH_MAX_SIZE); + server.stat_total_prefetch_batches++; + + dictPrefetchLookup lookups[DICT_PREFETCH_MAX_SIZE]; + dictPrefetcher p = { .lookups = lookups, .max_keys = nkeys }; + dictPrefetcherReset(&p, dicts, keys, nkeys); + dictPrefetcherRun(&p); +} + +/* -------------------------------------------------------------------------- + * Cross-command batch prefetching + * -------------------------------------------------------------------------- */ /* PrefetchCommandsBatch structure holds the state of the current batch of client commands being processed. */ typedef struct PrefetchCommandsBatch { - size_t cur_idx; /* Index of the current key being processed */ size_t key_count; /* Number of keys in the current batch */ size_t client_count; /* Number of clients in the current batch */ size_t pcmd_count; /* Number of pending commands in the current batch */ @@ -79,9 +332,7 @@ typedef struct PrefetchCommandsBatch { client **clients; /* Array of clients in the current batch */ pendingCommand **pending_cmds; /* Array of pending commands in the current batch */ dict **keys_dicts; /* Main dict for each key */ - dict **current_dicts; /* Points to dict to prefetch from */ - KeyPrefetchInfo *prefetch_info; /* Prefetch info for each key */ - GetValueDataFunc get_value_data_func; /* Function to get the value data */ + dictPrefetcher prefetcher; /* Initialized once; reset and reused per batch. */ } PrefetchCommandsBatch; static PrefetchCommandsBatch *batch = NULL; @@ -95,7 +346,7 @@ void freePrefetchCommandsBatch(void) { zfree(batch->pending_cmds); zfree(batch->keys); zfree(batch->keys_dicts); - zfree(batch->prefetch_info); + dictPrefetcherFree(&batch->prefetcher); zfree(batch); batch = NULL; } @@ -118,7 +369,7 @@ void prefetchCommandsBatchInit(void) { batch->pending_cmds = zcalloc(max_prefetch_size * sizeof(pendingCommand *)); batch->keys = zcalloc(max_prefetch_size * sizeof(void *)); batch->keys_dicts = zcalloc(max_prefetch_size * sizeof(dict *)); - batch->prefetch_info = zcalloc(max_prefetch_size * sizeof(KeyPrefetchInfo)); + dictPrefetcherInit(&batch->prefetcher, max_prefetch_size); } void onMaxBatchSizeChange(void) { @@ -131,173 +382,6 @@ void onMaxBatchSizeChange(void) { prefetchCommandsBatchInit(); } -/* Prefetch the given pointer and move to the next key in the batch. */ -static inline void prefetchAndMoveToNextKey(void *addr) { - redis_prefetch_read(addr); - /* While the prefetch is in progress, we can continue to the next key */ - batch->cur_idx = (batch->cur_idx + 1) % batch->key_count; -} - -static inline void markKeyAsdone(KeyPrefetchInfo *info) { - info->state = PREFETCH_DONE; - server.stat_total_prefetch_entries++; -} - -/* Returns the next KeyPrefetchInfo structure that needs to be processed. */ -static KeyPrefetchInfo *getNextPrefetchInfo(void) { - size_t start_idx = batch->cur_idx; - do { - KeyPrefetchInfo *info = &batch->prefetch_info[batch->cur_idx]; - if (info->state != PREFETCH_DONE) return info; - batch->cur_idx = (batch->cur_idx + 1) % batch->key_count; - } while (batch->cur_idx != start_idx); - return NULL; -} - -static void initBatchInfo(dict **dicts, GetValueDataFunc func) { - batch->current_dicts = dicts; - batch->get_value_data_func = func; - - /* Initialize the prefetch info */ - for (size_t i = 0; i < batch->key_count; i++) { - KeyPrefetchInfo *info = &batch->prefetch_info[i]; - if (!batch->current_dicts[i] || dictSize(batch->current_dicts[i]) == 0) { - info->state = PREFETCH_DONE; - continue; - } - - /* We skip prefetch during loading, so ht_table[0] should never be NULL - * when dictSize() > 0 (which only happens mid-dictEmpty via _dictReset). */ - serverAssert(batch->current_dicts[i]->ht_table[0]); - - info->ht_idx = HT_IDX_INVALID; - info->current_entry = NULL; - info->current_kv = NULL; - info->state = PREFETCH_BUCKET; - info->key_hash = dictGetHash(batch->current_dicts[i], batch->keys[i]); - } -} - -/* Prefetch the bucket of the next hash table index. - * If no tables are left, move to the PREFETCH_DONE state. */ -static void prefetchBucket(KeyPrefetchInfo *info) { - size_t i = batch->cur_idx; - - /* Determine which hash table to use */ - if (info->ht_idx == HT_IDX_INVALID) { - info->ht_idx = HT_IDX_FIRST; - } else if (info->ht_idx == HT_IDX_FIRST && dictIsRehashing(batch->current_dicts[i])) { - info->ht_idx = HT_IDX_SECOND; - } else { - /* No more tables left - mark as done. */ - markKeyAsdone(info); - return; - } - - /* Prefetch the bucket */ - info->bucket_idx = info->key_hash & DICTHT_SIZE_MASK(batch->current_dicts[i]->ht_size_exp[info->ht_idx]); - prefetchAndMoveToNextKey(&batch->current_dicts[i]->ht_table[info->ht_idx][info->bucket_idx]); - info->current_entry = NULL; - info->state = PREFETCH_ENTRY; -} - -/* Prefetch the entry in the bucket and move to the PREFETCH_KVOBJ state. - * If no more entries in the bucket, move to the PREFETCH_BUCKET state to look at the next table. */ -static void prefetchEntry(KeyPrefetchInfo *info) { - size_t i = batch->cur_idx; - - if (info->current_entry) { - /* We already found an entry in the bucket - move to the next entry */ - info->current_entry = dictGetNext(info->current_entry); - } else { - /* Go to the first entry in the bucket */ - info->current_entry = batch->current_dicts[i]->ht_table[info->ht_idx][info->bucket_idx]; - } - - if (info->current_entry) { - prefetchAndMoveToNextKey(info->current_entry); - info->current_kv = NULL; - info->state = PREFETCH_KVOBJ; - } else { - /* No entry found in the bucket - try the bucket in the next table */ - info->state = PREFETCH_BUCKET; - } -} - -/* Prefetch the kv object in the dict entry, and to the PREFETCH_VALDATA state. */ -static inline void prefetchKVOject(KeyPrefetchInfo *info) { - kvobj *kv = dictGetKey(info->current_entry); - int is_kv = dictEntryIsKey(info->current_entry); - - info->current_kv = kv; - info->state = PREFETCH_VALDATA; - /* If the entry is a pointer of kv object, we don't need to prefetch it */ - if (!is_kv) prefetchAndMoveToNextKey(kv); -} - -/* Prefetch the value data of the kv object found in dict entry. */ -static void prefetchValueData(KeyPrefetchInfo *info) { - size_t i = batch->cur_idx; - kvobj *kv = info->current_kv; - sds key = kvobjGetKey(kv); - - /* 1. If this is the last element, we assume a hit and don't compare the keys - * 2. This kv object is the target of the lookup. */ - if ((!dictGetNext(info->current_entry) && !dictIsRehashing(batch->current_dicts[i])) || - dictCompareKeys(batch->current_dicts[i], batch->keys[i], key)) - { - if (batch->get_value_data_func) { - void *value_data = batch->get_value_data_func(kv); - if (value_data) prefetchAndMoveToNextKey(value_data); - } - markKeyAsdone(info); - } else { - /* Not found in the current entry, move to the next entry */ - info->state = PREFETCH_ENTRY; - } -} - -/* Prefetch dictionary data for an array of keys. - * - * This function takes an array of dictionaries and keys, attempting to bring - * data closer to the L1 cache that might be needed for dictionary operations - * on those keys. - * - * The dictFind algorithm: - * 1. Evaluate the hash of the key - * 2. Access the index in the first table - * 3. Walk the entries linked list until the key is found - * If the key hasn't been found and the dictionary is in the middle of rehashing, - * access the index on the second table and repeat step 3 - * - * dictPrefetch executes the same algorithm as dictFind, but one step at a time - * for each key. Instead of waiting for data to be read from memory, it prefetches - * the data and then moves on to execute the next prefetch for another key. - * - * dicts - An array of dictionaries to prefetch data from. - * get_val_data_func - A callback function that dictPrefetch can invoke - * to bring the key's value data closer to the L1 cache as well. - */ -static void dictPrefetch(dict **dicts, GetValueDataFunc get_val_data_func) { - initBatchInfo(dicts, get_val_data_func); - KeyPrefetchInfo *info; - while ((info = getNextPrefetchInfo())) { - switch (info->state) { - case PREFETCH_BUCKET: prefetchBucket(info); break; - case PREFETCH_ENTRY: prefetchEntry(info); break; - case PREFETCH_KVOBJ: prefetchKVOject(info); break; - case PREFETCH_VALDATA: prefetchValueData(info); break; - default: serverPanic("Unknown prefetch state %d", info->state); - } - } -} - -/* Helper function to get the value pointer of a kv object. */ -static void *getObjectValuePtr(const void *value) { - kvobj *kv = (kvobj *)value; - return (kv->type == OBJ_STRING && kv->encoding == OBJ_ENCODING_RAW) ? kv->ptr : NULL; -} - void resetCommandsBatch(void) { if (batch == NULL) { /* Handle the case where prefetching becomes enabled from disabled. */ @@ -305,7 +389,6 @@ void resetCommandsBatch(void) { return; } - batch->cur_idx = 0; batch->key_count = 0; batch->client_count = 0; batch->pcmd_count = 0; @@ -379,8 +462,10 @@ void prefetchCommands(void) { * Prefetching is beneficial only if there are more than one key. */ if (batch->key_count > 1) { server.stat_total_prefetch_batches++; - /* Prefetch keys from the main dict */ - dictPrefetch(batch->keys_dicts, getObjectValuePtr); + /* Prefetch keys from the main dict — value-side prefetch (if any) + * is driven by dbDictType->prefetchEntryValue. */ + dictPrefetcherReset(&batch->prefetcher, batch->keys_dicts, batch->keys, batch->key_count); + dictPrefetcherRun(&batch->prefetcher); } } @@ -424,11 +509,15 @@ int addCommandToBatch(client *c) { batch->pending_cmds[batch->pcmd_count++] = pcmd; serverAssert(pcmd->flags & PENDING_CMD_KEYS_RESULT_VALID); + dict *cmd_dict = kvstoreGetDict(c->db->keys, pcmd->slot > 0 ? pcmd->slot : 0); for (int i = 0; i < pcmd->keys_result.numkeys && batch->key_count < batch->max_prefetch_size; i++) { batch->keys[batch->key_count] = pcmd->argv[pcmd->keys_result.keys[i].pos]; - batch->keys_dicts[batch->key_count] = - kvstoreGetDict(c->db->keys, pcmd->slot > 0 ? pcmd->slot : 0); + batch->keys_dicts[batch->key_count] = cmd_dict; batch->key_count++; + /* Mark the command as prefetched so the intra-command prefetch + * path skips it. Even on a partial batch, running both paths + * would just contend for cache bandwidth. */ + pcmd->flags |= PENDING_CMD_KEYS_PREFETCHED; } pcmd = pcmd->next; } diff --git a/src/memory_prefetch.h b/src/memory_prefetch.h index e2977f10f..07c108a50 100644 --- a/src/memory_prefetch.h +++ b/src/memory_prefetch.h @@ -15,12 +15,26 @@ #ifndef MEMORY_PREFETCH_H #define MEMORY_PREFETCH_H -struct client; +#include +struct client; +struct dict; + +/* Cross-command batch prefetching */ void prefetchCommandsBatchInit(void); int determinePrefetchCount(int len); int addCommandToBatch(struct client *c); void resetCommandsBatch(void); void prefetchCommands(void); +/* Intra-command prefetch: prefetch dict lookup data for an array of keys. + * Reuses the same state machine as the cross-command path. The dict's + * dictType drives any key/value payload prefetching via the + * prefetchEntryKey / prefetchEntryValue callbacks. + * + * nkeys must be <= DICT_PREFETCH_MAX_SIZE (the function asserts this). + * Callers should batch larger inputs into chunks of this size or smaller. */ +#define DICT_PREFETCH_MAX_SIZE 64 +void dictPrefetchKeys(struct dict **dicts, void **keys, size_t nkeys); + #endif /* MEMORY_PREFETCH_H */ diff --git a/src/module.c b/src/module.c index d4a857a2e..9843e6ccc 100644 --- a/src/module.c +++ b/src/module.c @@ -303,6 +303,9 @@ static pthread_mutex_t moduleGIL = PTHREAD_MUTEX_INITIALIZER; /* Function pointer type for keyspace event notification subscriptions from modules. */ typedef int (*RedisModuleNotificationFunc) (RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key); +/* Function pointer type for keyspace event notifications with subkeys from modules. */ +typedef void (*RedisModuleNotificationWithSubkeysFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count); + /* Function pointer type for post jobs */ typedef void (*RedisModulePostNotificationJobFunc) (RedisModuleCtx *ctx, void *pd); @@ -313,8 +316,12 @@ typedef struct RedisModuleKeyspaceSubscriber { RedisModule *module; /* Notification callback in the module*/ RedisModuleNotificationFunc notify_callback; + /* Extended notification callback with subkeys */ + RedisModuleNotificationWithSubkeysFunc notify_callback_with_subkeys; /* A bit mask of the events the module is interested in */ int event_mask; + /* Delivery flags for subkey notifications, controlling when the callback is invoked. */ + int flags; /* Active flag set on entry, to avoid reentrant subscribers * calling themselves */ int active; @@ -332,6 +339,11 @@ typedef struct RedisModulePostExecUnitJob { /* The module keyspace notification subscribers list */ static list *moduleKeyspaceSubscribers; +/* Cached event types that have at least one subscriber. + * Updated on subscribe/unsubscribe to avoid traversing the list on every event. */ +static int moduleKeyspaceSubscribersTypes = 0; +static int moduleKeyspaceSubscribersWithSubkeysTypes = 0; + /* The module post keyspace jobs list */ static list *modulePostExecUnitJobs; @@ -667,7 +679,7 @@ void moduleReleaseTempClient(client *c) { } clearClientConnectionState(c); listEmpty(c->reply); - c->reply_bytes = 0; + c->reply_bytes = c->reply_bytes_shared = c->reply_bytes_unshared = 0; c->duration = 0; resetClient(c, -1); serverAssert(c->all_argv_len_sum == 0); @@ -783,6 +795,23 @@ int moduleDelKeyIfEmpty(RedisModuleKey *key) { } } +/* Update the cached subscriber types by walking the subscriber list. + * Called after subscribe/unsubscribe operations. */ +static void moduleUpdateKeyspaceSubscribersTypes(void) { + int mask = 0, subkeys_mask = 0; + listIter li; + listNode *ln; + listRewind(moduleKeyspaceSubscribers,&li); + while((ln = listNext(&li))) { + RedisModuleKeyspaceSubscriber *sub = ln->value; + mask |= sub->event_mask; + if (sub->notify_callback_with_subkeys) + subkeys_mask |= sub->event_mask; + } + moduleKeyspaceSubscribersTypes = mask; + moduleKeyspaceSubscribersWithSubkeysTypes = subkeys_mask; +} + /* -------------------------------------------------------------------------- * Service API exported to modules * @@ -4225,7 +4254,10 @@ int RM_KeyType(RedisModuleKey *key) { case OBJ_HASH: return REDISMODULE_KEYTYPE_HASH; case OBJ_MODULE: return REDISMODULE_KEYTYPE_MODULE; case OBJ_STREAM: return REDISMODULE_KEYTYPE_STREAM; +#ifdef ENABLE_GCRA case OBJ_GCRA: return REDISMODULE_KEYTYPE_GCRA; +#endif + case OBJ_ARRAY: return REDISMODULE_KEYTYPE_ARRAY; default: return REDISMODULE_KEYTYPE_EMPTY; } } @@ -9250,10 +9282,13 @@ int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNoti RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub)); sub->module = ctx->module; sub->event_mask = types; + sub->flags = REDISMODULE_NOTIFY_FLAG_NONE; sub->notify_callback = callback; + sub->notify_callback_with_subkeys = NULL; sub->active = 0; listAddNodeTail(moduleKeyspaceSubscribers, sub); + moduleUpdateKeyspaceSubscribersTypes(); return REDISMODULE_OK; } @@ -9286,19 +9321,101 @@ int RM_UnsubscribeFromKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModule removed++; } } + if (removed > 0) moduleUpdateKeyspaceSubscribersTypes(); return removed > 0 ? REDISMODULE_OK : REDISMODULE_ERR; } -/* Check any subscriber for event */ -int moduleHasSubscribersForKeyspaceEvent(int type) { +/* Subscribe to keyspace notifications with subkey information. + * + * This is the extended version of RM_SubscribeToKeyspaceEvents. When subkeys + * are available, the `subkeys` array and `count` are passed to the callback. + * `subkeys` contains only the names of affected subkeys (values are not included), + * and `count` is the number of elements. The array may contain duplicates when + * the same subkey appears more than once in a command (e.g. HSET key f1 v1 f1 v2 + * produces subkeys=["f1","f1"], count=2). When no subkeys are present, `subkeys` + * will be NULL and `count` will be 0. Whether events without subkeys are delivered + * depends on the `flags` parameter (see below). + * + * `types` is a bit mask of event types the module is interested in + * (using the same REDISMODULE_NOTIFY_* flags as RM_SubscribeToKeyspaceEvents). + * + * `flags` controls delivery filtering: + * - REDISMODULE_NOTIFY_FLAG_NONE: The callback is invoked for all matching + * events regardless of whether subkeys are present, so a separate + * RM_SubscribeToKeyspaceEvents registration can be omitted. + * - REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED: The callback is only invoked + * when subkeys are not empty. Events without subkey information (e.g. SET, + * EXPIRE, DEL) are skipped. + * + * The callback signature is: + * void callback(RedisModuleCtx *ctx, int type, const char *event, + * RedisModuleString *key, RedisModuleString **subkeys, int count); + * + * The subkeys array and its contents are only valid during the callback. + * The underlying objects may be stack-allocated or temporary, so + * RM_RetainString must NOT be used on them. To keep a subkey beyond + * the callback (e.g. in a RM_AddPostNotificationJob callback), use + * RM_HoldString (which handles static objects by copying) or + * RM_CreateStringFromString to make a deep copy before returning. + */ +int RM_SubscribeToKeyspaceEventsWithSubkeys(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc callback) { + RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub)); + sub->module = ctx->module; + sub->event_mask = types; + sub->flags = flags; + sub->notify_callback = NULL; + sub->notify_callback_with_subkeys = callback; + sub->active = 0; + + listAddNodeTail(moduleKeyspaceSubscribers, sub); + moduleUpdateKeyspaceSubscribersTypes(); + return REDISMODULE_OK; +} + +/* Unregister a module's callback from keyspace notifications with subkeys + * for specific event types. + * + * This function removes a previously registered subscription identified by + * the event mask, delivery flags, and the callback function. + * + * Parameters: + * - ctx: The RedisModuleCtx associated with the calling module. + * - types: The event mask representing the notification types to unsubscribe from. + * - flags: The delivery flags that were used during registration. + * - callback: The callback function pointer that was originally registered. + * + * Returns: + * - REDISMODULE_OK on successful removal of the subscription. + * - REDISMODULE_ERR if no matching subscription was found. */ +int RM_UnsubscribeFromKeyspaceEventsWithSubkeys(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc callback) { + if (!ctx || !callback) return REDISMODULE_ERR; + int removed = 0; listIter li; listNode *ln; listRewind(moduleKeyspaceSubscribers,&li); - while((ln = listNext(&li))) { + while ((ln = listNext(&li))) { RedisModuleKeyspaceSubscriber *sub = ln->value; - if (sub->event_mask & type) return 1; + if (sub->event_mask == types && sub->flags == flags && + sub->notify_callback_with_subkeys == callback && + sub->module == ctx->module) + { + zfree(sub); + listDelNode(moduleKeyspaceSubscribers, ln); + removed++; + } } - return 0; + if (removed > 0) moduleUpdateKeyspaceSubscribersTypes(); + return removed > 0 ? REDISMODULE_OK : REDISMODULE_ERR; +} + +/* Check any subscriber for event. */ +int moduleHasSubscribersForKeyspaceEvent(int type) { + return (moduleKeyspaceSubscribersTypes & type) != 0; +} + +/* Check any subscriber for event with subkeys. */ +int moduleHasSubscribersForKeyspaceEventWithSubkeys(int type) { + return (moduleKeyspaceSubscribersWithSubkeysTypes & type) != 0; } void firePostExecutionUnitJobs(void) { @@ -9372,10 +9489,29 @@ int RM_NotifyKeyspaceEvent(RedisModuleCtx *ctx, int type, const char *event, Red return REDISMODULE_OK; } +/* Like RM_NotifyKeyspaceEvent, but also triggers subkey-level notifications + * when subkeys are provided. Both key-level (keyspace/keyevent) and + * subkey-level (subkeyspace/subkeyevent/subkeyspaceitem/subkeyspaceevent) + * channels are published to, depending on the server configuration. + * + * This is the extended version of RM_NotifyKeyspaceEvent and can actually + * replace it. When called with subkeys=NULL and count=0, it behaves + * identically to RM_NotifyKeyspaceEvent. */ +int RM_NotifyKeyspaceEventWithSubkeys(RedisModuleCtx *ctx, int type, const char *event, + RedisModuleString *key, RedisModuleString **subkeys, int count) { + if (!ctx || !ctx->client) + return REDISMODULE_ERR; + notifyKeyspaceEventWithSubkeys(type, (char *)event, key, ctx->client->db->id, subkeys, count); + return REDISMODULE_OK; +} + /* Dispatcher for keyspace notifications to module subscriber functions. - * This gets called only if at least one module requested to be notified on - * keyspace notifications */ -void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { + * This gets called only if at least one module requested to be notified on + * keyspace notifications. For each subscriber, if notify_callback is set it + * is called; otherwise if notify_callback_with_subkeys is set it is called + * for all events (subkeys may be NULL/0 when not applicable). */ +void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid, + robj **subkeys, int count) { /* Don't do anything if there aren't any subscribers */ if (listLength(moduleKeyspaceSubscribers) == 0) return; @@ -9403,7 +9539,9 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) listRewind(moduleKeyspaceSubscribers,&li); /* Remove irrelevant flags from the type mask */ - type &= ~(NOTIFY_KEYEVENT | NOTIFY_KEYSPACE); + type &= ~(NOTIFY_KEYEVENT | NOTIFY_KEYSPACE | + NOTIFY_SUBKEYSPACE | NOTIFY_SUBKEYEVENT | + NOTIFY_SUBKEYSPACEITEM | NOTIFY_SUBKEYSPACEEVENT); while((ln = listNext(&li))) { RedisModuleKeyspaceSubscriber *sub = ln->value; @@ -9411,6 +9549,15 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) * and avoid subscribers triggering themselves */ if ((sub->event_mask & type) && (sub->active == 0 || (sub->module->options & REDISMODULE_OPTIONS_ALLOW_NESTED_KEYSPACE_NOTIFICATIONS))) { + + /* If SUBKEYS_REQUIRED is set, skip events without subkeys. */ + if (sub->notify_callback_with_subkeys && + (sub->flags & REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED) && + (subkeys == NULL || count == 0)) + { + continue; + } + RedisModuleCtx ctx; moduleCreateContext(&ctx, sub->module, REDISMODULE_CTX_TEMP_CLIENT); selectDb(ctx.client, dbid); @@ -9422,7 +9569,11 @@ void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) sub->active = 1; server.allow_access_expired++; server.allow_access_trimmed++; - sub->notify_callback(&ctx, type, event, key); + if (sub->notify_callback) { + sub->notify_callback(&ctx, type, event, key); + } else if (sub->notify_callback_with_subkeys) { + sub->notify_callback_with_subkeys(&ctx, type, event, key, subkeys, count); + } server.allow_access_expired--; server.allow_access_trimmed--; sub->active = prev_active; @@ -9445,6 +9596,7 @@ void moduleUnsubscribeNotifications(RedisModule *module) { zfree(sub); } } + moduleUpdateKeyspaceSubscribersTypes(); } /* -------------------------------------------------------------------------- @@ -9512,7 +9664,7 @@ void RM_RegisterClusterMessageReceiver(RedisModuleCtx *ctx, uint8_t type, RedisM if (prev) prev->next = r->next; else - clusterReceivers[type]->next = r->next; + clusterReceivers[type] = r->next; /* Update the head */ zfree(r); } return; @@ -10331,11 +10483,11 @@ int RM_FreeModuleUser(RedisModuleUser *user) { * Returns NULL if user is NULL or the user has no name. * The returned string must be freed by the caller with RedisModule_FreeString() * or by enabling automatic memory management on a context. */ - RedisModuleString *RM_GetUserUsername(const RedisModuleUser *user) { + RedisModuleString *RM_GetUserUsername(RedisModuleCtx *ctx, const RedisModuleUser *user) { if(user == NULL || user->user == NULL || user->user->name == NULL) return NULL; - return RM_CreateString(NULL, user->user->name, sdslen(user->user->name)); + return RM_CreateString(ctx, user->user->name, sdslen(user->user->name)); } /* Sets the permissions of a user created through the redis module @@ -13221,7 +13373,7 @@ int moduleOnLoad(int (*onload)(void *, void **, int), const char *path, void *ha moduleCreateContext(&ctx, NULL, REDISMODULE_CTX_TEMP_CLIENT); /* We pass NULL since we don't have a module yet. */ if (onload((void*)&ctx,module_argv,module_argc) == REDISMODULE_ERR) { serverLog(LL_WARNING, - "Module %s initialization failed. Module not loaded",path); + "Module %s initialization failed. Module not loaded", path ? path : "(null)"); if (ctx.module) { moduleUnregisterCleanup(ctx.module); moduleRemoveCateogires(ctx.module); @@ -15414,9 +15566,12 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(DigestAddLongLong); REGISTER_API(DigestEndSequence); REGISTER_API(NotifyKeyspaceEvent); + REGISTER_API(NotifyKeyspaceEventWithSubkeys); REGISTER_API(GetNotifyKeyspaceEvents); REGISTER_API(SubscribeToKeyspaceEvents); REGISTER_API(UnsubscribeFromKeyspaceEvents); + REGISTER_API(SubscribeToKeyspaceEventsWithSubkeys); + REGISTER_API(UnsubscribeFromKeyspaceEventsWithSubkeys); REGISTER_API(AddPostNotificationJob); REGISTER_API(RegisterClusterMessageReceiver); REGISTER_API(SendClusterMessage); diff --git a/src/multi.c b/src/multi.c index cd8783d20..2b900bcbd 100644 --- a/src/multi.c +++ b/src/multi.c @@ -504,6 +504,7 @@ size_t multiStateMemOverhead(client *c) { /* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't managed per-client. */ mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey)); /* Reserved memory for queued multi commands. */ - mem += c->mstate.alloc_count * sizeof(pendingCommand); + mem += c->mstate.alloc_count * sizeof(pendingCommand*); + mem += c->mstate.count * sizeof(pendingCommand); return mem; } diff --git a/src/networking.c b/src/networking.c index beed81e74..3bcd74e82 100644 --- a/src/networking.c +++ b/src/networking.c @@ -36,7 +36,8 @@ static inline int _clientHasPendingRepliesNonSlave(client *c); static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); static pendingCommand *acquirePendingCommand(void); -static void reclaimPendingCommand(client *c, pendingCommand *pcmd); +static inline void reclaimPendingCommand(client *c, pendingCommand *pcmd); +static size_t getClientOutputBufferLogicalSize(client *c); int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ __thread sds thread_reusable_qb = NULL; @@ -212,7 +213,7 @@ client *createClient(connection *conn) { c->main_ch_client_id = 0; c->reply = listCreate(); c->deferred_reply_errors = NULL; - c->reply_bytes = 0; + c->reply_bytes = c->reply_bytes_shared = c->reply_bytes_unshared = 0; c->obuf_soft_limit_reached_time = 0; listSetFreeMethod(c->reply,freeClientReplyValue); listSetDupMethod(c->reply,dupClientReplyValue); @@ -367,7 +368,7 @@ int prepareClientToWrite(client *c) { * Low level functions to add more data to output buffers. * -------------------------------------------------------------------------- */ -static int tryAddPayload(char *buf, size_t *used, size_t size, uint8_t type, const void *payload, size_t len) { +static int tryAddPayload(client *c, char *buf, size_t *used, size_t size, uint8_t type, const void *payload, size_t len) { if (*used + sizeof(payloadHeader) + len > size) return 0; /* Start a new payload chunk */ @@ -376,6 +377,13 @@ static int tryAddPayload(char *buf, size_t *used, size_t size, uint8_t type, con header->payload_len = len; memcpy((char *)header + sizeof(payloadHeader), payload, len); *used += sizeof(payloadHeader) + len; + + /* Track referenced reply bytes for copy avoidance. */ + if (type == BULK_STR_REF) { + const bulkStrRef *str_ref = (const bulkStrRef *)payload; + c->reply_bytes_shared += sdslen(str_ref->obj->ptr); + } + return 1; } @@ -395,8 +403,11 @@ static void _addReplyPayloadToList(client *c, list *reply_list, const char *payl if (tail) { if (unlikely(tail->buf_encoded)) { /* Try to add to encoded buffer */ - if (tryAddPayload(tail->buf, &tail->used, tail->size, payload_type, (void *)payload, len)) { - len = 0; + if (tryAddPayload(c, tail->buf, &tail->used, tail->size, payload_type, (void *)payload, len)) { + /* For BULK_STR_REF payloads, tryAddPayload updates shared reply bytes + * which accounts for referenced strings. */ + if (encoded) closeClientOnOutputBufferLimitReached(c, 1); + return; } } else if (!encoded) { /* Both tail and new payload are non-encoded, can append directly */ @@ -424,7 +435,7 @@ static void _addReplyPayloadToList(client *c, list *reply_list, const char *payl tail->used = 0; tail->buf_encoded = encoded; if (tail->buf_encoded) { - serverAssert(tryAddPayload(tail->buf, &tail->used, tail->size, payload_type, (void *)payload, len)); + serverAssert(tryAddPayload(c, tail->buf, &tail->used, tail->size, payload_type, (void *)payload, len)); } else { tail->used = len; memcpy(tail->buf, payload, len); @@ -456,7 +467,7 @@ static size_t _addReplyPayloadToBuffer(client *c, const void *payload, size_t le size_t available = c->buf_usable_size - c->bufpos; size_t reply_len = min(available, len); if (c->buf_encoded) { - if (!tryAddPayload(c->buf, &c->bufpos, c->buf_usable_size, payload_type, payload, len)) + if (!tryAddPayload(c, c->buf, &c->bufpos, c->buf_usable_size, payload_type, payload, len)) return 0; reply_len = len; } else { @@ -472,18 +483,27 @@ static size_t _addReplyPayloadToBuffer(client *c, const void *payload, size_t le /* Adds bulk string reference (i.e. pointer to object and pointer to string itself) to static buffer * Returns non-zero value if succeeded to add */ static size_t _addBulkStrRefToBuffer(client *c, const void *payload, size_t len) { + size_t result; if (!c->buf_encoded) { /* If buffer is plain and not empty then can't add bulk string reference to it */ if (c->bufpos) return 0; c->buf_encoded = 1; /* Set c->buf to encoded mode to allow bulk string reference to be stored in it */ - size_t result = _addReplyPayloadToBuffer(c, payload, len, BULK_STR_REF); + result = _addReplyPayloadToBuffer(c, payload, len, BULK_STR_REF); if (!result) { /* Failed to add bulk string reference to buffer, need to revert to plain mode. */ c->buf_encoded = 0; + return 0; } - return result; + } else { + result = _addReplyPayloadToBuffer(c, payload, len, BULK_STR_REF); + if (!result) return 0; } - return _addReplyPayloadToBuffer(c, payload, len, BULK_STR_REF); + + /* Even though the bulk string is stored by reference and the underlying + * memory is shared, we still account this shared memory towards this + * client's output buffer usage, so we need to check the output buffer limits. */ + closeClientOnOutputBufferLimitReached(c, 1); + return result; } void _addReplyToBufferOrList(client *c, const char *s, size_t len) { @@ -1161,6 +1181,18 @@ void addReplyLongLongFromStr(client *c, robj *str) { addReplyProto(c,"\r\n",2); } +/* Reply with unsigned 64-bit value. Uses integer reply when value fits in + * signed long long, otherwise big number (RESP3) or bulk string (RESP2). */ +void addReplyUnsignedLongLong(client *c, uint64_t v) { + if (v <= (uint64_t)LLONG_MAX) { + addReplyLongLong(c, (long long)v); + } else { + char buf[LONG_STR_SIZE]; + int len = ull2string(buf, sizeof(buf), v); + addReplyBigNum(c, buf, len); + } +} + void addReplyAggregateLen(client *c, long length, int prefix) { serverAssert(length >= 0); if (_prepareClientToWrite(c) != C_OK) return; @@ -1457,6 +1489,7 @@ void AddReplyFromClient(client *dst, client *src) { /* Concatenate the reply list into the dest */ if (listLength(src->reply)) listJoin(dst->reply,src->reply); + serverAssert(src->reply_bytes_shared == 0); /* It is non-normal client, never has references. */ dst->reply_bytes += src->reply_bytes; src->reply_bytes = 0; src->bufpos = 0; @@ -1792,7 +1825,7 @@ void freeClientArgv(client *c) { freeClientArgvInternal(c, 1); } -void freeClientPendingCommands(client *c, int num_pcmds_to_free) { +static inline void freeClientPendingCommands(client *c, int num_pcmds_to_free) { /* (-1) means free all pending commands */ if (num_pcmds_to_free == -1) num_pcmds_to_free = c->pending_cmds.len; @@ -1926,6 +1959,72 @@ void tryUnlinkClientFromPendingRefReply(client *c, int force) { } } +/* Count bytes in an encoded buffer where the client holds the last remaining + * reference to the underlying string object (refcount == 1), meaning the key + * has been deleted from the keyspace and only this client buffer keeps the + * memory alive. + * + * Note: when multiple clients share a reference to the same object, + * the object's refcount stays above 1 even after the key is deleted. In that + * case none of those clients will be counted here, so the shared memory is + * under-reported until all but one client has consumed its copy. */ +static size_t computeUnsharedReplyBytes(char *buf, size_t bufpos) { + size_t total = 0; + char *ptr = buf; + while (ptr < buf + bufpos) { + payloadHeader *header = (payloadHeader *)ptr; + ptr += sizeof(payloadHeader); + if (header->payload_type == BULK_STR_REF) { + bulkStrRef *str_ref = (bulkStrRef *)ptr; + if (str_ref->obj != NULL && str_ref->obj->refcount == 1) + total += sdslen(str_ref->obj->ptr); + } + ptr += header->payload_len; + } + return total; +} + +/* Update the client's unshared reply memory (solely owned). */ +void updateClientUnsharedReplyBytes(client *c) { + c->reply_bytes_unshared = 0; + + /* No shared memory means no unshared memory either. */ + if (c->reply_bytes_shared == 0) return; + + /* Scan the static output buffer. */ + if (c->buf_encoded) + c->reply_bytes_unshared += computeUnsharedReplyBytes(c->buf, c->bufpos); + + /* Scan each block in the reply list. */ + listIter reply_li; + listNode *reply_ln; + listRewind(c->reply, &reply_li); + while ((reply_ln = listNext(&reply_li))) { + clientReplyBlock *block = listNodeValue(reply_ln); + if (block == NULL) continue; /* deferred-length placeholder */ + if (block->buf_encoded) + c->reply_bytes_unshared += computeUnsharedReplyBytes(block->buf, block->used); + } +} + +/* Compute shared reply memory: total shared reply bytes and the unshared subset where the key + * has been deleted and the client buffer is the sole holder. */ +void getClientsSharedMemoryUsage(size_t *shared_mem, size_t *unshared_mem) { + listNode *ln; + listIter li; + listRewind(server.clients_with_pending_ref_reply, &li); + while ((ln = listNext(&li))) { + client *c = listNodeValue(ln); + + /* Total shared reply bytes (logical size, shared with keyspace). */ + *shared_mem += c->reply_bytes_shared; + + /* Unshared reply bytes: the client is the sole owner because the key was deleted. */ + updateClientUnsharedReplyBytes(c); + *unshared_mem += c->reply_bytes_unshared; + } +} + /* Clear the client state to resemble a newly connected client. */ void clearClientConnectionState(client *c) { listNode *ln; @@ -2020,6 +2119,7 @@ static void releaseBufReferences(client *c, char *buf, size_t bufpos) { bulkStrRef *str_ref = (bulkStrRef *)ptr; /* Only release if not already released. */ if (str_ref->obj != NULL) { + c->reply_bytes_shared -= sdslen(str_ref->obj->ptr); if (in_io_thread) ioDeferFreeRobj(c, str_ref->obj); else @@ -2436,6 +2536,7 @@ static payloadHeader *processSentDataInEncodedBuffer(client *c, char *start_ptr, return head; } *remaining -= (writen_len - *sentlen); + c->reply_bytes_shared -= sdslen(str_ref->obj->ptr); if (in_io_thread) { ioDeferFreeRobj(c, str_ref->obj); } else { @@ -2594,7 +2695,7 @@ static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten) { /* If there are no longer objects in the list, we expect * the count of reply bytes to be exactly zero. */ if (listLength(c->reply) == 0) - serverAssert(c->reply_bytes == 0); + serverAssert(c->reply_bytes == 0 && c->reply_bytes_shared == 0); } else if (c->bufpos > 0) { /* For encoded buffers, we need to use writev to handle bulk string references */ if (c->buf_encoded) { @@ -3985,8 +4086,12 @@ sds catClientInfoString(sds s, client *client) { } *p = '\0'; + /* Refresh the cached unshared reply bytes before computing memory stats below. */ + updateClientUnsharedReplyBytes(client); + /* Compute the total memory consumed by this client. */ - size_t obufmem, total_mem = getClientMemoryUsage(client, &obufmem); + size_t obufmem = getClientOutputBufferLogicalSize(client); + size_t total_mem = getClientMemoryUsage(client); size_t used_blocks_of_repl_buf = 0; if (client->ref_repl_buf_node) { @@ -4018,8 +4123,10 @@ sds catClientInfoString(sds s, client *client) { " rbp=%U", (unsigned long long) client->buf_peak, " obl=%U", (unsigned long long) client->bufpos, " oll=%U", (unsigned long long) listLength(client->reply) + used_blocks_of_repl_buf, - " omem=%U", (unsigned long long) obufmem, /* should not include client->buf since we want to see 0 for static clients. */ - " tot-mem=%U", (unsigned long long) total_mem, + " omem=%U", (unsigned long long) obufmem, /* logical output buffer memory (includes shared memory; excludes client->buf so static clients show 0) */ + " omem-shared=%U", (unsigned long long) client->reply_bytes_shared, /* shared memory (not solely owned by this client) */ + " omem-unshared=%U", (unsigned long long) client->reply_bytes_unshared, /* unshared memory (solely owned by this client) */ + " tot-mem=%U", (unsigned long long) total_mem, /* actual memory usage (includes unshared memory, excludes shared memory) */ " events=%s", events, " cmd=%s", client->lastcmd ? client->lastcmd->fullname : "NULL", " user=%s", client->user ? client->user->name : "(superuser)", @@ -5043,11 +5150,12 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { /* This function returns the number of bytes that Redis is * using to store the reply still not read by the client. + * It does NOT include any referenced bytes (neither shared nor unshared). * * Note: this function is very fast so can be called as many time as * the caller wishes. The main usage of this function currently is * enforcing the client output length limits. */ -size_t getClientOutputBufferMemoryUsage(client *c) { +static size_t getClientOutputBufferAllocSize(client *c) { if (unlikely(clientTypeIsSlave(c))) { size_t repl_buf_size = 0; size_t repl_node_num = 0; @@ -5065,22 +5173,38 @@ size_t getClientOutputBufferMemoryUsage(client *c) { } } -size_t getNormalClientPendingReplyBytes(client *c) { - serverAssert(!clientTypeIsSlave(c)); - if (listLength(c->reply) == 0) return c->bufpos; - - clientReplyBlock *block = listNodeValue(listLast(c->reply)); - return (c->reply_bytes - block->size + block->used) + c->bufpos; +/* Returns the logical output buffer size for limit enforcement. + * This includes all shared memory (shared with the keyspace), ensuring that + * a client requesting huge amounts of data via copy-avoidance is still + * subject to output buffer limits. */ +static size_t getClientOutputBufferLogicalSize(client *c) { + size_t mem = getClientOutputBufferAllocSize(c); + if (!clientTypeIsSlave(c)) + mem += c->reply_bytes_shared; + return mem; } -/* Returns the total client's memory usage. - * Optionally, if output_buffer_mem_usage is not NULL, it fills it with - * the client output buffer memory usage portion of the total. */ -size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) { +/* Returns the actual memory used to store the reply not yet read by the client. + * This includes unshared memory (solely owned by this client), which would be + * freed when the client disconnects. */ +size_t getClientOutputBufferMemoryUsage(client *c) { + size_t mem = getClientOutputBufferAllocSize(c); + mem += c->reply_bytes_unshared; + return mem; +} + +size_t getNormalClientPendingReplyBytes(client *c) { + serverAssert(!clientTypeIsSlave(c)); + if (listLength(c->reply) == 0) return c->bufpos + c->reply_bytes_shared; + + clientReplyBlock *block = listNodeValue(listLast(c->reply)); + return (c->reply_bytes + c->reply_bytes_shared - block->size + block->used) + c->bufpos; +} + +/* Returns the total client's memory usage. */ +size_t getClientMemoryUsage(client *c) { size_t mem = getClientOutputBufferMemoryUsage(c); - if (output_buffer_mem_usage != NULL) - *output_buffer_mem_usage = mem; mem += c->querybuf ? sdsZmallocSize(c->querybuf) : 0; mem += zmalloc_size(c); mem += c->buf_usable_size; @@ -5158,7 +5282,7 @@ char *getClientTypeName(int class) { * Otherwise zero is returned. */ int checkClientOutputBufferLimits(client *c) { int soft = 0, hard = 0, class; - unsigned long used_mem = getClientOutputBufferMemoryUsage(c); + unsigned long used_mem = getClientOutputBufferLogicalSize(c); /* For unauthenticated clients the output buffer is limited to prevent * them from abusing it by not reading the replies */ @@ -5222,10 +5346,10 @@ int checkClientOutputBufferLimits(client *c) { * Returns 1 if client was (flagged) closed. */ int closeClientOnOutputBufferLimitReached(client *c, int async) { if (!c->conn) return 0; /* It is unsafe to free fake clients. */ - serverAssert(c->reply_bytes < SIZE_MAX-(1024*64)); + serverAssert(c->reply_bytes < SIZE_MAX-(1024*64)); /* actual memory only, logical memory may exceed SIZE_MAX */ /* Note that c->reply_bytes is irrelevant for replica clients * (they use the global repl buffers). */ - if ((c->reply_bytes == 0 && !clientTypeIsSlave(c)) || + if ((c->reply_bytes == 0 && c->reply_bytes_shared == 0 && !clientTypeIsSlave(c)) || c->flags & CLIENT_CLOSE_ASAP) return 0; if (checkClientOutputBufferLimits(c)) { sds client = catClientInfoString(sdsempty(),c); @@ -5596,7 +5720,7 @@ static int tryExpandPendingCommandPool(void) { * The shared pool is only used when IO threads are inactive to avoid race conditions * between multiple clients. Additionally, pool reuse provides minimal benefit in * multi-threaded scenarios, so we only use it in single-threaded mode. */ -static void reclaimPendingCommand(client *c, pendingCommand *pcmd) { +static inline void reclaimPendingCommand(client *c, pendingCommand *pcmd) { if (!server.io_threads_active) { /* Try to add to shared pool for reuse if argv isn't too large */ if (likely(pcmd->argv_len < 64)) { diff --git a/src/notify.c b/src/notify.c index 11ea53241..5c8b188fa 100644 --- a/src/notify.c +++ b/src/notify.c @@ -37,10 +37,17 @@ int keyspaceEventsStringToFlags(char *classes) { case 't': flags |= NOTIFY_STREAM; break; case 'm': flags |= NOTIFY_KEY_MISS; break; case 'd': flags |= NOTIFY_MODULE; break; + case 'a': flags |= NOTIFY_ARRAY; break; case 'n': flags |= NOTIFY_NEW; break; case 'o': flags |= NOTIFY_OVERWRITTEN; break; case 'c': flags |= NOTIFY_TYPE_CHANGED; break; +#ifdef ENABLE_GCRA case 'r': flags |= NOTIFY_RATE_LIMIT; break; +#endif + case 'S': flags |= NOTIFY_SUBKEYSPACE; break; + case 'T': flags |= NOTIFY_SUBKEYEVENT; break; + case 'I': flags |= NOTIFY_SUBKEYSPACEITEM; break; + case 'V': flags |= NOTIFY_SUBKEYSPACEEVENT; break; default: return -1; } } @@ -68,49 +75,99 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1); if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1); if (flags & NOTIFY_MODULE) res = sdscatlen(res,"d",1); + if (flags & NOTIFY_ARRAY) res = sdscatlen(res,"a",1); if (flags & NOTIFY_NEW) res = sdscatlen(res,"n",1); if (flags & NOTIFY_OVERWRITTEN) res = sdscatlen(res,"o",1); if (flags & NOTIFY_TYPE_CHANGED) res = sdscatlen(res,"c",1); +#ifdef ENABLE_GCRA if (flags & NOTIFY_RATE_LIMIT) res = sdscatlen(res,"r",1); +#endif } if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); if (flags & NOTIFY_KEY_MISS) res = sdscatlen(res,"m",1); + if (flags & NOTIFY_SUBKEYSPACE) res = sdscatlen(res,"S",1); + if (flags & NOTIFY_SUBKEYEVENT) res = sdscatlen(res,"T",1); + if (flags & NOTIFY_SUBKEYSPACEITEM) res = sdscatlen(res,"I",1); + if (flags & NOTIFY_SUBKEYSPACEEVENT) res = sdscatlen(res,"V",1); return res; } -/* The API provided to the rest of the Redis core is a simple function: +/* Append subkeys in length-prefixed format to 'dst'. + * If 'dst' is NULL, a new sds is created. + * Format: :[,:...] + * Example: 3:abc,2:xx,5:hello */ +static sds catSubkeysPayload(sds dst, robj **subkeys, int count) { + if (dst == NULL) dst = sdsempty(); + char lenbuf[32]; + + for (int i = 0; i < count; i++) { + serverAssert(sdsEncodedObject(subkeys[i])); + if (i > 0) dst = sdscatlen(dst, ",", 1); + size_t subkeylen = sdslen(subkeys[i]->ptr); + int lenlen = ll2string(lenbuf, sizeof(lenbuf), subkeylen); + dst = sdscatlen(dst, lenbuf, lenlen); + dst = sdscatlen(dst, ":", 1); + dst = sdscatsds(dst, subkeys[i]->ptr); + } + return dst; +} + +/* Internal implementation for keyspace event notifications. + * + * The API provided to the rest of the Redis core is: * * notifyKeyspaceEvent(int type, char *event, robj *key, int dbid); + * notifyKeyspaceEventWithSubkeys(int type, char *event, robj *key, int dbid, + * robj **subkeys, int count); * * 'type' is the notification class we define in `server.h`. * 'event' is a C string representing the event name. * 'key' is a Redis object representing the key name. * 'dbid' is the database ID where the key lives. + * 'subkeys' is an array of Redis objects representing the subkey names (can be NULL). + * 'count' is the number of subkeys in the array. + * + * For subkey notifications (4 channel types): + * - __subkeyspace@__: payload: | + * - __subkeyevent@__: payload: :| + * - __subkeyspaceitem@__:\n payload: + * - __subkeyspaceevent@__:| payload: + * + * Where is in length-prefixed format: :[,:...] + * Example: 3:foo,5:hello * * NOTE: This function may invoke module notification callbacks, which may * cause the key's kvobj to be reallocated. */ -void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { +static void notifyKeyspaceEventImpl(int type, const char *event, robj *key, int dbid, + robj **subkeys, int count) +{ sds chan; robj *chanobj, *eventobj; - int len = -1; char buf[24]; + serverAssert(sdsEncodedObject(key)); /* If any modules are interested in events, notify the module system now. * This bypasses the notifications configuration, but the module engine * will only call event subscribers if the event type matches the types - * they are interested in. */ - moduleNotifyKeyspaceEvent(type, event, key, dbid); + * they are interested in. Subkeys are passed through so that subscribers + * with a subkey callback receive them. */ + moduleNotifyKeyspaceEvent(type, event, key, dbid, subkeys, count); /* If notifications for this class of events are off, return ASAP. */ if (!(server.notify_keyspace_events & type)) return; + /* If there are no Pub/Sub subscribers (neither pattern nor channel), + * skip the remaining notification work since nobody would receive it. */ + if (dictSize(server.pubsub_patterns) == 0 && kvstoreSize(server.pubsub_channels) == 0) + return; + eventobj = createStringObject(event,strlen(event)); + int len = ll2string(buf,sizeof(buf),dbid); /* __keyspace@__: notifications. */ if (server.notify_keyspace_events & NOTIFY_KEYSPACE) { chan = sdsnewlen("__keyspace@",11); - len = ll2string(buf,sizeof(buf),dbid); chan = sdscatlen(chan, buf, len); chan = sdscatlen(chan, "__:", 3); chan = sdscatsds(chan, key->ptr); @@ -122,7 +179,6 @@ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { /* __keyevent@__: notifications. */ if (server.notify_keyspace_events & NOTIFY_KEYEVENT) { chan = sdsnewlen("__keyevent@",11); - if (len == -1) len = ll2string(buf,sizeof(buf),dbid); chan = sdscatlen(chan, buf, len); chan = sdscatlen(chan, "__:", 3); chan = sdscatsds(chan, eventobj->ptr); @@ -130,5 +186,112 @@ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { pubsubPublishMessage(chanobj, key, 0); decrRefCount(chanobj); } + + /* Subkey-level notifications (only when subkeys are provided). */ + if (subkeys != NULL && count > 0) { + /* __subkeyspace@__: |:[,...] notifications. + * Skip if the event contains '|' to avoid parsing ambiguity since '|' + * is used as a separator between event and subkeys in the payload. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYSPACE && !strchr(event, '|')) { + chan = sdsnewlen("__subkeyspace@", 14); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, key->ptr); + chanobj = createObject(OBJ_STRING, chan); + + /* Build payload: | */ + sds payload = sdsdup(eventobj->ptr); + payload = sdscatlen(payload, "|", 1); + payload = catSubkeysPayload(payload, subkeys, count); + robj *payloadobj = createObject(OBJ_STRING, payload); + pubsubPublishMessage(chanobj, payloadobj, 0); + decrRefCount(chanobj); + decrRefCount(payloadobj); + } + + /* __subkeyevent@__: :|:[,...] notifications. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYEVENT) { + chan = sdsnewlen("__subkeyevent@", 14); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, eventobj->ptr); + chanobj = createObject(OBJ_STRING, chan); + + /* Build payload: :| */ + size_t keylen = sdslen(key->ptr); + char keylenbuf[32]; + int keylenlen = ll2string(keylenbuf, sizeof(keylenbuf), keylen); + sds payload = sdsnewlen(keylenbuf, keylenlen); + payload = sdscatlen(payload, ":", 1); + payload = sdscatsds(payload, key->ptr); + payload = sdscatlen(payload, "|", 1); + payload = catSubkeysPayload(payload, subkeys, count); + robj *payloadobj = createObject(OBJ_STRING, payload); + pubsubPublishMessage(chanobj, payloadobj, 0); + decrRefCount(chanobj); + decrRefCount(payloadobj); + } + + /* __subkeyspaceitem@__:\n notifications (per subkey). + * Skip if the key contains '\n' to avoid parsing ambiguity in the channel name. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYSPACEITEM && + memchr(key->ptr, '\n', sdslen(key->ptr)) == NULL) + { + for (int i = 0; i < count; i++) { + serverAssert(sdsEncodedObject(subkeys[i])); + chan = sdsnewlen("__subkeyspaceitem@", 18); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, key->ptr); + chan = sdscatlen(chan, "\n", 1); + chan = sdscatsds(chan, subkeys[i]->ptr); + chanobj = createObject(OBJ_STRING, chan); + pubsubPublishMessage(chanobj, eventobj, 0); + decrRefCount(chanobj); + } + } + + /* __subkeyspaceevent@__:| notifications. + * Skip if the event contains '|' to avoid parsing ambiguity since '|' + * is used as a separator between event and key in the channel name. */ + if (server.notify_keyspace_events & NOTIFY_SUBKEYSPACEEVENT && !strchr(event, '|')) { + chan = sdsnewlen("__subkeyspaceevent@", 19); + chan = sdscatlen(chan, buf, len); + chan = sdscatlen(chan, "__:", 3); + chan = sdscatsds(chan, eventobj->ptr); + chan = sdscatlen(chan, "|", 1); + chan = sdscatsds(chan, key->ptr); + chanobj = createObject(OBJ_STRING, chan); + robj *payloadobj = createObject(OBJ_STRING, catSubkeysPayload(NULL, subkeys, count)); + pubsubPublishMessage(chanobj, payloadobj, 0); + decrRefCount(chanobj); + decrRefCount(payloadobj); + } + } + decrRefCount(eventobj); } + +/* Public API for key-level notifications (backward compatible). */ +void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid) { + notifyKeyspaceEventImpl(type, event, key, dbid, NULL, 0); +} + +/* Public API for notifications with subkeys (key-level + subkey-level). */ +void notifyKeyspaceEventWithSubkeys(int type, const char *event, robj *key, int dbid, + robj **subkeys, int count) { + notifyKeyspaceEventImpl(type, event, key, dbid, subkeys, count); +} + +/* Check if subkey information should be collected for the given event type. + * Returns true if any module subscribed to this event with subkeys, or if + * there are Pub/Sub subscribers and any subkey-level notification channel is + * enabled for this event type. */ +int isSubkeyNotifyEnabled(int type) { + if (moduleHasSubscribersForKeyspaceEventWithSubkeys(type)) return 1; + if (dictSize(server.pubsub_patterns) == 0 && kvstoreSize(server.pubsub_channels) == 0) + return 0; + return (server.notify_keyspace_events & type) && + (server.notify_keyspace_events & (NOTIFY_SUBKEYSPACE | NOTIFY_SUBKEYEVENT | + NOTIFY_SUBKEYSPACEITEM | NOTIFY_SUBKEYSPACEEVENT)); +} diff --git a/src/object.c b/src/object.c index cfcfa4844..697ed6e39 100644 --- a/src/object.c +++ b/src/object.c @@ -218,7 +218,7 @@ static kvobj *kvobjCreateEmbedString(const char *val_ptr, size_t val_len, * | robj (16) | key-hdr-size (1) | sdshdr8 "myvalue" \0 (11) | * +-----------+------------------+----------------------------+ */ -robj *createEmbeddedStringObject(const char *val_ptr, size_t val_len) { +static inline robj *createEmbeddedStringObject(const char *val_ptr, size_t val_len) { /* Calculate size for embedded value (always SDS_TYPE_8) */ size_t val_sds_size = sdsReqSize(val_len, SDS_TYPE_8); @@ -514,6 +514,7 @@ robj *createStreamObject(void) { return o; } +#ifdef ENABLE_GCRA robj *createGCRAObject(long long value) { /* NOTE: for 32-bit systems we can't use integer encoding (as OBJ_STRING does) * as the GCRA object is a unixtime value in microseconds, which as of the @@ -530,6 +531,14 @@ robj *createGCRAObject(long long value) { o->encoding = OBJ_ENCODING_INT; return o; } +#endif + +robj *createArrayObject(void) { + redisArray *ar = arNew(); + robj *o = createObject(OBJ_ARRAY, ar); + o->encoding = OBJ_ENCODING_SLICED_ARRAY; + return o; +} robj *createModuleObject(moduleType *mt, void *value) { moduleValue *mv = zmalloc(sizeof(*mv)); @@ -603,6 +612,7 @@ void freeStreamObject(robj *o) { freeStream(o->ptr); } +#ifdef ENABLE_GCRA void freeGCRAObject(robj *o) { #if UINTPTR_MAX == 0xffffffff zfree(o->ptr); @@ -610,6 +620,11 @@ void freeGCRAObject(robj *o) { (void)o; #endif } +#endif + +void freeArrayObject(robj *o) { + arFree(o->ptr); +} void incrRefCount(robj *o) { if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT - 1) { @@ -635,6 +650,14 @@ void decrRefCount(robj *o) { } if (--(o->refcount) == 0) { + /* Fast path for embedded strings: no inner allocation to free, and we + * can compute the alloc size to hint jemalloc for a faster deallocation. */ + if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_EMBSTR && !o->iskvobj) { + serverAssert(sdsType(o->ptr) == SDS_TYPE_8); /* embstr always type_8 */ + zfree_with_size(o, sizeof(robj) + sdsAllocSize(o->ptr)); + return; + } + void *alloc = o; if (o->iskvobj) { @@ -654,7 +677,10 @@ void decrRefCount(robj *o) { case OBJ_HASH: freeHashObject(o); break; case OBJ_MODULE: freeModuleObject(o); break; case OBJ_STREAM: freeStreamObject(o); break; +#ifdef ENABLE_GCRA case OBJ_GCRA: freeGCRAObject(o); break; +#endif + case OBJ_ARRAY: freeArrayObject(o); break; default: serverPanic("Unknown object type"); break; } } @@ -802,12 +828,19 @@ void dismissStreamObject(robj *o, size_t size_hint) { } } +/* See dismissObject() */ +void dismissArrayObject(robj *o, size_t size_hint) { + arDismiss(o->ptr, size_hint); +} + +#ifdef ENABLE_GCRA void dismissGCRAObject(robj *o, size_t size_hint) { /* GCRA is a single allocation of a long long thus way smaller than a * page-size. The dismiss mechanism is not needed for it - hence NOOP.*/ (void)o; (void)size_hint; } +#endif /* When creating a snapshot in a fork child process, the main process and child * process share the same physical memory pages, and if / when the parent @@ -837,7 +870,10 @@ void dismissObject(robj *o, size_t size_hint) { case OBJ_ZSET: dismissZsetObject(o, size_hint); break; case OBJ_HASH: dismissHashObject(o, size_hint); break; case OBJ_STREAM: dismissStreamObject(o, size_hint); break; +#ifdef ENABLE_GCRA case OBJ_GCRA: dismissGCRAObject(o, size_hint); break; +#endif + case OBJ_ARRAY: dismissArrayObject(o, size_hint); break; default: break; } #else @@ -959,7 +995,10 @@ size_t getObjectLength(robj *o) { case OBJ_ZSET: return zsetLength(o); case OBJ_HASH: return hashTypeLength(o, 0); case OBJ_STREAM: return streamLength(o); +#ifdef ENABLE_GCRA case OBJ_GCRA: return gcraObjectLength(o); +#endif + case OBJ_ARRAY: return arCount(o->ptr); default: return 0; } } @@ -1168,6 +1207,7 @@ int getLongLongFromObject(robj *o, long long *target) { return C_OK; } +#ifdef ENABLE_GCRA int getLongLongFromGCRAObject(robj *o, long long *target) { long long res; serverAssertWithInfo(NULL, o, o->type == OBJ_GCRA); @@ -1183,6 +1223,7 @@ int getLongLongFromGCRAObject(robj *o, long long *target) { *target = res; return C_OK; } +#endif int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) { long long value; @@ -1257,6 +1298,7 @@ char *strEncoding(int encoding) { case OBJ_ENCODING_SKIPLIST: return "skiplist"; case OBJ_ENCODING_EMBSTR: return "embstr"; case OBJ_ENCODING_STREAM: return "stream"; + case OBJ_ENCODING_SLICED_ARRAY: return "sliced-array"; default: return "unknown"; } } @@ -1275,7 +1317,10 @@ size_t kvobjComputeSize(robj *key, kvobj *o, size_t sample_size, int dbid) { o->type == OBJ_ZSET || o->type == OBJ_HASH || o->type == OBJ_STREAM || - o->type == OBJ_GCRA) +#ifdef ENABLE_GCRA + o->type == OBJ_GCRA || +#endif + o->type == OBJ_ARRAY) { return kvobjAllocSize(o); } else if (o->type == OBJ_MODULE) { @@ -1301,14 +1346,20 @@ size_t kvobjAllocSize(kvobj *o) { } else if (o->type == OBJ_STREAM) { stream *s = o->ptr; asize += s->alloc_size; +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { asize += gcraTypeAllocSize(o); +#endif + } else if (o->type == OBJ_ARRAY) { + redisArray *ar = o->ptr; + asize += ar->alloc_size; } else if (o->type == OBJ_MODULE) { /* TODO: Provide moduleGetAllocSize() module API for O(1) allocation size retrieval */ } return asize; } +#ifdef ENABLE_GCRA size_t gcraTypeAllocSize(robj *o) { (void)o; #if UINTPTR_MAX == 0xffffffff @@ -1325,6 +1376,7 @@ size_t gcraObjectLength(robj *o) { (void)o; return 1; } +#endif /* Release data obtained with getMemoryOverheadData(). */ void freeMemoryOverheadData(struct redisMemOverhead *mh) { @@ -1394,6 +1446,9 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mem_total += mh->repl_backlog; mem_total += mh->clients_slaves; + /* Compute shared/unshared reply memory. */ + getClientsSharedMemoryUsage(&mh->clients_normal_shared, &mh->clients_normal_unshared); + /* Computing the memory used by the clients would be O(N) if done * here online. We use our values computed incrementally by * updateClientMemoryUsage(). */ @@ -1424,7 +1479,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) { /* Cluster atomic slot migration buffers. */ mh->asm_import_input_buffer = asmGetImportInputBufferSize(); - mh->asm_migrate_output_buffer = asmGetMigrateOutputBufferSize(); + mh->asm_migrate_output_buffer = asmGetMigrateOutputMemoryUsage(); mem_total += mh->asm_import_input_buffer; mem_total += mh->asm_migrate_output_buffer; @@ -1749,7 +1804,7 @@ NULL } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) { struct redisMemOverhead *mh = getMemoryOverheadData(); - addReplyMapLen(c,33+mh->num_dbs); + addReplyMapLen(c,35+mh->num_dbs); addReplyBulkCString(c,"peak.allocated"); addReplyLongLong(c,mh->peak_allocated); @@ -1772,6 +1827,12 @@ NULL addReplyBulkCString(c,"clients.normal"); addReplyLongLong(c,mh->clients_normal); + addReplyBulkCString(c,"clients.normal.shared"); + addReplyLongLong(c,mh->clients_normal_shared); + + addReplyBulkCString(c,"clients.normal.unshared"); + addReplyLongLong(c,mh->clients_normal_unshared); + addReplyBulkCString(c,"cluster.links"); addReplyLongLong(c,mh->cluster_links); diff --git a/src/object.h b/src/object.h index 6b2591877..35cd40a3c 100644 --- a/src/object.h +++ b/src/object.h @@ -5,7 +5,7 @@ * values of different logical types (strings, lists, sets, hashes, sorted sets, * streams, modules, ...). It contains: * - type: one of OBJ_STRING, OBJ_LIST, OBJ_SET, OBJ_ZSET, OBJ_HASH, OBJ_STREAM, - * OBJ_GCRA, OBJ_MODULE, ... + * OBJ_MODULE, ... * - encoding: an implementation detail of how the value is represented in * memory for the given type (see OBJ_ENCODING_* below). For example, * strings may be RAW/EMBSTR/INT, sets may be INTSET or HT, etc. @@ -85,6 +85,7 @@ struct RedisModuleType; #define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */ #define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */ #define OBJ_ENCODING_LISTPACK_EX 12 /* Encoded as listpack, extended with metadata */ +#define OBJ_ENCODING_SLICED_ARRAY 13 /* Encoded as sliced array */ #define LRU_BITS 24 #define LRU_CLOCK_MAX ((1<lru */ @@ -163,6 +164,7 @@ robj *createZsetListpackObject(void); robj *createStreamObject(void); robj *createGCRAObject(long long value); robj *createModuleObject(struct RedisModuleType *mt, void *value); +robj *createArrayObject(void); int getLongFromObjectOrReply(struct client *c, robj *o, long *target, const char *msg); int getPositiveLongFromObjectOrReply(struct client *c, robj *o, long *target, const char *msg); int getRangeLongFromObjectOrReply(struct client *c, robj *o, long min, long max, long *target, const char *msg); diff --git a/src/pubsub.c b/src/pubsub.c index 7199be1e0..b9198d263 100644 --- a/src/pubsub.c +++ b/src/pubsub.c @@ -293,7 +293,10 @@ int pubsubUnsubscribeChannel(client *c, robj *channel, int notify, pubsubtype ty retval = 1; /* Remove the client from the channel -> clients list hash table */ if (server.cluster_enabled && type.shard) { - slot = getKeySlot(channel->ptr); + /* Compute the slot from the channel directly instead of using getKeySlot(), + * because the unsubscribe may be triggered by a different client, and + * getKeySlot() would return the cached slot of that client. */ + slot = keyHashSlot(channel->ptr, sdslen(channel->ptr)); } de = kvstoreDictFind(*type.serverPubSubChannels, slot, channel); serverAssertWithInfo(c,NULL,de != NULL); diff --git a/src/rax.c b/src/rax.c index e34e7e2f1..1db3f1cc2 100644 --- a/src/rax.c +++ b/src/rax.c @@ -1254,48 +1254,48 @@ int raxRemove(rax *rax, unsigned char *s, size_t len, void **old) { return 1; } -/* This is the core of raxFree(): performs a depth-first scan of the - * tree and releases all the nodes found. */ -void raxRecursiveFree(rax *rax, raxNode *n, void (*free_callback)(void*)) { - debugnode("free traversing",n); - int numchildren = n->iscompr ? 1 : n->size; - raxNode **cp = raxNodeLastChildPtr(n); - while(numchildren--) { - raxNode *child; - memcpy(&child,cp,sizeof(child)); - raxRecursiveFree(rax,child,free_callback); - cp--; - } - debugnode("free depth-first",n); - if (free_callback && n->iskey && !n->isnull) - free_callback(raxGetData(n)); - raxFreeNode(rax,n); - rax->numnodes--; -} +/* This is the core of raxFree(): performs an iterative depth-first scan + * of the tree and frees all the nodes found. Uses an explicit heap stack + * to avoid stack overflow on deep trees. The caller passes exactly one + * callback variant and the non-NULL one is invoked. */ +static void raxFreeNodesWithCallback(rax *rax, raxNode *n, + void (*free_callback)(void *item), + void (*free_callback_withctx)(void *item, void *ctx), + void *ctx) +{ + raxStack stack; + raxStackInit(&stack); + raxStackPush(&stack, n); -/* Same as raxRecursiveFree() with context argument */ -void raxRecursiveFreeWithCtx(rax *rax, raxNode *n, - void (*free_callback)(void *item, void *ctx), void *ctx) { - debugnode("free traversing",n); - int numchildren = n->iscompr ? 1 : n->size; - raxNode **cp = raxNodeLastChildPtr(n); - while(numchildren--) { - raxNode *child; - memcpy(&child,cp,sizeof(child)); - raxRecursiveFreeWithCtx(rax,child,free_callback, ctx); - cp--; + while (stack.items > 0) { + raxNode *curr = raxStackPop(&stack); + debugnode("free traversing",curr); + int numchildren = curr->iscompr ? 1 : curr->size; + raxNode **cp = raxNodeFirstChildPtr(curr); + for (int i = 0; i < numchildren; i++) { + raxNode *child; + memcpy(&child, cp + i, sizeof(child)); + raxStackPush(&stack, child); + } + debugnode("free depth-first",curr); + if (curr->iskey && !curr->isnull) { + void *data = raxGetData(curr); + if (free_callback_withctx) + free_callback_withctx(data, ctx); + else if (free_callback) + free_callback(data); + } + raxFreeNode(rax, curr); + rax->numnodes--; } - debugnode("free depth-first",n); - if (free_callback && n->iskey && !n->isnull) - free_callback(raxGetData(n), ctx); - raxFreeNode(rax,n); - rax->numnodes--; + + raxStackFree(&stack); } /* Free a whole radix tree, calling the specified callback in order to * free the auxiliary data. */ void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) { - raxRecursiveFree(rax,rax->head,free_callback); + raxFreeNodesWithCallback(rax, rax->head, free_callback, NULL, NULL); assert(rax->numnodes == 0); size_t *alloc_size = rax->alloc_size; size_t usable; @@ -1307,7 +1307,7 @@ void raxFreeWithCallback(rax *rax, void (*free_callback)(void*)) { * free the auxiliary data. */ void raxFreeWithCbAndContext(rax *rax, void (*free_callback)(void *item, void *ctx), void *ctx) { - raxRecursiveFreeWithCtx(rax,rax->head,free_callback,ctx); + raxFreeNodesWithCallback(rax, rax->head, NULL, free_callback, ctx); assert(rax->numnodes == 0); size_t *alloc_size = rax->alloc_size; size_t usable; diff --git a/src/rdb.c b/src/rdb.c index eae234832..4ae492a4d 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -124,33 +124,42 @@ time_t rdbLoadTime(rio *rdb) { return (time_t)t32; } -ssize_t rdbSaveMillisecondTime(rio *rdb, long long t) { - int64_t t64 = (int64_t) t; - memrev64ifbe(&t64); /* Store in little endian. */ - return rdbWriteRaw(rdb,&t64,8); +/* Save a signed 64-bit integer in little-endian format. */ +ssize_t rdbSaveSignedInteger(rio *rdb, int64_t val) { + memrev64ifbe(&val); /* Store in little endian. */ + return rdbWriteRaw(rdb, &val, 8); } -/* This function loads a time from the RDB file. It gets the version of the - * RDB because, unfortunately, before Redis 5 (RDB version 9), the function - * failed to convert data to/from little endian, so RDB files with keys having - * expires could not be shared between big endian and little endian systems - * (because the expire time will be totally wrong). The fix for this is just - * to call memrev64ifbe(), however if we fix this for all the RDB versions, +/* This function loads a signed 64-bit integer from the RDB file. It gets the + * version of the RDB because, unfortunately, before Redis 5 (RDB version 9), + * the function failed to convert data to/from little endian, so RDB files with + * keys having expires could not be shared between big endian and little endian + * systems (because the expire time will be totally wrong). The fix for this is + * just to call memrev64ifbe(), however if we fix this for all the RDB versions, * this call will introduce an incompatibility for big endian systems: * after upgrading to Redis version 5 they will no longer be able to load their * own old RDB files. Because of that, we instead fix the function only for new * RDB versions, and load older RDB versions as we used to do in the past, * allowing big endian systems to load their own old RDB files. * - * On I/O error the function returns LLONG_MAX, however if this is also a + * On I/O error the function returns INT64_MAX, however if this is also a * valid stored value, the caller should use rioGetReadError() to check for * errors after calling this function. */ -long long rdbLoadMillisecondTime(rio *rdb, int rdbver) { - int64_t t64; - if (rioRead(rdb,&t64,8) == 0) return LLONG_MAX; +int64_t rdbLoadSignedInteger(rio *rdb, int rdbver) { + int64_t val; + if (rioRead(rdb, &val, 8) == 0) return INT64_MAX; if (rdbver >= 9) /* Check the top comment of this function. */ - memrev64ifbe(&t64); /* Convert in big endian if the system is BE. */ - return (long long)t64; + memrev64ifbe(&val); /* Convert in big endian if the system is BE. */ + return val; +} + +/* Wrappers for millisecond time - these just call the signed integer functions */ +ssize_t rdbSaveMillisecondTime(rio *rdb, long long t) { + return rdbSaveSignedInteger(rdb, (int64_t)t); +} + +long long rdbLoadMillisecondTime(rio *rdb, int rdbver) { + return (long long)rdbLoadSignedInteger(rdb, rdbver); } /* Saves an encoded length. The first two bits in the first byte are used to @@ -713,10 +722,14 @@ int rdbSaveObjectType(rio *rdb, robj *o) { serverPanic("Unknown hash encoding"); case OBJ_STREAM: return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_5); +#ifdef ENABLE_GCRA case OBJ_GCRA: return rdbSaveType(rdb,RDB_TYPE_GCRA); +#endif case OBJ_MODULE: return rdbSaveType(rdb,RDB_TYPE_MODULE_2); + case OBJ_ARRAY: + return rdbSaveType(rdb,RDB_TYPE_ARRAY); default: serverPanic("Unknown object type"); } @@ -1039,6 +1052,68 @@ size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) { /* Save a Redis object. * Returns -1 on error, number of bytes written on success. */ +static ssize_t rdbSaveArrayElement(rio *rdb, uint64_t idx, void *v) { + ssize_t n, nwritten = 0; + + if ((n = rdbSaveLen(rdb, idx)) == -1) return -1; + nwritten += n; + + if (arIsInt(v)) { + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_INT)) == -1) return -1; + nwritten += n; + int64_t ival = arToInt(v); + if ((n = rdbSaveSignedInteger(rdb, ival)) == -1) return -1; + nwritten += n; + } else if (arIsFloat(v)) { + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_FLOAT)) == -1) return -1; + nwritten += n; + double d = arToDouble(v); + if (rdbSaveBinaryDoubleValue(rdb, d) == -1) return -1; + nwritten += 8; + } else if (arIsSmallStr(v)) { + char buf[AR_SMALLSTR_MAXLEN + 1]; + int len = arToSmallStr(v, buf); + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_SMALLSTR)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveRawString(rdb, (unsigned char *)buf, len)) == -1) return -1; + nwritten += n; + } else { + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_SDS)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveRawString(rdb, (unsigned char *)arStringData(v), arStringLen(v))) == -1) return -1; + nwritten += n; + } + + return nwritten; +} + +static ssize_t rdbSaveArraySlice(rio *rdb, arSlice *s, uint64_t slice_id, + uint32_t slice_size) { + ssize_t n, nwritten = 0; + + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + void *v = s->layout.dense.items[i]; + if (arIsEmpty(v)) continue; + + uint64_t idx = arMakeIdx(slice_id, s->layout.dense.offset + i, slice_size); + if ((n = rdbSaveArrayElement(rdb, idx, v)) == -1) return -1; + nwritten += n; + } + } else { + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + for (uint32_t i = 0; i < s->count; i++) { + uint64_t idx = arMakeIdx(slice_id, offsets[i], slice_size); + if ((n = rdbSaveArrayElement(rdb, idx, values[i])) == -1) return -1; + nwritten += n; + } + } + + return nwritten; +} + ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { ssize_t n = 0, nwritten = 0; @@ -1401,11 +1476,13 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { /* Save the all-time count of duplicate IIDs detected. */ if ((n = rdbSaveLen(rdb,s->iids_duplicates)) == -1) return -1; nwritten += n; +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { long long t; getLongLongFromGCRAObject(o, &t); if ((n = rdbSaveLen(rdb,t)) == -1) return -1; nwritten += n; +#endif } else if (o->type == OBJ_MODULE) { /* Save a module-specific value. */ RedisModuleIO io; @@ -1432,6 +1509,57 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { zfree(io.ctx); } return io.error ? -1 : (ssize_t)io.bytes; + } else if (o->type == OBJ_ARRAY) { + /* Save an array value. We persist only elements and insert_idx - no + * implementation details like slice_size. Arrays are loaded using + * the current ar_slice_size config. */ + redisArray *ar = o->ptr; + + /* Save count */ + if ((n = rdbSaveLen(rdb, ar->count)) == -1) return -1; + nwritten += n; + + /* Save insert_idx: 0 = none, 1 = has value followed by actual value. + * We can't save UINT64_MAX directly with rdbSaveLen/rdbLoadLen because + * rdbLoadLen returns UINT64_MAX (RDB_LENERR) to signal an error, making + * it impossible to distinguish a valid UINT64_MAX value from an error. */ + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + if ((n = rdbSaveLen(rdb, 0)) == -1) return -1; + nwritten += n; + } else { + if ((n = rdbSaveLen(rdb, 1)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveLen(rdb, ar->insert_idx)) == -1) return -1; + nwritten += n; + } + + /* Save elements in index order. + * We need to iterate through all slices, handling both flat directory + * mode and superdir mode. In superdir mode, blocks are sorted by + * block_id, so we iterate through blocks in order. */ + if (ar->superdir) { + /* Superdir mode: iterate through blocks */ + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + if (!s) continue; + uint64_t slice_id = block_base + si; + if ((n = rdbSaveArraySlice(rdb, s, slice_id, ar->slice_size)) == -1) return -1; + nwritten += n; + } + } + } else { + /* Flat directory mode */ + for (uint64_t slice_id = 0; slice_id <= ar->dir_highest_used && slice_id < ar->dir_alloc; slice_id++) { + arSlice *s = ar->dir[slice_id]; + if (!s) continue; + if ((n = rdbSaveArraySlice(rdb, s, slice_id, ar->slice_size)) == -1) return -1; + nwritten += n; + } + } } else { serverPanic("Unknown object type"); } @@ -2002,11 +2130,18 @@ void rdbRemoveTempFile(pid_t childpid, int from_signal) { /* This function is called by rdbLoadObject() when the code is in RDB-check * mode and we find a module value of type 2 that can be parsed without - * the need of the actual module. The value is parsed for errors, finally - * a dummy redis object is returned just to conform to the API. */ -robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { + * the need of the actual module. The value is parsed for errors. + * If null_on_error is true, NULL is returned when data corruption is detected; + * otherwise a dummy redis object is always returned regardless of success or + * failure. */ +robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename, int null_on_error) { uint64_t opcode; while((opcode = rdbLoadLen(rdb,NULL)) != RDB_MODULE_OPCODE_EOF) { + if (opcode == RDB_LENERR) { + rdbReportCorruptRDB("Error reading module opcode length from module %s value", modulename); + goto error; + } + if (opcode == RDB_MODULE_OPCODE_SINT || opcode == RDB_MODULE_OPCODE_UINT) { @@ -2014,12 +2149,14 @@ robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { if (rdbLoadLenByRef(rdb,NULL,&len) == -1) { rdbReportCorruptRDB( "Error reading integer from module %s value", modulename); + goto error; } } else if (opcode == RDB_MODULE_OPCODE_STRING) { robj *o = rdbGenericLoadStringObject(rdb,RDB_LOAD_NONE,NULL); if (o == NULL) { rdbReportCorruptRDB( "Error reading string from module %s value", modulename); + goto error; } decrRefCount(o); } else if (opcode == RDB_MODULE_OPCODE_FLOAT) { @@ -2027,16 +2164,24 @@ robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { if (rdbLoadBinaryFloatValue(rdb,&val) == -1) { rdbReportCorruptRDB( "Error reading float from module %s value", modulename); + goto error; } } else if (opcode == RDB_MODULE_OPCODE_DOUBLE) { double val; if (rdbLoadBinaryDoubleValue(rdb,&val) == -1) { rdbReportCorruptRDB( "Error reading double from module %s value", modulename); + goto error; } + } else { + rdbReportCorruptRDB( + "Unknown module opcode %llu reading module %s value", (unsigned long long)opcode, modulename); + goto error; } } return createStringObject("module-dummy-value",18); +error: + return null_on_error ? NULL : createStringObject("module-dummy-value",18); } /* Load object type and optional key metadata (into `keymeta`) from RDB stream. @@ -2917,13 +3062,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) /* search for duplicate records */ sds field = sdstrynewlen(fstr, flen); - int field_added = (field != NULL && dictAdd(dupSearchDict, field, NULL) == DICT_OK); - if (!field_added || !lpSafeToAdd(lp, (size_t)flen + vlen)) { + if (!field || !lpSafeToAdd(lp, (size_t)flen + vlen) || + dictAdd(dupSearchDict, field, NULL) != DICT_OK) { rdbReportCorruptRDB("Hash zipmap with dup elements, or big length (%u)", flen); /* If field was not added to dict, we still own it. * If it was added, dict owns it and dictRelease will free it. */ - if (!field_added) sdsfree(field); dictRelease(dupSearchDict); + sdsfree(field); lpFree(lp); zfree(encoded); o->ptr = NULL; @@ -3145,6 +3290,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } + uint64_t live_entries = 0; while(listpacks--) { /* Get the master ID, the one we'll use as key of the radix tree * node: the entries inside the listpack itself are delta-encoded @@ -3194,6 +3340,18 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } + long long lp_live; + if (!lpGetIntegerValue(first, &lp_live) || lp_live <= 0 || + (uint64_t)lp_live > UINT64_MAX - live_entries) + { + rdbReportCorruptRDB("Stream listpack bad entry count"); + sdsfree(nodekey); + decrRefCount(o); + zfree(lp); + return NULL; + } + live_entries += lp_live; + /* Insert the key in the radix tree. */ int retval = raxTryInsert(s->rax, (unsigned char*)nodekey,sizeof(streamID),lp,NULL); @@ -3243,8 +3401,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } - if (s->length && !raxSize(s->rax)) { - rdbReportCorruptRDB("Stream length inconsistent with rax entries"); + if (s->length != live_entries) { + rdbReportCorruptRDB("Stream length inconsistent with live entries"); decrRefCount(o); return NULL; } @@ -3416,6 +3574,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) } streamNACK *nack = result; + /* If the NACK already has a consumer assigned, the + * payload is corrupt — each global PEL entry must be + * claimed by exactly one consumer. */ + if (nack->consumer != NULL) { + rdbReportCorruptRDB("Stream consumer PEL entry already has a consumer assigned"); + decrRefCount(o); + return NULL; + } /* Set the NACK consumer, that was left to NULL when * loading the global PEL. Then set the same shared * NACK structure also in the consumer-specific PEL. */ @@ -3564,7 +3730,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) if (rdbCheckMode) { char name[10]; moduleTypeNameByID(name,moduleid); - return rdbLoadCheckModuleValue(rdb,name); + return rdbLoadCheckModuleValue(rdb, name, 0); } if (mt == NULL) { @@ -3611,6 +3777,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } o = createModuleObject(mt, ptr); +#ifdef ENABLE_GCRA } else if (rdbtype == RDB_TYPE_GCRA) { uint64_t time = rdbLoadLen(rdb, NULL); if (time == RDB_LENERR || time > LLONG_MAX) { @@ -3618,6 +3785,105 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } o = createGCRAObject((long long)time); +#endif + } else if (rdbtype == RDB_TYPE_ARRAY) { + /* Load array value. We only persist elements and insert_idx - no + * implementation details. Arrays use current ar_slice_size config. */ + uint64_t count; + if ((count = rdbLoadLen(rdb, NULL)) == RDB_LENERR) return NULL; + if (count == 0) { + rdbReportCorruptRDB("Empty array (count == 0) is invalid"); + return NULL; + } + + /* Load insert_idx: 0 = none, 1 = has value followed by actual value */ + uint64_t insert_idx_flag; + if ((insert_idx_flag = rdbLoadLen(rdb, NULL)) == RDB_LENERR) return NULL; + if (insert_idx_flag > 1) { + rdbReportCorruptRDB("Invalid array insert_idx_flag %llu", + (unsigned long long)insert_idx_flag); + return NULL; + } + uint64_t insert_idx; + if (insert_idx_flag == 0) { + insert_idx = AR_INSERT_IDX_NONE; + } else { + if ((insert_idx = rdbLoadLen(rdb, NULL)) == RDB_LENERR) return NULL; + } + + o = createArrayObject(); + redisArray *ar = o->ptr; + ar->insert_idx = insert_idx; + + /* Load elements */ + for (uint64_t i = 0; i < count; i++) { + uint64_t idx; + int idx_isencoded; + if (rdbLoadLenByRef(rdb, &idx_isencoded, &idx) == -1) { + decrRefCount(o); + return NULL; + } + if (idx_isencoded || idx == UINT64_MAX) { + decrRefCount(o); + rdbReportCorruptRDB("Invalid array index %llu", + (unsigned long long)idx); + return NULL; + } + + uint64_t type_tag; + if ((type_tag = rdbLoadLen(rdb, NULL)) == RDB_LENERR) { + decrRefCount(o); + return NULL; + } + + void *v; + if (type_tag == AR_RDB_TAG_INT) { + int64_t ival = rdbLoadSignedInteger(rdb, RDB_VERSION); + if (ival == INT64_MAX && rioGetReadError(rdb)) { + decrRefCount(o); + return NULL; + } + v = arValueFromRdbInt(ival); + } else if (type_tag == AR_RDB_TAG_FLOAT) { + double d; + if (rdbLoadBinaryDoubleValue(rdb, &d) == -1) { + decrRefCount(o); + return NULL; + } + v = arValueFromRdbFloat(d); + } else if (type_tag == AR_RDB_TAG_SMALLSTR) { + sds str; + if ((str = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL)) == NULL) { + decrRefCount(o); + return NULL; + } + size_t len = sdslen(str); + if (len > AR_SMALLSTR_MAXLEN) { + sdsfree(str); + decrRefCount(o); + rdbReportCorruptRDB("Invalid small string length %zu in array", len); + return NULL; + } + v = arValueFromRdbSmallStr(str, sdslen(str)); + sdsfree(str); + } else if (type_tag == AR_RDB_TAG_SDS) { + /* arString */ + sds str; + if ((str = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL)) == NULL) { + decrRefCount(o); + return NULL; + } + v = arEncode(str, sdslen(str)); + sdsfree(str); + } else { + decrRefCount(o); + rdbReportCorruptRDB("Unknown array element type_tag %llu", + (unsigned long long)type_tag); + return NULL; + } + + arSet(ar, idx, v); + } } else { rdbReportReadError("Unknown RDB encoding type %d",rdbtype); return NULL; @@ -4023,7 +4289,7 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin continue; } else { /* RDB check mode. */ - robj *aux = rdbLoadCheckModuleValue(rdb,name); + robj *aux = rdbLoadCheckModuleValue(rdb, name, 0); decrRefCount(aux); continue; /* Read next opcode. */ } @@ -4142,7 +4408,7 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin objectSetLRUOrLFU(val,lfu_freq,lru_idle,lru_clock,1000); /* call key space notification on key loaded for modules only */ - moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id); + moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id, NULL, 0); /* Release key (sds), dictEntry stores a copy of it in embedded data */ sdsfree(key); diff --git a/src/rdb.h b/src/rdb.h index 4898d82af..7e49ddff0 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -80,11 +80,18 @@ #define RDB_TYPE_HASH_LISTPACK_EX 25 /* Hash LP with HFEs. Attach min TTL at start */ #define RDB_TYPE_STREAM_LISTPACKS_4 26 /* Stream with IDMP support */ #define RDB_TYPE_STREAM_LISTPACKS_5 27 /* Stream with XNACK support (NACKed entries) */ -#define RDB_TYPE_GCRA 28 /* GCRA object */ +#define RDB_TYPE_ARRAY 28 /* Array data type */ +#ifdef ENABLE_GCRA +#define RDB_TYPE_GCRA 29 /* GCRA object */ +#endif /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */ /* Test if a type is an object type. */ +#ifdef ENABLE_GCRA +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 29)) +#else #define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 28)) +#endif /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_KEY_META 243 /* Key metadata (module metadata classes). */ @@ -133,6 +140,8 @@ int rdbSaveType(rio *rdb, unsigned char type); int rdbLoadType(rio *rdb); time_t rdbLoadTime(rio *rdb); int rdbSaveLen(rio *rdb, uint64_t len); +ssize_t rdbSaveSignedInteger(rio *rdb, int64_t val); +int64_t rdbLoadSignedInteger(rio *rdb, int rdbver); ssize_t rdbSaveMillisecondTime(rio *rdb, long long t); long long rdbLoadMillisecondTime(rio *rdb, int rdbver); uint64_t rdbLoadLen(rio *rdb, int *isencoded); @@ -152,7 +161,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error); void backgroundSaveDoneHandler(int exitcode, int bysignal); int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime,int dbid); ssize_t rdbSaveSingleModuleAux(rio *rdb, int when, moduleType *mt); -robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename); +robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename, int null_on_error); int rdbResolveKeyType(rio *rdb, int *type, int dbid, KeyMetaSpec *keymeta); robj *rdbLoadStringObject(rio *rdb); ssize_t rdbSaveStringObject(rio *rdb, robj *obj); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index eea78290d..e4c10216d 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -88,7 +88,10 @@ char *rdb_type_string[] = { "hash-listpack-md", "stream-v4", "stream-v5", + "array", +#ifdef ENABLE_GCRA "gcra", +#endif }; /* Show a few stats collected into 'rdbstate' */ @@ -256,7 +259,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { uint32_t classSpec; if (rioRead(&rdb, &classSpec, 4) == 0) goto eoferr; /* Skip module value using rdbLoadCheckModuleValue */ - robj *o = rdbLoadCheckModuleValue(&rdb, "metadata"); + robj *o = rdbLoadCheckModuleValue(&rdb, "metadata", 1); if (o == NULL) goto eoferr; decrRefCount(o); } @@ -326,7 +329,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { moduleTypeNameByID(name,moduleid); rdbCheckInfo("MODULE AUX for: %s", name); - robj *o = rdbLoadCheckModuleValue(&rdb,name); + robj *o = rdbLoadCheckModuleValue(&rdb, name, 0); decrRefCount(o); continue; /* Read type again. */ } else if (type == RDB_OPCODE_FUNCTION_PRE_GA) { diff --git a/src/redis-cli.c b/src/redis-cli.c index 76ff0a67f..75845cbab 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -472,7 +472,7 @@ static void cliLegacyIntegrateHelp(void) { if (entry->type != REDIS_REPLY_ARRAY || entry->elements < 4 || entry->element[0]->type != REDIS_REPLY_STRING || entry->element[1]->type != REDIS_REPLY_INTEGER || - entry->element[3]->type != REDIS_REPLY_INTEGER) return; + entry->element[3]->type != REDIS_REPLY_INTEGER) break; char *cmdname = entry->element[0]->str; int i; diff --git a/src/redismodule.h b/src/redismodule.h index c1040f12f..f0d9e8aa6 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -89,7 +89,7 @@ typedef long long ustime_t; #define REDISMODULE_KEYTYPE_ZSET 5 #define REDISMODULE_KEYTYPE_MODULE 6 #define REDISMODULE_KEYTYPE_STREAM 7 -#define REDISMODULE_KEYTYPE_GCRA 8 +#define REDISMODULE_KEYTYPE_ARRAY 8 /* Reply types. */ #define REDISMODULE_REPLY_UNKNOWN -1 @@ -248,14 +248,31 @@ This flag should not be used directly by the module. #define REDISMODULE_NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification */ #define REDISMODULE_NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification */ #define REDISMODULE_NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ -#define REDISMODULE_NOTIFY_RATE_LIMIT (1<<18) /* r, rate limit event */ + +#define REDISMODULE_NOTIFY_SUBKEYSPACE (1<<19) /* S */ +#define REDISMODULE_NOTIFY_SUBKEYEVENT (1<<20) /* T */ +#define REDISMODULE_NOTIFY_SUBKEYSPACEITEM (1<<21) /* I */ +#define REDISMODULE_NOTIFY_SUBKEYSPACEEVENT (1<<22) /* V */ +#define REDISMODULE_NOTIFY_ARRAY (1<<23) /* a, array key space notification */ +#ifdef ENABLE_GCRA +#define REDISMODULE_NOTIFY_RATE_LIMIT (1<<24) /* r, rate limit event */ +#endif /* Next notification flag, must be updated when adding new flags above! This flag should not be used directly by the module. * Use RedisModule_GetKeyspaceNotificationFlagsAll instead. */ -#define _REDISMODULE_NOTIFY_NEXT (1<<19) +#ifdef ENABLE_GCRA +#define _REDISMODULE_NOTIFY_NEXT (1<<25) +#else +#define _REDISMODULE_NOTIFY_NEXT (1<<24) +#endif -#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE) /* A */ +/* Delivery flags for RM_SubscribeToKeyspaceEventsWithSubkeys. + * These are passed in the 'flags' parameter, not in 'types'. */ +#define REDISMODULE_NOTIFY_FLAG_NONE 0 /* Invoke callback for all matching events */ +#define REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED (1<<0) /* Only invoke callback when subkeys are present */ + +#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE | REDISMODULE_NOTIFY_ARRAY) /* A */ /* A special pointer that we can use between the core and the module to signal * field deletion, and that is impossible to be a valid pointer. */ @@ -977,6 +994,7 @@ typedef struct RedisModuleConfigIterator RedisModuleConfigIterator; typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); typedef int (*RedisModuleNotificationFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key); +typedef void (*RedisModuleNotificationWithSubkeysFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count); typedef void (*RedisModulePostNotificationJobFunc) (RedisModuleCtx *ctx, void *pd); typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); @@ -1362,8 +1380,11 @@ REDISMODULE_API int (*RedisModule_ThreadSafeContextTryLock)(RedisModuleCtx *ctx) REDISMODULE_API void (*RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_SubscribeToKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_UnsubscribeFromKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SubscribeToKeyspaceEventsWithSubkeys)(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc cb) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_UnsubscribeFromKeyspaceEventsWithSubkeys)(RedisModuleCtx *ctx, int types, int flags, RedisModuleNotificationWithSubkeysFunc cb) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_AddPostNotificationJob)(RedisModuleCtx *ctx, RedisModulePostNotificationJobFunc callback, void *pd, void (*free_pd)(void*)) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_NotifyKeyspaceEvent)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_NotifyKeyspaceEventWithSubkeys)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key, RedisModuleString **subkeys, int count) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_GetNotifyKeyspaceEvents)(void) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_BlockedClientDisconnected)(RedisModuleCtx *ctx) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_RegisterClusterMessageReceiver)(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback) REDISMODULE_ATTR; @@ -1412,7 +1433,7 @@ REDISMODULE_API RedisModuleUser * (*RedisModule_CreateModuleUser)(const char *na REDISMODULE_API void (*RedisModule_FreeModuleUser)(RedisModuleUser *user) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_SetContextUser)(RedisModuleCtx *ctx, const RedisModuleUser *user) REDISMODULE_ATTR; REDISMODULE_API const RedisModuleUser *(*RedisModule_GetContextUser)(RedisModuleCtx *ctx) REDISMODULE_ATTR; -REDISMODULE_API RedisModuleString *(*RedisModule_GetUserUsername)(const RedisModuleUser *user) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString *(*RedisModule_GetUserUsername)(RedisModuleCtx *ctx, const RedisModuleUser *user) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_SetModuleUserACL)(RedisModuleUser *user, const char* acl) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_SetModuleUserACLString)(RedisModuleCtx * ctx, RedisModuleUser *user, const char* acl, RedisModuleString **error) REDISMODULE_ATTR; REDISMODULE_API RedisModuleString * (*RedisModule_GetModuleUserACLString)(RedisModuleUser *user) REDISMODULE_ATTR; @@ -1764,8 +1785,11 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(SetDisconnectCallback); REDISMODULE_GET_API(SubscribeToKeyspaceEvents); REDISMODULE_GET_API(UnsubscribeFromKeyspaceEvents); + REDISMODULE_GET_API(SubscribeToKeyspaceEventsWithSubkeys); + REDISMODULE_GET_API(UnsubscribeFromKeyspaceEventsWithSubkeys); REDISMODULE_GET_API(AddPostNotificationJob); REDISMODULE_GET_API(NotifyKeyspaceEvent); + REDISMODULE_GET_API(NotifyKeyspaceEventWithSubkeys); REDISMODULE_GET_API(GetNotifyKeyspaceEvents); REDISMODULE_GET_API(BlockedClientDisconnected); REDISMODULE_GET_API(RegisterClusterMessageReceiver); diff --git a/src/replication.c b/src/replication.c index 2ad39ab6f..44d81ba51 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2251,6 +2251,11 @@ void replicationAttachToNewMaster(void) { /* Asynchronously read the SYNC payload we receive from a master */ #define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */ void readSyncBulkPayload(connection *conn) { + /* During full sync, the functions engine is freed right before loading + * the RDB. To avoid this happening while a function is still running, + * delay full sync processing until it finishes. */ + if (isInsideYieldingLongCommand()) return; + char buf[PROTO_IOBUF_LEN]; ssize_t nread, readlen, nwritten; int use_diskless_load = useDisklessLoad(); @@ -4528,7 +4533,7 @@ void replicationCacheMaster(client *c) { if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); c->sentlen = 0; - c->reply_bytes = 0; + c->reply_bytes = c->reply_bytes_shared = c->reply_bytes_unshared = 0; c->bufpos = 0; resetClient(c, -1); resetClientQbufState(c); diff --git a/src/script_lua.c b/src/script_lua.c index 24ca1ad5e..b111ee30b 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -964,7 +964,7 @@ static int luaRedisGenericCommand(lua_State *lua, int raise_error) { ldbLogRedisReply(reply); if (reply != c->buf) sdsfree(reply); - c->reply_bytes = 0; + c->reply_bytes = c->reply_bytes_shared = c->reply_bytes_unshared = 0; cleanup: /* Clean up. Command code may have changed argv/argc so we use the diff --git a/src/sds.c b/src/sds.c index 0a940e13d..2dacb0fdf 100644 --- a/src/sds.c +++ b/src/sds.c @@ -105,7 +105,14 @@ sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) { int hdrlen = sdsHdrSize(type); size_t bufsize; - assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */ + if (trymalloc) { + /* protect against size_t overflow */ + if (initlen + hdrlen + 1 <= initlen) + return NULL; + } else { + assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */ + } + sh = trymalloc? s_trymalloc_usable(hdrlen+initlen+1, &bufsize) : s_malloc_usable(hdrlen+initlen+1, &bufsize); @@ -213,12 +220,14 @@ sds sdsdup(const sds s) { /* Free an sds string. No operation is performed if 's' is NULL. */ void sdsfree(sds s) { if (s == NULL) return; - s_free((char*)s-sdsHdrSize(s[-1])); -} - -void sdsfreeusable(sds s, size_t *usable) { - if (s == NULL) return; - s_free_usable((char*)s-sdsHdrSize(s[-1]), usable); + if (sdsType(s) == SDS_TYPE_5) { + /* TYPE_5 has no alloc field so sdsAllocSize() returns the requested + * size which may not match the actual allocation, so not suitable for + * s_free_with_size(). */ + s_free(sdsAllocPtr(s)); + } else { + s_free_with_size(sdsAllocPtr(s), sdsAllocSize(s)); + } } /* Generic version of sdsfree. */ diff --git a/src/sds.h b/src/sds.h index 3c02d7d39..b921dc0cb 100644 --- a/src/sds.h +++ b/src/sds.h @@ -267,7 +267,6 @@ sds sdsempty(void); sds sdsdup(const sds s); void sdsfree(sds s); void sdsfreegeneric(void *s); -void sdsfreeusable(sds s, size_t *usable); sds sdsgrowzero(sds s, size_t len); sds sdscatlen(sds s, const void *t, size_t len); sds sdscat(sds s, const char *t); diff --git a/src/sdsalloc.h b/src/sdsalloc.h index 5a53d4de8..08b4d0b86 100644 --- a/src/sdsalloc.h +++ b/src/sdsalloc.h @@ -2,6 +2,9 @@ * * Copyright (c) 2006-Present, Redis Ltd. * All rights reserved. + * + * Copyright (c) 2024-present, Valkey contributors. + * All rights reserved. * * Licensed under your choice of (a) the Redis Source Available License 2.0 * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the @@ -24,6 +27,7 @@ #define s_trymalloc ztrymalloc #define s_tryrealloc ztryrealloc #define s_free zfree +#define s_free_with_size zfree_with_size #define s_malloc_usable zmalloc_usable #define s_realloc_usable zrealloc_usable #define s_trymalloc_usable ztrymalloc_usable diff --git a/src/sentinel.c b/src/sentinel.c index f6a1f75bd..372e4b640 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -458,6 +458,25 @@ const char *preMonitorCfgName[] = { "announce-hostnames" }; +/* Returns 1 if the string contains control characters (0x00-0x1F or 0x7F), + * which must be rejected to prevent config injection via newlines/etc. */ +int sentinelStringContainsControlChars(sds s) { + for (size_t i = 0; i < sdslen(s); i++) { + unsigned char c = (unsigned char)s[i]; + if (c < 0x20 || c == 0x7F) return 1; + } + return 0; +} + +/* Append an sds value to dest, quoting it with sdscatrepr only if the value + * contains characters that need escaping (spaces, quotes, control chars, etc.). + * Simple values are appended as-is, preserving the traditional config format. */ +static sds sentinelSdscatConfigArg(sds dest, sds value) { + if (sdsneedsrepr(value)) + return sdscatrepr(dest, value, sdslen(value)); + return sdscatsds(dest, value); +} + /* This function overwrites a few normal Redis config default with Sentinel * specific defaults. */ void initSentinelConfig(void) { @@ -2048,8 +2067,13 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel monitor */ master = dictGetVal(de); master_addr = sentinelGetCurrentMasterAddress(master); + + /* Pre-compute the safely-formatted master name for config serialization. + * Only quoted if it contains characters requiring escaping. */ + sds qname = sentinelSdscatConfigArg(sdsempty(), master->name); + line = sdscatprintf(sdsempty(),"sentinel monitor %s %s %d %d", - master->name, announceSentinelAddr(master_addr), master_addr->port, + qname, announceSentinelAddr(master_addr), master_addr->port, master->quorum); rewriteConfigRewriteLine(state,"sentinel monitor",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ @@ -2058,7 +2082,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { if (master->down_after_period != sentinel_default_down_after) { line = sdscatprintf(sdsempty(), "sentinel down-after-milliseconds %s %ld", - master->name, (long) master->down_after_period); + qname, (long) master->down_after_period); rewriteConfigRewriteLine(state,"sentinel down-after-milliseconds",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } @@ -2067,7 +2091,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { if (master->failover_timeout != sentinel_default_failover_timeout) { line = sdscatprintf(sdsempty(), "sentinel failover-timeout %s %ld", - master->name, (long) master->failover_timeout); + qname, (long) master->failover_timeout); rewriteConfigRewriteLine(state,"sentinel failover-timeout",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ @@ -2077,42 +2101,38 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { if (master->parallel_syncs != SENTINEL_DEFAULT_PARALLEL_SYNCS) { line = sdscatprintf(sdsempty(), "sentinel parallel-syncs %s %d", - master->name, master->parallel_syncs); + qname, master->parallel_syncs); rewriteConfigRewriteLine(state,"sentinel parallel-syncs",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel notification-script */ if (master->notification_script) { - line = sdscatprintf(sdsempty(), - "sentinel notification-script %s %s", - master->name, master->notification_script); + line = sdscatprintf(sdsempty(), "sentinel notification-script %s ", qname); + line = sentinelSdscatConfigArg(line, master->notification_script); rewriteConfigRewriteLine(state,"sentinel notification-script",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel client-reconfig-script */ if (master->client_reconfig_script) { - line = sdscatprintf(sdsempty(), - "sentinel client-reconfig-script %s %s", - master->name, master->client_reconfig_script); + line = sdscatprintf(sdsempty(), "sentinel client-reconfig-script %s ", qname); + line = sentinelSdscatConfigArg(line, master->client_reconfig_script); rewriteConfigRewriteLine(state,"sentinel client-reconfig-script",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } /* sentinel auth-pass & auth-user */ if (master->auth_pass) { - line = sdscatprintf(sdsempty(), - "sentinel auth-pass %s %s", - master->name, master->auth_pass); + line = sdscatprintf(sdsempty(), "sentinel auth-pass %s ", qname); + line = sentinelSdscatConfigArg(line, master->auth_pass); rewriteConfigRewriteLine(state,"sentinel auth-pass",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } if (master->auth_user) { - line = sdscatprintf(sdsempty(), - "sentinel auth-user %s %s", - master->name, master->auth_user); + line = sdscatprintf(sdsempty(), "sentinel auth-user %s ", qname); + line = sentinelSdscatConfigArg(line, master->auth_user); rewriteConfigRewriteLine(state,"sentinel auth-user",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } @@ -2121,7 +2141,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { if (master->master_reboot_down_after_period != 0) { line = sdscatprintf(sdsempty(), "sentinel master-reboot-down-after-period %s %ld", - master->name, (long) master->master_reboot_down_after_period); + qname, (long) master->master_reboot_down_after_period); rewriteConfigRewriteLine(state,"sentinel master-reboot-down-after-period",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } @@ -2129,7 +2149,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel config-epoch */ line = sdscatprintf(sdsempty(), "sentinel config-epoch %s %llu", - master->name, (unsigned long long) master->config_epoch); + qname, (unsigned long long) master->config_epoch); rewriteConfigRewriteLine(state,"sentinel config-epoch",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ @@ -2137,7 +2157,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel leader-epoch */ line = sdscatprintf(sdsempty(), "sentinel leader-epoch %s %llu", - master->name, (unsigned long long) master->leader_epoch); + qname, (unsigned long long) master->leader_epoch); rewriteConfigRewriteLine(state,"sentinel leader-epoch",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ @@ -2158,7 +2178,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { slave_addr = master->addr; line = sdscatprintf(sdsempty(), "sentinel known-replica %s %s %d", - master->name, announceSentinelAddr(slave_addr), slave_addr->port); + qname, announceSentinelAddr(slave_addr), slave_addr->port); /* try to replace any known-slave option first if found */ if (rewriteConfigRewriteLine(state, "sentinel known-slave", sdsdup(line), 0) == 0) { rewriteConfigRewriteLine(state, "sentinel known-replica", line, 1); @@ -2176,7 +2196,7 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { if (ri->runid == NULL) continue; line = sdscatprintf(sdsempty(), "sentinel known-sentinel %s %s %d %s", - master->name, announceSentinelAddr(ri->addr), ri->addr->port, ri->runid); + qname, announceSentinelAddr(ri->addr), ri->addr->port, ri->runid); rewriteConfigRewriteLine(state,"sentinel known-sentinel",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } @@ -2187,13 +2207,16 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { while((de = dictNext(&di2)) != NULL) { sds oldname = dictGetKey(de); sds newname = dictGetVal(de); - line = sdscatprintf(sdsempty(), - "sentinel rename-command %s %s %s", - master->name, oldname, newname); + line = sdscatprintf(sdsempty(), "sentinel rename-command %s ", qname); + line = sentinelSdscatConfigArg(line, oldname); + line = sdscatlen(line, " ", 1); + line = sentinelSdscatConfigArg(line, newname); rewriteConfigRewriteLine(state,"sentinel rename-command",line,1); /* rewriteConfigMarkAsProcessed is handled after the loop */ } dictResetIterator(&di2); + + sdsfree(qname); } /* sentinel current-epoch is a global state valid for all the masters. */ @@ -2221,7 +2244,8 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel sentinel-user. */ if (sentinel.sentinel_auth_user) { - line = sdscatprintf(sdsempty(), "sentinel sentinel-user %s", sentinel.sentinel_auth_user); + line = sdsnew("sentinel sentinel-user "); + line = sentinelSdscatConfigArg(line, sentinel.sentinel_auth_user); rewriteConfigRewriteLine(state,"sentinel sentinel-user",line,1); } else { rewriteConfigMarkAsProcessed(state,"sentinel sentinel-user"); @@ -2229,10 +2253,11 @@ void rewriteConfigSentinelOption(struct rewriteConfigState *state) { /* sentinel sentinel-pass. */ if (sentinel.sentinel_auth_pass) { - line = sdscatprintf(sdsempty(), "sentinel sentinel-pass %s", sentinel.sentinel_auth_pass); + line = sdsnew("sentinel sentinel-pass "); + line = sentinelSdscatConfigArg(line, sentinel.sentinel_auth_pass); rewriteConfigRewriteLine(state,"sentinel sentinel-pass",line,1); } else { - rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass"); + rewriteConfigMarkAsProcessed(state,"sentinel sentinel-pass"); } dictResetIterator(&di); @@ -3238,6 +3263,11 @@ void sentinelConfigSetCommand(client *c) { if (!(!strcasecmp(val->ptr, "debug") || !strcasecmp(val->ptr, "verbose") || !strcasecmp(val->ptr, "notice") || !strcasecmp(val->ptr, "warning") || !strcasecmp(val->ptr, "nothing"))) goto badfmt; + } else if (!strcasecmp(option, "announce-ip")) { + if (sentinelStringContainsControlChars(val->ptr)) { + addReplyErrorFormat(c, "'%s' must not contain control characters", option); + goto exit; + } } } @@ -4045,6 +4075,11 @@ NULL return; } + if (sentinelStringContainsControlChars(c->argv[2]->ptr)) { + addReplyError(c, "Master name must not contain control characters"); + return; + } + /* If resolve-hostnames is used, actual DNS resolution may take place. * Otherwise just validate address. */ @@ -4388,6 +4423,12 @@ void sentinelSetCommand(client *c) { goto seterr; } + if (sentinelStringContainsControlChars(value)) { + addReplyError(c, + "notification-script must not contain control characters"); + goto seterr; + } + if (strlen(value) && access(value,X_OK) == -1) { addReplyError(c, "Notification script seems non existing or non executable"); @@ -4407,6 +4448,12 @@ void sentinelSetCommand(client *c) { goto seterr; } + if (sentinelStringContainsControlChars(value)) { + addReplyError(c, + "client-reconfig-script must not contain control characters"); + goto seterr; + } + if (strlen(value) && access(value,X_OK) == -1) { addReplyError(c, "Client reconfiguration script seems non existing or " @@ -4450,6 +4497,13 @@ void sentinelSetCommand(client *c) { goto badfmt; } + if (sentinelStringContainsControlChars(oldname) || + sentinelStringContainsControlChars(newname)) { + addReplyError(c, + "rename-command arguments must not contain control characters"); + goto seterr; + } + /* Remove any older renaming for this command. */ dictDelete(ri->renamed_commands,oldname); diff --git a/src/server.c b/src/server.c index 40617ce80..a2eed8685 100644 --- a/src/server.c +++ b/src/server.c @@ -570,6 +570,23 @@ int dictResizeAllowed(size_t moreMem, double usedRatio) { } } +/* dbDictType prefetch callbacks. + * The main keyspace stores a kvobj as the entry's "stored key" (no_value=1). + * The state machine in memory_prefetch.c calls these hooks to: + * - Bring the kvobj head into L1 before keyCompare runs (only useful when + * the entry holds an out-of-line pointer; embedded kvobjs are already + * in cache from the entry prefetch). + * - Bring kv->ptr into L1 for RAW strings, since addReplyBulk reads it + * immediately after the lookup. */ +static void *dbDictPrefetchEntryKey(const dictEntry *de) { + return dictEntryIsKey(de) ? NULL : dictGetKey(de); +} + +static void *dbDictPrefetchEntryValue(const dictEntry *de) { + kvobj *kv = dictGetKey(de); + return (kv->type == OBJ_STRING && kv->encoding == OBJ_ENCODING_RAW) ? kv->ptr : NULL; +} + /* Generic hash table type where keys are Redis Objects, Values * dummy pointers. */ dictType objectKeyPointerValueDictType = { @@ -633,6 +650,8 @@ dictType dbDictType = { .no_value = 1, /* keys and values are unified (kvobj) */ .keys_are_odd = 0, /* simple kvobj (robj) struct */ .keyFromStoredKey = kvGetKey, /* get key from stored-key */ + .prefetchEntryKey = dbDictPrefetchEntryKey, + .prefetchEntryValue = dbDictPrefetchEntryValue, }; /* Db->expires */ @@ -1059,7 +1078,8 @@ static inline clientMemUsageBucket *getMemUsageBucket(size_t mem) { */ void updateClientMemoryUsage(client *c) { serverAssert(c->conn); - size_t mem = getClientMemoryUsage(c, NULL); + size_t mem = getClientMemoryUsage(c); + int type = getClientType(c); /* Now that we have the memory used by the client, remove the old * value from the old category, and add it back. */ @@ -1128,6 +1148,20 @@ int updateClientMemUsageAndBucket(client *c) { return 0; } + /* Include unshared reply bytes in the client's memory usage for eviction. + * Walking the reply buffer is costly, so skip the scan when its outcome + * cannot affect bucket placement: since 0 <= unshared <= shared, if both + * endpoints map to the same bucket the cached value is reused. */ + if (c->reply_bytes_shared > 0) { + size_t lower_bound = getClientMemoryUsage(c) - c->reply_bytes_unshared; + size_t upper_bound = lower_bound + c->reply_bytes_shared; + if (getMemUsageBucket(lower_bound) != getMemUsageBucket(upper_bound)) + updateClientUnsharedReplyBytes(c); + } else { + /* No shared bytes: clear any stale cached unshared. */ + c->reply_bytes_unshared = 0; + } + /* Update client memory usage. */ updateClientMemoryUsage(c); @@ -6470,7 +6504,9 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) { "mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem + server.repl_full_sync_buffer.mem_used, "mem_replica_full_sync_buffer:%zu\r\n", server.repl_full_sync_buffer.mem_used, "mem_clients_slaves:%zu\r\n", mh->clients_slaves, - "mem_clients_normal:%zu\r\n", mh->clients_normal, + "mem_clients_normal:%zu\r\n", mh->clients_normal, /* actual memory usage (includes unshared memory, excludes shared memory) */ + "mem_clients_normal_shared:%zu\r\n", mh->clients_normal_shared, /* shared memory (not solely owned by this client) */ + "mem_clients_normal_unshared:%zu\r\n", mh->clients_normal_unshared, /* unshared memory (solely owned by this client) */ "mem_cluster_slot_migration_output_buffer:%zu\r\n", mh->asm_migrate_output_buffer, "mem_cluster_slot_migration_input_buffer:%zu\r\n", mh->asm_import_input_buffer, "mem_cluster_slot_migration_input_buffer_peak:%zu\r\n", asmGetPeakSyncBufferSize(), @@ -8098,6 +8134,10 @@ int main(int argc, char **argv) { } if (server.sentinel_mode) sentinelCheckConfigFile(); + /* Reserve dedicated used_memory slots for main + IO threads (single-writer + * fast path). See zmalloc_reserve_thread_slots(). */ + zmalloc_reserve_thread_slots(server.io_threads_num); + /* Do system checks */ #ifdef __linux__ linuxMemoryWarnings(); diff --git a/src/server.h b/src/server.h index cc691abb0..6afbe8e9a 100644 --- a/src/server.h +++ b/src/server.h @@ -22,6 +22,7 @@ #include "atomicvar.h" #include "commands.h" #include "object.h" +#include "sparsearray.h" #include #include @@ -288,7 +289,10 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define ACL_CATEGORY_CONNECTION (1ULL<<18) #define ACL_CATEGORY_TRANSACTION (1ULL<<19) #define ACL_CATEGORY_SCRIPTING (1ULL<<20) -#define ACL_CATEGORY_RATE_LIMIT (1ULL<<21) +#define ACL_CATEGORY_ARRAY (1ULL<<21) +#ifdef ENABLE_GCRA +#define ACL_CATEGORY_RATE_LIMIT (1ULL<<22) +#endif /* Key-spec flags * * -------------- */ @@ -797,8 +801,15 @@ typedef enum { #define NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ -#define NOTIFY_RATE_LIMIT (1<<18) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ -#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE) /* A flag */ +#define NOTIFY_SUBKEYSPACE (1<<19) /* S, subkey-level keyspace notification */ +#define NOTIFY_SUBKEYEVENT (1<<20) /* T, subkey-level keyevent notification */ +#define NOTIFY_SUBKEYSPACEITEM (1<<21) /* I, subkey-level notification per item: channel=key\nsubkey */ +#define NOTIFY_SUBKEYSPACEEVENT (1<<22) /* V, subkey-level notification: channel=event|key */ +#define NOTIFY_ARRAY (1<<23) /* a, array notification */ +#ifdef ENABLE_GCRA +#define NOTIFY_RATE_LIMIT (1<<24) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ +#endif +#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE | NOTIFY_ARRAY) /* A flag */ /* Using the following macro you can run code inside serverCron() with the * specified period, specified in milliseconds. @@ -860,10 +871,18 @@ typedef enum { * by a 64 bit module type ID, which has a 54 bits module-specific signature * in order to dispatch the loading to the right module, plus a 10 bits * encoding version. */ +/* Code related to GCRA is disabled by default. + * Build with -DENABLE_GCRA to compile it back in. */ + #define OBJ_MODULE 5 /* Module object. */ #define OBJ_STREAM 6 /* Stream object. */ -#define OBJ_GCRA 7 /* GCRA object. */ +#define OBJ_ARRAY 7 /* Array object. */ +#ifdef ENABLE_GCRA +#define OBJ_GCRA 8 /* GCRA object. */ +#define OBJ_TYPE_MAX 9 /* Maximum number of object types */ +#else #define OBJ_TYPE_MAX 8 /* Maximum number of object types */ +#endif /* NOTE: adding a new object requires changes in the following places: * - rdb.c - save/load (also bump RDB_VERSION if needed) @@ -872,7 +891,15 @@ typedef enum { * - debug.c - xorObjectDigest, serverLogObjectDebugInfo * - defrag.c - defragKey * - module.c - RM_KeyType (and add the new keytype to redismodule.h) - * - object.c - object(create/free/dismiss/allocSize/Length) */ + * - object.c - object(create/free/dismiss/allocSize/Length) + * - tests/support/util.tcl:generate_fuzzy_traffic_on_key - add command(s) for the new object type to the `commands` dict. + * + * If the new object type requires new command group make sure to update the following: + * - src/commands/command-docs.json - update the group:oneOf map with the new group + * - utils/generate-command-code.py - add the new group to GROUPS and COMMAND_GROUP_STR arrays + * - src/acl.c - add the new group to ACLDefaultCommandCategories array + * - src/server.h - add the new group to redisCommandGroup enum + * - if needed add new KSN type related to the group - search for NOTIFY_* and REDISMODULE_NOTIFY_* defines. */ /* Extract encver / signature from a module type ID. */ #define REDISMODULE_TYPE_ENCVER_BITS 10 @@ -1488,6 +1515,8 @@ typedef struct client { long bulklen; /* Length of bulk argument in multi bulk request. */ list *reply; /* List of reply objects to send to the client. */ unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */ + unsigned long long reply_bytes_shared; /* Bytes shared with keyspace objects in reply list. */ + unsigned long long reply_bytes_unshared; /* Cached subset of reply_bytes_shared solely owned by this client. */ list *deferred_reply_errors; /* Used for module thread safe contexts. */ size_t sentlen; /* Amount of bytes already sent in the current buffer or object being sent. */ @@ -1793,6 +1822,8 @@ struct redisMemOverhead { size_t replica_fullsync_buffer; size_t clients_slaves; size_t clients_normal; + size_t clients_normal_shared; + size_t clients_normal_unshared; size_t cluster_links; size_t aof_buffer; size_t eval_caches; @@ -2103,6 +2134,8 @@ struct redisServer { long long slowlog_entry_id; /* SLOWLOG current entry ID */ long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ + unsigned long slowlog_max_string_len; /* SLOWLOG max string length of a command's argument logged */ + int slowlog_max_argc; /* SLOWLOG max argument count per command logged */ long long stat_slowlog_count; /* Total slowlog entries ever pushed */ long long stat_slowlog_time_us_sum; /* Sum of all slowlog entry durations (usec) */ long long stat_slowlog_time_us_max; /* Max slowlog entry duration (usec) */ @@ -2425,6 +2458,10 @@ struct redisServer { /* Stream IDMP parameters */ long long stream_idmp_duration; /* Default IDMP duration in seconds. */ long long stream_idmp_maxsize; /* Default IDMP max entries. */ + /* Array parameters */ + uint32_t array_slice_size; /* Slice size for new arrays */ + uint32_t array_sparse_kmax; /* Max elements before sparse->dense */ + uint32_t array_sparse_kmin; /* Min elements before dense->sparse */ /* List parameters */ int list_max_listpack_size; int list_compress_depth; @@ -2622,6 +2659,7 @@ enum { PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ PENDING_CMD_FLAG_PREPROCESSED = 1 << 1, /* This command has passed pre-processing */ PENDING_CMD_KEYS_RESULT_VALID = 1 << 2, /* Command's keys_result is valid and cached */ + PENDING_CMD_KEYS_PREFETCHED = 1 << 3, /* Command's keys were prefetched by the cross-command batch */ }; /* Parser state and parse result of a command from a client's input buffer. */ @@ -2783,8 +2821,11 @@ typedef enum { COMMAND_GROUP_GEO, COMMAND_GROUP_STREAM, COMMAND_GROUP_BITMAP, + COMMAND_GROUP_ARRAY, COMMAND_GROUP_MODULE, +#ifdef ENABLE_GCRA COMMAND_GROUP_RATE_LIMIT, +#endif } redisCommandGroup; typedef void redisCommandProc(client *c); @@ -3079,7 +3120,7 @@ size_t moduleCount(void); void moduleAcquireGIL(void); int moduleTryAcquireGIL(void); void moduleReleaseGIL(void); -void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); +void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid, robj **subkeys, int count); void firePostExecutionUnitJobs(void); void moduleCallCommandFilters(client *c); void modulePostExecutionUnitOperations(void); @@ -3107,6 +3148,7 @@ void moduleDefragEnd(void); void *moduleGetHandleByName(char *modulename); int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd); int moduleHasSubscribersForKeyspaceEvent(int type); +int moduleHasSubscribersForKeyspaceEventWithSubkeys(int type); /* pcmd */ void initPendingCommand(pendingCommand *pcmd); @@ -3145,7 +3187,6 @@ void resetClient(client *c, int num_pcmds_to_free); void resetClientQbufState(client *c); void freeClientOriginalArgv(client *c); void freeClientArgv(client *c); -void freeClientPendingCommands(client *c, int num_pcmds_to_free); void tryDeferFreeClientObject(client *c, int type, void *ptr); void freeClientDeferredObjects(client *c, int free_array); void freeClientIODeferredObjects(client *c, int free_array); @@ -3195,6 +3236,7 @@ void addReplyBigNum(client *c, const char *num, size_t len); void addReplyHumanLongDouble(client *c, long double d); void addReplyLongLong(client *c, long long ll); void addReplyLongLongFromStr(client *c, robj* str); +void addReplyUnsignedLongLong(client *c, uint64_t v); void addReplyArrayLen(client *c, long length); void addReplyMapLen(client *c, long length); void addReplySetLen(client *c, long length); @@ -3222,7 +3264,9 @@ void replaceClientCommandVector(client *c, int argc, robj **argv); void redactClientCommandArgument(client *c, int argc); size_t getClientOutputBufferMemoryUsage(client *c); size_t getNormalClientPendingReplyBytes(client *c); -size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage); +size_t getClientMemoryUsage(client *c); +void updateClientUnsharedReplyBytes(client *c); +void getClientsSharedMemoryUsage(size_t *shared_mem, size_t *unshared_mem); int freeClientsInAsyncFreeQueue(void); int closeClientOnOutputBufferLimitReached(client *c, int async); int getClientType(client *c); @@ -3824,6 +3868,9 @@ struct listpackEx *listpackExCreate(void); void listpackExAddNew(robj *o, char *field, size_t flen, char *value, size_t vlen, uint64_t expireAt); +/* Array data type. */ +robj *arrayTypeDup(robj *o); + /* Pub / Sub */ int pubsubUnsubscribeAllChannels(client *c, int notify); int pubsubUnsubscribeShardAllChannels(client *c, int notify); @@ -3842,8 +3889,10 @@ dict *getClientPubSubShardChannels(client *c); /* Keyspace events notification */ void notifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); +void notifyKeyspaceEventWithSubkeys(int type, const char *event, robj *key, int dbid, robj **subkeys, int count); int keyspaceEventsStringToFlags(char *classes); sds keyspaceEventsFlagsToString(int flags); +int isSubkeyNotifyEnabled(int type); /* As part of KSN the module should not attempt to modify the key. Nevertheless, * RediSearch does it in some specific flows and modifies key metadata which in @@ -4238,6 +4287,7 @@ void decrCommand(client *c); void incrbyCommand(client *c); void decrbyCommand(client *c); void incrbyfloatCommand(client *c); +void increxCommand(client *c); void selectCommand(client *c); void swapdbCommand(client *c); void randomkeyCommand(client *c); @@ -4488,6 +4538,26 @@ void digestCommand(client *c); void gcraCommand(client *c); void gcraSetValueCommand(client *c); +/* Array commands (t_array.c) */ +void arsetCommand(client *c); +void argetCommand(client *c); +void ardelCommand(client *c); +void ardelrangeCommand(client *c); +void arlenCommand(client *c); +void arcountCommand(client *c); +void argetrangeCommand(client *c); +void arscanCommand(client *c); +void argrepCommand(client *c); +void aropCommand(client *c); +void arinsertCommand(client *c); +void arringCommand(client *c); +void arnextCommand(client *c); +void arseekCommand(client *c); +void arlastitemsCommand(client *c); +void arinfoCommand(client *c); +void armsetCommand(client *c); +void armgetCommand(client *c); + #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); void free(void *ptr) __attribute__ ((deprecated)); diff --git a/src/slowlog.c b/src/slowlog.c index 589f7d7ef..a25cb12c5 100644 --- a/src/slowlog.c +++ b/src/slowlog.c @@ -29,12 +29,12 @@ slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long dur slowlogEntry *se = zmalloc(sizeof(*se)); int j, slargc = argc; - if (slargc > SLOWLOG_ENTRY_MAX_ARGC) slargc = SLOWLOG_ENTRY_MAX_ARGC; + if (slargc > server.slowlog_max_argc) slargc = server.slowlog_max_argc; se->argc = slargc; se->argv = zmalloc(sizeof(robj*)*slargc); for (j = 0; j < slargc; j++) { /* Logging too many arguments is a useless memory waste, so we stop - * at SLOWLOG_ENTRY_MAX_ARGC, but use the last argument to specify + * at server.slowlog_max_argc, but use the last argument to specify * how many remaining arguments there were in the original command. */ if (slargc != argc && j == slargc-1) { se->argv[j] = createObject(OBJ_STRING, @@ -44,13 +44,13 @@ slowlogEntry *slowlogCreateEntry(client *c, robj **argv, int argc, long long dur /* Trim too long strings as well... */ if (argv[j]->type == OBJ_STRING && sdsEncodedObject(argv[j]) && - sdslen(argv[j]->ptr) > SLOWLOG_ENTRY_MAX_STRING) + sdslen(argv[j]->ptr) > server.slowlog_max_string_len) { - sds s = sdsnewlen(argv[j]->ptr, SLOWLOG_ENTRY_MAX_STRING); + sds s = sdsnewlen(argv[j]->ptr, server.slowlog_max_string_len); s = sdscatprintf(s,"... (%lu more bytes)", (unsigned long) - sdslen(argv[j]->ptr) - SLOWLOG_ENTRY_MAX_STRING); + sdslen(argv[j]->ptr) - server.slowlog_max_string_len); se->argv[j] = createObject(OBJ_STRING,s); } else if (argv[j]->refcount == OBJ_SHARED_REFCOUNT) { se->argv[j] = argv[j]; diff --git a/src/slowlog.h b/src/slowlog.h index afe3d434f..e4eb94904 100644 --- a/src/slowlog.h +++ b/src/slowlog.h @@ -10,9 +10,6 @@ #ifndef __SLOWLOG_H__ #define __SLOWLOG_H__ -#define SLOWLOG_ENTRY_MAX_ARGC 32 -#define SLOWLOG_ENTRY_MAX_STRING 128 - /* This structure defines an entry inside the slow log list */ typedef struct slowlogEntry { robj **argv; diff --git a/src/sparsearray.c b/src/sparsearray.c new file mode 100644 index 000000000..d4945f2a7 --- /dev/null +++ b/src/sparsearray.c @@ -0,0 +1,2080 @@ +/* + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Sparse Array - A memory-efficient sparse array with 64-bit index space. + * Originally authored by: Salvatore Sanfilippo. + * + * This data structure was designed and implemented by Salvatore Sanfilippo. + */ + +#include "server.h" +#include +#include + +/****************************************************************************** + * SPARSE ARRAY IMPLEMENTATION + * + * Sparse arrays are random-access sequences indexed by non-negative 64-bit + * integers. They support O(1) get/set operations and efficient iteration. + * + * Arrays use tagged pointer-sized values. 64-bit builds inline more payload, + * while 32-bit builds use narrower immediate encodings and fall back to + * arString more often. SDS strings are not used as values since the final + * bits of SDS pointers are not guaranteed to be zero. + * + * See sparsearray.h for data structure documentation and inline helpers. + * + *****************************************************************************/ + +/* ---------------------------------------------------------------------------- + * Configuration - mapped to Redis server struct for easy standalone adaptation + * -------------------------------------------------------------------------- */ + +#define ArraySliceSize server.array_slice_size +#define ArraySparseKMax server.array_sparse_kmax +#define ArraySparseKMin server.array_sparse_kmin + +/* ---------------------------------------------------------------------------- + * Allocation size tracking + * + * Every zmalloc/zfree/zrealloc that contributes to the array's footprint is + * tracked in ar->alloc_size so that kvobjAllocSize() can return an O(1) + * answer. When ar is NULL (e.g. during arFree) tracking is skipped. + * -------------------------------------------------------------------------- */ + +static inline void *arAllocAndTrack(redisArray *ar, size_t size) { + size_t usable; + void *ptr = zmalloc_usable(size, &usable); + if (ar) ar->alloc_size += usable; + return ptr; +} +static inline void *arCallocAndTrack(redisArray *ar, size_t size) { + size_t usable; + void *ptr = zcalloc_usable(size, &usable); + if (ar) ar->alloc_size += usable; + return ptr; +} +static inline void arFreeAndTrack(redisArray *ar, void *ptr) { + size_t usable; + zfree_usable(ptr, &usable); + if (ar) ar->alloc_size -= usable; +} +static inline void *arReallocAndTrack(redisArray *ar, void *ptr, size_t size) { + size_t usable, old_usable; + void *newptr = zrealloc_usable(ptr, size, &usable, &old_usable); + if (ar) ar->alloc_size += usable - old_usable; + return newptr; +} + +/* Track a tagged value entering/leaving the array (arString bookkeeping). */ +static inline void arTrackValueIn(redisArray *ar, void *v) { + if (ar && arIsPtr(v)) ar->alloc_size += zmalloc_size(v); +} +static inline void arTrackValueOut(redisArray *ar, void *v) { + if (ar && arIsPtr(v)) ar->alloc_size -= zmalloc_size(v); +} + +/* ---------------------------------------------------------------------------- + * Internal helpers + * -------------------------------------------------------------------------- */ + +static inline size_t arStringHeaderSize(size_t len) { + return (len <= 32767) ? 2 : 8; +} + +size_t arStringLen(const void *ptr) { + const uint8_t *p = (const uint8_t *)ptr; + if (p[0] & 0x80) { + return ((size_t)(p[0] & 0x7F) << 8) | p[1]; + } else { + size_t len = 0; + for (int i = 0; i < 8; i++) len = (len << 8) | p[i]; + return len; + } +} + +const char *arStringData(const void *ptr) { + const uint8_t *p = (const uint8_t *)ptr; + return (const char *)(p + ((p[0] & 0x80) ? 2 : 8)); +} + +static inline size_t arSparseAllocSize(uint32_t cap) { + size_t offsets_size = cap * sizeof(uint16_t); + size_t padding = (sizeof(void *) - (offsets_size % sizeof(void *))) % sizeof(void *); + return sizeof(arSlice) + offsets_size + padding + cap * sizeof(void *); +} + +static inline size_t arDenseAllocSize(uint32_t winsize) { + return sizeof(arSlice) + winsize * sizeof(void *); +} + +static inline uint32_t arSliceMaxIdx(arSlice *s) { + if (s->encoding == AR_SLICE_DENSE) { + return s->layout.dense.max_idx; + } else { + return s->layout.sparse.offsets[s->count - 1]; + } +} + +/* ---------------------------------------------------------------------------- + * arString type + * -------------------------------------------------------------------------- */ + +/* Allocate a new arString with the given content. + * + * We use arString instead of SDS because SDS pointers are not guaranteed to + * have the low bits zero (SDS points inside an allocation, after the header). + * Our tagged pointer scheme needs tag 00 for heap strings, so we need aligned + * pointers. zmalloc guarantees sufficient alignment. + * + * arString has two header formats: + * + * 1. Short header (2 bytes): lengths up to 32767 bytes. + * The top bit of the first byte is set, and the remaining 15 bits store + * the length in big-endian form. + * + * +--------+--------+-------------------+ + * |1LLLLLLL|LLLLLLLL| payload | + * +--------+--------+-------------------+ + * byte 0 byte 1 + * + * 2. Long header (8 bytes): lengths up to 2^63-1 bytes. + * The top bit of the first byte is clear, and the remaining 63 bits store + * the length in big-endian form. + * + * +--------+--------+--------+--------+--------+--------+--------+--// + * |0LLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL| + * +--------+--------+--------+--------+--------+--------+--------+--// + * byte 0 byte 1 byte 2 byte 3 byte 4 byte 5 byte 6 + * + * //--+--------+-------------------+ + * |LLLLLLLL| payload | + * //--+--------+-------------------+ + * byte 7 + * + * For simplicity we use a 63 bit len even when Redis is compiled with a 32 + * bit target, the overhead for strings > 32k is small. + * + * So the pointer returned by arStringNew() always points to the start of the + * header, and the string data begins immediately after the 2-byte or 8-byte + * header. */ +void *arStringNew(const char *s, size_t len) { + /* Length is stored in 63 bits; reject >= 2^63 to avoid + * hypothetical header corruption. On 32 bit builds this is guaranteed + * by size_t itself, so don't compile an always-true assertion. */ +#if SIZE_MAX > UINT32_MAX + serverAssert(len < ((size_t)1 << 63)); +#endif + size_t hdr_size = arStringHeaderSize(len); + uint8_t *ptr = zmalloc(hdr_size + len); + + if (hdr_size == 2) { + /* Short header: MSB=1, 15-bit length */ + ptr[0] = 0x80 | ((len >> 8) & 0x7F); + ptr[1] = len & 0xFF; + } else { + /* Long header: MSB=0, 63-bit length in big-endian */ + for (int i = 7; i >= 0; i--) { + ptr[7 - i] = (len >> (i * 8)) & 0xFF; + } + } + + memcpy(ptr + hdr_size, s, len); + return ptr; +} + +/* Free arString pointer */ +void arStringFree(void *ptr) { + zfree(ptr); +} + +/* Duplicate an arString */ +void *arStringDup(void *ptr) { + size_t len = arStringLen(ptr); + size_t hdr_size = arStringHeaderSize(len); + size_t total = hdr_size + len; + void *dup = zmalloc(total); + memcpy(dup, ptr, total); + return dup; +} + +/* Free arString if value is pointer-tagged, otherwise nothing to + * free, the info is encoded in the pointer itself. */ +void arFreePtr(void *v) { + if (arIsPtr(v)) { + arStringFree(v); + } +} + +/* ---------------------------------------------------------------------------- + * Slice allocation and management + * -------------------------------------------------------------------------- */ + +/* Create a new dense slice with given rel_idx (index relative to slice base) */ +arSlice *arSliceDenseNew(redisArray *ar, uint32_t rel_idx, uint32_t slice_size) { + uint32_t winsize = AR_SLICE_MIN_ALLOC; + uint32_t offset = rel_idx; + + /* Adjust offset if the initial window would extend past the slice + * boundary. For example, with slice size 4096 (the default), creating + * the slice around relative index 4093 needs the window shifted left. */ + if (offset + winsize > slice_size) { + offset = slice_size - winsize; + } + + arSlice *s = arAllocAndTrack(ar, arDenseAllocSize(winsize)); + s->encoding = AR_SLICE_DENSE; + s->count = 0; + s->layout.dense.offset = offset; + s->layout.dense.winsize = winsize; + s->layout.dense.max_idx = 0; + s->layout.dense.items = (void **)(s + 1); /* Payload starts after struct */ + memset(s->layout.dense.items, 0, winsize * sizeof(void *)); + return s; +} + +/* Sparse slices are a single allocation: the arSlice struct followed by + * a payload containing offsets[] and values[]. This function computes + * where these arrays live in the payload and sets the pointers accordingly. + * Must be called after zmalloc or memcpy, since copied slices have stale + * pointers that still reference the source allocation's memory. The values + * array requires pointer alignment, hence the padding after offsets[]. */ +void arSparseSetupPointers(arSlice *s) { + char *p = (char *)(s + 1); + size_t offsets_size = s->layout.sparse.cap * sizeof(uint16_t); + size_t padding = (sizeof(void *) - (offsets_size % sizeof(void *))) % sizeof(void *); + s->layout.sparse.offsets = (uint16_t *)p; + s->layout.sparse.values = (void **)(p + offsets_size + padding); +} + +/* Create a new sparse slice */ +arSlice *arSliceSparseNew(redisArray *ar) { + uint32_t cap = (ArraySparseKMax < 4) ? ArraySparseKMax : 4; + arSlice *s = arAllocAndTrack(ar, arSparseAllocSize(cap)); + s->encoding = AR_SLICE_SPARSE; + s->count = 0; + s->layout.sparse.cap = cap; + arSparseSetupPointers(s); + return s; +} + +/* Free a slice (including all arString values inside). + * When ar is non-NULL, deducts the memory from ar->alloc_size. + * Pass NULL for ar when the entire array is being destroyed (arFree). */ +void arSliceFree(redisArray *ar, arSlice *s) { + if (!s) return; + + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + arTrackValueOut(ar, s->layout.dense.items[i]); + arFreePtr(s->layout.dense.items[i]); + } + } else { + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < s->count; i++) { + arTrackValueOut(ar, values[i]); + arFreePtr(values[i]); + } + } + arFreeAndTrack(ar, s); +} + +/* Grow dense slice to accommodate rel_idx (right growth) */ +arSlice *arSliceDenseGrowRight(redisArray *ar, arSlice *s, uint32_t rel_idx, uint32_t slice_size) { + uint32_t new_winsize = s->layout.dense.winsize; + + /* Double until rel_idx fits */ + while (rel_idx >= s->layout.dense.offset + new_winsize && new_winsize < slice_size) { + new_winsize <<= 1; + } + + uint32_t new_offset = s->layout.dense.offset; + if (new_winsize >= slice_size) { + new_winsize = slice_size; + new_offset = 0; + } else if (new_offset + new_winsize > slice_size) { + /* Window would exceed slice boundary, adjust offset */ + new_offset = slice_size - new_winsize; + } + + /* Fast path: when offset does not move, we can use realloc() to grow + * the dense allocation without relocating existing items ourselves. */ + if (new_offset == s->layout.dense.offset) { + uint32_t old_winsize = s->layout.dense.winsize; + arSlice *ns = arReallocAndTrack(ar, s, arDenseAllocSize(new_winsize)); + ns->layout.dense.winsize = new_winsize; + ns->layout.dense.items = (void **)(ns + 1); + + /* New tail must be explicitly zeroed for arIsEmpty() semantics. */ + memset(ns->layout.dense.items + old_winsize, 0, + (new_winsize - old_winsize) * sizeof(void *)); + return ns; + } + + /* Data copy path: offset moved, so we allocate a new slice and copy. */ + arSlice *ns = arAllocAndTrack(ar, arDenseAllocSize(new_winsize)); + ns->encoding = AR_SLICE_DENSE; + ns->count = s->count; + ns->layout.dense.offset = new_offset; + ns->layout.dense.winsize = new_winsize; + ns->layout.dense.max_idx = s->layout.dense.max_idx; + ns->layout.dense.items = (void **)(ns + 1); + + /* Zero-fill first to ensure arIsEmpty() works for new slots, then + * copy old data */ + memset(ns->layout.dense.items, 0, new_winsize * sizeof(void *)); + uint32_t shift = s->layout.dense.offset - new_offset; + serverAssert(shift + s->layout.dense.winsize <= new_winsize); + memcpy(ns->layout.dense.items + shift, s->layout.dense.items, s->layout.dense.winsize * sizeof(void *)); + + arFreeAndTrack(ar, s); + return ns; +} + +/* Grow dense slice to accommodate rel_idx (left growth with slack). + * Note that in this case no realloc() optimization is possible, still + * we can grow on the left more than needed (next power of two) so if + * there is a right-to-left access pattern we can cope. */ +arSlice *arSliceDenseGrowLeft(redisArray *ar, arSlice *s, uint32_t rel_idx, uint32_t slice_size) { + uint32_t old_end = s->layout.dense.offset + s->layout.dense.winsize; + uint32_t need = old_end - rel_idx; + + /* Find next power of two that fits */ + uint32_t new_winsize = nearestNextPowerOf2(need); + if (new_winsize < AR_SLICE_MIN_ALLOC) new_winsize = AR_SLICE_MIN_ALLOC; + if (new_winsize > slice_size) new_winsize = slice_size; + + /* Position the window so that the old data is right-aligned (leaving + * slack on left) */ + int32_t new_offset = (int32_t)old_end - (int32_t)new_winsize; + if (new_offset < 0) new_offset = 0; + if (new_winsize == slice_size) new_offset = 0; + + arSlice *ns = arAllocAndTrack(ar, arDenseAllocSize(new_winsize)); + ns->encoding = AR_SLICE_DENSE; + ns->count = s->count; + ns->layout.dense.offset = (uint32_t)new_offset; + ns->layout.dense.winsize = new_winsize; + ns->layout.dense.max_idx = s->layout.dense.max_idx; + ns->layout.dense.items = (void **)(ns + 1); + + /* Zero-fill for arIsEmpty() semantics, then copy old data right-aligned */ + memset(ns->layout.dense.items, 0, new_winsize * sizeof(void *)); + uint32_t shift = s->layout.dense.offset - ns->layout.dense.offset; + serverAssert(shift + s->layout.dense.winsize <= new_winsize); + memcpy(ns->layout.dense.items + shift, s->layout.dense.items, s->layout.dense.winsize * sizeof(void *)); + + arFreeAndTrack(ar, s); + return ns; +} + +/* Grow dense slice if rel_idx is outside the current window. Returns a new + * slice, or the old pointer if the current slice can already accommodate the + * index. */ +arSlice *arSliceDenseGrowIfNeeded(redisArray *ar, arSlice *s, uint32_t rel_idx, uint32_t slice_size) { + if (rel_idx >= s->layout.dense.offset + s->layout.dense.winsize) { + return arSliceDenseGrowRight(ar, s, rel_idx, slice_size); + } else if (rel_idx < s->layout.dense.offset) { + return arSliceDenseGrowLeft(ar, s, rel_idx, slice_size); + } + return s; +} + +/* Binary search in sparse slice. + * Returns index where rel_idx is or should be (the two cases + * can be distinguished via 'found'). */ +uint32_t arSparseFindPos(arSlice *s, uint16_t rel_idx, int *found) { + uint16_t *offsets = s->layout.sparse.offsets; + uint32_t lo = 0, hi = s->count; + while (lo < hi) { + uint32_t mid = lo + (hi - lo) / 2; + if (offsets[mid] < rel_idx) { + lo = mid + 1; + } else { + hi = mid; + } + } + *found = (lo < s->count && offsets[lo] == rel_idx); + return lo; +} + +/* Promote sparse slice to dense. */ +arSlice *arSparsePromote(redisArray *ar, arSlice *s, uint32_t slice_size) { + if (s->count == 0) { + arFreeAndTrack(ar, s); + return arSliceDenseNew(ar, 0, slice_size); + } + + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + uint32_t min_off = offsets[0]; + uint32_t max_off = offsets[s->count - 1]; + uint32_t need = max_off - min_off + 1; + + uint32_t winsize = nearestNextPowerOf2(need); + if (winsize < AR_SLICE_MIN_ALLOC) winsize = AR_SLICE_MIN_ALLOC; + + uint32_t offset = min_off; + if (winsize >= slice_size) { + winsize = slice_size; + offset = 0; + } else if (offset + winsize > slice_size) { + /* Window would exceed slice boundary, adjust offset */ + offset = slice_size - winsize; + } + + arSlice *d = arAllocAndTrack(ar, arDenseAllocSize(winsize)); + d->encoding = AR_SLICE_DENSE; + d->count = s->count; + d->layout.dense.offset = offset; + d->layout.dense.winsize = winsize; + d->layout.dense.max_idx = max_off; + d->layout.dense.items = (void **)(d + 1); + + /* Set the entries in the sparse representation into the + * new dense slice. */ + memset(d->layout.dense.items, 0, winsize * sizeof(void *)); + for (uint32_t i = 0; i < s->count; i++) { + serverAssert(offsets[i] >= offset); + serverAssert(offsets[i] - offset < winsize); + d->layout.dense.items[offsets[i] - offset] = values[i]; + } + + arFreeAndTrack(ar, s); + return d; +} + +/* Demote the provided dense slice to a sparse slice, if beneficial. + * The function returns the dense slice given in input if not demoted, + * otherwise the newly created sparse slice containing the same elements + * is returned, in this case, as a side effect, the dense slice in + * input is freed. */ +arSlice *arDenseMaybeDemote(redisArray *ar, arSlice *d) { + if (ArraySparseKMax == 0) return d; // Sparse is disabled by config. + if (d->count > ArraySparseKMin) return d; // Yet not at demotion level. + if (d->count > ArraySparseKMax) return d; // Just config sanity check. + if (d->layout.dense.winsize == AR_SLICE_MIN_ALLOC) return d; // Already small. + + /* Only demote if it actually saves memory. We require the dense slice + * to be significantly larger than sparse would be (at least 25% bigger), + * and large enough in absolute terms (4x kmin) to be worth the trouble. */ + size_t dense_bytes = arDenseAllocSize(d->layout.dense.winsize); + size_t sparse_bytes = arSparseAllocSize(ArraySparseKMin); + if (d->layout.dense.winsize < 4 * ArraySparseKMin) return d; + if (dense_bytes < sparse_bytes * 5 / 4) return d; + + /* Demote it. */ + arSlice *s = arAllocAndTrack(ar, arSparseAllocSize(ArraySparseKMin)); + s->encoding = AR_SLICE_SPARSE; + s->count = 0; + s->layout.sparse.cap = ArraySparseKMin; + arSparseSetupPointers(s); + + /* Copy every entry from dense to sparse. */ + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < d->layout.dense.winsize && s->count < d->count; i++) { + if (!arIsEmpty(d->layout.dense.items[i])) { + offsets[s->count] = d->layout.dense.offset + i; + values[s->count] = d->layout.dense.items[i]; + s->count++; + } + } + + arFreeAndTrack(ar, d); + return s; +} + +/* Update max_idx after deletion in dense slice. This is O(winsize) in the worst + * case, but we only scan when we deleted the current max, which is rare. */ +void arDenseUpdateMaxIdx(arSlice *d, uint32_t deleted_off) { + /* Note that if the slice is left without elements, it will get + * deallocated so there is nothing to set. */ + if (d->count == 0 || deleted_off < d->layout.dense.max_idx) return; + + /* Scan backward from old max to find new max. */ + for (int pos = d->layout.dense.max_idx - d->layout.dense.offset; pos >= 0; pos--) { + if (!arIsEmpty(d->layout.dense.items[pos])) { + d->layout.dense.max_idx = d->layout.dense.offset + pos; + return; + } + } +} + +/* ---------------------------------------------------------------------------- + * Directory management (flat mode and superdir mode) + * + * Why two modes: + * + * - Flat mode (ar->superdir == NULL): ar->dir is indexed by slice_id + * (ar->dir[slice_id] -> arSlice*). This is very fast and compact while + * slice IDs stay relatively low. + * + * - Superdir mode (ar->superdir != NULL): there are two levels of indirection. + * Metadata (that is, pointers to actual array slices) is split into sorted + * entries by block_id; each block is a fixed table of 2048 slice pointers. + * That table uses about 8 KB on 32-bit builds and 16 KB on 64-bit builds. + * Blocks are allocated on demand. Basically this means that what was, in + * flat mode, a contiguous array of slice pointers (called the directory), + * in superdir mode becomes a sparse array of directory pieces. + * + * The superdir avoids catastrophic metadata growth for sparse/high indices. + * A flat directory must be sized up to the highest slice_id, even if almost + * all entries are NULL. With very large index jumps, that would waste huge + * memory. Superdir keeps metadata proportional to the number of populated + * blocks/slices instead of the largest slice_id ever seen. + * + * Promotion trigger: + * - When slice_id >= AR_SUPER_BLOCK_SLOTS (2048), flat mode is promoted. + * - Practical meaning: slice_id is idx / slice_size. + * With default slice_size=4096, threshold slice_id=2048 corresponds to + * idx >= 2048*4096 = 8,388,608 (first index that needs block_id 1). + * + * Hint: here what we gain is not just efficiency. Also there are no security + * concerns with setting a very high index. No problem with a corrupted RDB + * file containing a very high index, and no need to configure a maximum index + * allowable in an array. Thanks to this design the array type of Redis is + * a more useful and safe type. + * -------------------------------------------------------------------------- */ + +/* Binary search for block_id in superdir. + * Returns index where found or should be inserted. */ +uint32_t arSuperDirFind(redisArray *ar, uint64_t block_id, int *found) { + uint32_t lo = 0, hi = ar->sdir_len; + while (lo < hi) { + uint32_t mid = lo + (hi - lo) / 2; + if (ar->superdir[mid].block_id < block_id) { + lo = mid + 1; + } else { + hi = mid; + } + } + *found = (lo < ar->sdir_len && ar->superdir[lo].block_id == block_id); + return lo; +} + +/* Get slice pointer from superdir mode. Returns NULL if not found. */ +arSlice **arSuperDirGetSlot(redisArray *ar, uint64_t slice_id) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + uint32_t block_off = slice_id % AR_SUPER_BLOCK_SLOTS; + + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + if (!found) return NULL; + + return ar->superdir[pos].slots + block_off; +} + +/* Ensure block exists in superdir, creating if needed. Returns slot pointer. */ +arSlice **arSuperDirEnsureSlot(redisArray *ar, uint64_t slice_id) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + uint32_t block_off = slice_id % AR_SUPER_BLOCK_SLOTS; + + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + + if (!found) { + /* Need to insert new block */ + if (ar->sdir_len >= ar->sdir_cap) { + /* Grow superdir array */ + uint32_t new_cap = ar->sdir_cap ? ar->sdir_cap * 2 : 4; + ar->superdir = arReallocAndTrack(ar, ar->superdir, new_cap * sizeof(arSDirEntry)); + ar->sdir_cap = new_cap; + } + + /* Shift entries to make room */ + if (pos < ar->sdir_len) { + memmove(ar->superdir + pos + 1, ar->superdir + pos, + (ar->sdir_len - pos) * sizeof(arSDirEntry)); + } + + /* Initialize new entry */ + ar->superdir[pos].block_id = block_id; + ar->superdir[pos].count = 0; + ar->superdir[pos].slots = arCallocAndTrack(ar, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + ar->sdir_len++; + } + + return ar->superdir[pos].slots + block_off; +} + +/* Look up the superdir block that contains slice_id. + * Returns a pointer to that arSDirEntry, or NULL if the block was never + * allocated (no slices currently exist in that block). */ +arSDirEntry *arSuperDirGetEntry(redisArray *ar, uint64_t slice_id) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + return found ? ar->superdir + pos : NULL; +} + +/* Remove one block entry from superdir at index pos. + * We assume 'pos' is valid and the block is logically empty (count == 0). + * Frees the slice-pointer table, compacts remaining entries (keeping order by + * block_id), and decrements ar->sdir_len. */ +void arSuperDirRemoveBlock(redisArray *ar, uint32_t pos) { + arFreeAndTrack(ar, ar->superdir[pos].slots); + if (pos < ar->sdir_len - 1) { + memmove(ar->superdir + pos, ar->superdir + pos + 1, + (ar->sdir_len - pos - 1) * sizeof(arSDirEntry)); + } + ar->sdir_len--; +} + +/* Promote from flat directory to superdir mode. Flat mode only ever uses + * slice_id < AR_SUPER_BLOCK_SLOTS, so all existing slices belong to block 0. */ +void arPromoteToSuperDir(redisArray *ar) { + ar->sdir_cap = 4; + ar->sdir_len = 0; + ar->superdir = arAllocAndTrack(ar, ar->sdir_cap * sizeof(arSDirEntry)); + + /* Copy existing flat dir content into block 0 */ + if (ar->dir_alloc > 0) { + ar->superdir[0].block_id = 0; + ar->superdir[0].slots = arCallocAndTrack(ar, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + ar->superdir[0].count = 0; + ar->sdir_len = 1; + + /* Copy flat dir pointers to block 0, counting non-NULL */ + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + ar->superdir[0].slots[i] = ar->dir[i]; + if (ar->dir[i]) ar->superdir[0].count++; + } + } + + /* Free old flat directory */ + if (ar->dir) arFreeAndTrack(ar, ar->dir); + ar->dir = NULL; + ar->dir_alloc = 0; +} + +/* Grow directory to accommodate slice_id (handles both modes, dense and + * superdir mode). */ +void arDirGrow(redisArray *ar, uint64_t slice_id) { + /* Check if promotion to superdir is needed */ + if (ar->superdir == NULL && slice_id >= AR_SUPER_BLOCK_SLOTS) { + arPromoteToSuperDir(ar); + } + + if (ar->superdir) { + /* Superdir allocates blocks on-demand in arSetSlice(), so we don't + * allocate a 2048-pointer block for ranges that end up empty. */ + return; + } + + /* Flat mode: grow directory if needed */ + if (slice_id < ar->dir_alloc) return; + + uint64_t new_alloc = ar->dir_alloc ? ar->dir_alloc : 1; + + /* Grow geometrically and stop at the first power-of-two size + * that can index slice_id. Note that thanks to superdir mode the + * size of this table of pointers is bound. */ + while (new_alloc <= slice_id) { + new_alloc <<= 1; + } + + arSlice **new_dir = arReallocAndTrack(ar, ar->dir, new_alloc * sizeof(arSlice *)); + + /* Zero-fill new slots */ + memset(new_dir + ar->dir_alloc, 0, (new_alloc - ar->dir_alloc) * sizeof(arSlice *)); + ar->dir = new_dir; + ar->dir_alloc = new_alloc; +} + +/* Maybe shrink directory after freeing a slice (flat mode only). + * Since dir_alloc is always a power of two, we can only shrink by halving. + * So shrinking only happens when dir_highest_used < dir_alloc/2. The 90% + * check is just a quick early-out to skip the loop in the common case. */ +void arDirMaybeShrink(redisArray *ar) { + if (ar->superdir) return; /* Superdir mode: blocks freed individually */ + if (ar->count == 0) return; /* Will be deleted anyway */ + if (ar->dir_highest_used >= ar->dir_alloc * 9 / 10) return; + + /* Find smallest power of two > dir_highest_used */ + uint64_t new_alloc = 1; + while (new_alloc <= ar->dir_highest_used) new_alloc <<= 1; + if (new_alloc >= ar->dir_alloc) return; + + ar->dir = arReallocAndTrack(ar, ar->dir, new_alloc * sizeof(arSlice *)); + ar->dir_alloc = new_alloc; +} + +/* Update dir_highest_used after freeing a slice. + * To always know the highest directory index used is useful + * for a number of reasons: + * 1. arLen() is O(1) this way. + * 2. We can start reverse scans from the rightmost populated directory entry. + * 3. We can shrink the directory (in flat mode) if needed, since we know + * the usage. */ +void arDirUpdateHighest(redisArray *ar, uint64_t freed_id) { + if (ar->count == 0) return; + if (freed_id < ar->dir_highest_used) return; + + if (ar->superdir) { + /* Superdir mode: scan backwards through blocks */ + for (int32_t bi = ar->sdir_len - 1; bi >= 0; bi--) { + arSDirEntry *e = ar->superdir + bi; + if (e->count == 0) continue; + /* Scan backwards through this block's slots */ + for (int32_t si = AR_SUPER_BLOCK_SLOTS - 1; si >= 0; si--) { + if (e->slots[si] != NULL) { + ar->dir_highest_used = e->block_id * AR_SUPER_BLOCK_SLOTS + si; + return; + } + } + } + ar->dir_highest_used = 0; + } else { + /* Flat mode: scan backward for next non-NULL slice */ + for (int64_t i = (int64_t)freed_id - 1; i >= 0; i--) { + if (ar->dir[i] != NULL) { + ar->dir_highest_used = i; + return; + } + } + ar->dir_highest_used = 0; + } +} + +/* Get slice pointer by slice_id (which is the logical array-index divided by + * the elements-per-slice), handling both flat and superdir modes. If no slice + * was already allocated for such slice_id, NULL is returned. */ +arSlice *arGetSlice(redisArray *ar, uint64_t slice_id) { + if (ar->superdir) { + arSlice **slot = arSuperDirGetSlot(ar, slice_id); + return slot ? *slot : NULL; + } else { + if (slice_id >= ar->dir_alloc) return NULL; + return ar->dir[slice_id]; + } +} + +/* Set slice pointer in the directory. In superdir mode, setting to NULL + * decrements the block's slice count and frees the block if it becomes empty. + * Setting to non-NULL allocates the block if needed. */ +void arSetSlice(redisArray *ar, uint64_t slice_id, arSlice *s) { + if (ar->superdir) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + uint32_t block_off = slice_id % AR_SUPER_BLOCK_SLOTS; + + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + + if (s == NULL) { + /* Setting to NULL: decrement block count, maybe remove block */ + if (!found) return; /* Block doesn't exist, nothing to do */ + arSDirEntry *entry = ar->superdir + pos; + if (entry->slots[block_off] != NULL) { + entry->slots[block_off] = NULL; + entry->count--; + ar->num_slices--; + /* Remove empty block */ + if (entry->count == 0) { + arSuperDirRemoveBlock(ar, pos); + } + } + } else { + /* Setting to non-NULL: ensure block exists */ + arSlice **slot = arSuperDirEnsureSlot(ar, slice_id); + arSDirEntry *entry = arSuperDirGetEntry(ar, slice_id); + if (*slot == NULL) { + entry->count++; + ar->num_slices++; + } + *slot = s; + } + } else { + if (s == NULL && ar->dir[slice_id] != NULL) ar->num_slices--; + else if (s != NULL && ar->dir[slice_id] == NULL) ar->num_slices++; + ar->dir[slice_id] = s; + } +} + +/* ---------------------------------------------------------------------------- + * Value encoding + * -------------------------------------------------------------------------- */ + +/* Try to encode string as immediate integer */ +int arTryEncodeInt(const char *s, size_t len, void **out) { + long long ll; + if (string2ll(s, len, &ll) && arIntFits(ll)) { + *out = arFromInt(ll); + return 1; + } + return 0; +} + +/* Try to encode string as immediate float. + * + * The local immediate float encoding clears the low 2 bits of the underlying + * floating-point payload to make room for the tag. On 64-bit builds we do it + * on the IEEE-754 double bits directly. On 32-bit builds we first quantize to + * float, then clear the low 2 bits of the float payload. We only encode if the + * later string representation matches the original input exactly. + * + * There's a subtlety with whole-number floats: d2string formats 1.0 as "1" + * (without decimal point), so "1.0" wouldn't match and would be stored as + * a heap string. We fix this by appending ".0" when d2string produces an + * integer-looking result and comparing again. + * + * Note: pure integers like "1" are handled by arTryEncodeInt first, so values + * reaching here that look like integers after d2string likely had ".0". */ +int arTryEncodeFloat(const char *s, size_t len, void **out) { + /* Fast filter to discard things that obviously can't pass the later + * round-trip test: + * + * 1. Can have optional leading '-'. + * 2. Can be composed only by digits plus one mandatory '.'. + * + * This skips expensive float parsing for obvious non-candidates. */ + size_t i = 0; + int dot_seen = 0; + + if (len == 0) return 0; + if (s[0] == '-') { + if (len == 1) return 0; + i = 1; + } + for (; i < len; i++) { + char c = s[i]; + if (c == '.') { + if (dot_seen) return 0; + dot_seen = 1; + } else if (c < '0' || c > '9') { + return 0; + } + } + if (!dot_seen) return 0; + + /* Expensive round-trip path: convert to double. */ + double d; + if (!string2d(s, len, &d)) return 0; + if (isnan(d) || isinf(d)) return 0; + + uint64_t bits_trunc; + double d_trunc; +#if UINTPTR_MAX == UINT64_MAX + /* Truncate the double payload directly on 64-bit builds. */ + uint64_t bits; + memcpy(&bits, &d, sizeof(bits)); + bits_trunc = bits & ~AR_TAG_MASK; + memcpy(&d_trunc, &bits_trunc, sizeof(d_trunc)); +#else + /* 32-bit builds inline floats, not doubles. Quantize first, then clear + * the low 2 bits of the float payload. */ + float f = (float)d; + if (!isfinite(f)) return 0; // May happen after casting. + uint32_t bits32; + uint32_t bits32_trunc; + float f_trunc; + + memcpy(&bits32, &f, sizeof(bits32)); + bits32_trunc = bits32 & ~(uint32_t)AR_TAG_MASK; + memcpy(&f_trunc, &bits32_trunc, sizeof(f_trunc)); + bits_trunc = bits32_trunc; + d_trunc = (double)f_trunc; // Reduced precision float here. +#endif + + /* Verify round-trip */ + char buf[AR_INLINE_BUFSIZE]; + int buflen = d2string(buf, sizeof(buf) - 2, d_trunc); + if ((size_t)buflen == len && memcmp(buf, s, len) == 0) { + *out = arFromFloatBits(bits_trunc); + return 1; + } + + /* Also try the ".0" form. d2string(1.0) returns "1", but when floats are + * later converted back to strings we restore ".0" for integer-looking + * values, so inputs like "1.0" can still round-trip exactly. */ + buf[buflen] = '.'; + buf[buflen + 1] = '0'; + buf[buflen + 2] = '\0'; + buflen += 2; + if ((size_t)buflen == len && memcmp(buf, s, len) == 0) { + *out = arFromFloatBits(bits_trunc); + return 1; + } + + return 0; +} + +/* Format a float in the canonical string form exposed by arrays. + * buf must be at least AR_INLINE_BUFSIZE bytes. We use d2string() for the + * shortest round-trippable representation, then restore ".0" for + * integer-looking finite values so decoded/replied floats match the logical + * form expected by array persistence and encoding checks. */ +int arFormatFloat(double d, char *buf, size_t bufsize) { + serverAssert(bufsize >= AR_INLINE_BUFSIZE); + int len = d2string(buf, bufsize - 2, d); + if (isfinite(d) && !memchr(buf, '.', len) && !memchr(buf, 'e', len) && + !memchr(buf, 'E', len)) { + serverAssert((size_t)len + 2 < bufsize); + buf[len++] = '.'; + buf[len++] = '0'; + buf[len] = '\0'; + } + return len; +} + +/* Encode a string into a tagged value */ +void *arEncode(const char *s, size_t len) { + void *v; + + /* Try integer first */ + if (arTryEncodeInt(s, len, &v)) { + return v; + } + + /* Try float */ + if (arTryEncodeFloat(s, len, &v)) { + return v; + } + + /* Try small string (architecture-dependent inline limit). */ + if (len <= AR_SMALLSTR_MAXLEN) { + return arFromSmallStr(s, (int)len); + } + + /* Fall back to arString (8+ bytes) */ + return arStringNew(s, len); +} + +void *arValueFromRdbInt(int64_t ival) { + if (arIntFits(ival)) return arFromInt(ival); + + /* If the integer does not fit (i.e. loading into a 32 bit instance + * what was stored in the RDB by a 64 bit instance), we promote it + * to a plain string. */ + char buf[32]; + int len = ll2string(buf, sizeof(buf), ival); + return arStringNew(buf, len); +} + +void *arValueFromRdbFloat(double d) { +#if UINTPTR_MAX == UINT64_MAX + /* On 64-bit, doubles are inlined directly (low 2 bits cleared). + * No string round-trip needed: the RDB double already has clean + * low bits (from the saving side's arToDouble). */ + uint64_t bits; + memcpy(&bits, &d, sizeof(bits)); + return arFromFloatBits(bits); +#endif + + /* Loading on a 32 bit system is more complicated to do efficiently. + * + * RDB always stores array floats as doubles. On 32-bit systems we can + * only inline a float payload with the low 2 bits stolen for the tag. + * Simulate that exact quantization path and keep the value encoded only + * if it survives unchanged. */ + uint32_t bits32; + uint32_t bits32_trunc; + float f_trunc; + double d_trunc; + + /* Narrow to float first, then clear the low 2 payload bits that are + * reserved for the tagged-pointer type. */ + float f = (float)d; + memcpy(&bits32, &f, sizeof(bits32)); + bits32_trunc = bits32 & ~(uint32_t)AR_TAG_MASK; + memcpy(&f_trunc, &bits32_trunc, sizeof(f_trunc)); + d_trunc = (double)f_trunc; + + /* Bitwise comparison keeps signed zero distinct and tells us whether + * the 64-bit RDB value is exactly representable by the local 30-bit + * inline-float format. */ + uint64_t bits64; + uint64_t bits64_trunc; + memcpy(&bits64, &d, sizeof(bits64)); + memcpy(&bits64_trunc, &d_trunc, sizeof(bits64_trunc)); + if (bits64 == bits64_trunc) return arFromFloatBits(bits32_trunc); + + /* Otherwise materialize the canonical string form for this float. */ + char buf[AR_INLINE_BUFSIZE]; + int len = arFormatFloat(d, buf, sizeof(buf)); + return arStringNew(buf, len); +} + +void *arValueFromRdbSmallStr(const char *s, size_t len) { + if (len <= AR_SMALLSTR_MAXLEN) return arFromSmallStr(s, (int)len); + return arStringNew(s, len); +} + +/* Decode a tagged value into raw bytes. + * For inline values, buf must point to at least AR_INLINE_BUFSIZE bytes and + * the returned pointer will be buf. For arString values, the returned pointer + * aliases the string payload directly. Returns NULL if value is empty. + * + * This is a helper function used for AOF rewriting, AROP string "MATCH" + * and DEBUG DIGEST. */ +const char *arDecode(void *v, char *buf, size_t bufsize, size_t *outlen) { + serverAssert(bufsize >= AR_INLINE_BUFSIZE); + if (arIsEmpty(v)) { + if (outlen) *outlen = 0; + return NULL; + } + + if (arIsInt(v)) { + int64_t ival = arToInt(v); + int len = ll2string(buf, 32, ival); + if (outlen) *outlen = len; + return buf; + } + + if (arIsFloat(v)) { + double d = arToDouble(v); + int len = arFormatFloat(d, buf, bufsize); + if (outlen) *outlen = len; + return buf; + } + + if (arIsSmallStr(v)) { + int len = arSmallStrLen(v); + if (outlen) *outlen = len; + arToSmallStr(v, buf); + return buf; + } + + /* arString pointer */ + size_t len = arStringLen(v); + if (outlen) *outlen = len; + return arStringData(v); +} + + +/* ---------------------------------------------------------------------------- + * Array lifecycle + * -------------------------------------------------------------------------- */ + +/* Create a new empty array */ +redisArray *arNew(void) { + redisArray *ar = zmalloc(sizeof(redisArray)); + ar->count = 0; + ar->insert_idx = AR_INSERT_IDX_NONE; + ar->dir_alloc = 0; + ar->dir_highest_used = 0; + ar->num_slices = 0; + ar->alloc_size = zmalloc_size(ar); + ar->slice_size = ArraySliceSize; /* Use current config value */ + ar->sdir_len = 0; + ar->sdir_cap = 0; + ar->dir = NULL; + ar->superdir = NULL; + return ar; +} + +/* Free an array and all its contents */ +void arFree(redisArray *ar) { + if (!ar) return; + + if (ar->superdir) { + /* Superdir mode: free all blocks and their slices */ + for (uint32_t i = 0; i < ar->sdir_len; i++) { + arSDirEntry *e = ar->superdir + i; + for (uint32_t j = 0; j < AR_SUPER_BLOCK_SLOTS; j++) { + if (e->slots[j]) arSliceFree(NULL, e->slots[j]); + } + zfree(e->slots); + } + zfree(ar->superdir); + } else { + /* Flat mode */ + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i]) { + arSliceFree(NULL, ar->dir[i]); + } + } + zfree(ar->dir); + } + zfree(ar); +} + +/* Dismiss a single slice's memory back to the OS. */ +static void arSliceDismiss(arSlice *s, int dismiss_values) { + if (s->encoding == AR_SLICE_DENSE) { + if (dismiss_values) { + void **items = s->layout.dense.items; + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + if (arIsPtr(items[i])) + dismissMemory(items[i], arStringLen(items[i])); + } + } + dismissMemory(s, arDenseAllocSize(s->layout.dense.winsize)); + } else { + if (dismiss_values) { + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < s->count; i++) { + if (arIsPtr(values[i])) + dismissMemory(values[i], arStringLen(values[i])); + } + } + dismissMemory(s, arSparseAllocSize(s->layout.sparse.cap)); + } +} + +/* See dismissObject(). Always dismiss the directory and slices; per-value + * dismissal only when the average element size makes it worthwhile. */ +void arDismiss(redisArray *ar, size_t size_hint) { + if (!ar) return; + uint64_t count = ar->count; + int dismiss_values = (count != 0 && size_hint / count >= server.page_size); + + if (ar->superdir) { + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + if (e->slots[si] == NULL) continue; + arSliceDismiss(e->slots[si], dismiss_values); + } + dismissMemory(e->slots, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + } + dismissMemory(ar->superdir, ar->sdir_cap * sizeof(arSDirEntry)); + } else if (ar->dir) { + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i] == NULL) continue; + arSliceDismiss(ar->dir[i], dismiss_values); + } + dismissMemory(ar->dir, ar->dir_alloc * sizeof(arSlice *)); + } +} + +/* arDup() helper to duplicate a single slice into the duplicated array. + * This function is responsible of tracking allocations in dup_ar + * (hence the name of the parameter), since it has the knowledge of + * the array slice that it is duplicating. + * + * The dear reader of this code may wonder why we don't just duplicate the + * array and its slices without tracking memory, and then copy the memory + * field of the array at the end. The problem is that the array does not + * track the logical allocated memory, but the actual memory usage reported + * by the allocator: there is no guarantee that the allocations of the copy + * perfectly match the ones of the original array. */ +arSlice *arSliceDup(redisArray *dup_ar, arSlice *s) { + if (s->encoding == AR_SLICE_DENSE) { + size_t sz = arDenseAllocSize(s->layout.dense.winsize); + arSlice *nd = arAllocAndTrack(dup_ar, sz); + memcpy(nd, s, sizeof(arSlice)); + nd->layout.dense.items = (void **)(nd + 1); + memcpy(nd->layout.dense.items, s->layout.dense.items, + s->layout.dense.winsize * sizeof(void *)); + + /* Duplicate arString pointers */ + for (uint32_t j = 0; j < s->layout.dense.winsize; j++) { + if (arIsPtr(nd->layout.dense.items[j])) { + nd->layout.dense.items[j] = arStringDup(nd->layout.dense.items[j]); + arTrackValueIn(dup_ar, nd->layout.dense.items[j]); + } + } + return nd; + } else { + size_t sz = arSparseAllocSize(s->layout.sparse.cap); + arSlice *nsp = arAllocAndTrack(dup_ar, sz); + memcpy(nsp, s, sizeof(arSlice)); + arSparseSetupPointers(nsp); + memcpy(nsp->layout.sparse.offsets, s->layout.sparse.offsets, + s->layout.sparse.cap * sizeof(uint16_t)); + memcpy(nsp->layout.sparse.values, s->layout.sparse.values, + s->layout.sparse.cap * sizeof(void *)); + + /* Duplicate arString pointers */ + void **values = nsp->layout.sparse.values; + for (uint32_t j = 0; j < s->count; j++) { + if (arIsPtr(values[j])) { + values[j] = arStringDup(values[j]); + arTrackValueIn(dup_ar, values[j]); + } + } + return nsp; + } +} + +/* Duplicate an array (deep copy) */ +redisArray *arDup(redisArray *ar) { + redisArray *dup = zmalloc(sizeof(redisArray)); + dup->count = ar->count; + dup->insert_idx = ar->insert_idx; + dup->dir_alloc = ar->dir_alloc; + dup->dir_highest_used = ar->dir_highest_used; + dup->num_slices = ar->num_slices; + dup->alloc_size = zmalloc_size(dup); + dup->slice_size = ar->slice_size; + dup->sdir_len = ar->sdir_len; + dup->sdir_cap = ar->sdir_cap; + + if (ar->superdir) { + /* Superdir mode */ + dup->dir = NULL; + dup->superdir = arAllocAndTrack(dup, ar->sdir_cap * sizeof(arSDirEntry)); + + for (uint32_t i = 0; i < ar->sdir_len; i++) { + arSDirEntry *src = ar->superdir + i; + arSDirEntry *dst = dup->superdir + i; + + dst->block_id = src->block_id; + dst->count = src->count; + dst->slots = arCallocAndTrack(dup, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + + for (uint32_t j = 0; j < AR_SUPER_BLOCK_SLOTS; j++) { + if (src->slots[j]) { + dst->slots[j] = arSliceDup(dup, src->slots[j]); + } + } + } + } else if (ar->dir_alloc > 0) { + /* Flat mode */ + dup->superdir = NULL; + dup->dir = arAllocAndTrack(dup, ar->dir_alloc * sizeof(arSlice *)); + memset(dup->dir, 0, ar->dir_alloc * sizeof(arSlice *)); + + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i]) { + dup->dir[i] = arSliceDup(dup, ar->dir[i]); + } + } + } else { + dup->dir = NULL; + dup->superdir = NULL; + } + + return dup; +} + +/* ---------------------------------------------------------------------------- + * Core operations + * -------------------------------------------------------------------------- */ + +/* Get value at index (returns NULL for empty/missing) */ +void *arGet(redisArray *ar, uint64_t idx) { + uint64_t slice_id = arSliceId(idx, ar->slice_size); + uint32_t rel_idx = arSliceOff(idx, ar->slice_size); + + arSlice *s = arGetSlice(ar, slice_id); + if (s == NULL) return NULL; // No slice at all for this index. + + if (s->encoding == AR_SLICE_DENSE) { + if (rel_idx < s->layout.dense.offset || + rel_idx >= s->layout.dense.offset + s->layout.dense.winsize) + { + // The slice window does not include this index. + return NULL; + } + return s->layout.dense.items[rel_idx - s->layout.dense.offset]; + } else { + int found; + uint32_t pos = arSparseFindPos(s, (uint16_t)rel_idx, &found); + if (found) { + void **values = s->layout.sparse.values; + return values[pos]; + } + return NULL; + } +} + +/* Set value at index. Caller must ensure idx != UINT64_MAX. + * v must not be NULL (empty) - use arDel() to delete elements. */ +void arSet(redisArray *ar, uint64_t idx, void *v) { + serverAssert(v != NULL); /* Use arDel for deletion, not arSet(v=NULL) */ + /* UINT64_MAX can't be used for a couple of reasons: for once, + * the array len is the max index set + 1, so we could not represent + * that; also it is a sentinel for last set index still not being set. */ + serverAssert(idx != UINT64_MAX); + uint64_t slice_id = arSliceId(idx, ar->slice_size); + uint32_t rel_idx = arSliceOff(idx, ar->slice_size); + + /* Ensure directory capacity (may trigger promotion to superdir) */ + arDirGrow(ar, slice_id); + + /* Get current slice */ + arSlice *s = arGetSlice(ar, slice_id); + + /* Create slice if missing */ + if (s == NULL) { + if (ArraySparseKMax > 0) { + s = arSliceSparseNew(ar); + } else { + s = arSliceDenseNew(ar, rel_idx, ar->slice_size); + } + arSetSlice(ar, slice_id, s); + } + + if (s->encoding == AR_SLICE_DENSE) { + /* Grow the slice window if needed */ + s = arSliceDenseGrowIfNeeded(ar, s, rel_idx, ar->slice_size); + arSetSlice(ar, slice_id, s); // In case it changed. + + uint32_t pos = rel_idx - s->layout.dense.offset; + void *old = s->layout.dense.items[pos]; + + if (arIsEmpty(old)) { + s->count++; + ar->count++; + } else { + /* Replace existing value. */ + arTrackValueOut(ar, old); + arFreePtr(old); + } + + arTrackValueIn(ar, v); + s->layout.dense.items[pos] = v; + + /* Update max_idx */ + if (rel_idx > s->layout.dense.max_idx) { + s->layout.dense.max_idx = rel_idx; + } + } else { + int found; + uint32_t pos = arSparseFindPos(s, (uint16_t)rel_idx, &found); + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + if (found) { + /* Replace existing */ + arTrackValueOut(ar, values[pos]); + arFreePtr(values[pos]); + arTrackValueIn(ar, v); + values[pos] = v; + } else { + /* Insert new */ + if (s->count >= ArraySparseKMax) { + /* Promote to dense */ + arSlice *d = arSparsePromote(ar, s, ar->slice_size); + + /* Grow window if needed */ + d = arSliceDenseGrowIfNeeded(ar, d, rel_idx, ar->slice_size); + arSetSlice(ar, slice_id, d); + + uint32_t dpos = rel_idx - d->layout.dense.offset; + arTrackValueIn(ar, v); + d->layout.dense.items[dpos] = v; + d->count++; + ar->count++; + if (rel_idx > d->layout.dense.max_idx) d->layout.dense.max_idx = rel_idx; + } else { + /* Insert in sparse */ + if (s->count >= s->layout.sparse.cap) { + /* Grow capacity, we grow 2x but note that there is no + * point in growing more than kmax, so we clamp to kmax. */ + uint32_t new_cap = s->layout.sparse.cap * 2; + if (new_cap > ArraySparseKMax) new_cap = ArraySparseKMax; + arSlice *ns = arAllocAndTrack(ar, arSparseAllocSize(new_cap)); + ns->encoding = AR_SLICE_SPARSE; + ns->count = s->count; + ns->layout.sparse.cap = new_cap; + arSparseSetupPointers(ns); + + /* Copy old data to new slice */ + uint16_t *old_offsets = s->layout.sparse.offsets; + void **old_values = s->layout.sparse.values; + uint16_t *new_offsets = ns->layout.sparse.offsets; + void **new_values = ns->layout.sparse.values; + memcpy(new_offsets,old_offsets,s->count * sizeof(uint16_t)); + memcpy(new_values,old_values,s->count * sizeof(void *)); + + arFreeAndTrack(ar, s); + s = ns; + arSetSlice(ar, slice_id, s); + offsets = new_offsets; + values = new_values; + } + + /* Shift and insert in place */ + memmove(offsets + pos + 1, offsets + pos, + (s->count - pos) * sizeof(uint16_t)); + memmove(values + pos + 1, values + pos, + (s->count - pos) * sizeof(void *)); + offsets[pos] = (uint16_t)rel_idx; + arTrackValueIn(ar, v); + values[pos] = v; + s->count++; + ar->count++; + } + } + } + + /* Update dir_highest_used. The count==1 check handles when we just added + * the first element to an empty array. */ + if (slice_id > ar->dir_highest_used || ar->count == 1) { + ar->dir_highest_used = slice_id; + } +} + +/* Delete value at index. Returns 1 if deleted, 0 if was already empty. */ +int arDel(redisArray *ar, uint64_t idx) { + uint64_t slice_id = arSliceId(idx, ar->slice_size); + uint32_t rel_idx = arSliceOff(idx, ar->slice_size); + + arSlice *s = arGetSlice(ar, slice_id); + if (s == NULL) return 0; + + if (s->encoding == AR_SLICE_DENSE) { + if (rel_idx < s->layout.dense.offset || rel_idx >= s->layout.dense.offset + s->layout.dense.winsize) { + return 0; + } + + uint32_t pos = rel_idx - s->layout.dense.offset; + void *old = s->layout.dense.items[pos]; + if (arIsEmpty(old)) return 0; + + arTrackValueOut(ar, old); + arFreePtr(old); + s->layout.dense.items[pos] = NULL; + s->count--; + ar->count--; + + /* Update max_idx if we deleted the max */ + arDenseUpdateMaxIdx(s, rel_idx); + if (s->count != 0) { + /* Maybe demote to sparse. */ + arSetSlice(ar, slice_id, arDenseMaybeDemote(ar, s)); + return 1; + } + } else { + int found; + uint32_t pos = arSparseFindPos(s, (uint16_t)rel_idx, &found); + if (!found) return 0; + + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + arTrackValueOut(ar, values[pos]); + arFreePtr(values[pos]); + memmove(offsets + pos, offsets + pos + 1, + (s->count - pos - 1) * sizeof(uint16_t)); + memmove(values + pos, values + pos + 1, + (s->count - pos - 1) * sizeof(void *)); + s->count--; + ar->count--; + } + + /* Delete the slice if now it is empty. */ + if (s->count == 0) { + arSliceFree(ar, s); + /* Note that in superdir mode arSetSlice() will also free + * empty blocks. */ + arSetSlice(ar, slice_id, NULL); + arDirUpdateHighest(ar, slice_id); + arDirMaybeShrink(ar); + } + return 1; +} + +/* ============================================================================ + * GENERALIZED RANGE DELETE - arDeleteRange + * ============================================================================ + * + * This function provides O(N) range deletion where N is the number of stored + * elements, NOT the numeric range length. It achieves this by: + * + * 1. Deleting whole fully-covered slices in the middle range. + * 2. In superdir mode, visiting only overlapping blocks and covered slice + * slots within them, instead of scanning the numeric slice-id span. + * 3. Only doing element-level deletion inside the two boundary slices. + * + * This is used by ARDELRANGE directly and by arTruncate as a special case. + * -------------------------------------------------------------------------- */ + +/* Helper: delete elements within a single slice in offset range + * [del_lo..del_hi]. Returns number of elements deleted. Handles both dense + * and sparse slices. + * + * Dense slices delete slot-by-slot inside the window. Sparse slices identify + * the contiguous offset span to delete, free those values, then compact the + * tail once. + * + * If the slice becomes empty, it is freed and the slot is cleared. */ +uint64_t arDeleteSliceRange(redisArray *ar, uint64_t slice_id, + uint32_t del_lo, uint32_t del_hi) { + arSlice *s = arGetSlice(ar, slice_id); + if (!s) return 0; + + uint64_t deleted = 0; + + if (s->encoding == AR_SLICE_DENSE) { + /* Dense: intersect deletion range with allocated window */ + uint32_t win_lo = s->layout.dense.offset; + uint32_t win_hi = s->layout.dense.offset + s->layout.dense.winsize - 1; + + /* Clamp to window */ + uint32_t eff_lo = (del_lo > win_lo) ? del_lo : win_lo; + uint32_t eff_hi = (del_hi < win_hi) ? del_hi : win_hi; + + if (eff_lo <= eff_hi) { + /* Clear every populated slot in the effective dense range. */ + for (uint32_t off = eff_lo; off <= eff_hi; off++) { + uint32_t pos = off - s->layout.dense.offset; + if (!arIsEmpty(s->layout.dense.items[pos])) { + arTrackValueOut(ar, s->layout.dense.items[pos]); + arFreePtr(s->layout.dense.items[pos]); + s->layout.dense.items[pos] = NULL; + s->count--; + ar->count--; + deleted++; + } + } + + /* Update max_idx if affected */ + if (s->count > 0 && s->layout.dense.max_idx >= eff_lo) { + /* Scan backwards to find new max */ + s->layout.dense.max_idx = s->layout.dense.offset; + for (int32_t i = (int32_t)win_hi; i >= (int32_t)win_lo; i--) { + if (!arIsEmpty(s->layout.dense.items[i - s->layout.dense.offset])) { + s->layout.dense.max_idx = i; + break; + } + } + } + } + + /* Delete slice if empty, or demote it to sparse if we are + * below the threshold. */ + if (s->count == 0) { + arSliceFree(ar, s); + arSetSlice(ar, slice_id, NULL); + } else { + arSetSlice(ar, slice_id, arDenseMaybeDemote(ar, s)); + } + } else { + /* Sparse: deleted elements form a contiguous span in the sorted + * offsets/values arrays. Find that span, free the values in it, + * then compact the tail once. */ + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + int found; + uint32_t first = arSparseFindPos(s, (uint16_t)del_lo, &found); + uint32_t last = arSparseFindPos(s, (uint16_t)del_hi, &found); + if (found) last++; + + /* Free all values in the contiguous sparse span to delete. */ + for (uint32_t i = first; i < last; i++) { + arTrackValueOut(ar, values[i]); + arFreePtr(values[i]); + } + + /* Shift the surviving tail left to close the deleted gap. */ + if (first < last) { + uint32_t tail = s->count - last; + if (tail > 0) { + memmove(offsets + first, offsets + last, + tail * sizeof(uint16_t)); + memmove(values + first, values + last, + tail * sizeof(void *)); + } + + deleted = last - first; + s->count -= deleted; + ar->count -= deleted; + } + + if (s->count == 0) { + arSliceFree(ar, s); + arSetSlice(ar, slice_id, NULL); + } + } + + return deleted; +} + +/* Main range delete function: delete all elements in [lo..hi]. + * Returns number of elements deleted. + * + * Algorithm: + * 1. Compute slice boundaries + * 2. Handle boundary slices with element-level deletion + * 3. Delete full slices/blocks in between (O(1) per slice) + * 4. Update metadata (dir_highest_used, shrink directories) + * + * Complexity: O(S + N) where S = slices touched, N = boundary elements. + * Note that just looping with arGetSlice() and removing the in-the-middle + * slices one after the other would be much simpler but would have completely + * different complexity properties, in case of big span of empty indexes. */ +uint64_t arDeleteRange(redisArray *ar, uint64_t lo, uint64_t hi) { + if (ar->count == 0 || lo > hi) return 0; + + uint32_t slice_size = ar->slice_size; + uint64_t lo_slice = arSliceId(lo, slice_size); + uint64_t hi_slice = arSliceId(hi, slice_size); + uint32_t lo_off = arSliceOff(lo, slice_size); + uint32_t hi_off = arSliceOff(hi, slice_size); + + uint64_t deleted = 0; + int touched_highest = 0; /* Did we touch dir_highest_used? */ + + if (lo_slice == hi_slice) { + /* Range is within a single slice: element-level delete only */ + deleted = arDeleteSliceRange(ar, lo_slice, lo_off, hi_off); + if (lo_slice >= ar->dir_highest_used) touched_highest = 1; + } else { + /* Multiple slices: handle boundaries and full slices separately */ + + /* 1. Delete within lo_slice: [lo_off .. slice_size-1] */ + deleted += arDeleteSliceRange(ar, lo_slice, lo_off, slice_size - 1); + + /* 2. Delete within hi_slice: [0 .. hi_off] */ + deleted += arDeleteSliceRange(ar, hi_slice, 0, hi_off); + if (hi_slice >= ar->dir_highest_used) touched_highest = 1; + + /* 3. Delete full slices in between [lo_slice+1 .. hi_slice-1] */ + if (lo_slice + 1 <= hi_slice - 1) { + if (ar->superdir) { + /* Superdir mode: identify only the block entries that can + * contain slices in the middle range, then delete the covered + * slot interval inside each of those blocks. Iterate from high + * to low so removing an empty block does not invalidate the + * yet-to-be-visited entries. */ + uint64_t mid_lo = lo_slice + 1; + uint64_t mid_hi = hi_slice - 1; + uint64_t lo_block = mid_lo / AR_SUPER_BLOCK_SLOTS; + uint64_t hi_block = mid_hi / AR_SUPER_BLOCK_SLOTS; + + /* arSuperDirFind() is a lower-bound search on block_id. + * start is the first entry whose block_id is >= lo_block. + * end is the first entry whose block_id is > hi_block, so the + * blocks to visit are exactly [start, end). */ + int found; + uint32_t start = arSuperDirFind(ar, lo_block, &found); + uint32_t end = arSuperDirFind(ar, hi_block, &found); + if (found) end++; /* Convert matching index to past-the-end. */ + + /* Iterate backward because deleting the last slice in a block + * removes that block entry and compacts the superdir array. */ + for (int32_t bi = (int32_t)end - 1; bi >= (int32_t)start; bi--) { + arSDirEntry *e = ar->superdir + bi; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + uint64_t block_end = block_base + AR_SUPER_BLOCK_SLOTS - 1; + + /* Convert the global middle slice range to the local slot + * interval covered inside this block. */ + uint32_t first_si = (mid_lo > block_base) ? + (uint32_t)(mid_lo - block_base) : 0; + uint32_t last_si = (mid_hi < block_end) ? + (uint32_t)(mid_hi - block_base) : AR_SUPER_BLOCK_SLOTS - 1; + + /* Delete each covered slice slot. The block itself, if it + * becomes empty, is removed after this local scan. */ + for (uint32_t si = first_si; si <= last_si; si++) { + if (e->slots[si]) { + uint64_t slice_id = block_base + si; + deleted += e->slots[si]->count; + ar->count -= e->slots[si]->count; + arSliceFree(ar, e->slots[si]); + e->slots[si] = NULL; + e->count--; + ar->num_slices--; + if (slice_id >= ar->dir_highest_used) + touched_highest = 1; + } + } + + /* Remove the superdir block if empty. */ + if (e->count == 0) { + arSuperDirRemoveBlock(ar, bi); + } + } + } else { + /* Flat mode: delete full slices in middle range */ + uint64_t end = hi_slice - 1; + if (end >= ar->dir_alloc) end = ar->dir_alloc - 1; + + for (uint64_t sid = lo_slice + 1; sid <= end; sid++) { + if (ar->dir[sid]) { + deleted += ar->dir[sid]->count; + ar->count -= ar->dir[sid]->count; + arSliceFree(ar, ar->dir[sid]); + ar->dir[sid] = NULL; + ar->num_slices--; + if (sid >= ar->dir_highest_used) touched_highest = 1; + } + } + } + } + } + + /* Update dir_highest_used if we touched or deleted high slices */ + if (touched_highest && ar->count > 0) { + ar->dir_highest_used = 0; + if (ar->superdir) { + for (int32_t bi = ar->sdir_len - 1; bi >= 0; bi--) { + arSDirEntry *e = ar->superdir + bi; + if (e->count == 0) continue; + for (int32_t si = AR_SUPER_BLOCK_SLOTS - 1; si >= 0; si--) { + if (e->slots[si]) { + ar->dir_highest_used = e->block_id * AR_SUPER_BLOCK_SLOTS + si; + goto found_highest; + } + } + } + } else { + for (int64_t i = (int64_t)ar->dir_alloc - 1; i >= 0; i--) { + if (ar->dir[i]) { + ar->dir_highest_used = i; + goto found_highest; + } + } + } + } +found_highest: + + if (ar->count == 0) { + ar->dir_highest_used = 0; + } + + arDirMaybeShrink(ar); + return deleted; +} + +/* Truncate array: delete all elements with index >= limit. + * Used by ARRING to implement ring buffer wrap-around. + * + * This is implemented as a special case of arDeleteRange. limit==0 means + * "delete everything". */ +void arTruncate(redisArray *ar, uint64_t limit) { + if (ar->count == 0) return; + + uint64_t len = arLen(ar); + if (limit >= len) return; /* Nothing to delete */ + + arDeleteRange(ar, limit, len - 1); +} + +/* ---------------------------------------------------------------------------- + * Properties + * -------------------------------------------------------------------------- */ + +/* Get count of non-empty elements */ +uint64_t arCount(redisArray *ar) { + return ar->count; +} + +/* Get logical length (max index + 1) */ +uint64_t arLen(redisArray *ar) { + if (ar->count == 0) return 0; + + arSlice *s = arGetSlice(ar, ar->dir_highest_used); + if (s == NULL) return 0; /* Defensive: if count>0 but slice missing, corrupted state */ + uint32_t local_max = arSliceMaxIdx(s); + return arMakeIdx(ar->dir_highest_used, local_max, ar->slice_size) + 1; +} + +/* ---------------------------------------------------------------------------- + * Range set optimization + * -------------------------------------------------------------------------- */ + +/* Pre-promote sparse slices to dense if a range set would overflow them. + * + * When ARSET writes many elements to a sparse slice, each insertion + * requires a sorted insert with memmove. If the slice eventually exceeds + * kmax elements, it gets promoted to dense anyway - wasting all that work. + * + * This function checks each slice touched by [lo, hi] and promotes it to + * dense upfront if the final element count would exceed kmax. Slices that + * will stay within kmax remain sparse. This way, bulk writes either go + * into sparse (if small) or dense (if large), but never do expensive + * sparse insertions followed by promotion. */ +void arMayPromoteToDenseForRangeSet(redisArray *ar, uint64_t lo, uint64_t hi) { + if (ArraySparseKMax == 0) return; /* Sparse disabled, nothing to do */ + + uint64_t slice_lo = arSliceId(lo, ar->slice_size); + uint64_t slice_hi = arSliceId(hi, ar->slice_size); + + /* Ensure directory can hold all slices we might touch */ + arDirGrow(ar, slice_hi); + + for (uint64_t sid = slice_lo; sid <= slice_hi; sid++) { + /* Compute offset range within this slice */ + uint64_t range_start = (sid == slice_lo) ? lo : (sid << arSliceBits(ar->slice_size)); + uint64_t range_end = (sid == slice_hi) ? hi : ((sid + 1) << arSliceBits(ar->slice_size)) - 1; + uint32_t start_off = arSliceOff(range_start, ar->slice_size); + uint32_t end_off = arSliceOff(range_end, ar->slice_size); + uint32_t range_size = end_off - start_off + 1; + + arSlice *s = arGetSlice(ar, sid); + + if (s == NULL) { + /* No slice yet - create dense directly if range exceeds kmax */ + if (range_size > ArraySparseKMax) { + arSetSlice(ar, sid, arSliceDenseNew(ar, start_off, ar->slice_size)); + } + continue; + } + + if (s->encoding == AR_SLICE_DENSE) continue; /* Already dense */ + + /* Sparse slice - check if we need to promote */ + if (range_size > ArraySparseKMax) { + /* Range alone exceeds kmax, must promote */ + arSetSlice(ar, sid, arSparsePromote(ar, s, ar->slice_size)); + continue; + } + + /* Count existing elements in [start_off, end_off] via linear scan. + * Sparse slices have at most kmax elements, so this is O(kmax). */ + uint16_t *offsets = s->layout.sparse.offsets; + uint32_t existing = 0; + for (uint32_t i = 0; i < s->count; i++) { + if (offsets[i] >= start_off && offsets[i] <= end_off) { + existing++; + } + } + + /* New elements = range_size - existing (slots we'll fill that are empty) */ + uint32_t new_elements = range_size - existing; + if (s->count + new_elements > ArraySparseKMax) { + arSetSlice(ar, sid, arSparsePromote(ar, s, ar->slice_size)); + } + } +} + +/* ---------------------------------------------------------------------------- + * Defragmentation + * -------------------------------------------------------------------------- */ + +/* Defrag one slice, fix the slice pointers that point inside its allocation + * and defrag the heap strings as well. + * + * If work is not NULL, also account for the slice scan performed here: + * dense slices add winsize, while sparse slices add count. We update the + * active defrag scanned statistic at the same time, so callers do not need + * to duplicate that logic. */ +static arSlice *arDefragSlice(arSlice *s, unsigned long *work, + void *(*defragfn)(void *)) { + /* 1. Try to defrag the slice itself. If the pointer changed, + * we need to also change the structure pointers pointing inside + * the allocation (that now has a different base address). */ + arSlice *new_s = defragfn(s); + if (new_s) { + s = new_s; + if (s->encoding == AR_SLICE_DENSE) + s->layout.dense.items = (void **)(s + 1); + else + arSparseSetupPointers(s); + } + + /* Defrag the arString() items. All the other items are + * encoded in the pointer value itself and need no handling. */ + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t j = 0; j < s->layout.dense.winsize; j++) { + if (!arIsPtr(s->layout.dense.items[j])) continue; + void *new_ptr = defragfn(s->layout.dense.items[j]); + if (new_ptr) s->layout.dense.items[j] = new_ptr; + } + if (work) { + *work += s->layout.dense.winsize; + server.stat_active_defrag_scanned += s->layout.dense.winsize; + } + } else { + void **values = s->layout.sparse.values; + for (uint32_t j = 0; j < s->count; j++) { + if (!arIsPtr(values[j])) continue; + void *new_ptr = defragfn(values[j]); + if (new_ptr) values[j] = new_ptr; + } + if (work) { + *work += s->count; + server.stat_active_defrag_scanned += s->count; + } + } + return s; +} + +/* Defrag the array header and the top-level directory object that points to + * slices. This is the cheap metadata pass done before we walk the slices + * themselves. */ +static redisArray *arDefragTopLevel(redisArray *ar, void *(*defragfn)(void *)) { + redisArray *new_ar = defragfn(ar); + if (new_ar) ar = new_ar; + + if (ar->superdir) { + arSDirEntry *new_sdir = defragfn(ar->superdir); + if (new_sdir) ar->superdir = new_sdir; + } else if (ar->dir) { + arSlice **new_dir = defragfn(ar->dir); + if (new_dir) ar->dir = new_dir; + } + return ar; +} + +/* Encode the next superdir scan position as a single cursor. + * Cursor 0 means "start from the beginning" and also "finished". + * + * On 64-bit builds we encode block_id and slot, so resume is stable even if + * blocks before the current one are inserted or removed between defrag steps. + * + * On 32-bit builds the generic defrag cursor type is only unsigned long, so + * it cannot always hold a full 64-bit block_id. In that case we fall back to + * the positional (block-index, slot) encoding. */ +static inline unsigned long arDefragSuperdirCursor(redisArray *ar, uint32_t bi, uint32_t si) { + serverAssert(si < AR_SUPER_BLOCK_SLOTS); +#if ULONG_MAX >= UINT64_MAX + uint64_t block_id = ar->superdir[bi].block_id; + serverAssert(block_id <= (ULONG_MAX - 1) / AR_SUPER_BLOCK_SLOTS); + return ((unsigned long)block_id * AR_SUPER_BLOCK_SLOTS + si) + 1; +#else + UNUSED(ar); + return ((unsigned long)bi * AR_SUPER_BLOCK_SLOTS + si) + 1; +#endif +} + +/* Decode the next superdir scan position stored in the incremental defrag + * cursor. */ +static void arDefragDecodeSuperdirCursor(redisArray *ar, unsigned long cursor, + uint32_t *bi, uint32_t *si) { + serverAssert(cursor > 0); + unsigned long pos = cursor - 1; +#if ULONG_MAX >= UINT64_MAX + /* Flat-mode cursors are also encoded as "slot + 1". After promotion to + * superdir, those old cursors still decode correctly here as block_id 0 + * with the same slot index, because flat mode only ever covers block 0 + * and arPromoteToSuperDir() copies the flat directory into block 0. */ + uint64_t block_id = pos / AR_SUPER_BLOCK_SLOTS; + int found; + + *si = pos % AR_SUPER_BLOCK_SLOTS; + *bi = arSuperDirFind(ar, block_id, &found); + if (!found) *si = 0; +#else + UNUSED(ar); + *bi = pos / AR_SUPER_BLOCK_SLOTS; + *si = pos % AR_SUPER_BLOCK_SLOTS; +#endif +} + +/* Defrag an array that is small enough that we can handle it + * in a single pass. */ +redisArray *arDefrag(redisArray *ar, void *(*defragfn)(void *)) { + ar = arDefragTopLevel(ar, defragfn); + + if (ar->superdir) { + /* Defrag each block slots array, then each slice referenced by it. */ + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + arSlice **new_slots = defragfn(e->slots); + if (new_slots) e->slots = new_slots; + + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + if (e->slots[si] == NULL) continue; + e->slots[si] = arDefragSlice(e->slots[si], NULL, defragfn); + } + } + } else if (ar->dir) { + /* Defrag each slice referenced by the flat directory. */ + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i] == NULL) continue; + ar->dir[i] = arDefragSlice(ar->dir[i], NULL, defragfn); + } + } + + return ar; +} + +/* Incremental defrag step for arrays. Cursor 0 means "start from the + * beginning" and also "no more work". + * + * Work is counted explicitly in order to keep one call roughly aligned with + * active_defrag_max_scan_fields: + * + * 1. Visiting one flat directory entry costs 1. + * 2. In superdir mode, visiting one top-level block entry costs 1, and + * visiting one slot inside that block costs another 1. + * 3. Defragmenting a slice then adds the cost of scanning that slice: + * sparse slices add s->count, while dense slices add winsize. + * + * Slices are still defragmented as whole units. So a dense slice may cause one + * call to overshoot the configured budget, but we still stop immediately after + * that slice in order to resume from the next cursor position later. */ +unsigned long arDefragIncremental(redisArray **arref, unsigned long cursor, + void *(*defragfn)(void *)) +{ + redisArray *ar = *arref; + unsigned long work = 0; + unsigned long maxwork = server.active_defrag_max_scan_fields; + if (ar == NULL) return 0; + + if (cursor == 0) { + ar = arDefragTopLevel(ar, defragfn); + *arref = ar; + } + + if (ar->superdir) { + uint32_t bi = 0, si = 0; + if (cursor != 0) arDefragDecodeSuperdirCursor(ar, cursor, &bi, &si); + + for (; bi < ar->sdir_len; bi++, si = 0) { + arSDirEntry *e = ar->superdir + bi; + /* Defrag the block slots array once when we enter the block from + * its first slot. If we later resume in the middle of the same + * block, the slots array was already handled. */ + if (si == 0) { + arSlice **new_slots = defragfn(e->slots); + if (new_slots) e->slots = new_slots; + work++; + server.stat_active_defrag_scanned++; + } + + for (; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + work++; + server.stat_active_defrag_scanned++; + + if (s == NULL) { + if (work > maxwork) { + si++; + if (si == AR_SUPER_BLOCK_SLOTS) { + bi++; + si = 0; + } + if (bi >= ar->sdir_len) return 0; + return arDefragSuperdirCursor(ar, bi, si); + } + continue; + } + + e->slots[si] = arDefragSlice(s, &work, defragfn); + + if (work > maxwork) { + si++; + if (si == AR_SUPER_BLOCK_SLOTS) { + bi++; + si = 0; + } + if (bi >= ar->sdir_len) return 0; + return arDefragSuperdirCursor(ar, bi, si); + } + } + } + return 0; + } + + if (ar->dir == NULL) return 0; + + uint64_t i = (cursor == 0) ? 0 : cursor - 1; + for (; i < ar->dir_alloc; i++) { + arSlice *s = ar->dir[i]; + work++; + server.stat_active_defrag_scanned++; + + if (s == NULL) { + if (work > maxwork) { + i++; + if (i >= ar->dir_alloc) return 0; + return i + 1; + } + continue; + } + + ar->dir[i] = arDefragSlice(s, &work, defragfn); + + if (work > maxwork) { + i++; + if (i >= ar->dir_alloc) return 0; + return i + 1; + } + } + return 0; +} diff --git a/src/sparsearray.h b/src/sparsearray.h new file mode 100644 index 000000000..c0444ee7b --- /dev/null +++ b/src/sparsearray.h @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Sparse Array - A memory-efficient sparse array with 64-bit index space. + * + * This data structure was designed and implemented by Salvatore Sanfilippo. + */ + +#ifndef __SPARSEARRAY_H +#define __SPARSEARRAY_H + +#include +#include +#include + +/* ============================================================================ + * SPARSE ARRAY OVERVIEW + * ============================================================================ + * + * Sparse arrays are random-access sequences indexed by non-negative 64-bit + * integers. They support O(1) get/set operations and efficient iteration. + * + * MEMORY LAYOUT + * ------------- + * The array uses a two-level structure: a directory pointing to "slices", + * which contain just a range of elements. For very large/sparse arrays, a + * three-level "superdir" structure is used. + * + * SLICE TYPES + * ----------- + * Each slice holds up to slice_size elements and can be: + * + * - Sparse: Sorted array of (offset, value) pairs. Memory-efficient when + * elements are scattered within the slice. + * + * - Dense: Contiguous array with a sliding window. Used when the slice + * has many elements. + * + * VALUE ENCODING (Tagged Pointers) + * -------------------------------- + * Values are stored in tagged pointer-sized words, using the low 2 bits as a + * tag. The exact immediate encoding depends on pointer width: + * + * 64-bit builds: + * Tag 00: arString pointer (heap-allocated, 8+ byte strings) + * Tag 01: Immediate signed integer in the 62-bit payload + * Tag 10: Immediate double (low 2 bits of the IEEE-754 payload cleared) + * Tag 11: Inline small string (0-7 bytes) + * + * 32-bit builds: + * Tag 00: arString pointer + * Tag 01: Immediate signed integer in the 30-bit payload + * Tag 10: Immediate float (low 2 bits of the IEEE-754 payload cleared) + * Tag 11: Inline small string (0-3 bytes) + * + * RDB persistence is architecture-neutral: values are saved as logical ints, + * doubles and strings, never as raw tagged words. + * ========================================================================== */ + +/* ---------------------------------------------------------------------------- + * Configuration defaults + * -------------------------------------------------------------------------- */ + +#define AR_SLICE_SIZE_DEFAULT 4096 +#define AR_SLICE_SIZE_MIN 256 +#define AR_SLICE_SIZE_MAX 65536 +#define AR_SPARSE_KMAX_DEFAULT 10 +#define AR_SPARSE_KMIN_DEFAULT 5 + +/* Superdir: fixed-size blocks of slice pointers. Each block holds 2048 + * pointers to actual array slices, which uses about 8 KB on 32-bit builds + * and 16 KB on 64-bit builds. This keeps very large indices from forcing + * catastrophic flat-directory growth. */ +#define AR_SUPER_BLOCK_SLOTS 2048 + +/* Internal constants */ +#define AR_SLICE_MIN_ALLOC 8 /* Initial dense window allocation */ +#define AR_INSERT_IDX_NONE UINT64_MAX /* No insert performed yet */ + +/* Slice encoding types */ +#define AR_SLICE_DENSE 0 +#define AR_SLICE_SPARSE 1 + +/* Tagged value encoding (low 2 bits). NULL (0) means empty slot. */ +#define AR_TAG_PTR ((uintptr_t)0) /* arString pointer (low 2 bits = 00) */ +#define AR_TAG_INT ((uintptr_t)1) /* Immediate signed integer (01) */ +#define AR_TAG_FLOAT ((uintptr_t)2) /* Immediate float (10) */ +#define AR_TAG_STR ((uintptr_t)3) /* Inline small string (11) */ +#define AR_TAG_MASK ((uintptr_t)3) + +#if UINTPTR_MAX == UINT64_MAX +#define AR_SMALLSTR_MAXLEN 7 +#define AR_SMALLSTR_LEN_MASK 0x7u +#elif UINTPTR_MAX == UINT32_MAX +#define AR_SMALLSTR_MAXLEN 3 +#define AR_SMALLSTR_LEN_MASK 0x3u +#else +#error "Unsupported pointer size" +#endif + +/* RDB type tags for array elements */ +#define AR_RDB_TAG_SDS 0 +#define AR_RDB_TAG_INT 1 +#define AR_RDB_TAG_FLOAT 2 +#define AR_RDB_TAG_SMALLSTR 3 + +/* Buffer size for inline types (int/float/smallstr) */ +#define AR_INLINE_BUFSIZE 64 + +/* ---------------------------------------------------------------------------- + * Data structures + * -------------------------------------------------------------------------- */ + +/* Array slice: holds a range of elements. Single allocation with payload. */ +typedef struct arSlice { + uint8_t encoding; /* 0=dense, 1=sparse */ + uint8_t _pad1[3]; + uint32_t count; /* Non-empty items in this slice */ + union { + struct { + uint32_t offset; /* First logical offset in window */ + uint32_t winsize; /* Window size (power of two) */ + uint32_t max_idx; /* Highest offset with a value */ + void **items; /* Points into payload */ + } dense; + struct { + uint32_t cap; /* Capacity */ + uint16_t *offsets; /* Points into payload */ + void **values; /* Points into payload (aligned) */ + } sparse; + } layout; +} arSlice; + +/* Super-directory entry: groups slices into fixed-size pointer blocks. */ +typedef struct arSDirEntry { + uint64_t block_id; /* slice_id / AR_SUPER_BLOCK_SLOTS */ + uint32_t count; /* Non-NULL slots in this block */ + uint32_t _pad; + arSlice **slots; /* AR_SUPER_BLOCK_SLOTS pointers to slices */ +} arSDirEntry; + +/* Array header */ +typedef struct redisArray { + uint64_t count; /* Total non-empty items */ + uint64_t insert_idx; /* Last insert index, or UINT64_MAX if none */ + uint64_t dir_alloc; /* Flat directory length (flat mode) */ + uint64_t dir_highest_used; /* Highest non-NULL slice index */ + uint64_t num_slices; /* Number of allocated slices */ + size_t alloc_size; /* Tracked total allocation (for slot stats) */ + uint32_t slice_size; /* Slice size (power of two) */ + uint32_t sdir_len; /* Superdir entries count */ + uint32_t sdir_cap; /* Superdir capacity */ + uint32_t _pad; + arSlice **dir; /* Flat directory or NULL */ + arSDirEntry *superdir; /* Super-directory or NULL */ +} redisArray; + +/* ---------------------------------------------------------------------------- + * Inline helpers: index arithmetic + * -------------------------------------------------------------------------- */ + +/* Compute bits needed to address elements within a slice. */ +static inline int arSliceBits(uint32_t slice_size) { + if (slice_size == 4096) return 12; /* Fast path for default */ + int bits = 0; + uint32_t x = slice_size; + while (x > 1) { x >>= 1; bits++; } + return bits; +} + +static inline uint64_t arSliceId(uint64_t idx, uint32_t slice_size) { + return idx >> arSliceBits(slice_size); +} + +static inline uint32_t arSliceOff(uint64_t idx, uint32_t slice_size) { + return (uint32_t)(idx & (slice_size - 1)); +} + +static inline uint64_t arMakeIdx(uint64_t slice_id, uint32_t off, uint32_t slice_size) { + return (slice_id << arSliceBits(slice_size)) | off; +} + +/* ---------------------------------------------------------------------------- + * Inline helpers: tagged value encoding + * -------------------------------------------------------------------------- */ + +static inline int arIsEmpty(void *v) { return v == NULL; } + +static inline int arIsPtr(void *v) { + return v != NULL && ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_PTR; +} + +static inline int arIsInt(void *v) { + return ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_INT; +} + +static inline int64_t arToInt(void *v) { + return (int64_t)(intptr_t)v >> 2; /* Arithmetic shift preserves sign */ +} + +static inline void *arFromInt(int64_t ival) { + return (void *)(((uintptr_t)ival << 2) | AR_TAG_INT); +} + +static inline int arIntFits(int64_t ival) { +#if UINTPTR_MAX == UINT64_MAX + return ival >= -(1LL << 61) && ival <= (1LL << 61) - 1; +#else + return ival >= -(1LL << 29) && ival <= (1LL << 29) - 1; +#endif +} + +static inline int arIsFloat(void *v) { + return ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_FLOAT; +} + +static inline double arToDouble(void *v) { +#if UINTPTR_MAX == UINT64_MAX + uint64_t bits = (uintptr_t)v & ~AR_TAG_MASK; + double d; + memcpy(&d, &bits, sizeof(d)); + return d; +#else + uint32_t bits = (uint32_t)((uintptr_t)v & ~(uintptr_t)AR_TAG_MASK); + float f; + memcpy(&f, &bits, sizeof(f)); + return (double)f; +#endif +} + +static inline void *arFromFloatBits(uint64_t bits_trunc) { +#if UINTPTR_MAX == UINT64_MAX + return (void *)((bits_trunc & ~AR_TAG_MASK) | AR_TAG_FLOAT); +#else + uint32_t bits32 = (uint32_t)bits_trunc; + return (void *)(uintptr_t)((bits32 & ~(uint32_t)AR_TAG_MASK) | AR_TAG_FLOAT); +#endif +} + +static inline int arIsSmallStr(void *v) { + return ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_STR; +} + +static inline int arSmallStrLen(void *v) { + return (int)(((uintptr_t)v >> 2) & AR_SMALLSTR_LEN_MASK); +} + +static inline int arToSmallStr(void *v, char *buf) { + int len = arSmallStrLen(v); + uintptr_t val = (uintptr_t)v; + for (int i = 0; i < len; i++) { + buf[i] = (char)((val >> (8 * (i + 1))) & 0xFF); + } + buf[len] = '\0'; + return len; +} + +static inline void *arFromSmallStr(const char *s, int len) { + uintptr_t v = AR_TAG_STR | ((uintptr_t)len << 2); + for (int i = 0; i < len; i++) { + v |= ((uintptr_t)(uint8_t)s[i]) << (8 * (i + 1)); + } + return (void *)v; +} + +/* ---------------------------------------------------------------------------- + * Public API + * -------------------------------------------------------------------------- */ + +/* Lifecycle */ +redisArray *arNew(void); +void arFree(redisArray *ar); +redisArray *arDup(redisArray *ar); +void arDismiss(redisArray *ar, size_t size_hint); + +/* Element access */ +void *arGet(redisArray *ar, uint64_t idx); +void arSet(redisArray *ar, uint64_t idx, void *v); +int arDel(redisArray *ar, uint64_t idx); + +/* Value encoding/decoding */ +void *arEncode(const char *s, size_t len); +const char *arDecode(void *v, char *buf, size_t bufsize, size_t *outlen); +int arFormatFloat(double d, char *buf, size_t bufsize); +size_t arStringLen(const void *ptr); +const char *arStringData(const void *ptr); +void *arValueFromRdbInt(int64_t ival); +void *arValueFromRdbFloat(double d); +void *arValueFromRdbSmallStr(const char *s, size_t len); + +/* Queries */ +uint64_t arCount(redisArray *ar); +uint64_t arLen(redisArray *ar); + +/* Bulk operations */ +uint64_t arDeleteRange(redisArray *ar, uint64_t lo, uint64_t hi); +void arTruncate(redisArray *ar, uint64_t limit); +void arMayPromoteToDenseForRangeSet(redisArray *ar, uint64_t lo, uint64_t hi); + +/* Utilities */ +uint32_t arSparseFindPos(arSlice *s, uint16_t rel_idx, int *found); +uint32_t arSuperDirFind(redisArray *ar, uint64_t block_id, int *found); +redisArray *arDefrag(redisArray *ar, void *(*defragfn)(void *)); +unsigned long arDefragIncremental(redisArray **arref, unsigned long cursor, + void *(*defragfn)(void *)); + +#endif /* __SPARSEARRAY_H */ diff --git a/src/t_array.c b/src/t_array.c new file mode 100644 index 000000000..4fb72f8da --- /dev/null +++ b/src/t_array.c @@ -0,0 +1,2021 @@ +/* + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Redis Array commands implementation. + * Originally authored by: Salvatore Sanfilippo. + * + * The core sparse array data structure is in sparsearray.c/sparsearray.h. + * This file contains Redis command handlers and Redis-specific operations. + */ + +#include "server.h" +#include "../deps/tre/local_includes/tre.h" +#include + +/****************************************************************************** + * + * ARRAY COMMANDS AND HIGHER LEVEL LOGIC + * + * This section contains all the Redis commands for the Array type, as well + * as the type operations used by COPY and other server-level functionality. + * + *****************************************************************************/ + +/* ---------------------------------------------------------------------------- + * Array type operations for COPY command + * -------------------------------------------------------------------------- */ + +robj *arrayTypeDup(robj *o) { + redisArray *ar = o->ptr; + redisArray *dup = arDup(ar); + robj *newobj = createObject(OBJ_ARRAY, dup); + newobj->encoding = OBJ_ENCODING_SLICED_ARRAY; + return newobj; +} + +/* ---------------------------------------------------------------------------- + * Internal helpers + * -------------------------------------------------------------------------- */ + +#define ARGETRANGE_MAX_ITEMS 1000000 + +/* Lookup array object for write, create it if missing, or reply with + * WRONGTYPE and return NULL if the key holds a different type. */ +robj *lookupArrayForWriteOrReply(client *c, robj *key) { + robj *o = lookupKeyWrite(c->db, key); + if (o == NULL) { + o = createArrayObject(); + dbAdd(c->db, key, &o); + } else if (checkType(c, o, OBJ_ARRAY)) { + return NULL; + } + return o; +} + +/* Reply with an array value. This helper is needed because we used + * tagged pointers for inlining values like floats, integers, small + * strings directly inside the pointer. Big memory saves, but more + * work needed when there is to reply to the client. */ +void addReplyArrayValue(client *c, void *v) { + if (arIsEmpty(v)) { + addReplyNull(c); + return; + } + + char buf[AR_INLINE_BUFSIZE]; + size_t len; + const char *data = arDecode(v, buf, sizeof(buf), &len); + addReplyBulkCBuffer(c, data, len); +} + +/* Parse array index from object. Accepts 0 to 2^64-2 by default. + * If allow_max is true, also accepts UINT64_MAX. This is used by ARSEEK + * because ARSEEK UINT64_MAX sets insert_idx to UINT64_MAX-1, which is + * a valid terminal state (next ARINSERT would overflow). + * Returns C_OK/C_ERR. Does NOT send error reply - caller decides. */ +int getArrayIndexFromObject(robj *o, uint64_t *idx, int allow_max) { + unsigned long long ull; + if (o->encoding == OBJ_ENCODING_INT) { + if ((long)o->ptr < 0) return C_ERR; + ull = (unsigned long long)(long)o->ptr; + } else { + if (!string2ull(o->ptr, &ull)) return C_ERR; + } + if (ull == UINT64_MAX && !allow_max) return C_ERR; + *idx = ull; + return C_OK; +} + +/* Parse an array index argument and reply with an error on failure. */ +int arrayParseIndexOrReply(client *c, robj *arg, uint64_t *idx) { + if (getArrayIndexFromObject(arg, idx, 0) != C_OK) { + addReplyError(c, "invalid array index"); + return C_ERR; + } + return C_OK; +} + +/* ---------------------------------------------------------------------------- + * ARGET / ARMGET + * -------------------------------------------------------------------------- */ + +/* ARGET key idx + * + * Returns the value at idx in O(1). + * Missing keys and holes both reply with NULL. */ +void argetCommand(client *c) { + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o && checkType(c, o, OBJ_ARRAY)) return; + + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[2], &idx) != C_OK) return; + + void *v = o ? arGet(o->ptr, idx) : NULL; + addReplyArrayValue(c, v); +} + +/* ARMGET key idx [idx ...] + * + * Returns the values at the requested indices in O(N), where N is the number + * of indices. Missing keys and holes reply with NULLs. All indices are + * validated before the reply starts, so malformed input fails atomically. */ +void armgetCommand(client *c) { + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) return; + + /* Pre-validate all indices so malformed input fails the whole command, + * like the other array commands. */ + for (int i = 2; i < c->argc; i++) { + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[i], &idx) != C_OK) return; + } + + addReplyArrayLen(c, c->argc - 2); + + for (int i = 2; i < c->argc; i++) { + if (o == NULL) { + /* Non existing keys are semantically equivalent + * to non existing indexes of existing arrays. */ + addReplyNull(c); + continue; + } + + uint64_t idx = 0; + getArrayIndexFromObject(c->argv[i], &idx, 0); /* Already validated. */ + + redisArray *ar = o->ptr; + void *v = arGet(ar, idx); + addReplyArrayValue(c, v); + } +} + +/* ---------------------------------------------------------------------------- + * ARSET / ARMSET + * -------------------------------------------------------------------------- */ + +/* ARSET key [value ...] + * + * Sets one or more contiguous values in O(N), where N is the number of + * values. Creates the array if needed and returns the number of previously + * empty slots that were filled. */ + void arsetCommand(client *c) { + uint64_t start_idx; + if (arrayParseIndexOrReply(c, c->argv[2], &start_idx) != C_OK) return; + + int num_values = c->argc - 3; + + /* Pre-validate: check for overflow and forbidden max index. */ + uint64_t last_idx = start_idx + (uint64_t)num_values - 1; + if (last_idx < start_idx || last_idx == UINT64_MAX) { + addReplyError(c, "array index overflow"); + return; + } + + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + + /* Pre-promote sparse slices only for true bulk sets. A single-element + * write does not benefit from the extra range-analysis pass. */ + if (num_values > 1) + arMayPromoteToDenseForRangeSet(ar, start_idx, last_idx); + + /* Write all values starting at start_idx */ + uint64_t idx = start_idx; + for (int i = 3; i < c->argc; i++) { + sds val = c->argv[i]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, idx, v); + idx++; + } + + long long set_count = arCount(ar) - old_count; + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arset", c->argv[1], c->db->id); + server.dirty += num_values; + addReplyLongLong(c, set_count); +} + +/* ARMSET key idx value [idx value ...] + * + * Sets multiple scattered index/value pairs in O(N), where N is the number of + * pairs. Creates the array if needed, returns the number of newly filled + * slots, and validates all indices before mutating. */ +void armsetCommand(client *c) { + if ((c->argc - 2) % 2 != 0) { + addReplyErrorArity(c); + return; + } + + /* Validate all indices first */ + for (int i = 2; i < c->argc; i += 2) { + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[i], &idx) != C_OK) return; + } + + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + + for (int i = 2; i < c->argc; i += 2) { + uint64_t idx = 0; + getArrayIndexFromObject(c->argv[i], &idx, 0); /* Already validated */ + + sds val = c->argv[i + 1]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, idx, v); + } + + int num_pairs = (c->argc - 2) / 2; + long long set_count = arCount(ar) - old_count; + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "armset", c->argv[1], c->db->id); + server.dirty += num_pairs; + addReplyLongLong(c, set_count); +} + +/* ---------------------------------------------------------------------------- + * ARDEL / ARDELRANGE + * -------------------------------------------------------------------------- */ + +/* ARDEL key idx [idx ...] + * + * Deletes the specified indices in O(N), where N is the number of indices. + * All indices are validated first, and if the array becomes empty the key + * itself is deleted. The number of deleted (existing) items is returned. */ +void ardelCommand(client *c) { + /* Pre-validate all indices before mutating, to report syntax errors + * even if the key doesn't exist. */ + for (int i = 2; i < c->argc; i++) { + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[i], &idx) != C_OK) return; + } + + robj *o = lookupKeyWrite(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + long long deleted = 0; + + for (int i = 2; i < c->argc; i++) { + uint64_t idx = 0; + getArrayIndexFromObject(c->argv[i], &idx, 0); /* Already validated */ + deleted += arDel(ar, idx); + } + + int keyremoved = (arCount(ar) == 0); + if (server.memory_tracking_enabled && deleted > 0 && keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + if (deleted > 0) { + if (keyremoved) + dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); + updateKeysizesHist(c->db, OBJ_ARRAY, + old_count, keyremoved ? -1 : (int64_t)arCount(ar)); + if (server.memory_tracking_enabled && !keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "ardel", c->argv[1], c->db->id); + if (keyremoved) + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty += deleted; + } + addReplyLongLong(c, deleted); +} + +/* ARDELRANGE key start end [start end ...] + * + * Deletes one or more ranges. Complexity is proportional to the existing + * elements / slices touched, not to the numeric span of the requested ranges, + * so huge ranges do not block the server forever. + * + * Each pair may be given in either order. All ranges are validated up front, + * and an empty resulting array deletes the key. */ +void ardelrangeCommand(client *c) { + if ((c->argc - 2) % 2 != 0) { + addReplyErrorArity(c); + return; + } + + /* Pre-validate all ranges before mutating, to avoid partial updates + * if a later range has invalid syntax. */ + for (int i = 2; i < c->argc; i += 2) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[i], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[i + 1], &end) != C_OK) return; + } + + robj *o = lookupKeyWrite(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + uint64_t total_deleted = 0; + + /* Process each range using the generalized arDeleteRange */ + for (int i = 2; i < c->argc; i += 2) { + uint64_t start = 0, end = 0; + getArrayIndexFromObject(c->argv[i], &start, 0); /* Already validated */ + getArrayIndexFromObject(c->argv[i + 1], &end, 0); + + uint64_t lo = (start <= end) ? start : end; + uint64_t hi = (start <= end) ? end : start; + + total_deleted += arDeleteRange(ar, lo, hi); + } + + int keyremoved = (arCount(ar) == 0); + if (server.memory_tracking_enabled && total_deleted > 0 && keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + if (total_deleted > 0) { + if (keyremoved) + dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); + updateKeysizesHist(c->db, OBJ_ARRAY, + old_count, keyremoved ? -1 : (int64_t)arCount(ar)); + if (server.memory_tracking_enabled && !keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "ardelrange", c->argv[1], c->db->id); + if (keyremoved) + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty += total_deleted; + } + addReplyUnsignedLongLong(c, total_deleted); +} + +/* ---------------------------------------------------------------------------- + * ARLEN / ARCOUNT + * -------------------------------------------------------------------------- */ + +/* ARLEN key + * + * Returns max-index-plus-one in O(1). + * Missing keys reply with 0. */ +void arlenCommand(client *c) { + robj *o = lookupKeyReadOrReply(c, c->argv[1], shared.czero); + if (o == NULL || checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + addReplyUnsignedLongLong(c, arLen(ar)); +} + +/* ARCOUNT key + * + * Returns the number of non-empty elements in O(1). + * Missing keys reply with 0. */ +void arcountCommand(client *c) { + robj *o = lookupKeyReadOrReply(c, c->argv[1], shared.czero); + if (o == NULL || checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + addReplyUnsignedLongLong(c, arCount(ar)); +} + +/* ---------------------------------------------------------------------------- + * ARGETRANGE + * -------------------------------------------------------------------------- */ + +/* ARGETRANGE key start end + * + * Returns every position in the requested range in O(N), where N is the range + * length. Holes are returned as NULLs, and a missing key behaves like an all- + * NULL array. If start > end the reply order is reversed. + * + * To avoid giant synthetic NULL replies, the range length is hard-limited, + * otherwise the command, with a wrong range, could make the server totally + * unusable. The max range is 1 million elements and is fixed, constituting + * the user-facing semantic of the command. */ +void argetrangeCommand(client *c) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[2], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[3], &end) != C_OK) return; + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) return; + + int reverse = start > end; + uint64_t lo = reverse ? end : start; + uint64_t hi = reverse ? start : end; + uint64_t len = hi - lo + 1; + + /* ARGETRANGE is a special command: it can trigger a huge reply blocking + * the server (basically forever) even if there is no actual data. This + * is unlike an SMEMBERS against a very big key: somebody added so many + * elements inside, before asking for a huge amount of elements. But, in the + * case of ARGETRANGE, you can just trigger a huge amount of NULLs to be + * sent to the client. In ARSCAN this was optimized to be O(N) with the + * actual populated elements, but in this case it can't be done because + * of the semantic of the command, and the Redis protocol inability to reply + * with run-length ranges (a, b, c, 1293455 NULLs, d, e). + * + * Because of all that, we put an hard limit in the range size, and this + * limit must be part of the Redis culture, so it should not be tuned in + * any way: 1 million items, with an hard error if the range is bigger than + * that, not just a silent trimming at this length, that would cause hard + * to track bugs. */ + if (len > ARGETRANGE_MAX_ITEMS) { + addReplyErrorFormat(c, "range exceeds maximum of %u items", + ARGETRANGE_MAX_ITEMS); + return; + } + + addReplyArrayLen(c, len); + if (o == NULL) { + for (uint64_t i = 0; i < len; i++) addReplyNull(c); + return; + } + + redisArray *ar = o->ptr; + if (reverse) { + for (uint64_t idx = hi; ; idx--) { + void *v = arGet(ar, idx); + addReplyArrayValue(c, v); + if (idx == lo) break; + } + } else { + for (uint64_t idx = lo; idx <= hi; idx++) { + void *v = arGet(ar, idx); + addReplyArrayValue(c, v); + } + } +} + +/* ---------------------------------------------------------------------------- + * ARSCAN + * -------------------------------------------------------------------------- */ + +/* Iterate populated elements in [start..end]. + * + * This iterator is read-only and not mutation-stable: between Init() and the + * final Next() that returns 0, the caller must not write to the array. Any + * write may free or relocate the current slice, making the iterator state + * stale. The goal of this abstraction was to capture repeated code in the + * implementation of ARSCAN, ARGREP, AROP. + * + * The struct lives on the caller stack, so setup and iteration stay allocation + * free and command-local. */ +typedef struct { + redisArray *ar; + uint64_t lo; /* Normalized inclusive range start. */ + uint64_t hi; /* Normalized inclusive range end. */ + uint64_t lo_slice; /* First slice touched by the range. */ + uint64_t hi_slice; /* Last slice touched by the range. */ + uint32_t slice_size; /* Cached slice size. */ + int reverse; /* Iterate from high to low. */ + int32_t step; /* +1 forward, -1 backward. */ + int done; /* No more elements to return. */ + int top_done; /* No more slices to inspect after current. */ + + uint64_t slice_id; /* Next flat-directory slice to inspect. */ + int32_t sdir_index; /* Next superdir entry to inspect. */ + int32_t slot_index; /* Next slot inside the current superdir entry. */ + + arSlice *slice; /* Slice currently being scanned. */ + uint64_t slice_base; /* Logical index of slice offset 0. */ + uint32_t off_lo; /* First in-range offset for current slice. */ + uint32_t off_hi; /* Last in-range offset for current slice. */ + int dense; /* Current slice is dense. */ + void **dense_items; /* Dense items window. */ + int32_t dense_off; /* Current dense logical offset. */ + int32_t dense_item_pos; /* Current dense window index. */ + int32_t dense_item_end; /* Final dense window index. */ + uint16_t *sparse_offsets; /* Sparse offsets array. */ + void **sparse_values; /* Sparse values array. */ + int32_t sparse_count; /* Sparse entry count. */ + int32_t sparse_pos; /* Current sparse entry position. */ + int slice_ready; /* Current slice scan state is initialized. */ +} arScanIter; + +#define AR_SCAN_ITER_SLOT_UNSET INT32_MIN + +/* Keep the per-element iterator hot path inline in the command loops. + * It helps a lot with certain targets, up to ~30-50% speed regression + * without forcing the inlining. */ +#if defined(__GNUC__) || defined(__clang__) +#define ALWAYS_INLINE __attribute__((always_inline)) inline +#else +#define ALWAYS_INLINE inline +#endif + +/* Initialize a populated-elements iterator. Empty arrays and empty clipped + * ranges are turned into a done iterator here so the first Next() is a single + * branch. */ +static void arScanIterInit(redisArray *ar, uint64_t start, uint64_t end, + arScanIter *it) +{ + memset(it, 0, sizeof(*it)); + it->ar = ar; + + if (ar == NULL || arCount(ar) == 0) { + it->done = 1; + it->top_done = 1; + return; + } + + /* Note that a few things here could be taken + * from the array itself, as they are immutable, + * but after introducing this abstraction a small + * but measurable speed regression suggested to + * micro-optimize for this hot path and have + * iterator-side copies of often used stuff. */ + it->reverse = start > end; + it->step = it->reverse ? -1 : 1; + it->lo = it->reverse ? end : start; + it->hi = it->reverse ? start : end; + it->slice_size = ar->slice_size; + it->lo_slice = it->lo / it->slice_size; + it->hi_slice = it->hi / it->slice_size; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + + /* No intersection between the range and the array span. */ + if (it->lo_slice > ar->dir_highest_used) { + it->done = 1; + it->top_done = 1; + return; + } + + /* Clip the high end to the actual array span. */ + if (it->hi_slice > ar->dir_highest_used) { + it->hi_slice = ar->dir_highest_used; + it->hi = arMakeIdx(it->hi_slice, it->slice_size - 1, it->slice_size); + } + + /* Clipping made the range empty? */ + if (it->lo_slice > it->hi_slice) { + it->done = 1; + it->top_done = 1; + return; + } + + if (ar->superdir) { + int found; + + /* Start from the first superdir block that can intersect the range. */ + uint64_t block_id = (it->reverse ? it->hi_slice : it->lo_slice) / + AR_SUPER_BLOCK_SLOTS; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + + if (it->reverse) it->sdir_index = found ? (int32_t)pos : (int32_t)pos - 1; + else it->sdir_index = (int32_t)pos; + + /* No superdir block intersects the clipped range. */ + if (it->sdir_index < 0 || it->sdir_index >= (int32_t)ar->sdir_len) { + it->done = 1; + it->top_done = 1; + } + } else { + /* Flat directory iteration starts directly from the first in-range slice. */ + it->slice_id = it->reverse ? it->hi_slice : it->lo_slice; + } +} + +/* Prepare the current slice-local scan state. Returns 1 if the slice may + * yield at least one populated element in range, otherwise 0. + * The function is used by arScanIterLoadNextSlice() each time a new + * slice should be iterated. When a new slice is selected by + * arScanIterLoadNextSlice(), then this function is called to setup the + * iteration needed by arScanIterNext(). */ +static ALWAYS_INLINE int arScanIterPrepareSlice(arScanIter *it, + arSlice *s, uint64_t slice_id) +{ + uint64_t slice_base = slice_id * it->slice_size; + /* Restrict the scan to the part of this slice touched by the query. */ + uint32_t off_lo = (slice_id == it->lo_slice) ? + arSliceOff(it->lo, it->slice_size) : 0; + uint32_t off_hi = (slice_id == it->hi_slice) ? + arSliceOff(it->hi, it->slice_size) : it->slice_size - 1; + + if (s->encoding == AR_SLICE_DENSE) { + uint32_t win_lo = s->layout.dense.offset; + uint32_t win_hi = s->layout.dense.offset + s->layout.dense.winsize - 1; + + /* Dense slices may only have a smaller populated window allocated. */ + if (off_lo < win_lo) off_lo = win_lo; + if (off_hi > win_hi) off_hi = win_hi; + + /* No intersection between the range and the dense window. */ + if (off_lo > off_hi) return 0; + + it->dense = 1; + it->dense_items = s->layout.dense.items; + it->dense_off = it->reverse ? (int32_t)off_hi : (int32_t)off_lo; + it->dense_item_pos = it->dense_off - (int32_t)win_lo; + it->dense_item_end = (it->reverse ? (int32_t)off_lo : + (int32_t)off_hi) - (int32_t)win_lo; + } else { + int found; + uint32_t pos; + uint16_t *offsets = s->layout.sparse.offsets; + + it->dense = 0; + it->sparse_offsets = offsets; + it->sparse_values = s->layout.sparse.values; + it->sparse_count = (int32_t)s->count; + if (it->reverse) { + /* Start from the last sparse entry that can still be in range. */ + pos = arSparseFindPos(s, (uint16_t)off_hi, &found); + it->sparse_pos = found ? (int32_t)pos : (int32_t)pos - 1; + + /* No sparse entry falls inside the requested offsets. */ + if (it->sparse_pos < 0 || offsets[it->sparse_pos] < off_lo) + return 0; + } else { + /* Start from the first sparse entry that can still be in range. */ + pos = arSparseFindPos(s, (uint16_t)off_lo, &found); + it->sparse_pos = (int32_t)pos; + + /* No sparse entry falls inside the requested offsets. */ + if (it->sparse_pos >= (int32_t)s->count || + offsets[it->sparse_pos] > off_hi) return 0; + } + } + + it->slice = s; + it->slice_base = slice_base; + it->off_lo = off_lo; + it->off_hi = off_hi; + it->slice_ready = 1; + return 1; +} + +/* Advance top-level directory state until a non-NULL slice in range is ready + * for local scanning, or return 0 if the iterator is exhausted. */ +static ALWAYS_INLINE int arScanIterLoadNextSlice(arScanIter *it) { + redisArray *ar = it->ar; + + if (ar->superdir) { + while (!it->top_done) { + /* No more superdir blocks to inspect. */ + if (it->sdir_index < 0 || it->sdir_index >= (int32_t)ar->sdir_len) { + it->top_done = 1; + break; + } + + arSDirEntry *e = ar->superdir + it->sdir_index; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + uint64_t block_end = block_base + AR_SUPER_BLOCK_SLOTS - 1; + int32_t block_slot_lo = (block_base < it->lo_slice) ? + (int32_t)(it->lo_slice - block_base) : 0; + int32_t block_slot_hi = (block_end > it->hi_slice) ? + (int32_t)(it->hi_slice - block_base) : AR_SUPER_BLOCK_SLOTS - 1; + + /* This block starts after the requested range. */ + if (block_base > it->hi_slice) { + it->top_done = 1; + break; + } + + /* This block ends before the requested range. */ + if (block_end < it->lo_slice) { + if (it->reverse) it->top_done = 1; + else it->sdir_index++; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + continue; + } + + if (it->reverse) { + /* slot_index uses a sentinel outside the valid 0..2047 range + * so reverse scans can consume slot 0 and then fall below the + * block without looking like a fresh block entry. */ + if (it->slot_index == AR_SCAN_ITER_SLOT_UNSET) + it->slot_index = block_slot_hi; + + while (it->slot_index >= block_slot_lo) { + int32_t si = it->slot_index--; + arSlice *s = e->slots[si]; + if (s && arScanIterPrepareSlice(it, s, block_base + si)) + return 1; + } + + /* This block had no more matching slices, move to the previous block. */ + it->sdir_index--; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + } else { + /* slot_index uses a sentinel outside the valid 0..2047 range + * so an exhausted block does not look like a fresh entry. */ + if (it->slot_index == AR_SCAN_ITER_SLOT_UNSET) + it->slot_index = block_slot_lo; + + while (it->slot_index <= block_slot_hi) { + int32_t si = it->slot_index++; + arSlice *s = e->slots[si]; + if (s && arScanIterPrepareSlice(it, s, block_base + si)) + return 1; + } + + /* This block had no more matching slices, move to the next block. */ + it->sdir_index++; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + } + } + } else { + while (!it->top_done) { + uint64_t slice_id = it->slice_id; + arSlice *s = ar->dir[slice_id]; + + /* Advance the top-level cursor before possibly returning this slice. */ + if (it->reverse) { + if (slice_id == it->lo_slice) it->top_done = 1; + else it->slice_id = slice_id - 1; + } else { + if (slice_id == it->hi_slice) it->top_done = 1; + else it->slice_id = slice_id + 1; + } + + if (s && arScanIterPrepareSlice(it, s, slice_id)) + return 1; + } + } + + return 0; +} + +/* Return the next populated element in range, or 0 when done. */ +static ALWAYS_INLINE int arScanIterNext(arScanIter *it, + uint64_t *idx, void **value) +{ + /* The iterator was already fully consumed. */ + if (it->done) return 0; + + while (1) { + if (it->slice_ready) { + /* Drain the current slice before asking for another one. */ + if (it->dense) { + while ((it->step > 0 && it->dense_item_pos <= it->dense_item_end) || + (it->step < 0 && it->dense_item_pos >= it->dense_item_end)) { + uint32_t off = (uint32_t)it->dense_off; + void *v = it->dense_items[it->dense_item_pos]; + it->dense_off += it->step; + it->dense_item_pos += it->step; + + /* Dense windows may contain holes. */ + if (arIsEmpty(v)) continue; + + if (idx) *idx = it->slice_base + off; + *value = v; + return 1; + } + } else { + while (it->sparse_pos >= 0 && it->sparse_pos < it->sparse_count) { + int32_t pos = it->sparse_pos; + uint32_t off = it->sparse_offsets[pos]; + + /* Sparse entries are sorted, so leaving the window ends this slice. */ + if (off < it->off_lo || off > it->off_hi) break; + + it->sparse_pos += it->step; + if (idx) *idx = it->slice_base + off; + *value = it->sparse_values[pos]; + return 1; + } + } + + /* The current slice has no more in-range populated elements. */ + it->slice = NULL; + it->slice_ready = 0; + } + + /* No more in-range slices are available. */ + if (!arScanIterLoadNextSlice(it)) { + it->done = 1; + return 0; + } + } +} + +/* ARSCAN key start end [LIMIT count] + * + * Returns only existing elements as flat index/value pairs. + * + * Complexity is O(P), where P is visited positions in touched slices + * (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) + * and typical case close to O(N), where N is the number of existing + * elements in range. This means that huge ranges are safe and will not + * block the server with a work bound to the span length. + * + * Unlike ARGETRANGE, holes are skipped rather than returned as NULLs. + * LIMIT caps the number of returned pairs. */ +void arscanCommand(client *c) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[2], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[3], &end) != C_OK) return; + + /* Parse optional LIMIT */ + uint64_t remaining = UINT64_MAX; + if (c->argc == 6) { + if (strcasecmp(c->argv[4]->ptr, "LIMIT") != 0) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + long long ll; + if (getLongLongFromObjectOrReply(c, c->argv[5], &ll, NULL) != C_OK) + return; + if (ll <= 0) { + addReplyError(c, "LIMIT must be positive"); + return; + } + remaining = (uint64_t)ll; + } else if (c->argc != 4) { + addReplyErrorArity(c); + return; + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) return; + + if (o == NULL) { + addReplyArrayLen(c, 0); + return; + } + + redisArray *ar = o->ptr; + void *replylen = addReplyDeferredLen(c); + uint64_t count = 0; + arScanIter it; + uint64_t idx; + void *v; + + arScanIterInit(ar, start, end, &it); + while (remaining && arScanIterNext(&it, &idx, &v)) { + /* Reply with nested [idx, value] pairs. */ + addReplyArrayLen(c, 2); + addReplyUnsignedLongLong(c, idx); + addReplyArrayValue(c, v); + count++; + remaining--; + } + + setDeferredArrayLen(c, replylen, count); +} + +/* ============================================================================ + * ARGREP + * ============================================================================ + * + * Search existing array elements in a range using textual predicates. + * Like ARSCAN, the work is bound by the visited slices, not by the raw + * numeric span alone: dense slices scan the touched dense window, while + * sparse slices only scan stored entries inside the covered offsets. + * -------------------------------------------------------------------------- */ + +#define ARGREP_PRED_EXACT 1 +#define ARGREP_PRED_MATCH 2 +#define ARGREP_PRED_GLOB 3 +#define ARGREP_PRED_RE 4 + +#define ARGREP_MAX_PREDICATES 250 +#define ARGREP_MAX_RE_LEN 2048 + +#define ARGREP_COMBINE_OR 1 +#define ARGREP_COMBINE_AND 2 + +#define ARGREP_BOUND_INDEX 1 +#define ARGREP_BOUND_START 2 +#define ARGREP_BOUND_END 3 + +typedef struct { + int type; /* EXACT, MATCH, GLOB, or RE. */ + sds pattern; /* Pattern argument exactly as given by the user. */ + regex_t regex; /* Compiled regex for RE predicates. */ + int regex_compiled; /* Whether regex must be freed. */ +} arGrepPredicate; + +typedef struct { + int type; /* Numeric index, logical start, or logical end. */ + uint64_t index; /* Used only for numeric bounds. */ +} arGrepBound; + +typedef struct { + arGrepPredicate *preds; /* All predicates to apply to each element. */ + int num_preds; /* Number of predicates stored in preds[]. */ + int combine; /* OR by default, AND if requested. */ + int withvalues; /* Reply with [idx value ...] instead of [idx ...]. */ + int nocase; /* Apply case-insensitive matching globally. */ +} arGrepPlan; + +/* Lowercase only ASCII letters. This keeps MATCH/EXACT deterministic and + * locale-independent even on arbitrary binary payloads. */ +static inline unsigned char arGrepLowerAscii(unsigned char c) { + return (c >= 'A' && c <= 'Z') ? (unsigned char)(c + ('a' - 'A')) : c; +} + +/* Compare two byte strings, optionally ignoring ASCII case. */ +int arGrepBytesEqual(const char *a, size_t alen, const char *b, size_t blen, + int nocase) { + if (alen != blen) return 0; + if (!nocase) return memcmp(a, b, alen) == 0; + + for (size_t i = 0; i < alen; i++) { + if (arGrepLowerAscii((unsigned char)a[i]) != + arGrepLowerAscii((unsigned char)b[i])) { + return 0; + } + } + return 1; +} + +/* Find a needle inside a byte string, optionally ignoring ASCII case. */ +int arGrepBytesContains(const char *haystack, size_t haystack_len, + const char *needle, size_t needle_len, int nocase) { + if (needle_len == 0) return 1; + if (needle_len > haystack_len) return 0; + + size_t last = haystack_len - needle_len; + for (size_t i = 0; i <= last; i++) { + if (arGrepBytesEqual(haystack + i, needle_len, needle, needle_len, + nocase)) { + return 1; + } + } + return 0; +} + +/* Return the predicate type for a keyword, or 0 if it is not one. */ +int arGrepPredicateType(const char *token) { + if (!strcasecmp(token, "EXACT")) return ARGREP_PRED_EXACT; + if (!strcasecmp(token, "MATCH")) return ARGREP_PRED_MATCH; + if (!strcasecmp(token, "GLOB")) return ARGREP_PRED_GLOB; + if (!strcasecmp(token, "RE")) return ARGREP_PRED_RE; + return 0; +} + +/* Free any compiled regex state created while parsing ARGREP. */ +void arGrepFreePlan(arGrepPlan *plan) { + if (plan->preds == NULL) return; + + for (int i = 0; i < plan->num_preds; i++) { + if (plan->preds[i].regex_compiled) + tre_regfree(&plan->preds[i].regex); + } + zfree(plan->preds); + plan->preds = NULL; +} + +/* Parse a bound argument. ARGREP accepts the special tokens "-" and "+" + * in addition to normal array indexes. */ +int arGrepParseBoundOrReply(client *c, robj *arg, arGrepBound *bound) { + if (arg->encoding != OBJ_ENCODING_INT) { + sds token = arg->ptr; + if (sdslen(token) == 1 && token[0] == '-') { + bound->type = ARGREP_BOUND_START; + bound->index = 0; + return C_OK; + } + if (sdslen(token) == 1 && token[0] == '+') { + bound->type = ARGREP_BOUND_END; + bound->index = 0; + return C_OK; + } + } + + if (getArrayIndexFromObject(arg, &bound->index, 0) != C_OK) { + addReplyError(c, "invalid array index"); + return C_ERR; + } + bound->type = ARGREP_BOUND_INDEX; + return C_OK; +} + +/* Resolve a parsed bound against the current array length. */ +uint64_t arGrepResolveBound(arGrepBound *bound, uint64_t max_index) { + if (bound->type == ARGREP_BOUND_START) return 0; + if (bound->type == ARGREP_BOUND_END) return max_index; + return bound->index; +} + +/* Compile all RE predicates after the whole command is parsed, so NOCASE is + * already known and affects every regex consistently. */ +int arGrepCompileRegexesOrReply(client *c, arGrepPlan *plan) { + for (int i = 0; i < plan->num_preds; i++) { + arGrepPredicate *pred = &plan->preds[i]; + if (pred->type != ARGREP_PRED_RE) continue; + + if (sdslen(pred->pattern) == 0) { + addReplyError(c, "regular expression is empty"); + return C_ERR; + } + + int cflags = REG_EXTENDED | REG_NOSUB | REG_USEBYTES; + if (plan->nocase) cflags |= REG_ICASE; + + int err = tre_regncompb(&pred->regex, pred->pattern, + sdslen(pred->pattern), cflags); + if (err != REG_OK) { + char errbuf[256]; + tre_regerror(err, &pred->regex, errbuf, sizeof(errbuf)); + addReplyErrorFormat(c, "invalid regular expression: %s", errbuf); + return C_ERR; + } + pred->regex_compiled = 1; + + if (tre_have_backrefs(&pred->regex)) { + addReplyError(c, "regular expression backreferences are not supported"); + return C_ERR; + } + } + return C_OK; +} + +/* Parse predicates and global modifiers in a single pass. This makes the + * command more user-friendly because predicates and options can be mixed + * freely. If the same global option appears multiple times, the last one + * wins. */ +int arGrepParsePlanOrReply(client *c, arGrepPlan *plan, uint64_t *limit) { + memset(plan, 0, sizeof(*plan)); + plan->combine = ARGREP_COMBINE_OR; + *limit = UINT64_MAX; + + int max_preds = c->argc - 4; + plan->preds = zcalloc(sizeof(*plan->preds) * max_preds); + + for (int arg = 4; arg < c->argc; ) { + sds token = c->argv[arg]->ptr; + int type = arGrepPredicateType(token); + + if (type != 0) { + if (arg + 1 >= c->argc) { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + if (plan->num_preds >= ARGREP_MAX_PREDICATES) { + addReplyErrorFormat(c, "too many predicates, maximum is %d", + ARGREP_MAX_PREDICATES); + return C_ERR; + } + + arGrepPredicate *pred = &plan->preds[plan->num_preds++]; + pred->type = type; + pred->pattern = c->argv[arg + 1]->ptr; + if (type == ARGREP_PRED_RE && + sdslen(pred->pattern) > ARGREP_MAX_RE_LEN) { + addReplyErrorFormat(c, + "regular expression is too long, maximum is %d bytes", + ARGREP_MAX_RE_LEN); + return C_ERR; + } + arg += 2; + continue; + } + + if (!strcasecmp(token, "LIMIT")) { + if (arg + 1 >= c->argc) { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + + long long ll; + if (getLongLongFromObjectOrReply(c, c->argv[arg + 1], &ll, NULL) + != C_OK) { + return C_ERR; + } + if (ll <= 0) { + addReplyError(c, "LIMIT must be positive"); + return C_ERR; + } + + *limit = (uint64_t)ll; + arg += 2; + continue; + } + + if (!strcasecmp(token, "WITHVALUES")) { + plan->withvalues = 1; + arg++; + continue; + } + + if (!strcasecmp(token, "NOCASE")) { + plan->nocase = 1; + arg++; + continue; + } + + if (!strcasecmp(token, "AND") || !strcasecmp(token, "OR")) { + plan->combine = !strcasecmp(token, "AND") ? + ARGREP_COMBINE_AND : ARGREP_COMBINE_OR; + arg++; + continue; + } + + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + + if (plan->num_preds == 0) { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + + return arGrepCompileRegexesOrReply(c, plan); +} + +/* Match one predicate against the decoded element bytes. */ +int arGrepMatchPredicate(arGrepPredicate *pred, const char *data, size_t len, + int nocase) { + size_t pattern_len = sdslen(pred->pattern); + + switch (pred->type) { + case ARGREP_PRED_EXACT: + return arGrepBytesEqual(data, len, pred->pattern, pattern_len, nocase); + case ARGREP_PRED_MATCH: + return arGrepBytesContains(data, len, pred->pattern, pattern_len, + nocase); + case ARGREP_PRED_GLOB: + return stringmatchlen(pred->pattern, pattern_len, data, len, nocase); + case ARGREP_PRED_RE: + return tre_regnexecb(&pred->regex, data, len, 0, NULL, 0) == REG_OK; + default: + serverPanic("Unknown ARGREP predicate type"); + } +} + +/* Decode one array value and apply all the predicates to it. */ +int arGrepValueMatches(arGrepPlan *plan, void *v) { + char buf[AR_INLINE_BUFSIZE]; + size_t len; + const char *data = arDecode(v, buf, sizeof(buf), &len); + + if (plan->combine == ARGREP_COMBINE_AND) { + for (int i = 0; i < plan->num_preds; i++) { + if (!arGrepMatchPredicate(&plan->preds[i], data, len, + plan->nocase)) { + return 0; + } + } + return 1; + } + + for (int i = 0; i < plan->num_preds; i++) { + if (arGrepMatchPredicate(&plan->preds[i], data, len, plan->nocase)) + return 1; + } + return 0; +} + +/* ARGREP key start end + * (EXACT string | MATCH string | GLOB pattern | RE pattern) ... + * [AND | OR] [LIMIT count] [WITHVALUES] [NOCASE] + * + * Search existing elements in a range and return matching indexes. + * + * Complexity is O(P * C), where P is the number of visited positions in the + * touched slices and C is the cost of evaluating the active predicates. + * Dense slices scan the touched dense window, sparse slices only visit stored + * entries, and LIMIT stops as soon as enough matches were emitted. + * + * "-" and "+" mean the logical start and end of the array. WITHVALUES changes + * the reply from [idx ...] to [idx value ...]. */ +void argrepCommand(client *c) { + arGrepBound start_bound, end_bound; + if (arGrepParseBoundOrReply(c, c->argv[2], &start_bound) != C_OK) return; + if (arGrepParseBoundOrReply(c, c->argv[3], &end_bound) != C_OK) return; + + arGrepPlan plan; + uint64_t remaining; + if (arGrepParsePlanOrReply(c, &plan, &remaining) != C_OK) { + arGrepFreePlan(&plan); + return; + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) { + arGrepFreePlan(&plan); + return; + } + if (o == NULL) { + arGrepFreePlan(&plan); + addReplyArrayLen(c, 0); + return; + } + + redisArray *ar = o->ptr; + uint64_t ar_len = arLen(ar); + if (ar_len == 0 || arCount(ar) == 0) { + arGrepFreePlan(&plan); + addReplyArrayLen(c, 0); + return; + } + + void *replylen = addReplyDeferredLen(c); + uint64_t count = 0; + uint64_t max_index = ar_len - 1; + uint64_t start = arGrepResolveBound(&start_bound, max_index); + uint64_t end = arGrepResolveBound(&end_bound, max_index); + arScanIter it; + uint64_t idx; + void *v; + + arScanIterInit(ar, start, end, &it); + while (remaining && arScanIterNext(&it, &idx, &v)) { + if (!arGrepValueMatches(&plan, v)) continue; + /* With WITHVALUES, reply nested [idx, value] pairs. */ + if (plan.withvalues) addReplyArrayLen(c, 2); + addReplyUnsignedLongLong(c, idx); + if (plan.withvalues) addReplyArrayValue(c, v); + count++; + remaining--; + } + + setDeferredArrayLen(c, replylen, count); + arGrepFreePlan(&plan); +} + +/* ============================================================================ + * AROP + * ============================================================================ + * + * Aggregate operations over a range. Uses O(N) iteration where N is the + * number of stored elements. Dense slices scan the window intersection + * (bounded by dense.winsize, kept small by demotion when density drops). + * -------------------------------------------------------------------------- */ + +/* Operation types for AROP */ +#define AROP_SUM 1 /* Sum of numeric elements in range. */ +#define AROP_MIN 2 /* Minimum numeric element in range. */ +#define AROP_MAX 3 /* Maximum numeric element in range. */ +#define AROP_AND 4 /* Bitwise AND of integer elements in range. */ +#define AROP_OR 5 /* Bitwise OR of integer elements in range. */ +#define AROP_XOR 6 /* Bitwise XOR of integer elements in range. */ +#define AROP_MATCH 7 /* Count elements equal to a target string. */ +#define AROP_USED 8 /* Count of non-empty (used) slots in range. */ + +/* Accumulator state for AROP */ +typedef struct { + int op; /* Selected AROP operation. */ + sds match_val; /* MATCH target string. */ + long double sum_acc; /* Running SUM accumulator. */ + long double minmax_acc; /* Running MIN or MAX accumulator. */ + int64_t bitwise_acc; /* Running AND/OR/XOR accumulator. */ + long long match_count; /* Number of MATCH hits. */ + long long used_count; /* Number of non-empty elements seen. */ + int has_numeric; /* Saw at least one numeric value. */ + int has_int; /* Saw at least one bitwise-usable integer. */ +} arOpAcc; + +/* Process a single value for AROP aggregation, aggregating it + * into the structure arOpAcc 'acc'. This helper is used + * directly by the AROP command implementation while scanning + * populated elements in the requested range. */ +static inline void arOpAccumulate(arOpAcc *acc, void *v) { + if (acc->op == AROP_USED) { + acc->used_count++; + return; + } + + if (acc->op == AROP_MATCH) { + size_t vlen; + char vbuf[AR_INLINE_BUFSIZE]; + const char *data = arDecode(v, vbuf, sizeof(vbuf), &vlen); + if (vlen == sdslen(acc->match_val) && + memcmp(data, acc->match_val, vlen) == 0) { + acc->match_count++; + } + return; + } + + /* Numeric operations */ + long double num; + int is_int = 0; + int64_t ival = 0; + + if (arIsInt(v)) { + ival = arToInt(v); + num = (long double)ival; + is_int = 1; + } else if (arIsFloat(v)) { + num = (long double)arToDouble(v); + } else { + const char *data; + size_t vlen; + char smallbuf[8]; + + if (arIsSmallStr(v)) { + vlen = arToSmallStr(v, smallbuf); + data = smallbuf; + } else { + data = arStringData(v); + vlen = arStringLen(v); + } + + long long ll; + if (string2ll(data, vlen, &ll)) { + ival = ll; + num = (long double)ll; + is_int = 1; + } else { + long double ld; + if (string2ld(data, vlen, &ld)) { + num = ld; + } else { + return; + } + } + } + + if (acc->op == AROP_AND || acc->op == AROP_OR || acc->op == AROP_XOR) { + if (!is_int) { + /* If it is a float, we only take the integer part. */ + if (isnan(num)) return; + if (num < (long double)INT64_MIN || num > (long double)INT64_MAX) + return; + ival = (int64_t)num; /* Truncate toward zero. */ + } + if (!acc->has_int) { + acc->bitwise_acc = ival; + acc->has_int = 1; + } else { + if (acc->op == AROP_AND) acc->bitwise_acc &= ival; + else if (acc->op == AROP_OR) acc->bitwise_acc |= ival; + else acc->bitwise_acc ^= ival; + } + } else { + if (!acc->has_numeric) { + /* Handle the first element seen for SUM, MIN, MAX. */ + acc->sum_acc = num; + acc->minmax_acc = num; + acc->has_numeric = 1; + } else { + if (acc->op == AROP_SUM) + acc->sum_acc += num; + else if (acc->op == AROP_MIN && num < acc->minmax_acc) + acc->minmax_acc = num; + else if (acc->op == AROP_MAX && num > acc->minmax_acc) + acc->minmax_acc = num; + } + } +} + +/* AROP key start end OP [arg] + * + * Aggregates over existing elements in the requested range, the + * aggregation performed depends in the "op" argument. + * + * Complexity is O(P), where P is visited positions in touched slices + * (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) + * and typical case close to O(N), where N is the number of existing + * elements in range. + * + * MATCH and USED count hits. SUM/MIN/MAX ignore values that are not numeric. + * AND/OR/XOR truncate floats toward zero and ignore values that, after the + * truncation, cannot be represented as int64_t. */ +void aropCommand(client *c) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[2], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[3], &end) != C_OK) return; + + const char *opstr = c->argv[4]->ptr; + int op = 0; + if (!strcasecmp(opstr, "SUM")) op = AROP_SUM; + else if (!strcasecmp(opstr, "MIN")) op = AROP_MIN; + else if (!strcasecmp(opstr, "MAX")) op = AROP_MAX; + else if (!strcasecmp(opstr, "AND")) op = AROP_AND; + else if (!strcasecmp(opstr, "OR")) op = AROP_OR; + else if (!strcasecmp(opstr, "XOR")) op = AROP_XOR; + else if (!strcasecmp(opstr, "MATCH")) op = AROP_MATCH; + else if (!strcasecmp(opstr, "USED")) op = AROP_USED; + else { + addReplyError(c, "unknown operation"); + return; + } + + sds match_val = NULL; + if (op == AROP_MATCH) { + if (c->argc != 6) { + addReplyError(c, "MATCH requires a value argument"); + return; + } + match_val = c->argv[5]->ptr; + } else if (c->argc != 5) { + addReplyErrorArity(c); + return; + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + if (op == AROP_MATCH || op == AROP_USED) { + addReplyLongLong(c, 0); + } else { + addReplyNull(c); + } + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + arOpAcc acc = { + .op = op, .match_val = match_val, + .sum_acc = 0, .minmax_acc = 0, .bitwise_acc = 0, + .match_count = 0, .used_count = 0, + .has_numeric = 0, .has_int = 0 + }; + arScanIter it; + void *v; + + /* All current AROP operations are order-independent, so iterating the + * user-provided direction is fine here. */ + arScanIterInit(ar, start, end, &it); + while (arScanIterNext(&it, NULL, &v)) + arOpAccumulate(&acc, v); + + /* Reply */ + if (op == AROP_MATCH) { + addReplyLongLong(c, acc.match_count); + } else if (op == AROP_USED) { + addReplyLongLong(c, acc.used_count); + } else if (op == AROP_AND || op == AROP_OR || op == AROP_XOR) { + if (!acc.has_int) addReplyNull(c); + else addReplyLongLong(c, acc.bitwise_acc); + } else { + if (!acc.has_numeric) { + addReplyNull(c); + } else { + long double result = (op == AROP_SUM) ? acc.sum_acc : acc.minmax_acc; + char buf[MAX_LONG_DOUBLE_CHARS + 1]; + int len = ld2string(buf, sizeof(buf), result, LD_STR_AUTO); + addReplyBulkCBuffer(c, buf, len); + } + } +} + +/* ---------------------------------------------------------------------------- + * The ring buffer family of commands: + * + * ARINSERT / ARNEXT / ARSEEK / ARLASTITEMS + * -------------------------------------------------------------------------- */ + +/* ARINSERT key value [value ...] + * + * Appends one or more values at the private insert cursor in O(N), where N is + * the number of values. The whole batch fails on index overflow. + * + * The cursor is then advanced to the last written index, which is also + * returned as the command return value, and can be inspected later + * with ARNEXT. */ +void arinsertCommand(client *c) { + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + int num_values = c->argc - 2; + + /* Pre-validate: compute start cursor and check entire batch fits */ + uint64_t start_cursor; + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + start_cursor = 0; + } else { + if (ar->insert_idx >= UINT64_MAX - 1) { + addReplyError(c, "insert index overflow"); + return; + } + start_cursor = ar->insert_idx + 1; + } + + /* Check last cursor won't overflow or reach forbidden index. */ + uint64_t last_cursor = start_cursor + (uint64_t)num_values - 1; + if (last_cursor < start_cursor || last_cursor == UINT64_MAX) { + addReplyError(c, "insert index overflow"); + return; + } + + /* Pre-promote sparse slices only for true bulk inserts. A single-element + * insert does not benefit from the extra range-analysis pass. */ + if (num_values > 1) + arMayPromoteToDenseForRangeSet(ar, start_cursor, last_cursor); + + /* Apply all values */ + uint64_t cursor = start_cursor; + for (int i = 0; i < num_values; i++) { + sds val = c->argv[2 + i]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, cursor, v); + cursor++; + } + ar->insert_idx = last_cursor; + + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arinsert", c->argv[1], c->db->id); + server.dirty += num_values; + + addReplyUnsignedLongLong(c, ar->insert_idx); +} + +/* Duplicate one array value exactly. Immediate values can be copied as tagged + * words, while heap strings are re-encoded from their logical string form. + * This could be regarded as costly, but capturing values out of the existing + * array would break the sparsearray API isolation. */ +static void *arRingDupValue(void *v) { + if (v == NULL || !arIsPtr(v)) return v; + return arEncode(arStringData(v), arStringLen(v)); +} + +/* Return the next slot that ARRING would write to before modulo reduction. */ +static uint64_t arRingNextCursor(redisArray *ar) { + return (ar->insert_idx == AR_INSERT_IDX_NONE) ? 0 : ar->insert_idx + 1; +} + +/* Decide if ARRING needs to rebuild the retained logical ring positions before + * writing new values. + * + * We rebuild in only two cases: + * + * 1. Shrink: new size is smaller than the current inferred ring span. + * 2. Grow after wrap: the ring had already wrapped inside the old span, so + * without a rebuild the next write would overwrite old low indexes instead + * of using the newly added capacity. + * + * An explicit ARSEEK 0 is treated differently on grow: it is a direct cursor + * override saying "write next at index 0", so we honor it instead of forcing + * a grow-after-wrap repack first. + * + * keep_span is the maximum number of logical positions that may be retained. */ +static int arRingNeedsRework(redisArray *ar, uint64_t ring_size, + uint64_t *old_span, uint64_t *keep_span) { + *old_span = arLen(ar); + *keep_span = 0; + + if (*old_span == 0) return 0; + + if (ring_size < *old_span) { + *keep_span = ring_size; + return 1; + } + if (ring_size == *old_span) { + return 0; + } + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + return 0; + } + if (arRingNextCursor(ar) < *old_span) { + *keep_span = *old_span; + return 1; + } + return 0; +} + +/* Rebuild the retained logical ring positions into a fresh compact array. + * + * We walk backward from the current anchor and keep at most keep_span items, + * but stop as soon as the first NULL is encountered. This makes resize keep + * the latest contiguous tail of existing items instead of crossing holes. + * + * The retained items are replayed in chronological order, oldest to newest, + * so after the rebuild: + * + * - index 0 holds the oldest retained position + * - index retained_count-1 holds the newest retained position + * - insert_idx points to retained_count-1, ready for the next ARRING write + * + * We use two passes: one backward pass to count the contiguous retained tail, + * then one forward replay pass into the new array. This avoids any temporary + * retained-items buffer. */ +static redisArray *arRingRework(redisArray *ar, uint64_t old_span, + uint64_t keep_span) { + serverAssert(old_span > 0); + serverAssert(keep_span > 0); + serverAssert(keep_span <= old_span); + + redisArray *new_ar = arNew(); + + /* The rebuild operates on the inferred ring window [0..old_span-1]. If + * insert_idx is outside that window because of ARSEEK, fold it back into + * the current inferred span with modulo. If ARSEEK 0 was used and we are + * shrinking, anchor the walk at the current tail, just like ARLASTITEMS. + * Grow does not reach this path because arRingNeedsRework() skips grow + * rework when insert_idx is AR_INSERT_IDX_NONE. */ + uint64_t anchor_idx = (ar->insert_idx == AR_INSERT_IDX_NONE) ? + (old_span - 1) : (ar->insert_idx % old_span); + + uint64_t retained_count = 0; + uint64_t src_idx = anchor_idx; + + while (retained_count < keep_span) { + void *v = arGet(ar, src_idx); + if (v == NULL) break; /* This makes any mix of ARSET/SEEK/RING calls + * always bound to populatede items, not logical + * array span. */ + + retained_count++; + src_idx = (src_idx == 0) ? old_span - 1 : src_idx - 1; + } + + /* src_idx now points to the position just before the oldest retained + * item, so advance once to start replaying oldest -> newest. */ + src_idx++; + if (src_idx == old_span) src_idx = 0; + + for (uint64_t dst_idx = 0; dst_idx < retained_count; dst_idx++) { + void *v = arGet(ar, src_idx); + serverAssert(v != NULL); + arSet(new_ar, dst_idx, arRingDupValue(v)); + + src_idx++; + if (src_idx == old_span) src_idx = 0; + } + if (retained_count != 0) new_ar->insert_idx = retained_count - 1; + return new_ar; +} + +/* ARRING key size value [value ...] + * + * Writes values into a logical ring buffer. May rework the array if + * the logical size changes across calls, so that the up to size + * items are retained in the correct logical position. + * + * Complexity is O(M) normally, where M is the number of inserted values, + * and O(N+M) on resize, where N is the maximum of the old and new ring size. + * The rebuild stops at the first NULL, so holes cut the retained tail. + * + * ARSEEK 0 is still honored as a direct cursor override on grow. + * + * Returns the last written slot. */ +void arringCommand(client *c) { + long long ll; + if (getLongLongFromObjectOrReply(c,c->argv[2],&ll,"invalid size") != C_OK) + return; + if (ll <= 0) { + addReplyError(c, "size must be positive"); + return; + } + uint64_t ring_size = (uint64_t)ll; + + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + int num_values = c->argc - 3; + uint64_t cursor = 0; + + /* If the requested size changes the logical ring shape, rebuild once + * before the hot insertion loop. This makes the command, when the user + * updates the window, no longer O(M), but O(N+M), however note that this + * is absolutely needed for high level sane semantics. Users will resize + * ring buffers, and they want to retain the latest items in a logically + * correct way. */ + uint64_t old_span, keep_span; + if (arRingNeedsRework(ar, ring_size, &old_span, &keep_span)) { + redisArray *new_ar = arRingRework(ar, old_span, keep_span); + arFree(ar); + o->ptr = ar = new_ar; + } + + /* Set the new items, modulo ring size. */ + for (int i = 0; i < num_values; i++) { + /* Compute the next write position, then wrap it into the requested + * ring size if needed. By this point any needed resize/rework was + * already handled above. */ + cursor = arRingNextCursor(ar); + if (cursor >= ring_size) cursor = cursor % ring_size; + + /* Set the value */ + sds val = c->argv[3 + i]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, cursor, v); + ar->insert_idx = cursor; + } + + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arring", c->argv[1], c->db->id); + server.dirty += num_values; + + addReplyUnsignedLongLong(c, cursor); +} + +/* ARNEXT key + * + * Returns in O(1) the next index that ARINSERT / ARRING would use. + * + * Missing keys and the pre-insert state reply with 0. If the cursor is in the + * terminal state where the next append would overflow, the reply is NULL. */ +void arnextCommand(client *c) { + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + addReplyLongLong(c, 0); + } else if (ar->insert_idx == UINT64_MAX - 1) { + addReplyNull(c); /* Terminal: index space exhausted */ + } else { + addReplyUnsignedLongLong(c, ar->insert_idx + 1); + } +} + +/* ARSEEK key idx + * + * Sets in O(1) the next index used by ARINSERT and ARRING. + * + * Returns 1 if the cursor was updated and 0 if the key does not exist. + * idx 0 resets the insert state to "next write goes to 0": in this case + * successive ARRING calls are guaranteed to don't rework the array in chase + * of logical size change. */ +void arseekCommand(client *c) { + uint64_t idx; + /* Allow UINT64_MAX because ARSEEK UINT64_MAX sets insert_idx to + * UINT64_MAX-1, which is a valid terminal state (next ARINSERT + * would overflow and fail). This is needed for AOF persistence. */ + if (getArrayIndexFromObject(c->argv[2], &idx, 1) != C_OK) { + addReplyError(c, "invalid array index"); + return; + } + + /* There aren't many good options for non existing keys: both creating + * an empty array or failing with "no such key" does not align very + * well with the Redis commands usual semantics. However we need to signal + * back that we ignored the index set if the key is not there, so zero + * is returned. */ + robj *o = lookupKeyWrite(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + + /* Set insert_idx so next ARINSERT writes to idx */ + if (idx == 0) { + ar->insert_idx = AR_INSERT_IDX_NONE; + } else { + ar->insert_idx = idx - 1; + } + + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arseek", c->argv[1], c->db->id); + server.dirty++; + addReplyLongLong(c, 1); +} + +/* ARLASTITEMS key count [REV] + * + * Returns the most recent positions from the current insert anchor in O(N), + * where N is the requested count. REV flips the reply order. + * + * This command may return NULLs because it walks positions, not only existing + * items. If ARSEEK 0 was used, the current array tail is used as the anchor. */ +void arlastitemsCommand(client *c) { + long long count; + if (getLongLongFromObjectOrReply(c, c->argv[2], &count, + "invalid COUNT") != C_OK) return; + + /* For count <= 0, nothing to return, just an empty array. */ + if (count <= 0) { + addReplyArrayLen(c, 0); + return; + } + + /* Parse REV if provided. */ + int rev = 0; + if (c->argc == 4) { + if (strcasecmp(c->argv[3]->ptr, "REV") == 0) { + rev = 1; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + } else if (c->argc != 3) { + addReplyErrorArity(c); + return; + } + + /* No key? Empty reply. */ + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + addReplyArrayLen(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + uint64_t ar_len = arLen(ar); + uint64_t effective_count = + (uint64_t)count > ar->count ? ar->count : (uint64_t)count; + + /* Should never happen in practice, because we checked the COUNT before + * and the array should not be empty to be still a Redis key, so this + * is mostly a safety net. */ + if (effective_count == 0) { + addReplyArrayLen(c, 0); + return; + } + + /* Collect items walking backward from insert_idx. If ARSEEK 0 was used, + * insert_idx is AR_INSERT_IDX_NONE: in that case use the max set index as + * the anchor so ARLASTITEMS still reports the tail of the current array. + * + * Note that we use an array to collect the items: in the no-REV case + * otherwise a double scan would be needed. */ + void **collected = zmalloc(effective_count * sizeof(void *)); + uint64_t anchor_idx = + (ar->insert_idx == AR_INSERT_IDX_NONE) ? ar_len - 1 : ar->insert_idx; + uint64_t current_idx = anchor_idx; + uint64_t steps = 0; + + while(steps < effective_count) { + collected[steps] = arGet(ar, current_idx); + steps++; + + /* Decrement with wrap */ + if (current_idx == 0) { + current_idx = ar_len - 1; + } else { + current_idx--; + } + } + + /* Emit the protocol with the collected items. */ + addReplyArrayLen(c, steps); + if (rev) { + /* Return in reverse chronological order (newest first) */ + for (uint64_t i = 0; i < steps; i++) + addReplyArrayValue(c, collected[i]); + } else { + /* Return in chronological order (oldest first) */ + for (int64_t i = steps - 1; i >= 0; i--) + addReplyArrayValue(c, collected[i]); + } + zfree(collected); +} + +/* ---------------------------------------------------------------------------- + * ARINFO + * -------------------------------------------------------------------------- */ + +/* ARINFO key [FULL] + * + * Returns metadata about the array in O(1), or O(N) with FULL where N is the + * number of slices. Unlike ARLEN and ARCOUNT, a missing key is an error. + * FULL adds per-encoding slice statistics by scanning the directory. */ +void arinfoCommand(client *c) { + int full = 0; + + if (c->argc > 2) { + if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr, "full")) { + full = 1; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + addReplyError(c, "no such key"); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + + /* Per-encoding stats (only computed for FULL) */ + uint64_t num_dense = 0; + uint64_t num_sparse = 0; + uint64_t dense_total_winsize = 0; + uint64_t dense_total_count = 0; + uint64_t sparse_total_cap = 0; + + if (full) { + if (ar->superdir) { + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + if (!s) continue; + if (s->encoding == AR_SLICE_DENSE) { + num_dense++; + dense_total_winsize += s->layout.dense.winsize; + dense_total_count += s->count; + } else { + num_sparse++; + sparse_total_cap += s->layout.sparse.cap; + } + } + } + } else { + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + arSlice *s = ar->dir[i]; + if (!s) continue; + if (s->encoding == AR_SLICE_DENSE) { + num_dense++; + dense_total_winsize += s->layout.dense.winsize; + dense_total_count += s->count; + } else { + num_sparse++; + sparse_total_cap += s->layout.sparse.cap; + } + } + } + } + + if (full) { + addReplyMapLen(c, 12); + } else { + addReplyMapLen(c, 7); + } + + addReplyBulkCString(c, "count"); + addReplyUnsignedLongLong(c, ar->count); + + addReplyBulkCString(c, "len"); + addReplyUnsignedLongLong(c, arLen(ar)); + + addReplyBulkCString(c, "next-insert-index"); + if (ar->insert_idx == AR_INSERT_IDX_NONE || + ar->insert_idx == UINT64_MAX - 1) { + addReplyLongLong(c, 0); + } else { + addReplyUnsignedLongLong(c, ar->insert_idx + 1); + } + + addReplyBulkCString(c, "slices"); + addReplyLongLong(c, ar->num_slices); + + addReplyBulkCString(c, "directory-size"); + if (ar->superdir) { + /* Superdir mode: report allocated capacity */ + addReplyLongLong(c, ar->sdir_cap); + } else { + addReplyLongLong(c, ar->dir_alloc); + } + + addReplyBulkCString(c, "super-dir-entries"); + addReplyLongLong(c, ar->superdir ? ar->sdir_len : 0); + + addReplyBulkCString(c, "slice-size"); + addReplyLongLong(c, ar->slice_size); + + if (full) { + addReplyBulkCString(c, "dense-slices"); + addReplyLongLong(c, num_dense); + + addReplyBulkCString(c, "sparse-slices"); + addReplyLongLong(c, num_sparse); + + addReplyBulkCString(c, "avg-dense-size"); + if (num_dense > 0) { + addReplyDouble(c, (double)dense_total_winsize / num_dense); + } else { + addReplyDouble(c, 0); + } + + addReplyBulkCString(c, "avg-dense-fill"); + if (dense_total_winsize > 0) { + addReplyDouble(c, (double)dense_total_count / dense_total_winsize); + } else { + addReplyDouble(c, 0); + } + + addReplyBulkCString(c, "avg-sparse-size"); + if (num_sparse > 0) { + addReplyDouble(c, (double)sparse_total_cap / num_sparse); + } else { + addReplyDouble(c, 0); + } + } +} diff --git a/src/t_hash.c b/src/t_hash.c index 5ea456597..4dac2802e 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -15,6 +15,7 @@ #include "ebuckets.h" #include "entry.h" #include "cluster_asm.h" +#include "vector.h" #include /* Threshold for HEXPIRE and HPERSIST to be considered whether it is worth to @@ -45,6 +46,18 @@ typedef enum GetFieldRes { typedef listpackEntry CommonEntry; /* extend usage beyond lp */ +#define FIELDS_STACK_SIZE 16 + +/* A vec with an embedded stack buffer, used to collect field robj pointers + * for subkey notifications without heap allocation in the common case. */ +typedef struct fieldvec { vec v; void *buf[FIELDS_STACK_SIZE]; } fieldvec; + +static inline vec *fieldvecInit(fieldvec *fv, size_t cap) { + vecInit(&fv->v, fv->buf, FIELDS_STACK_SIZE); + vecReserve(&fv->v, cap); + return &fv->v; +} + /* hash field expiration (HFE) funcs */ static ExpireAction onFieldExpire(eItem item, void *ctx); static ExpireMeta* hentryGetExpireMeta(const eItem field); @@ -126,6 +139,7 @@ typedef struct OnFieldExpireCtx { robj *hashObj; redisDb *db; int activeEx; /* 1 for active expire, 0 for lazy expire */ + vec *vexpired; /* Expired fields vector */ } OnFieldExpireCtx; /* The implementation of hashes by dict was modified from storing fields as sds @@ -360,7 +374,8 @@ static uint64_t listpackExGetMinExpire(robj *o) { } /* Walk over fields and delete the expired ones. */ -void listpackExExpire(redisDb *db, kvobj *kv, ExpireInfo *info, int activeEx) { +void listpackExExpire(redisDb *db, kvobj *kv, ExpireInfo *info) { + OnFieldExpireCtx *ctx = info->ctx; serverAssert(kv->encoding == OBJ_ENCODING_LISTPACK_EX); uint64_t expired = 0, min = EB_EXPIRE_TIME_INVALID; unsigned char *ptr; @@ -387,9 +402,15 @@ void listpackExExpire(redisDb *db, kvobj *kv, ExpireInfo *info, int activeEx) { if (val == HASH_LP_NO_TTL || (uint64_t) val > info->now) break; + /* Collect expired field for subkey notification. */ + if (ctx->vexpired) { + char *fstr = (char *)(fref ? fref : intbuf); + vecPush(ctx->vexpired, createStringObject(fstr, flen)); + } + propagateHashFieldDeletion(db, key, (char *)((fref) ? fref : intbuf), flen); server.stat_expired_subkeys++; - if (activeEx) server.stat_expired_subkeys_active++; + if (ctx->activeEx) server.stat_expired_subkeys_active++; ptr = lpNext(lpt->lp, ptr); @@ -780,9 +801,13 @@ GetFieldRes hashTypeGetValue(redisDb *db, kvobj *o, sds field, unsigned char **v /* If the field is the last one in the hash, then the hash will be deleted */ res = GETF_EXPIRED; robj *keyObj = createStringObject(key, sdslen(key)); - if (!(hfeFlags & HFE_LAZY_NO_NOTIFICATION)) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", keyObj, db->id); - if ((hashTypeLength(o, 0) == 0) && (!(hfeFlags & HFE_LAZY_AVOID_HASH_DEL))) { + unsigned long length = hashTypeLength(o, 0); + if ((length != 0) && !(hfeFlags & HFE_LAZY_NO_NOTIFICATION)) { + robj fobj, *farr[1] = {&fobj}; + initStaticStringObject(fobj, field); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", keyObj, db->id, farr, 1); + } + if ((length == 0) && (!(hfeFlags & HFE_LAZY_AVOID_HASH_DEL))) { if (!(hfeFlags & HFE_LAZY_NO_NOTIFICATION)) notifyKeyspaceEvent(NOTIFY_GENERIC, "del", keyObj, db->id); dbDelete(db,keyObj); @@ -1876,30 +1901,29 @@ void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, CommonEntry *k */ uint64_t hashTypeExpire(redisDb *db, kvobj *o, uint32_t *quota, int updateSubexpires, int activeEx) { uint64_t noExpireLeftRes = EB_EXPIRE_TIME_INVALID; - ExpireInfo info = {0}; - if (o->encoding == OBJ_ENCODING_LISTPACK_EX) { - info = (ExpireInfo) { + /* Collect expired field names for batched subkey notification. + * Skip allocation entirely when subkey notifications are disabled. */ + fieldvec fvexpired; + vec *vexpired = isSubkeyNotifyEnabled(NOTIFY_HASH) ? + fieldvecInit(&fvexpired, FIELDS_STACK_SIZE) : NULL; + + OnFieldExpireCtx onFieldExpireCtx = { .hashObj = o, .db = db, .activeEx = activeEx, .vexpired = vexpired }; + ExpireInfo info = (ExpireInfo) { .maxToExpire = *quota, .now = commandTimeSnapshot(), + .ctx = &onFieldExpireCtx, .itemsExpired = 0}; - listpackExExpire(db, o, &info, activeEx); + if (o->encoding == OBJ_ENCODING_LISTPACK_EX) { + listpackExExpire(db, o, &info); } else { serverAssert(o->encoding == OBJ_ENCODING_HT); dict *d = o->ptr; htMetadataEx *dictExpireMeta = htGetMetadataEx(d); - OnFieldExpireCtx onFieldExpireCtx = { .hashObj = o, .db = db, .activeEx = activeEx }; - - info = (ExpireInfo){ - .maxToExpire = *quota, - .onExpireItem = onFieldExpire, - .ctx = &onFieldExpireCtx, - .now = commandTimeSnapshot() - }; - + info.onExpireItem = onFieldExpire; ebExpire(&dictExpireMeta->hfe, &hashFieldExpireBucketsType, &info); } @@ -1912,7 +1936,11 @@ uint64_t hashTypeExpire(redisDb *db, kvobj *o, uint32_t *quota, int updateSubexp if (info.itemsExpired) { sds keystr = kvobjGetKey(o); robj *key = createStringObject(keystr, sdslen(keystr)); - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", key, db->id); + + /* Send subkey notification with all expired fields */ + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", key, db->id, + vexpired ? (robj**)vecData(vexpired) : NULL, vexpired ? vecSize(vexpired) : 0); + int slot; int deleted = 0; @@ -1935,6 +1963,14 @@ uint64_t hashTypeExpire(redisDb *db, kvobj *o, uint32_t *quota, int updateSubexp decrRefCount(key); } + /* Free collected expired fields */ + if (vexpired) { + for (size_t i = 0; i < vecSize(vexpired); i++) { + decrRefCount(vecGet(vexpired, i)); + } + vecRelease(vexpired); + } + /* return 0 if hash got deleted, EB_EXPIRE_TIME_INVALID if no more fields * with expiration. Else return next expiration time */ return (info.nextExpireTime == EB_EXPIRE_TIME_INVALID) ? noExpireLeftRes : info.nextExpireTime; @@ -2103,7 +2139,7 @@ void hsetnxCommand(client *c) { updateKeysizesHist(c->db, OBJ_HASH, hlen - 1, hlen); if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), kv, oldsize, kvobjAllocSize(kv)); - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hset",c->argv[1],c->db->id,&c->argv[2],1); KSN_INVALIDATE_KVOBJ(kv); server.dirty++; } @@ -2141,7 +2177,16 @@ void hsetCommand(client *c) { updateKeysizesHist(c->db, OBJ_HASH, l - created, l); if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), kv, oldsize, kvobjAllocSize(kv)); - notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); + + /* Collect field pointers for subkey notification. Fields are at argv[2,4,6...]. */ + int numfields = (c->argc - 2) / 2; + fieldvec fvset; + vec *vset = fieldvecInit(&fvset, numfields); + for (i = 0; i < numfields; i++) { + vecPush(vset, c->argv[2 + i * 2]); + } + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hset",c->argv[1],c->db->id,(robj**)vecData(vset),numfields); + vecRelease(vset); KSN_INVALIDATE_KVOBJ(kv); server.dirty += (c->argc - 2)/2; } @@ -2355,8 +2400,7 @@ err_expiration: */ void hsetexCommand(client *c) { int flags = 0, first_field_pos = 0, field_count = 0, expire_time_pos = -1; - int updated = 0, deleted = 0, set_expiry; - int expired = 0, fields_set = 0; + int set_expiry; long long expire_time = EB_EXPIRE_TIME_INVALID; int64_t oldlen, newlen; HashTypeSetEx setex; @@ -2383,6 +2427,13 @@ void hsetexCommand(client *c) { if (server.memory_tracking_enabled) oldsize = kvobjAllocSize(o); + /* Track fields for subkey notifications by event type. */ + fieldvec fvexpired, fvset, fvdeleted, fvupdated; + vec *vexpired = fieldvecInit(&fvexpired, field_count); + vec *vset = fieldvecInit(&fvset, field_count); + vec *vdeleted = fieldvecInit(&fvdeleted, field_count); + vec *vupdated = fieldvecInit(&fvupdated, field_count); + if (flags & (HFE_FXX | HFE_FNX)) { int found = 0; for (int i = 0; i < field_count; i++) { @@ -2398,7 +2449,9 @@ void hsetexCommand(client *c) { GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, opt, NULL); int exists = (res == GETF_OK); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[first_field_pos + (i * 2)]); + } found += exists; /* Check for early exit if the condition is already invalid. */ @@ -2435,12 +2488,15 @@ void hsetexCommand(client *c) { opt |= HASH_SET_KEEP_TTL; hashTypeSet(c->db, o, field, value, opt); - fields_set = 1; + vecPush(vset, c->argv[first_field_pos + (i * 2)]); /* Update the expiration time. */ if (set_expiry) { int ret = hashTypeSetEx(o, field, expire_time, &setex); - updated += (ret == HSETEX_OK); - deleted += (ret == HSETEX_DELETED); + if (ret == HSETEX_OK) { + vecPush(vupdated, c->argv[first_field_pos + (i * 2)]); + } else if (ret == HSETEX_DELETED) { + vecPush(vdeleted, c->argv[first_field_pos + (i * 2)]); + } } } @@ -2449,7 +2505,7 @@ void hsetexCommand(client *c) { server.dirty += field_count; - if (deleted) { + if (vecSize(vdeleted)) { /* If fields are deleted due to timestamp is being in the past, hdel's * are already propagated. No need to propagate the command itself. */ preventCommandPropagation(c); @@ -2470,15 +2526,23 @@ out: if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); /* Emit keyspace notifications based on field expiry, mutation, or key deletion */ - if (fields_set || expired) { + if (vecSize(vset) || vecSize(vexpired)) { newlen = (int64_t) hashTypeLength(o, 0); keyModified(c, c->db, c->argv[1], o, 1); - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (fields_set) { - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - if (deleted || updated) - notifyKeyspaceEvent(NOTIFY_HASH, deleted ? "hdel" : "hexpire", c->argv[1], c->db->id); + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); + } + if (vecSize(vset)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hset", c->argv[1], + c->db->id, (robj**)vecData(vset), vecSize(vset)); + if (vecSize(vdeleted)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", c->argv[1], + c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); + } else if (vecSize(vupdated)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpire", c->argv[1], + c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); + } } KSN_INVALIDATE_KVOBJ(o); @@ -2494,6 +2558,11 @@ out: if (oldlen != newlen) updateKeysizesHist(c->db, OBJ_HASH, oldlen, newlen); } + + vecRelease(vexpired); + vecRelease(vset); + vecRelease(vdeleted); + vecRelease(vupdated); } void hincrbyCommand(client *c) { @@ -2543,7 +2612,7 @@ void hincrbyCommand(client *c) { updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); addReplyLongLong(c,value); keyModified(c,c->db,c->argv[1], o, 1); - notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id,&c->argv[2],1); KSN_INVALIDATE_KVOBJ(o); server.dirty++; } @@ -2602,7 +2671,7 @@ void hincrbyfloatCommand(client *c) { updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); addReplyBulkCBuffer(c,buf,len); keyModified(c,c->db,c->argv[1],o,1); - notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id,&c->argv[2],1); KSN_INVALIDATE_KVOBJ(o); server.dirty++; @@ -2651,19 +2720,24 @@ void hgetCommand(client *c) { void hmgetCommand(client *c) { GetFieldRes res = GETF_OK; - int i; - int expired = 0, deleted = 0; + int i, deleted = 0; /* Don't abort when the key cannot be found. Non-existing keys are empty * hashes, where HMGET should respond with a series of null bulks. */ kvobj *o = lookupKeyRead(c->db, c->argv[1]); if (checkType(c,o,OBJ_HASH)) return; + /* Track expired fields for subkey notification. */ + fieldvec fvexpired; + vec *vexpired = fieldvecInit(&fvexpired, c->argc-2); + addReplyArrayLen(c, c->argc-2); for (i = 2; i < c->argc ; i++) { if (!deleted) { res = addHashFieldToReply(c, o, c->argv[i]->ptr, HFE_LAZY_NO_NOTIFICATION); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[i]); + } deleted += (res == GETF_EXPIRED_HASH); } else { /* If hash got lazy expired since all fields are expired (o is invalid), @@ -2672,11 +2746,14 @@ void hmgetCommand(client *c) { } } - if (expired) { - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (deleted) - notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); } + if (deleted) + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + + vecRelease(vexpired); } /* Get and delete the value of one or more fields of a given hash key. @@ -2685,7 +2762,7 @@ void hmgetCommand(client *c) { * doesn’t exist. */ void hgetdelCommand(client *c) { - int res = 0, hfe = 0, deleted = 0, expired = 0; + int res = 0, hfe = 0; int64_t oldlen = -1; /* not exists as long as it is not set */ long num_fields = 0; size_t oldsize = 0; @@ -2723,6 +2800,11 @@ void hgetdelCommand(client *c) { oldsize = kvobjAllocSize(o); } + /* Track fields for subkey notifications. */ + fieldvec fvexpired, fvdeleted; + vec *vexpired = fieldvecInit(&fvexpired, num_fields); + vec *vdeleted = fieldvecInit(&fvdeleted, num_fields); + addReplyArrayLen(c, num_fields); for (int i = 4; i < c->argc; i++) { const int flags = HFE_LAZY_NO_NOTIFICATION | @@ -2731,17 +2813,22 @@ void hgetdelCommand(client *c) { HFE_LAZY_NO_UPDATE_KEYSIZES | HFE_LAZY_NO_UPDATE_ALLOCSIZES; res = addHashFieldToReply(c, o, c->argv[i]->ptr, flags); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[i]); + } /* Try to delete only if it's found and not expired lazily. */ if (res == GETF_OK) { - deleted++; + vecPush(vdeleted, c->argv[i]); serverAssert(hashTypeDelete(o, c->argv[i]->ptr) == 1); } } /* Return if no modification has been made. */ - if (expired == 0 && deleted == 0) + if (vecSize(vexpired) == 0 && vecSize(vdeleted) == 0) { + vecRelease(vexpired); + vecRelease(vdeleted); return; + } int64_t newlen = (int64_t) hashTypeLength(o, 0); /* del key if become empty */ @@ -2759,11 +2846,14 @@ void hgetdelCommand(client *c) { keyModified(c, c->db, c->argv[1], o, 1); - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (deleted) { - notifyKeyspaceEvent(NOTIFY_HASH, "hdel", c->argv[1], c->db->id); - server.dirty += deleted; + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); + } + if (vecSize(vdeleted)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", c->argv[1], + c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); + server.dirty += vecSize(vdeleted); /* Propagate as HDEL command. * Orig: HGETDEL FIELDS field1 field2 ... @@ -2773,6 +2863,8 @@ void hgetdelCommand(client *c) { rewriteClientCommandArgument(c, 2, NULL); /* Delete arg */ } + vecRelease(vexpired); + vecRelease(vdeleted); KSN_INVALIDATE_KVOBJ(o); /* Key may have become empty because of deleting fields or lazy expire. */ @@ -2794,7 +2886,6 @@ void hgetdelCommand(client *c) { * doesn’t exist. */ void hgetexCommand(client *c) { - int expired = 0, deleted = 0, updated = 0; int parse_flags = 0, expire_time_pos = -1, first_field_pos = -1, num_fields = -1; long long expire_time = 0; int64_t oldlen = 0, newlen = -1; @@ -2824,6 +2915,12 @@ void hgetexCommand(client *c) { if (parse_flags) hashTypeSetExInit(c->argv[1], o, c, c->db, 0, &setex); + /* Track fields for subkey notifications by event type. */ + fieldvec fvexpired, fvdeleted, fvupdated; + vec *vexpired = fieldvecInit(&fvexpired, num_fields); + vec *vdeleted = fieldvecInit(&fvdeleted, num_fields); + vec *vupdated = fieldvecInit(&fvupdated, num_fields); + addReplyArrayLen(c, num_fields); for (int i = first_field_pos; i < first_field_pos + num_fields; i++) { const int flags = HFE_LAZY_NO_NOTIFICATION | @@ -2833,7 +2930,9 @@ void hgetexCommand(client *c) { HFE_LAZY_NO_UPDATE_ALLOCSIZES; sds field = c->argv[i]->ptr; int res = addHashFieldToReply(c, o, c->argv[i]->ptr, flags); - expired += (res == GETF_EXPIRED); + if (res == GETF_EXPIRED) { + vecPush(vexpired, c->argv[i]); + } /* Set expiration only if the field exists and not expired lazily. */ if (res == GETF_OK && parse_flags) { @@ -2841,8 +2940,11 @@ void hgetexCommand(client *c) { expire_time = EB_EXPIRE_TIME_INVALID; res = hashTypeSetEx(o, field, expire_time, &setex); - deleted += (res == HSETEX_DELETED); - updated += (res == HSETEX_OK); + if (res == HSETEX_DELETED) { + vecPush(vdeleted, c->argv[i]); + } else if (res == HSETEX_OK) { + vecPush(vupdated, c->argv[i]); + } } } @@ -2853,10 +2955,14 @@ void hgetexCommand(client *c) { updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); /* Exit early if no modification has been made. */ - if (expired == 0 && deleted == 0 && updated == 0) + if (vecSize(vexpired) == 0 && vecSize(vdeleted) == 0 && vecSize(vupdated) == 0) { + vecRelease(vexpired); + vecRelease(vdeleted); + vecRelease(vupdated); return; + } - server.dirty += deleted + updated; + server.dirty += vecSize(vdeleted) + vecSize(vupdated); keyModified(c, c->db, c->argv[1], o, 1); /* This command will never be propagated as it is. It will be propagated as @@ -2867,16 +2973,19 @@ void hgetexCommand(client *c) { * If PERSIST flags is used, it will be propagated as HPERSIST command. * IF EX/EXAT/PX/PXAT flags are used, it will be replicated as HPEXPRITEAT. */ - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - if (updated) { + if (vecSize(vexpired)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpired", c->argv[1], + c->db->id, (robj**)vecData(vexpired), vecSize(vexpired)); + } + if (vecSize(vupdated)) { /* Build canonical command for propagation */ int canonical_argc; robj **canonical_argv; int idx = 0; if (parse_flags & HFE_PERSIST) { - notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hpersist", c->argv[1], + c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); /* Build canonical HPERSIST command: HPERSIST key FIELDS numfields field1 field2 ... */ canonical_argc = 4 + num_fields; canonical_argv = zmalloc(sizeof(robj*) * canonical_argc); @@ -2885,7 +2994,8 @@ void hgetexCommand(client *c) { canonical_argv[idx++] = c->argv[1]; /* key */ incrRefCount(c->argv[1]); } else { - notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpire", c->argv[1], + c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); /* Build canonical HPEXPIREAT command: HPEXPIREAT key timestamp FIELDS numfields field1 field2 ... */ canonical_argc = 5 + num_fields; canonical_argv = zmalloc(sizeof(robj*) * canonical_argc); @@ -2905,13 +3015,18 @@ void hgetexCommand(client *c) { } replaceClientCommandVector(c, canonical_argc, canonical_argv); - } else if (deleted) { + } else if (vecSize(vdeleted)) { /* If we are here, fields are deleted because new timestamp was in the * past. HDELs are already propagated as part of hashTypeSetEx(). */ - notifyKeyspaceEvent(NOTIFY_HASH, "hdel", c->argv[1], c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", c->argv[1], + c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); preventCommandPropagation(c); } + vecRelease(vexpired); + vecRelease(vdeleted); + vecRelease(vupdated); + /* Key may become empty due to lazy expiry in addHashFieldToReply() * or the new expiration time is in the past.*/ newlen = hashTypeLength(o, 0); @@ -2925,7 +3040,7 @@ void hgetexCommand(client *c) { void hdelCommand(client *c) { kvobj *o; - int j, deleted = 0, keyremoved = 0; + int j, keyremoved = 0; size_t oldsize = 0; if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || @@ -2943,11 +3058,15 @@ void hdelCommand(client *c) { * field with expiration and removes it from global HFE DS. */ int isHFE = hashTypeIsFieldsWithExpire(o); + /* Track which fields were actually deleted for subkey notification. */ + fieldvec fvdeleted; + vec *vdeleted = fieldvecInit(&fvdeleted, c->argc - 2); + if (o->encoding == OBJ_ENCODING_HT) dictPauseAutoResize((dict*)o->ptr); for (j = 2; j < c->argc; j++) { if (hashTypeDelete(o,c->argv[j]->ptr)) { - deleted++; + vecPush(vdeleted, c->argv[j]); if (hashTypeLength(o, 0) == 0) { keyremoved = 1; break; @@ -2961,7 +3080,7 @@ void hdelCommand(client *c) { } if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); - if (deleted) { + if (vecSize(vdeleted)) { /* Update keysizes histogram */ int64_t newLen = (int64_t) hashTypeLength(o, 0); updateKeysizesHist(c->db, OBJ_HASH, oldLen, keyremoved ? -1 : newLen); @@ -2977,15 +3096,16 @@ void hdelCommand(client *c) { /* Signal key modification */ keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); - notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id); + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH,"hdel",c->argv[1],c->db->id,(robj**)vecData(vdeleted),vecSize(vdeleted)); KSN_INVALIDATE_KVOBJ(o); /* Invalidate local kvobj pointer */ /* Notify del event if key was deleted */ if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); - server.dirty += deleted; + server.dirty += vecSize(vdeleted); } - addReplyLongLong(c,deleted); + addReplyLongLong(c,vecSize(vdeleted)); + vecRelease(vdeleted); } void hlenCommand(client *c) { @@ -3067,6 +3187,73 @@ void genericHgetallCommand(client *c, int flags) { if (server.memory_tracking_enabled) oldsize = kvobjAllocSize(o); + + /* Fast path: batched prefetch for hashtable-encoded HGETALL. + * Collect a batch of dict entries, prefetch their Entry structs and + * value SDS data, then emit replies while the data is cache-warm. + * This hides the latency of pointer chasing through scattered + * heap allocations (dictEntry → Entry → value SDS). */ +#define HGETALL_BATCH 16 + if (o->encoding == OBJ_ENCODING_HT) { + int skip_expired = !server.allow_access_expired; + dict *d = o->ptr; + dictIterator di; + dictInitSafeIterator(&di, d); + Entry *batch_entry[HGETALL_BATCH]; + sds batch_val[HGETALL_BATCH]; + + while (1) { + /* Phase 1: pull a batch of entries from the dict iterator and + * prefetch their Entry structs. Pure pointer-fetch — we don't + * dereference Entry here so the prefetch is effective. */ + int batch_count = 0; + while (batch_count < HGETALL_BATCH) { + dictEntry *de = dictNext(&di); + if (!de) break; + Entry *e = dictGetKey(de); + batch_entry[batch_count++] = e; + redis_prefetch_read(e); + } + if (batch_count == 0) break; + + /* Phase 2: Entry structs are warm — check expiry, extract value, + * and prefetch the value SDS. Expired entries are dropped from + * the batch by compacting in place. */ + int valid_count = 0; + for (int i = 0; i < batch_count; i++) { + Entry *e = batch_entry[i]; + if (skip_expired) { + uint64_t expire_time = entryGetExpiry(e); + if (expire_time != EB_EXPIRE_TIME_INVALID && (mstime_t)expire_time < commandTimeSnapshot()) + continue; + } + batch_entry[valid_count] = e; + if (flags & OBJ_HASH_VALUE) { + sds val = entryGetValue(e); + batch_val[valid_count] = val; + redis_prefetch_read(val); + } + valid_count++; + } + + /* Phase 3: emit replies — field + value data is cache-warm. */ + for (int i = 0; i < valid_count; i++) { + if (flags & OBJ_HASH_KEY) { + sds field = entryGetField(batch_entry[i]); + addReplyBulkCBuffer(c, field, sdslen(field)); + count++; + } + if (flags & OBJ_HASH_VALUE) { + sds val = batch_val[i]; + addReplyBulkCBuffer(c, val, sdslen(val)); + count++; + } + } + } + dictResetIterator(&di); + goto done; + } + hashTypeInitIterator(&hi, o); while (hashTypeNext(&hi, 1 /*skipExpiredFields*/) != C_ERR) { @@ -3081,6 +3268,8 @@ void genericHgetallCommand(client *c, int flags) { } hashTypeResetIterator(&hi); + +done: if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, oldsize, kvobjAllocSize(o)); @@ -3523,6 +3712,11 @@ static ExpireAction onFieldExpire(eItem item, void *ctx) { if (server.memory_tracking_enabled) oldsize = kvobjAllocSize(kv); sds field = entryGetField(e); + + /* Collect expired field for subkey notification (before deletion) */ + if (expCtx->vexpired) + vecPush(expCtx->vexpired, createStringObject(field, sdslen(field))); + propagateHashFieldDeletion(expCtx->db, key, field, sdslen(field)); /* update keysizes */ @@ -3816,7 +4010,7 @@ static void httlGenericCommand(client *c, const char *cmd, long long basetime, i */ static void hexpireGenericCommand(client *c, long long basetime, int unit) { HashCommandArgs args; - int fieldsNotSet = 0, updated = 0, deleted = 0; + int fieldsNotSet = 0; int64_t oldlen, newlen; robj *keyArg = c->argv[1]; size_t oldsize = 0; @@ -3852,12 +4046,20 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { int *fieldsToRemove = NULL; int removeCount = 0; + /* Track fields for subkey notifications. */ + fieldvec fvupdated, fvdeleted; + vec *vupdated = fieldvecInit(&fvupdated, args.fieldCount); + vec *vdeleted = fieldvecInit(&fvdeleted, args.fieldCount); + for (int i = 0; i < args.fieldCount; i++) { int fieldPos = args.firstFieldPos + i; sds field = c->argv[fieldPos]->ptr; SetExRes res = hashTypeSetEx(hashObj, field, args.expireTime, &exCtx); - updated += (res == HSETEX_OK); - deleted += (res == HSETEX_DELETED); + if (res == HSETEX_OK) { + vecPush(vupdated, c->argv[fieldPos]); + } else if (res == HSETEX_DELETED) { + vecPush(vdeleted, c->argv[fieldPos]); + } if (unlikely(res != HSETEX_OK)) { if (fieldsToRemove == NULL) { @@ -3875,11 +4077,13 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(keyArg->ptr), hashObj, oldsize, kvobjAllocSize(hashObj)); - if (deleted + updated > 0) { - server.dirty += deleted + updated; + if (vecSize(vdeleted) + vecSize(vupdated) > 0) { + server.dirty += vecSize(vdeleted) + vecSize(vupdated); keyModified(c, c->db, keyArg, hashObj, 1); - notifyKeyspaceEvent(NOTIFY_HASH, deleted ? "hdel" : "hexpire", - keyArg, c->db->id); + if (vecSize(vdeleted)) notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hdel", + keyArg, c->db->id, (robj**)vecData(vdeleted), vecSize(vdeleted)); + if (vecSize(vupdated)) notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hexpire", + keyArg, c->db->id, (robj**)vecData(vupdated), vecSize(vupdated)); } newlen = (int64_t) hashTypeLength(hashObj, 0); @@ -3896,7 +4100,9 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { /* Avoid propagating command if not even one field was updated (Either because * the time is in the past, and corresponding HDELs were sent, or conditions * not met) then it is useless and invalid to propagate command with no fields */ - if (updated == 0) { + if (vecSize(vupdated) == 0) { + vecRelease(vupdated); + vecRelease(vdeleted); preventCommandPropagation(c); zfree(fieldsToRemove); return; @@ -3917,13 +4123,16 @@ static void hexpireGenericCommand(client *c, long long basetime, int unit) { for (int i = removeCount - 1; i >= 0; i--) { rewriteClientCommandArgument(c, fieldsToRemove[i], NULL); } - robj *newFieldCount = createStringObjectFromLongLong(updated); + robj *newFieldCount = createStringObjectFromLongLong(vecSize(vupdated)); rewriteClientCommandArgument(c, args.fieldsPos + 1, newFieldCount); decrRefCount(newFieldCount); } if (fieldsToRemove) zfree(fieldsToRemove); + + vecRelease(vupdated); + vecRelease(vdeleted); } /* HPEXPIRE key milliseconds [ NX | XX | GT | LT] FIELDS numfields */ @@ -3970,7 +4179,6 @@ void hpexpiretimeCommand(client *c) { /* HPERSIST key FIELDS numfields */ void hpersistCommand(client *c) { long numFields = 0, numFieldsAt = 3; - int changed = 0; /* Used to determine whether to send a notification. */ /* Read the hash object */ kvobj *hashObj = lookupKeyWrite(c->db, c->argv[1]); @@ -4003,6 +4211,10 @@ void hpersistCommand(client *c) { return; } + /* Track which fields were successfully persisted for subkey notification. */ + fieldvec fvpersisted; + vec *vpersisted = fieldvecInit(&fvpersisted, numFields); + if (hashObj->encoding == OBJ_ENCODING_LISTPACK) { addReplyArrayLen(c, numFields); for (int i = 0 ; i < numFields ; i++) { @@ -4018,6 +4230,7 @@ void hpersistCommand(client *c) { else addReplyLongLong(c, HFE_PERSIST_NO_TTL); } + vecRelease(vpersisted); return; } else if (hashObj->encoding == OBJ_ENCODING_LISTPACK_EX) { long long prevExpire; @@ -4059,7 +4272,7 @@ void hpersistCommand(client *c) { if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), hashObj, oldsize, kvobjAllocSize(hashObj)); addReplyLongLong(c, HFE_PERSIST_OK); - changed = 1; + vecPush(vpersisted, c->argv[numFieldsAt + 1 + i]); } } else if (hashObj->encoding == OBJ_ENCODING_HT) { dict *d = hashObj->ptr; @@ -4091,7 +4304,7 @@ void hpersistCommand(client *c) { hfieldPersist(hashObj, entry); addReplyLongLong(c, HFE_PERSIST_OK); - changed = 1; + vecPush(vpersisted, c->argv[numFieldsAt + 1 + i]); } if (server.memory_tracking_enabled) updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), hashObj, oldsize, kvobjAllocSize(hashObj)); @@ -4101,9 +4314,11 @@ void hpersistCommand(client *c) { /* Generates a hpersist event if the expiry time associated with any field * has been successfully deleted. */ - if (changed) { - notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); + if (vecSize(vpersisted)) { + notifyKeyspaceEventWithSubkeys(NOTIFY_HASH, "hpersist", c->argv[1], + c->db->id, (robj**)vecData(vpersisted), vecSize(vpersisted)); keyModified(c, c->db, c->argv[1], hashObj, 1); server.dirty++; } + vecRelease(vpersisted); } diff --git a/src/t_stream.c b/src/t_stream.c index 7e8f58abd..8ee73ab37 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -4413,7 +4413,7 @@ void xnackCommand(client *c) { } else if (!strcasecmp(c->argv[3]->ptr,"FATAL")) { mode = XNACK_FATAL; } else { - addReplyError(c,"ERR mode must be SILENT, FAIL, or FATAL"); + addReplyError(c,"mode must be SILENT, FAIL, or FATAL"); return; } @@ -4432,7 +4432,7 @@ void xnackCommand(client *c) { numids = (int)numids_long; ids_start = i + 2; if (numids > (c->argc - ids_start)) { - addReplyError(c,"ERR number of IDs doesn't match numids"); + addReplyError(c,"number of IDs doesn't match numids"); return; } i = ids_start + numids - 1; @@ -4443,18 +4443,18 @@ void xnackCommand(client *c) { if (getLongLongFromObjectOrReply(c,c->argv[i],&retrycount,NULL) != C_OK) return; if (retrycount < 0) { - addReplyError(c,"ERR Invalid RETRYCOUNT value, must be >= 0"); + addReplyError(c,"Invalid RETRYCOUNT value, must be >= 0"); return; } } else { - addReplyErrorFormat(c,"ERR Unrecognized XNACK option '%s'", + addReplyErrorFormat(c,"Unrecognized XNACK option '%s'", (char *)c->argv[i]->ptr); return; } } if (ids_start == 0) { - addReplyError(c,"ERR syntax error, expected IDS keyword"); + addReplyError(c,"syntax error, expected IDS keyword"); return; } @@ -6031,12 +6031,14 @@ int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) { if (!valid_record || zero != 0) return 0; p = next; if (!lpValidateNext(lp, &next, size)) return 0; + int64_t actual_deleted = 0; entry_count += deleted_count; while (entry_count--) { if (!p) return 0; int64_t fields = master_fields, extra_fields = 3; int64_t flags = lpGetIntegerIfValid(p, &valid_record); if (!valid_record) return 0; + if (flags & STREAM_ITEM_FLAG_DELETED) actual_deleted++; p = next; if (!lpValidateNext(lp, &next, size)) return 0; /* entry id */ @@ -6073,6 +6075,9 @@ int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) { p = next; if (!lpValidateNext(lp, &next, size)) return 0; } + if (actual_deleted != deleted_count) + return 0; + if (next) return 0; diff --git a/src/t_string.c b/src/t_string.c index b9fc3ad5c..4f5019e4e 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -8,7 +8,9 @@ */ #include "server.h" +#include "cluster.h" #include "xxhash.h" +#include #include /* isnan(), isinf() */ /* XXH3 64-bit hash produces 16 hex characters when formatted */ @@ -71,7 +73,7 @@ static int checkStringLength(client *c, long long size, long long append) { #define OBJ_SET_IFDNE (1<<12) /* Set if current digest does not equal match digest */ /* Forward declaration */ -static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds); +static int getExpireMillisecondsOrReply(client *c, robj *expire, int relative_ttl, int unit, long long *milliseconds); /* Generic SET command family (SET, SETEX, PSETEX, SETNX) * @@ -88,8 +90,9 @@ void setGenericCommand(client *c, int flags, robj *key, robj **valref, robj *exp long long milliseconds = 0; /* initialized to avoid any harmless warning */ int found = 0; int setkey_flags = 0; + int relative_ttl = (flags & (OBJ_EX|OBJ_PX)) != 0; /* EX/PX are relative; EXAT/PXAT are absolute. */ - if (expire && getExpireMillisecondsOrReply(c, expire, flags, unit, &milliseconds) != C_OK) { + if (expire && getExpireMillisecondsOrReply(c, expire, relative_ttl, unit, &milliseconds) != C_OK) { return; } @@ -212,37 +215,33 @@ void setGenericCommand(client *c, int flags, robj *key, robj **valref, robj *exp /* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the command argv) */ if ((flags & OBJ_SET_GET) && !expire) { - int argc = 0; - int j; - robj **argv = zmalloc((c->argc-1)*sizeof(robj*)); - for (j=0; j < c->argc; j++) { + for (int j = c->argc - 1; j >= 3; j--) { char *a = c->argv[j]->ptr; /* Skip GET which may be repeated multiple times. */ - if (j >= 3 && - (a[0] == 'g' || a[0] == 'G') && + if ((a[0] == 'g' || a[0] == 'G') && (a[1] == 'e' || a[1] == 'E') && (a[2] == 't' || a[2] == 'T') && a[3] == '\0') - continue; - argv[argc++] = c->argv[j]; - incrRefCount(c->argv[j]); + { + rewriteClientCommandArgument(c, j, NULL); + } } - replaceClientCommandVector(c, argc, argv); } } /* - * Extract the `expire` argument of a given GET/SET command as an absolute timestamp in milliseconds. + * Extract the `expire` argument of a given command as an absolute timestamp in milliseconds. * * "client" is the client that sent the `expire` argument. * "expire" is the `expire` argument to be extracted. - * "flags" represents the behavior of the command (e.g. PX or EX). + * "relative_ttl" is true when the value is a relative TTL (EX/PX), + * false when it is an absolute timestamp (EXAT/PXAT). * "unit" is the original unit of the given `expire` argument (e.g. UNIT_SECONDS). * "milliseconds" is output argument. * * If return C_OK, "milliseconds" output argument will be set to the resulting absolute timestamp. * If return C_ERR, an error reply has been added to the given client. */ -static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds) { +static int getExpireMillisecondsOrReply(client *c, robj *expire, int relative_ttl, int unit, long long *milliseconds) { int ret = getLongLongFromObjectOrReply(c, expire, milliseconds, NULL); if (ret != C_OK) { return ret; @@ -256,7 +255,7 @@ static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int if (unit == UNIT_SECONDS) *milliseconds *= 1000; - if ((flags & OBJ_PX) || (flags & OBJ_EX)) { + if (relative_ttl) { *milliseconds += commandTimeSnapshot(); } @@ -515,7 +514,8 @@ void getexCommand(client *c) { /* Validate the expiration time value first */ long long milliseconds = 0; - if (args.expire && getExpireMillisecondsOrReply(c, args.expire, args.flags, args.unit, &milliseconds) != C_OK) { + int relative_ttl = (args.flags & (OBJ_EX|OBJ_PX)) != 0; /* EX/PX are relative; EXAT/PXAT are absolute. */ + if (args.expire && getExpireMillisecondsOrReply(c, args.expire, relative_ttl, args.unit, &milliseconds) != C_OK) { return; } @@ -688,51 +688,125 @@ void getrangeCommand(client *c) { } } -void mgetCommand(client *c) { - int j; +/* Batch size for intra-command key prefetching. */ +#define PREFETCH_BATCH_SIZE 16 - addReplyArrayLen(c,c->argc-1); - for (j = 1; j < c->argc; j++) { - kvobj *o = lookupKeyRead(c->db, c->argv[j]); - if (o == NULL) { - addReplyNull(c); - } else { - if (o->type != OBJ_STRING) { - addReplyNull(c); - } else { - addReplyBulk(c,o); - } +/* Pick the next prefetch batch starting at argv[start] and warm it via + * dictPrefetchKeys. 'stride' is 1 for keys-only args (MGET) or 2 for + * key/value pairs (MSET). Returns the chosen batch size in items. */ +static int prefetchKeysBatch(client *c, int slot, int start, int stride) { + int batch = (c->argc - start) / stride; + + /* If at least two full batches remain, take one; otherwise fall + * through with batch = remaining keys, doing them in one go. */ + if (batch >= PREFETCH_BATCH_SIZE*2) batch = PREFETCH_BATCH_SIZE; + + dict *d = kvstoreGetDict(c->db->keys, slot); + if (d != NULL && dictSize(d) > 0) { + void *keys[PREFETCH_BATCH_SIZE*2]; + dict *dicts[PREFETCH_BATCH_SIZE*2]; + for (int k = 0; k < batch; k++) { + keys[k] = c->argv[start + k * stride]->ptr; + dicts[k] = d; } + dictPrefetchKeys(dicts, keys, batch); + } + return batch; +} + +void mgetCommand(client *c) { + int numkeys = c->argc - 1; + + addReplyArrayLen(c, numkeys); + + /* MGET requires all keys in the same slot in cluster mode. Reuse the + * slot already computed by the cross-command batching path when + * available, otherwise fall back to recomputing from argv[1]. */ + int slot = 0; + if (server.cluster_enabled) { + pendingCommand *pcmd = c->current_pending_cmd; + slot = (pcmd && pcmd->slot != INVALID_CLUSTER_SLOT) ? + pcmd->slot : getKeySlot(c->argv[1]->ptr); + } + + /* Decide whether to prefetch within this command. Skip if disabled by + * config (prefetch_batch_max_size == 0), or if the cross-command batch + * path already warmed our keys — running both paths would just contend + * for cache bandwidth. */ + int already_prefetched = c->current_pending_cmd && + (c->current_pending_cmd->flags & PENDING_CMD_KEYS_PREFETCHED); + int do_prefetch = server.prefetch_batch_max_size && !already_prefetched && numkeys > 1; + + int j = 1; + while (j < c->argc) { + /* If prefetching, take one batch; otherwise take all items. */ + int batch = do_prefetch ? prefetchKeysBatch(c, slot, j, 1) : c->argc - j; + + for (int k = 0; k < batch; k++) { + kvobj *o = lookupKeyRead(c->db, c->argv[j + k]); + if (o == NULL || o->type != OBJ_STRING) + addReplyNull(c); + else + addReplyBulk(c, o); + } + j += batch; } } void msetGenericCommand(client *c, int nx) { - int j; - if ((c->argc % 2) == 0) { addReplyErrorArity(c); return; } + int numkeys = (c->argc - 1) / 2; + + /* Same gating as mgetCommand, see comment there. */ + int slot = 0; + if (server.cluster_enabled) { + pendingCommand *pcmd = c->current_pending_cmd; + slot = (pcmd && pcmd->slot != INVALID_CLUSTER_SLOT) ? + pcmd->slot : getKeySlot(c->argv[1]->ptr); + } + int already_prefetched = c->current_pending_cmd && + (c->current_pending_cmd->flags & PENDING_CMD_KEYS_PREFETCHED); + int do_prefetch = server.prefetch_batch_max_size && !already_prefetched && numkeys > 1; + /* Handle the NX flag. The MSETNX semantic is to return zero and don't * set anything if at least one key already exists. */ if (nx) { - for (j = 1; j < c->argc; j += 2) { - if (lookupKeyWrite(c->db,c->argv[j]) != NULL) { - addReply(c, shared.czero); - return; + int j = 1; + while (j < c->argc) { + /* If prefetching, take one batch; otherwise take all items. */ + int batch = do_prefetch ? prefetchKeysBatch(c, slot, j, 2) + : (c->argc - j) / 2; + for (int k = 0; k < batch; k++) { + if (lookupKeyWrite(c->db, c->argv[j + k * 2]) != NULL) { + addReply(c, shared.czero); + return; + } } + j += batch * 2; } } - for (j = 1; j < c->argc; j += 2) { - c->argv[j+1] = tryObjectEncoding(c->argv[j+1]); - /* if 'NX', no need set flags SETKEY_DOESNT_EXIST. Already verified earlier! */ - setKey(c, c->db, c->argv[j], &(c->argv[j+1]) , 0 /*flags*/); - incrRefCount(c->argv[j+1]); /* refcnt not incr by setKey() */ - notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[j],c->db->id); + /* If nx is set, the NX loop above already prefetched. */ + do_prefetch = do_prefetch && !nx; + + int j = 1; + while (j < c->argc) { + int batch = do_prefetch ? prefetchKeysBatch(c, slot, j, 2) + : (c->argc - j) / 2; + for (int k = 0; k < batch; k++) { + int i = j + k * 2; + c->argv[i + 1] = tryObjectEncoding(c->argv[i + 1]); + setKey(c, c->db, c->argv[i], &(c->argv[i + 1]), 0); + incrRefCount(c->argv[i + 1]); + notifyKeyspaceEvent(NOTIFY_STRING, "set", c->argv[i], c->db->id); + } + j += batch * 2; } - server.dirty += (c->argc-1)/2; + server.dirty += numkeys; addReply(c, nx ? shared.cone : shared.ok); } @@ -767,7 +841,8 @@ void msetexCommand(client *c) { /* Validate the expiration time value first */ long long milliseconds = 0; - if (args.expire && getExpireMillisecondsOrReply(c, args.expire, args.flags, args.unit, &milliseconds) != C_OK) { + int relative_ttl = (args.flags & (OBJ_EX|OBJ_PX)) != 0; /* EX/PX are relative; EXAT/PXAT are absolute. */ + if (args.expire && getExpireMillisecondsOrReply(c, args.expire, relative_ttl, args.unit, &milliseconds) != C_OK) { return; } @@ -923,6 +998,405 @@ void incrbyfloatCommand(client *c) { rewriteClientCommandArgument(c,3,shared.keepttl); } +/* INCREX option flags. */ +#define OBJ_INCREX_BYFLOAT (1<<0) /* Set if float-point increment is given */ +#define OBJ_INCREX_BYINT (1<<1) /* Set if integer increment is given */ +#define OBJ_INCREX_LBOUND (1<<2) /* Set if lower bound of increx result is given */ +#define OBJ_INCREX_UBOUND (1<<3) /* Set if upper bound of increx result is given */ +#define OBJ_INCREX_OVERFLOW_FAIL (1<<4) /* Return an error when the result is out of bounds (default) */ +#define OBJ_INCREX_OVERFLOW_SAT (1<<5) /* Saturate the result to LBOUND/UBOUND/type limits instead of failing */ +#define OBJ_INCREX_OVERFLOW_REJECT (1<<6) /* Leave the key unchanged and reply [current_value, 0] when the result is out of bounds */ +#define OBJ_INCREX_ENX (1<<7) /* Set expiration only when the key has no expiry */ +#define OBJ_INCREX_PERSIST (1<<8) /* Set if we need to remove the ttl */ +#define OBJ_INCREX_EX (1<<9) /* Set if time in seconds is given */ +#define OBJ_INCREX_PX (1<<10) /* Set if time in ms is given */ +#define OBJ_INCREX_EXAT (1<<11) /* Set if timestamp in second is given */ +#define OBJ_INCREX_PXAT (1<<12) /* Set if timestamp in ms is given */ + +/* INCREX argument structure */ +typedef struct { + int flags; /* OBJ_INCREX_* bits set during parsing. */ + int unit; /* UNIT_SECONDS or UNIT_MILLISECONDS for EX/PX/EXAT/PXAT. */ + long long expire_ms; /* Absolute expire timestamp in ms (0 if no expiration given). */ + long long incr_ll; /* BYINT increment value (defaults to 1). */ + long long ub_ll; /* BYINT upper bound (defaults to LLONG_MAX). */ + long long lb_ll; /* BYINT lower bound (defaults to LLONG_MIN). */ + long double incr_ld; /* BYFLOAT increment value (defaults to 0). */ + long double ub_ld; /* BYFLOAT upper bound (defaults to LDBL_MAX). */ + long double lb_ld; /* BYFLOAT lower bound (defaults to -LDBL_MAX). */ +} incrExArgs; + +/* The parseIncrExArgumentsOrReply() function performs validation for INCREX command. + * If there are any syntax violations C_ERR is returned else C_OK is returned. */ +static int parseIncrExArgumentsOrReply(client *c, int start_pos, incrExArgs *args) { + memset(args, 0, sizeof(*args)); + args->unit = UNIT_SECONDS; + args->incr_ll = 1; + args->lb_ll = LLONG_MIN; + args->ub_ll = LLONG_MAX; + args->lb_ld = -LDBL_MAX; + args->ub_ld = LDBL_MAX; + + /* LBOUND/UBOUND values are parsed after the loop because their target type + * depends on whether BYINT or BYFLOAT was given, which may appear later. */ + robj *lower_bound = NULL, *upper_bound = NULL, *expire = NULL; + + /* Mask of all mutually-exclusive expiration-related flags. */ + const int expire_flags = OBJ_INCREX_EX|OBJ_INCREX_PX|OBJ_INCREX_EXAT|OBJ_INCREX_PXAT|OBJ_INCREX_PERSIST; + + for (int j = start_pos; j < c->argc; j++) { + char *opt = c->argv[j]->ptr; + robj *next = (j == c->argc-1) ? NULL : c->argv[j+1]; + + if (!strcasecmp(opt, "BYINT") && next && !(args->flags & (OBJ_INCREX_BYINT|OBJ_INCREX_BYFLOAT))) { + if (getLongLongFromObjectOrReply(c, next, &args->incr_ll, + "Increment is not an integer or out of range") != C_OK) + { + return C_ERR; + } + args->flags |= OBJ_INCREX_BYINT; + j++; + } else if (!strcasecmp(opt, "BYFLOAT") && next && !(args->flags & (OBJ_INCREX_BYINT|OBJ_INCREX_BYFLOAT))) { + if (getLongDoubleFromObjectOrReply(c, next, &args->incr_ld, + "Increment is not a valid float") != C_OK) + { + return C_ERR; + } + if (isinf(args->incr_ld)) { + addReplyError(c, "BYFLOAT increment cannot be Infinity"); + return C_ERR; + } + args->flags |= OBJ_INCREX_BYFLOAT; + j++; + } else if (!strcasecmp(opt, "LBOUND") && next && !(args->flags & OBJ_INCREX_LBOUND)) { + args->flags |= OBJ_INCREX_LBOUND; + lower_bound = next; + j++; + } else if (!strcasecmp(opt, "UBOUND") && next && !(args->flags & OBJ_INCREX_UBOUND)) { + args->flags |= OBJ_INCREX_UBOUND; + upper_bound = next; + j++; + } else if (!strcasecmp(opt, "OVERFLOW") && next && + !(args->flags & (OBJ_INCREX_OVERFLOW_FAIL|OBJ_INCREX_OVERFLOW_SAT|OBJ_INCREX_OVERFLOW_REJECT))) + { + if (!strcasecmp(next->ptr, "FAIL")) { + args->flags |= OBJ_INCREX_OVERFLOW_FAIL; + } else if (!strcasecmp(next->ptr, "SAT")) { + args->flags |= OBJ_INCREX_OVERFLOW_SAT; + } else if (!strcasecmp(next->ptr, "REJECT")) { + args->flags |= OBJ_INCREX_OVERFLOW_REJECT; + } else { + addReplyError(c, "OVERFLOW policy must be FAIL, SAT or REJECT"); + return C_ERR; + } + j++; + } else if (!strcasecmp(opt, "ENX") && !(args->flags & (OBJ_INCREX_ENX|OBJ_INCREX_PERSIST))) { + args->flags |= OBJ_INCREX_ENX; + } else if (!strcasecmp(opt, "PERSIST") && !(args->flags & (expire_flags|OBJ_INCREX_ENX))) { + args->flags |= OBJ_INCREX_PERSIST; + } else if (!strcasecmp(opt, "EX") && !(args->flags & expire_flags) && next) { + args->flags |= OBJ_INCREX_EX; + expire = next; + j++; + } else if (!strcasecmp(opt, "PX") && !(args->flags & expire_flags) && next) { + args->flags |= OBJ_INCREX_PX; + args->unit = UNIT_MILLISECONDS; + expire = next; + j++; + } else if (!strcasecmp(opt, "EXAT") && !(args->flags & expire_flags) && next) { + args->flags |= OBJ_INCREX_EXAT; + expire = next; + j++; + } else if (!strcasecmp(opt, "PXAT") && !(args->flags & expire_flags) && next) { + args->flags |= OBJ_INCREX_PXAT; + args->unit = UNIT_MILLISECONDS; + expire = next; + j++; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + } + + /* Resolve LBOUND/UBOUND values now that BYINT/BYFLOAT is known. */ + if (args->flags & OBJ_INCREX_BYFLOAT) { + if (lower_bound && getLongDoubleFromObjectOrReply(c, lower_bound, &args->lb_ld, + "LBOUND is not a valid float") != C_OK) + { + return C_ERR; + } + if (upper_bound && getLongDoubleFromObjectOrReply(c, upper_bound, &args->ub_ld, + "UBOUND is not a valid float") != C_OK) + { + return C_ERR; + } + if (args->lb_ld > args->ub_ld) { + addReplyError(c, "LBOUND can't be greater than UBOUND"); + return C_ERR; + } + } else { + if (lower_bound && getLongLongFromObjectOrReply(c, lower_bound, &args->lb_ll, + "LBOUND is not an integer or out of range") != C_OK) + { + return C_ERR; + } + if (upper_bound && getLongLongFromObjectOrReply(c, upper_bound, &args->ub_ll, + "UBOUND is not an integer or out of range") != C_OK) + { + return C_ERR; + } + if (args->lb_ll > args->ub_ll) { + addReplyError(c, "LBOUND can't be greater than UBOUND"); + return C_ERR; + } + } + + /* ENX requires an expiration option. */ + if ((args->flags & OBJ_INCREX_ENX) && !(args->flags & expire_flags)) { + addReplyError(c, "ENX flag requires an expiration"); + return C_ERR; + } + + if (expire) { + int relative_ttl = (args->flags & (OBJ_INCREX_EX|OBJ_INCREX_PX)) != 0; + if (getExpireMillisecondsOrReply(c, expire, relative_ttl, args->unit, &args->expire_ms) != C_OK) + return C_ERR; + } + return C_OK; +} + +/* + * INCREX [BYFLOAT increment | BYINT increment] [LBOUND lowerbound] + * [UBOUND upperbound] [OVERFLOW ] + * [EX seconds | PX milliseconds | EXAT seconds-timestamp | PXAT milliseconds-timestamp | PERSIST] [ENX] + * + * Increments the numeric value of a key and optionally updates its expiration time. + * + * Increment options: + * Defaults to incrementing by 1 (like INCR) if no increment option is given. + * At most one of the following may be specified: + * - BYINT: Increment by an integer (like INCRBY). + * - BYFLOAT: Increment by a float (like INCRBYFLOAT). Returns an error if the result is NaN or Infinity. + * + * Range options: + * LBOUND and UBOUND optionally restrict the result to a range. The behavior + * when the result would land outside that range (or, with no explicit bound, + * would overflow the type limits) is controlled by OVERFLOW: + * - OVERFLOW FAIL (default): the operation is rejected with an error, + * matching the semantics of INCRBY/INCRBYFLOAT. + * - OVERFLOW SAT: the result is silently capped at UBOUND / floored at LBOUND + * (or saturated to the type limits when no explicit bound is + * given) instead of producing an error. + * - OVERFLOW REJECT: the operation is silently skipped (the key value and TTL + * are left unchanged) and the reply is the current value with + * an applied increment of 0, instead of producing an error. + * + * Expiration options: + * At most one of the following may be specified: + * - EX: Set expiration in seconds. + * - PX: Set expiration in milliseconds. + * - EXAT: Set expiration to an absolute Unix timestamp (seconds). + * - PXAT: Set expiration to an absolute Unix timestamp (milliseconds). + * - PERSIST: Remove the key's TTL. + * + * If no expiration option is given, the key's existing TTL is preserved. + * ENX restricts expiration updates to keys that currently have no TTL. + * + * Reply: + * - (Simple Error) if any parameter is invalid, or if BYFLOAT produces NaN or Infinity. + * - (Array) of two Bulk Strings on success: + * 1. The new value of the key after the increment. + * 2. The actual increment applied. + * + * Note: When the result is saturated by LBOUND/UBOUND, the expiration is still updated normally. + */ +void increxCommand(client *c) { + kvobj *o = NULL; + robj *new = NULL; + dictEntryLink link; + long long value_ll, oldvalue_ll = 0; + long double value_ld, oldvalue_ld = 0; + + incrExArgs args; + if (parseIncrExArgumentsOrReply(c, 2, &args) != C_OK) + return; + + o = lookupKeyWriteWithLink(c->db, c->argv[1], &link); + if (checkType(c, o, OBJ_STRING)) return; + + int byfloat = args.flags & OBJ_INCREX_BYFLOAT; + /* FAIL is the default when no OVERFLOW policy is specified. */ + int fail_mode = !(args.flags & (OBJ_INCREX_OVERFLOW_SAT | OBJ_INCREX_OVERFLOW_REJECT)); + int reject_mode = args.flags & OBJ_INCREX_OVERFLOW_REJECT; + if (byfloat) { + long double lb = args.lb_ld, ub = args.ub_ld; + if (getLongDoubleFromObjectOrReply(c, o, &value_ld, NULL) != C_OK) + return; + + /* Reject if the existing value is already Infinity (the increment is + * checked at parse time in parseIncrExArgumentsOrReply). */ + if (isinf(value_ld)) { + addReplyError(c, "value cannot be Infinity"); + return; + } + + oldvalue_ld = value_ld; + value_ld += args.incr_ld; + int overflow = isinf(value_ld); + if (overflow || value_ld > ub || value_ld < lb) { + /* FAIL: return an error. */ + if (fail_mode) { + addReplyError(c, overflow ? "increment would produce Infinity" : + "value is out of bounds"); + return; + } + + /* Result is infinite or out of [LBOUND, UBOUND]: + * FAIL: error; SAT: clamp to +/-LDBL_MAX or the breached bound; + * REJECT: leave key untouched, reply [current_value, 0]. */ + if (reject_mode) { + addReplyArrayLen(c, 2); + addReplyHumanLongDouble(c, oldvalue_ld); + addReplyHumanLongDouble(c, 0); + return; + } + + /* SAT: clamp the result. */ + if (overflow) + value_ld = (args.incr_ld >= 0) ? ub : lb; + else + value_ld = value_ld > ub ? ub : lb; + } + + long double delta = value_ld - oldvalue_ld; + if (isinf(delta)) { + /* The applied delta cannot be represented as a valid long double. This can + * only happen under OVERFLOW SAT when the saturated result and the + * prior value sit at opposite ends of the type range. */ + addReplyError(c, "applied increment would be Infinity"); + return; + } + + addReplyArrayLen(c, 2); + addReplyHumanLongDouble(c, value_ld); + addReplyHumanLongDouble(c, delta); + } else { + long long lb = args.lb_ll, ub = args.ub_ll; + if (getLongLongFromObjectOrReply(c, o, &value_ll, NULL) != C_OK) + return; + + oldvalue_ll = value_ll; + int overflow = add_overflow_ll(oldvalue_ll, args.incr_ll, &value_ll); + if (overflow || value_ll > ub || value_ll < lb) { + /* FAIL: return an error. */ + if (fail_mode) { + addReplyError(c, overflow ? "increment or decrement would overflow" : + "value is out of bounds"); + return; + } + + /* Result overflows long long or is out of [LBOUND, UBOUND]: + * FAIL: error; SAT: clamp to LLONG_MAX/LLONG_MIN or the breached bound; + * REJECT: leave key untouched, reply [current_value, 0]. */ + if (reject_mode) { + addReplyArrayLen(c, 2); + addReplyLongLong(c, oldvalue_ll); + addReplyLongLong(c, 0); + return; + } + + /* SAT: clamp the result. */ + if (overflow) + value_ll = (args.incr_ll >= 0) ? ub : lb; + else + value_ll = value_ll > ub ? ub : lb; + } + + long long delta = 0; + if (sub_overflow_ll(value_ll, oldvalue_ll, &delta)) { + /* The applied delta cannot be represented as a long long. This can + * only happen under OVERFLOW SAT when the saturated result and the + * prior value sit at opposite ends of the type range. */ + addReplyError(c, "applied increment would overflow"); + return; + } + + addReplyArrayLen(c, 2); + addReplyLongLong(c, value_ll); + addReplyLongLong(c, delta); + } + + /* If the expire time is already elapsed, it is propagated as DEL/UNLINK */ + int has_expiry = o && (kvobjGetExpire(o) != -1); + int set_new_expire = args.expire_ms && (!(args.flags & OBJ_INCREX_ENX) || !has_expiry); + if (set_new_expire && checkAlreadyExpired(args.expire_ms)) { + if (o) { + int deleted = dbGenericDelete(c->db, c->argv[1], server.lazyfree_lazy_expire, DB_FLAG_KEY_EXPIRED); + serverAssert(deleted); + robj *aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del; + rewriteClientCommandVector(c, 2, aux, c->argv[1]); + keyModified(c, c->db, c->argv[1], NULL, 1); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty++; + } + server.stat_expiredkeys++; + return; + } + + if (!byfloat && o && o->refcount == 1 && o->encoding == OBJ_ENCODING_INT && + value_ll >= LONG_MIN && value_ll <= LONG_MAX) + { + new = o; + o->ptr = (void*)((long)value_ll); + updateKeysizesHist(c->db, OBJ_STRING, (int64_t)sdigits10(oldvalue_ll), (int64_t)sdigits10(value_ll)); + } else { + if (byfloat) + new = createStringObjectFromLongDouble(value_ld, 1); + else + new = createStringObjectFromLongLongForValue(value_ll); + if (o) + dbReplaceValueWithLink(c->db, c->argv[1], &new, link); + else + dbAddByLink(c->db, c->argv[1], &new, &link); + } + + /* Replicate INCREX as SET with the final value to avoid float precision + * or formatting drift across replicas / AOF restart. The TTL clause is: + * PERSIST -> SET + * sets a new TTL -> SET PXAT + * otherwise -> SET KEEPTTL (no expire option, + * or ENX hit on a key that already has a TTL) */ + int persist_notify = 0, expire_notify = 0; + if (args.flags & OBJ_INCREX_PERSIST) { + persist_notify = removeExpire(c->db, c->argv[1]); + rewriteClientCommandVector(c, 3, shared.set, c->argv[1], new); + } else if (set_new_expire) { + new = setExpire(c, c->db, c->argv[1], args.expire_ms); + expire_notify = 1; + robj *milliseconds_obj = createStringObjectFromLongLong(args.expire_ms); + rewriteClientCommandVector(c, 5, shared.set, c->argv[1], new, shared.pxat, milliseconds_obj); + decrRefCount(milliseconds_obj); + } else { + rewriteClientCommandVector(c, 4, shared.set, c->argv[1], new, shared.keepttl); + } + + keyModified(c, c->db, c->argv[1], new, 1); + server.dirty++; + + notifyKeyspaceEvent(NOTIFY_STRING, byfloat ? "incrbyfloat" : "incrby", c->argv[1], c->db->id); + if (persist_notify) + notifyKeyspaceEvent(NOTIFY_GENERIC, "persist", c->argv[1], c->db->id); + if (expire_notify) + notifyKeyspaceEvent(NOTIFY_GENERIC, "expire", c->argv[1], c->db->id); + + /* A KSN handler may reallocate the kvobj and replace it in the dict. The local + * pointers `o`/`new` may then point to a stale object and must not be dereferenced; + * null them out. The object is not freed though if rewriteClientCommandVector() + * above incremented its refcount, so c->argv keeps it alive for command propagation. */ + KSN_INVALIDATE_KVOBJ(o); + KSN_INVALIDATE_KVOBJ(new); +} + void appendCommand(client *c) { size_t totlen; robj *append; diff --git a/src/tracking.c b/src/tracking.c index 09acd33f8..c235d5812 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -125,7 +125,7 @@ int checkPrefixCollisionsOrReply(client *c, robj **prefixes, size_t numprefix) { "Prefixes for a single client must not overlap.", (unsigned char *)prefixes[i]->ptr, (unsigned char *)prefixes[j]->ptr); - return i; + return 0; } } } diff --git a/src/util.h b/src/util.h index cbc63ea6c..0c775c205 100644 --- a/src/util.h +++ b/src/util.h @@ -91,6 +91,39 @@ static inline int log2ceil(size_t x) { #endif } +/* Return the smallest power of 2 >= count (e.g. 5 -> 8, 8 -> 8). */ +static inline int nearestNextPowerOf2(unsigned int count) { + if (count <= 1) return 1; + return 1 << (32 - __builtin_clz(count-1)); +} + +/* Check for __builtin_add_overflow() */ +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif +#if __has_builtin(__builtin_add_overflow) || (defined(__GNUC__) && __GNUC__ >= 5) +#define add_overflow_ll(a, b, res) __builtin_add_overflow((a), (b), (res)) +#define sub_overflow_ll(a, b, res) __builtin_sub_overflow((a), (b), (res)) +#else +#include +static inline int add_overflow_ll(long long a, long long b, long long *res) { + if ((b > 0 && a > LLONG_MAX - b) || (b < 0 && a < LLONG_MIN - b)) { + *res = (long long)((unsigned long long)a + (unsigned long long)b); + return 1; + } + *res = a + b; + return 0; +} +static inline int sub_overflow_ll(long long a, long long b, long long *res) { + if ((b < 0 && a > LLONG_MAX + b) || (b > 0 && a < LLONG_MIN + b)) { + *res = (long long)((unsigned long long)a - (unsigned long long)b); + return 1; + } + *res = a - b; + return 0; +} +#endif + #ifndef static_assert #define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1] #endif diff --git a/src/vector.c b/src/vector.c index e5809dabb..fc0ba13e1 100644 --- a/src/vector.c +++ b/src/vector.c @@ -33,13 +33,19 @@ void vecInit(vec *v, void **stack, size_t initcap) { v->size = 0; v->cap = initcap; v->stack = stack; /* stack is NULL if not used */ - + v->free = NULL; + /* now init data either stack, heap or NULL */ v->data = (stack) ? stack : ((initcap > 0) ? zmalloc(initcap * sizeof(void *)) : NULL); } -/* Free only heap storage if any */ +/* Release storage. If a free method is set, it is applied to every element + * before the backing storage is released. Stack storage is never freed. */ void vecRelease(vec *v) { + if (v->free) { + for (size_t i = 0; i < v->size; i++) + v->free(v->data[i]); + } /* if data is not stack-allocated and is not NULL, free it */ if (v->data && v->data != v->stack) zfree(v->data); @@ -47,29 +53,25 @@ void vecRelease(vec *v) { v->cap = 0; v->data = NULL; v->stack = NULL; + v->free = NULL; } -/* Reset the logical length to zero while preserving allocated storage. */ +/* Reset the logical length to zero while preserving allocated storage. + * If a free method is set, it is applied to every element before reset. */ void vecClear(vec *v) { + if (v->free) { + for (size_t i = 0; i < v->size; i++) + v->free(v->data[i]); + } v->size = 0; } -/* Return the number of elements in the vector. */ -size_t vecSize(const vec *v) { - return v->size; -} - /* Get element at index. index must be < vecSize(v). */ void *vecGet(const vec *v, size_t index) { assert(index < v->size); return v->data[index]; } -/* Return the contiguous backing array. */ -void **vecData(vec *v) { - return v->data; -} - /* Ensure capacity is at least mincap. */ void vecReserve(vec *v, size_t mincap) { void **newdata; @@ -90,7 +92,7 @@ void vecReserve(vec *v, size_t mincap) { /* Append one element, growing storage as needed. */ void vecPush(vec *v, void *value) { - if (v->size == v->cap) { + if (unlikely(v->size == v->cap)) { size_t newcap = (v->cap > 0) ? v->cap * 2 : VEC_DEFAULT_INITCAP; vecReserve(v, newcap); } @@ -107,6 +109,18 @@ void vecPush(vec *v, void *value) { #define UNUSED(x) (void)(x) +static int vecTestFreeCalls = 0; +static void vecTestFree(void *ptr) { + vecTestFreeCalls++; + zfree(ptr); +} + +static int *vecTestNewInt(int v) { + int *p = zmalloc(sizeof(int)); + *p = v; + return p; +} + int vectorTest(int argc, char **argv, int flags) { UNUSED(argc); @@ -168,6 +182,46 @@ int vectorTest(int argc, char **argv, int flags) vecGet(&v, 0) == &five && vecGet(&v, 1) == &six); vecRelease(&v); + /* vecSetFreeMethod: element free callback is invoked on release. */ + void *vstack2[2]; + vecInit(&v, vstack2, 2); + vecSetFreeMethod(&v, vecTestFree); + vecPush(&v, vecTestNewInt(1)); + vecPush(&v, vecTestNewInt(2)); + vecPush(&v, vecTestNewInt(3)); /* triggers spill to heap */ + vecTestFreeCalls = 0; + vecRelease(&v); + test_cond("vecRelease() invokes free method on each element", + vecTestFreeCalls == 3); + + /* vecClear: free method is invoked on each element, storage preserved. */ + vecInit(&v, NULL, 4); + vecSetFreeMethod(&v, vecTestFree); + vecPush(&v, vecTestNewInt(1)); + vecPush(&v, vecTestNewInt(2)); + vecPush(&v, vecTestNewInt(3)); + heap_data = vecData(&v); + vecTestFreeCalls = 0; + vecClear(&v); + test_cond("vecClear() invokes free method on each element preserving storage", + vecTestFreeCalls == 3 && vecSize(&v) == 0 && + vecData(&v) == heap_data && v.cap == 4); + /* Push again after clear to verify the vector is still usable. */ + vecPush(&v, vecTestNewInt(4)); + test_cond("vecPush() works after vecClear() with free method", + vecSize(&v) == 1 && vecData(&v) == heap_data); + vecTestFreeCalls = 0; + vecRelease(&v); + test_cond("vecRelease() after vecClear()+push frees remaining element", + vecTestFreeCalls == 1); + + vecInit(&v, NULL, 4); + vecSetFreeMethod(&v, vecTestFree); + vecTestFreeCalls = 0; + vecRelease(&v); + test_cond("vecRelease() free method is a no-op on empty vector", + vecTestFreeCalls == 0); + return 0; } #endif diff --git a/src/vector.h b/src/vector.h index a3ea28505..c89955c98 100644 --- a/src/vector.h +++ b/src/vector.h @@ -60,25 +60,36 @@ typedef struct vec { size_t cap; /* Capacity of the vector. */ void **data; /* Heap-allocated storage or refers to stack. */ void **stack; /* Optional stack buffer. */ + void (*free)(void *ptr); /* Optional free method, applied to each + * element on vecRelease. NULL = no-op. */ } vec; +/* Return the contiguous backing array. */ +static inline void **vecData(const vec *v) { return v->data; } + +/* Return the number of elements in the vector. */ +static inline size_t vecSize(const vec *v) { return v->size; } + /* Initialize a vector */ void vecInit(vec *v, void **stack, size_t initcap); -/* Free only heap storage if any */ +/* Set a free method applied to every element on vecRelease. + * Symmetric to listSetFreeMethod for adlist. */ +static inline void vecSetFreeMethod(vec *v, void (*freefn)(void *ptr)) { + v->free = freefn; +} + +/* Release storage. If a free method is set, it is applied to every element + * before the backing storage is released. Stack storage is never freed. */ void vecRelease(vec *v); -/* Reset the logical length to zero while preserving allocated storage. */ +/* Reset the logical length to zero while preserving allocated storage. + * If a free method is set, it is applied to every element before reset. */ void vecClear(vec *v); -size_t vecSize(const vec *v); - /* Requires index < vecSize(v). */ void *vecGet(const vec *v, size_t index); -/* Return the contiguous backing array. */ -void **vecData(vec *v); - /* Ensure capacity is at least mincap. */ void vecReserve(vec *v, size_t mincap); diff --git a/src/zipmap.c b/src/zipmap.c index 51c64ca81..e3981d810 100644 --- a/src/zipmap.c +++ b/src/zipmap.c @@ -387,6 +387,10 @@ int zipmapValidateIntegrity(unsigned char *zm, size_t size, int deep) { /* read the field name length */ l = zipmapDecodeLength(p); + /* Sanity check: length < 254 must be encoded in 1 byte, not 5 bytes */ + if (l < ZIPMAP_BIGLEN && s != 1) + return 0; + p += s; /* skip the encoded field size */ p += l; /* skip the field */ @@ -402,6 +406,9 @@ int zipmapValidateIntegrity(unsigned char *zm, size_t size, int deep) { /* read the value length */ l = zipmapDecodeLength(p); + /* Sanity check: length < 254 must be encoded in 1 byte, not 5 bytes */ + if (l < ZIPMAP_BIGLEN && s != 1) + return 0; p += s; /* skip the encoded value size*/ e = *p++; /* skip the encoded free space (always encoded in one byte) */ p += l+e; /* skip the value and free space */ diff --git a/src/zmalloc.c b/src/zmalloc.c index 21d5749e4..5b84ccb07 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -2,6 +2,9 @@ * * Copyright (c) 2009-Present, Redis Ltd. * All rights reserved. + * + * Copyright (c) 2024-present, Valkey contributors. + * All rights reserved. * * Licensed under your choice of (a) the Redis Source Available License 2.0 * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the @@ -77,10 +80,27 @@ void je_free_with_usize(void *ptr, size_t *usize); #define realloc_with_usize(ptr,size,old_usize,new_usize) je_realloc_with_usize(ptr,size,old_usize,new_usize) #define free_with_usize(ptr,usize) je_free_with_usize(ptr,usize) #endif + +/* Compile-time jemalloc tuning: raise per-bin tcache limits for small size + * classes so bursts of same size small allocations don't spill into the + * arena which reduces performance. + * + * lg_tcache_nslots_mul:3 default slot count log2 multiplier: 1 (2x) → 3 (8x). + * tcache_nslots_small_max:1000 per-bin hard cap 200 -> 1000. + */ +const char *je_malloc_conf = + "lg_tcache_nslots_mul:3,tcache_nslots_small_max:1000"; #endif -#define MAX_THREADS 16 /* Keep it a power of 2 so we can use '&' instead of '%'. */ -#define THREAD_MASK (MAX_THREADS - 1) +/* Per-thread memory accounting slots. The first DEDICATED_ENTRIES threads + * (typically the main thread plus io threads) each get a private slot and can + * use the cheap single-writer atomic operation (plain load+store). + * Threads beyond that share a pool hashed by thread index and pay the cost of + * a full atomic RMW. */ +#define DEDICATED_ENTRIES 8 +#define SHARED_ENTRIES 8 /* Must be a power of 2 for modulo */ +#define SHARED_ENTRIES_MASK (SHARED_ENTRIES - 1) +#define MAX_ENTRIES (DEDICATED_ENTRIES + SHARED_ENTRIES) #define PEAK_CHECK_THRESHOLD (1024 * 100) /* 100KB */ typedef struct used_memory_entry { @@ -89,7 +109,7 @@ typedef struct used_memory_entry { char padding[CACHE_LINE_SIZE - sizeof(long long) - sizeof(long long)]; } used_memory_entry; -static __attribute__((aligned(CACHE_LINE_SIZE))) used_memory_entry used_memory[MAX_THREADS]; +static __attribute__((aligned(CACHE_LINE_SIZE))) used_memory_entry used_memory[MAX_ENTRIES]; static redisAtomic size_t num_active_threads = 0; static redisAtomic size_t zmalloc_peak = 0; static redisAtomic time_t zmalloc_peak_time = 0; @@ -97,19 +117,75 @@ static __thread long my_thread_index = -1; static inline void init_my_thread_index(void) { if (unlikely(my_thread_index == -1)) { - atomicGetIncr(num_active_threads, my_thread_index, 1); - my_thread_index &= THREAD_MASK; + long idx; + atomicGetIncr(num_active_threads, idx, 1); + if (idx < DEDICATED_ENTRIES) { + my_thread_index = idx; + } else { + /* Overflow threads share the shared pool entries (atomic RMW). */ + my_thread_index = DEDICATED_ENTRIES + (idx & SHARED_ENTRIES_MASK); + } } } -static void update_zmalloc_stat_alloc(long long bytes_delta) { +/* Pre-advance the thread index counter so reserved threads that call + * zmalloc_register_reserved_thread() can claim dedicated used_memory accounting + * slots. Must be called once by main() before any other thread can allocate via + * zmalloc(), otherwise background threads could auto-register into the + * dedicated range. See DEDICATED_ENTRIES comment for details. */ +void zmalloc_reserve_thread_slots(int n) { + assert(n >= 1); + + size_t cur; + atomicGet(num_active_threads, cur); + assert((my_thread_index == -1 && cur == 0) || + (my_thread_index == 0 && cur == 1)); + + if (my_thread_index == -1) my_thread_index = 0; /* claim entry 0 for main thread */ + atomicSet(num_active_threads, (size_t)n); +} + +/* A reserved thread, e.g. an IO thread, calls this once at startup, before its + * first allocation. Claims the next dedicated slot via a private atomic counter, + * falls back to the shared pool if all dedicated slots have been taken. */ +void zmalloc_register_reserved_slot(void) { + assert(my_thread_index == -1); + static redisAtomic int reserved_slot_counter = 1; /* Slot 0 is reserved for main thread. */ + + int slot; + atomicGetIncr(reserved_slot_counter, slot, 1); + if (slot < DEDICATED_ENTRIES) { + size_t reserved; + atomicGet(num_active_threads, reserved); + assert((size_t)slot < reserved); + + my_thread_index = slot; + } else { + my_thread_index = DEDICATED_ENTRIES + (slot & SHARED_ENTRIES_MASK); + } +} + +static inline long long update_used_memory_entry(used_memory_entry *entry, long long bytes_delta) { + long long thread_used; + + if (my_thread_index < DEDICATED_ENTRIES) { + /* Dedicated slot: single writer, plain load+store (no lock prefix). */ + atomicIncrGetSingleWriter(entry->used_memory, bytes_delta, thread_used); + } else { + /* Shared pool slots: multiple writers, atomic RMW required. */ + atomicIncrGet(entry->used_memory, thread_used, bytes_delta); + } + return thread_used; +} + +static inline void update_zmalloc_stat_alloc(long long bytes_delta) { init_my_thread_index(); - /* Per-thread allocation counter and the last counter value at which we ran a - * global peak check (throttles how often we call zmalloc_used_memory()). */ - long long thread_used, thread_last_peak_check_used; - atomicIncrGet(used_memory[my_thread_index].used_memory, thread_used, bytes_delta); - atomicGet(used_memory[my_thread_index].last_peak_check, thread_last_peak_check_used); + used_memory_entry *entry = &used_memory[my_thread_index]; + long long thread_used = update_used_memory_entry(entry, bytes_delta); + + long long thread_last_peak_check_used; + atomicGet(entry->last_peak_check, thread_last_peak_check_used); /* Only run the (expensive) global used/peak check after this thread's * allocation counter has advanced enough since the last check. */ @@ -140,13 +216,13 @@ static void update_zmalloc_stat_alloc(long long bytes_delta) { /* Record the thread counter value at which we last ran a global peak check, * to throttle future checks for this thread. */ - atomicSet(used_memory[my_thread_index].last_peak_check, thread_used); + atomicSet(entry->last_peak_check, thread_used); } } -static void update_zmalloc_stat_free(long long num) { +static inline void update_zmalloc_stat_free(long long num) { init_my_thread_index(); - atomicDecr(used_memory[my_thread_index].used_memory, num); + update_used_memory_entry(&used_memory[my_thread_index], -num); } static void zmalloc_default_oom(size_t size) { @@ -552,6 +628,21 @@ void zfree_usable(void *ptr, size_t *usable) { if (usable) *usable = oldsize; } +/* Free with a size hint to skip the emap lookup in jemalloc's free path. + * jemalloc's sdallocx() accepts any size that rounds to the correct size class + * (i.e. both requested and usable sizes work), but 'size' must be the usable + * size to keep zmalloc used_memory accounting accurate. */ +void zfree_with_size(void *ptr, size_t size) { + if (ptr == NULL) return; +#ifdef USE_JEMALLOC + update_zmalloc_stat_free(size); + je_sdallocx(ptr, size, 0); +#else + UNUSED(size); + zfree(ptr); +#endif +} + char *zstrdup_usable(const char *s, size_t *usable) { size_t l = strlen(s)+1; char *p = zmalloc_usable(l, usable); @@ -568,8 +659,8 @@ size_t zmalloc_used_memory(void) { size_t local_num_active_threads; long long total_mem = 0; atomicGet(num_active_threads,local_num_active_threads); - if (local_num_active_threads > MAX_THREADS) { - local_num_active_threads = MAX_THREADS; + if (local_num_active_threads > MAX_ENTRIES) { + local_num_active_threads = MAX_ENTRIES; } for (size_t i = 0; i < local_num_active_threads; ++i) { long long thread_used_mem; diff --git a/src/zmalloc.h b/src/zmalloc.h index 3dda50327..40d16c547 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -2,6 +2,9 @@ * * Copyright (c) 2009-Present, Redis Ltd. * All rights reserved. + * + * Copyright (c) 2024-present, Valkey contributors. + * All rights reserved. * * Licensed under your choice of (a) the Redis Source Available License 2.0 * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the @@ -100,16 +103,19 @@ __attribute__((malloc,alloc_size(1),noinline)) void *ztrymalloc(size_t size); __attribute__((malloc,alloc_size(1),noinline)) void *ztrycalloc(size_t size); __attribute__((alloc_size(2),noinline)) void *ztryrealloc(void *ptr, size_t size); void zfree(void *ptr); +void zfree_usable(void *ptr, size_t *usable); +void zfree_with_size(void *ptr, size_t size); void *zmalloc_usable(size_t size, size_t *usable); void *zcalloc_usable(size_t size, size_t *usable); void *zrealloc_usable(void *ptr, size_t size, size_t *usable, size_t *old_usable); void *ztrymalloc_usable(size_t size, size_t *usable); void *ztrycalloc_usable(size_t size, size_t *usable); void *ztryrealloc_usable(void *ptr, size_t size, size_t *usable, size_t *old_usable); -void zfree_usable(void *ptr, size_t *usable); __attribute__((malloc)) char *zstrdup(const char *s); __attribute__((malloc)) char *zstrdup_usable(const char *s, size_t *usable); size_t zmalloc_used_memory(void); +void zmalloc_reserve_thread_slots(int n); +void zmalloc_register_reserved_slot(void); size_t zmalloc_get_peak_memory(void); time_t zmalloc_get_peak_memory_time(void); void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); diff --git a/tests/assets/array-32bit.rdb b/tests/assets/array-32bit.rdb new file mode 100644 index 000000000..94ff98ea3 Binary files /dev/null and b/tests/assets/array-32bit.rdb differ diff --git a/tests/integration/corrupt-dump-fuzzer.tcl b/tests/integration/corrupt-dump-fuzzer.tcl index a6d911324..8bd170027 100644 --- a/tests/integration/corrupt-dump-fuzzer.tcl +++ b/tests/integration/corrupt-dump-fuzzer.tcl @@ -15,7 +15,7 @@ if { ! [ catch { proc generate_collections {suffix elements} { set rd [redis_deferring_client] - set numcmd 7 + set numcmd 8 ;# base commands including array set has_vsets [server_has_command vadd] if {$has_vsets} {incr numcmd} @@ -29,6 +29,15 @@ proc generate_collections {suffix elements} { $rd zadd zset$suffix $j $val $rd sadd set$suffix $val $rd xadd stream$suffix * item 1 value $val + # Array with sparse indices and mixed value types (int, float, string) + set idx [expr {$j * 100 + int(rand() * 50)}] ;# sparse indices + if {$j % 3 == 0} { + $rd arset array$suffix $idx $j ;# integer value + } elseif {$j % 3 == 1} { + $rd arset array$suffix $idx [format "%.5f" [expr {rand() * 1000}]] ;# float value + } else { + $rd arset array$suffix $idx "str_$val" ;# string value + } if {$has_vsets} { $rd vadd vset$suffix VALUES 3 1 1 1 $j } @@ -59,6 +68,9 @@ proc generate_types {} { # create other non-collection types r incr int r set string str +if 0 { + r gcra gcra 10 5 60000 +} # create bigger objects with 10 items (more than a single ziplist / listpack) generate_collections big 10 diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl index 59c7c8b3d..d333a4764 100644 --- a/tests/integration/corrupt-dump.tcl +++ b/tests/integration/corrupt-dump.tcl @@ -989,6 +989,169 @@ test {corrupt payload: fuzzer findings - vector sets with wrong encoding} { } } +test {corrupt payload: fuzzer findings - decrRefCount on NULL robj on corrupt KEY_META payload} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + catch {r restore key 0 "\xF3\x02\x01\x0D\x00\x54\x23\x3F\xC9\x82\x32\x05\x8D" replace} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: stream with NACK shared between two consumers} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + r debug set-skip-checksum-validation 1 + # Payload: stream with entry 1-0, one consumer group (mygroup), + # two consumers whose PELs both reference 1-0 (shared NACK). + # XACK on one consumer frees the NACK, leaving a dangling + # pointer in the other consumer's PEL (use-after-free). + catch {r RESTORE mystream 0 "\x1a\x01\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x1d\x1d\x00\x00\x00\x0a\x00\x01\x01\x00\x01\x01\x01\x81\x6b\x02\x00\x01\x02\x01\x00\x01\x00\x01\x81\x76\x02\x04\x01\xff\x01\x01\x00\x01\x00\x00\x00\x01\x01\x07\x6d\x79\x67\x72\x6f\x75\x70\x01\x00\x01\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x01\x64\x42\xb9\x9d\x01\x00\x00\x01\x02\x09\x63\x6f\x6e\x73\x75\x6d\x65\x72\x41\x01\x64\x42\xb9\x9d\x01\x00\x00\x01\x64\x42\xb9\x9d\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x09\x63\x6f\x6e\x73\x75\x6d\x65\x72\x42\x01\x64\x42\xb9\x9d\x01\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x40\x64\x40\x64\x00\x00\x00\x0d\x00\xe7\x12\xf7\xcc\x25\xd5\x0e\x44"} err + catch {r XACK mystream mygroup 1-0} _ + catch {r XREADGROUP GROUP mygroup consumerA COUNT 10 STREAMS mystream 0} _ + catch {r DEL mystream} _ + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: stream listpack with wrong deleted count in header} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + r config set sanitize-dump-payload yes + r debug set-skip-checksum-validation 1 + # Payload: stream whose listpack header says deleted_count = 1 + # but the only entry is live. + catch {r RESTORE mystream 0 "\x1A\x01\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x1D\x1D\x00\x00\x00\x0A\x00\x01\x01\x00\x01\x01\x01\x81\x6B\x02\x00\x01\x03\x01\x00\x01\x00\x01\x81\x76\x02\x04\x01\xFF\x01\x01\x00\x01\x00\x00\x00\x01\x00\x40\x64\x40\x64\x00\x00\x00\x0D\x00\xBD\x89\x4D\xF3\x41\xC5\xE0\x8E" REPLACE} err + catch {r XREAD COUNT 1 STREAMS mystream $} _ + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: stream length inconsistent with live entries} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + r debug set-skip-checksum-validation 1 + # Payload: listpack has master.count=1 (lp_live=1) so the lp_live <= 0 + # guard passes, but s->length=2 while live_entries accumulates to 1. + # Exercises the s->length != live_entries check in rdb.c. + catch {r RESTORE mystream 0 "\x1A\x01\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x1D\x1D\x00\x00\x00\x0A\x00\x01\x01\x01\x01\x01\x01\x81\x6B\x02\x00\x01\x03\x01\x00\x01\x00\x01\x81\x76\x02\x04\x01\xFF\x02\x01\x00\x01\x00\x00\x00\x01\x00\x40\x64\x40\x64\x00\x00\x00\x0D\x00\xBD\x89\x4D\xF3\x41\xC5\xE0\x8E" REPLACE} err + catch {r XREAD COUNT 1 STREAMS mystream $} _ + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: stream all-tombstone listpack with zero length} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + r debug set-skip-checksum-validation 1 + # Payload: listpack has lp_live = 0 (only a tombstone entry) and + # s->length = 0. With lp_live rejected only on < 0 this would load + # silently into an inconsistent state (raxSize > 0, length = 0); + # the <= 0 check rejects it at the listpack header. + catch {r RESTORE mystream 0 "\x1A\x01\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x1D\x1D\x00\x00\x00\x0A\x00\x00\x01\x01\x01\x01\x01\x81\x6B\x02\x00\x01\x03\x01\x00\x01\x00\x01\x81\x76\x02\x04\x01\xFF\x00\x01\x00\x01\x00\x00\x00\x01\x00\x40\x64\x40\x64\x00\x00\x00\x0D\x00\xBD\x89\x4D\xF3\x41\xC5\xE0\x8E" REPLACE} err + catch {r XREAD COUNT 1 STREAMS mystream $} _ + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: stream live entry count integer overflow bypasses length check} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + r config set sanitize-dump-payload no + r debug set-skip-checksum-validation 1 + # Three listpacks whose lp_live counts sum to exactly 2^64, wrapping + # live_entries (uint64_t) back to 0. Stream length is also set to 0, so + # without the overflow guard the s->length != live_entries check passes, + # silently accepting a structurally broken stream. + # (LLONG_MAX + LLONG_MAX + 2 = 2^64 => live_entries wraps to 0) + catch {r RESTORE mystream 0 "\x0F\x03\x10\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x11\x11\x00\x00\x00\x01\x00\xF4\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F\x09\xFF\x10\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x11\x11\x00\x00\x00\x01\x00\xF4\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F\x09\xFF\x10\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x09\x09\x00\x00\x00\x01\x00\x02\x01\xFF\x00\x03\x00\x00\x0A\x00\x00\x00\x00\x00\x00\x00\x00\x00"} err + assert_match "*Bad data format*" $err + r ping + } +} + +test {corrupt payload: zipmap - element wouldn't fit in listpack} { + # Redis converts legacy zipmap encoded hashes to listpacks. + # This test creates a zipmap entry with a 1GB value which cannot + # fit into a listpack and verifies that RESTORE fails. + + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no proto-max-bulk-len 2147483648 client-query-buffer-limit 2147483648]] { + proc zipmap_encode_len {len} { + if {$len < 254} { + return [binary format c $len] + } else { + return [binary format ci 254 $len] + } + } + r config set sanitize-dump-payload no + + # Generates Zipmap with 1GB value - should fail lpSafeToAdd check + set val_len [expr {1024 * 1024 * 1024 + 1}] + + # Zipmap has 1 element + set zm [binary format c 1] + # Field is 1 byte long + append zm [zipmap_encode_len 1] + append zm "k" + # Value is 1GB long + append zm [zipmap_encode_len $val_len] + append zm [binary format c 0] + append zm [string repeat "A" $val_len] + # ZIPMAP_END marker + append zm [binary format c 255] + # Prepend RDB header + set zm_len [string length $zm] + set rdb_len [binary format cI 0x80 $zm_len] + set dump [binary format c 9] + append dump $rdb_len + append dump $zm + append dump [binary format s 9] + append dump [binary format w 0] + + catch {r RESTORE _hash 0 $dump} err + assert_match "*Bad data format*" $err + } +} {} {large-memory} + +test {corrupt payload: zipmap - 5 bytes length encoding for a small field} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + catch { + r restore key 0 "\x09\x11\x01\xfe\x04\x00\x00\x00\x01\x00\xff\x00\x04\x00\x76\x61\x6c\x31\xff\x09\x00\xf9\xd5\xa4\xf7\x7d\x00\x3f\x1b" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: zipmap - 5 bytes length encoding for a small value} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + catch { + r restore key 0 "\x09\x0e\x01\x01\x6b\xfe\x04\x00\x00\x00\x00\x76\x61\x6c\x31\xff\x09\x00\xd0\xf9\xe4\x1d\xe4\xfb\x11\x4c" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: zipmap - 5 bytes length encoding and a huge field} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x09\x41\x15\x02\x04\x6b\x65\x79\x31\x04\x00\x76\x61\x6c\x31\xfe\x04\x00\x00\x00\xfe\xff\xff\xff\xfd\x00\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\xff\x09\x00\x54\x2f\x0a\xca\x4e\x5c\x49\x9f" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: stream - duplicated consumer PEL entry} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x15\x01\x10\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x39\x40\x42\x15\x42\x00\x00\x00\x11\x00\x02\x01\x00\x01\x01\x01\x86\x66\x69\x65\x6c\x64\x31\x07\x00\x01\x40\x0f\x0a\x00\x01\x86\x76\x61\x6c\x75\x65\x31\x07\x04\x20\x0b\x02\xcd\xd9\x02\xe0\x01\x22\x01\x32\x07\x80\x1a\x04\x32\x07\x06\x01\xff\x02\x81\x00\x00\x01\x9b\x0d\x56\xb7\x90\x00\x81\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x02\x01\x07\x6d\x79\x67\x72\x6f\x75\x70\x81\x00\x00\x01\x9b\x0d\x56\xb7\x90\x00\x02\x02\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x80\xd9\x56\x0d\x9b\x01\x00\x00\x01\x00\x00\x01\x9b\x0d\x56\xb7\x90\x00\x00\x00\x00\x00\x00\x00\x00\x80\xd9\x56\x0d\x9b\x01\x00\x00\x01\x01\x09\x63\x6f\x6e\x73\x75\x6d\x65\x72\x31\x80\xd9\x56\x0d\x9b\x01\x00\x00\x80\xd9\x56\x0d\x9b\x01\x00\x00\x02\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x4b\xe0\x99\x30\x67\x4d\xe5\x87" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Stream consumer PEL entry already has a consumer assigned*" 0 + } +} } ;# tags diff --git a/tests/integration/dismiss-mem.tcl b/tests/integration/dismiss-mem.tcl index 2b0fbb3e4..50f125762 100644 --- a/tests/integration/dismiss-mem.tcl +++ b/tests/integration/dismiss-mem.tcl @@ -46,6 +46,15 @@ start_server {tags {"dismiss external:skip needs:debug"}} { # stream r xadd bigstream * entry1 $bigstr entry2 $bigstr + # array: dense slice populated with large string values, plus a + # sparsely-populated array whose indices span multiple slices. + for {set i 0} {$i < 32} {incr i} { + r arset dense_array $i $bigstr + } + for {set i 0} {$i < 16} {incr i} { + r arset sparse_array [expr {$i * 5000}] $bigstr + } + set digest [debug_digest] # Test both RDB (yes) and AOF (no) rewrite paths. foreach preamble {yes no} { diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 51959e4f4..0611a970e 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -119,7 +119,13 @@ start_server {tags {"repl external:skip"}} { } else { fail "set get wasn't propagated" } - assert_match {*calls=3,*} [cmdrstat set $A] + assert_equal [r set test qaz get get] vaz + wait_for_condition 500 10 { + [$A get test] eq "qaz" + } else { + fail "set get get wasn't propagated" + } + assert_match {*calls=4,*} [cmdrstat set $A] assert_match {} [cmdrstat getset $A] } @@ -880,27 +886,44 @@ proc compute_cpu_usage {start end} { return [ list $pucpu $pscpu ] } - +if {!$::valgrind} { # test diskless rdb pipe with multiple replicas, which may drop half way -start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { - set master [srv 0 client] - $master config set repl-diskless-sync yes - $master config set repl-diskless-sync-delay 5 - $master config set repl-diskless-sync-max-replicas 2 - set master_host [srv 0 host] - set master_port [srv 0 port] - set master_pid [srv 0 pid] - # put enough data in the db that the rdb file will be bigger than the socket buffers - # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds - # we also need the replica to process requests during transfer (which it does only once in 2mb) - $master debug populate 20000 test 10000 - $master config set rdbcompression no - $master config set repl-rdb-channel no - # If running on Linux, we also measure utime/stime to detect possible I/O handling issues - set os [catch {exec uname}] - set measure_time [expr {$os == "Linux"} ? 1 : 0] - foreach all_drop {no slow fast all timeout} { +foreach all_drop {no slow fast all timeout} { + start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { + set master [srv 0 client] + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 5 + $master config set repl-diskless-sync-max-replicas 2 + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + if {$all_drop == "timeout"} { + # Use a larger RDB (~100 MB) so it cannot fit into the kernel TCP + # send buffer (autotuning can absorb tens of MB on some hosts). We + # need the primary to hit the blocked writer path + # (repl_last_partial_write != 0) while the slow replica is paused, + # so the cron triggers the "(full sync)" timeout path instead of + # the replica being moved to ONLINE prematurely and timing out via + # the "(streaming sync)" path. + $master debug populate 10000 test 10000 + } else { + # Put enough data in the db that the RDB is comfortably larger than the + # pipe and socket buffers so the primary can hit the blocked writer path, + # but keep it small enough that slow TLS CI runners don't spend minutes + # draining an oversized transfer (~40 MB uncompressed). + $master debug populate 4000 test 10000 + } + $master config set rdbcompression no + $master config set repl-rdb-channel no + # If running on Linux, we also measure utime/stime to detect possible I/O handling issues + set os [catch {exec uname}] + set measure_time [expr {$os == "Linux"} ? 1 : 0] + test "diskless $all_drop replicas drop during rdb pipe" { + # Reset config that the timeout subcase may change, so a failing + # subcase does not leave the next one with an aggressive timeout. + $master config set repl-timeout 60 + $master config set rdb-key-save-delay 0 set replicas {} set replicas_alive {} # start one replica that will read the rdb fast, and one that will be slow @@ -917,7 +940,24 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { set loglines [count_log_lines -2] [lindex $replicas 0] config set repl-diskless-load swapdb [lindex $replicas 1] config set repl-diskless-load swapdb - [lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds + if {$all_drop == "all"} { + # Keep the RDB child generating data long enough for + # both replicas to be killed before the pipe reaches + # EOF, so this subcase still covers the last-replica + # drop path instead of racing with normal completion. + $master config set rdb-key-save-delay 1000 + } + # For non-timeout subcases, use key-load-delay to keep + # replica 0 as a steady slow reader for the entire RDB + # transfer. This keeps the expected diskless pipe code + # paths covered without accepting alternate log outcomes. + if {$all_drop != "timeout"} { + # 4k keys with 500 microseconds each keeps replica 0 + # slow for about 2 seconds, which is long enough to + # fill the pipe without turning the transfer into a + # multi-minute TLS run. + [lindex $replicas 0] config set key-load-delay 500 + } [lindex $replicas 0] replicaof $master_host $master_port [lindex $replicas 1] replicaof $master_host $master_port @@ -931,9 +971,16 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { set start_time [clock seconds] } - # wait a while so that the pipe socket writer will be - # blocked on write (since replica 0 is slow to read from the socket) - after 500 + if {$all_drop != "timeout"} { + # key-load-delay is already throttling the slow + # replica; just wait for the pipe to fill. + after 500 + } else { + # For the timeout subcase, stop the slow reader so it + # reaches repl-timeout during full sync. + pause_process [srv -1 pid] + after 500 + } # add some command to be present in the command stream after the rdb. $master incr $all_drop @@ -948,14 +995,17 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { set replicas_alive [lreplace $replicas_alive 0 0] } if {$all_drop == "timeout"} { + # Let one replica hit repl-timeout while the slow reader + # is paused, then restore a generous timeout so the + # remaining replica can finish the streamed RDB. $master config set repl-timeout 2 - # we want the slow replica to hang on a key for very long so it'll reach repl-timeout - pause_process [srv -1 pid] - after 2000 + wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 200 100 + $master config set repl-timeout 60 } - # wait for rdb child to exit - wait_for_condition 500 100 { + # Use a single generous budget for all subcases; successful + # runs still exit early once the child is done. + wait_for_condition 5000 100 { [s -2 rdb_bgsave_in_progress] == 0 } else { fail "rdb child didn't terminate" @@ -972,7 +1022,6 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1 } if {$all_drop == "timeout"} { - wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1 wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1 # master disconnected the slow replica, remove from array set replicas_alive [lreplace $replicas_alive 0 0] @@ -996,18 +1045,23 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { assert {$master_utime < 70} assert {$master_stime < 70} } - if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} { + if {!$::no_latency && ($all_drop == "no" || $all_drop == "fast")} { assert {$master_utime < 15} assert {$master_stime < 15} } } + # In the "no" case both replicas stay alive through the + # full streamed RDB, so on slow TLS runners the final + # ONLINE transition can lag behind child exit. + set replica_online_wait_tries [expr {$all_drop == "no" ? 600 : 150}] + # verify the data integrity foreach replica $replicas_alive { # Wait that replicas acknowledge they are online so # we are sure that DBSIZE and DEBUG DIGEST will not # fail because of timing issues. - wait_for_condition 150 100 { + wait_for_condition $replica_online_wait_tries 100 { [lindex [$replica role] 3] eq {connected} } else { fail "replicas still not connected after some time" @@ -1032,6 +1086,7 @@ start_server {tags {"repl external:skip tsan:skip"} overrides {save ""}} { } } } +} ;# end of valgrind test "diskless replication child being killed is collected" { # when diskless master is waiting for the replica to become writable @@ -1872,3 +1927,79 @@ start_server {tags {"repl external:skip"}} { } } } + +# Fullsync should not free the functions lib ctx while the replica has +# a timed out function that is still running. +foreach type {script function} { + start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + + test "Fullsync should not free scripting engine on a replica while a $type is running" { + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + # Set small client output buffer limit to trigger fullsync quickly + $master config set client-output-buffer-limit "replica 1k 1k 0" + $replica config set busy-reply-threshold 1 ;# script timeout in 1 ms + + # Load function + if {$type eq "function"} { + $master function load replace {#!lua name=blocklib + redis.register_function{ + function_name='blockfunc', + callback=function() while true do end end, + flags={'no-writes'} + } + } + } + + # Start replication + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Run the blocking script on replica + set rd [redis_deferring_client] + if {$type eq "script"} { + $rd eval {while true do end} 0 + } else { + $rd fcall_ro blockfunc 0 + } + + # Verify replica replies with BUSY + wait_for_condition 50 100 { + [catch {$replica ping} e] == 1 && [string match {*BUSY*} $e] + } else { + fail "$type didn't become busy" + } + + # Fills client output buffer and triggers fullsync + populate 5 bigkey 1000000 -1 + wait_for_condition 50 100 { + [s -1 sync_full] >= 2 + } else { + fail "Fullsync was not triggered" + } + + # Verify replica is still running the function + after 1000 + catch {$replica ping} e + assert_match {*BUSY*} $e "replica should still reply with BUSY" + + if {$type eq "script"} { + $replica script kill + } else { + $replica function kill + } + + # Verify replica is responsive again + catch {$rd read} result + $rd close + wait_for_sync $replica + assert_equal [$replica ping] "PONG" + } + } + } +} diff --git a/tests/modules/keyspace_events.c b/tests/modules/keyspace_events.c index 146261f6e..8dc9e1d1c 100644 --- a/tests/modules/keyspace_events.c +++ b/tests/modules/keyspace_events.c @@ -29,6 +29,11 @@ RedisModuleDict *module_event_log = NULL; /** Counts how many deleted KSN we got on keys with a prefix of "count_dels_" **/ static size_t dels = 0; +/* Subkey notification log */ +#define SUBKEY_LOG_MAX 256 +static char subkey_log[SUBKEY_LOG_MAX][512]; +static int subkey_log_count = 0; + static int KeySpace_NotificationLoaded(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key){ REDISMODULE_NOT_USED(ctx); REDISMODULE_NOT_USED(type); @@ -298,6 +303,104 @@ static int cmdGetDels(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { return RedisModule_ReplyWithLongLong(ctx, dels); } +/* Subkey notification callback */ +static void KeySpace_NotificationSubkeys(RedisModuleCtx *ctx, int type, const char *event, + RedisModuleString *key, RedisModuleString **subkeys, int count) { + REDISMODULE_NOT_USED(ctx); + REDISMODULE_NOT_USED(type); + + if (subkey_log_count >= SUBKEY_LOG_MAX) return; + + const char *key_str = RedisModule_StringPtrLen(key, NULL); + + /* Format: " ..." or " 0" */ + char buf[512]; + int off = snprintf(buf, sizeof(buf), "%s %s %d", event, key_str, count); + for (int i = 0; i < count && (size_t)off < sizeof(buf) - 1; i++) { + const char *sk = RedisModule_StringPtrLen(subkeys[i], NULL); + off += snprintf(buf + off, sizeof(buf) - off, " %s", sk); + } + snprintf(subkey_log[subkey_log_count], sizeof(subkey_log[0]), "%s", buf); + subkey_log_count++; +} + +/* keyspace.get_subkey_events — return all logged subkey events as an array */ +static int cmdGetSubkeyEvents(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + RedisModule_ReplyWithArray(ctx, subkey_log_count); + for (int i = 0; i < subkey_log_count; i++) { + RedisModule_ReplyWithCString(ctx, subkey_log[i]); + } + return REDISMODULE_OK; +} + +/* keyspace.reset_subkey_events — clear the log */ +static int cmdResetSubkeyEvents(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + subkey_log_count = 0; + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.notify_with_subkeys [subkey2 ...] — trigger a module subkey notification */ +static int cmdNotifyWithSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc < 3) return RedisModule_WrongArity(ctx); + + RedisModuleString *key = argv[1]; + RedisModuleString **subkeys = &argv[2]; + int count = argc - 2; + + RedisModule_NotifyKeyspaceEventWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH, "module_subkey_event", key, subkeys, count); + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.subscribe_subkeys — subscribe with NONE flag (all events) */ +static int cmdSubscribeSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_SubscribeToKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_NONE, KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR subscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.unsubscribe_subkeys — unsubscribe the subkey callback */ +static int cmdUnsubscribeSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_UnsubscribeFromKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_NONE, KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR unsubscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.subscribe_require_subkeys — subscribe with SUBKEYS_REQUIRED flag */ +static int cmdSubscribeRequireSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_SubscribeToKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED, + KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR subscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + +/* keyspace.unsubscribe_require_subkeys — unsubscribe the SUBKEYS_REQUIRED callback */ +static int cmdUnsubscribeRequireSubkeys(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + if (RedisModule_UnsubscribeFromKeyspaceEventsWithSubkeys(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED, + KeySpace_NotificationSubkeys) != REDISMODULE_OK) { + return RedisModule_ReplyWithError(ctx, "ERR unsubscribe failed"); + } + return RedisModule_ReplyWithSimpleString(ctx, "OK"); +} + static RedisModuleNotificationFunc get_callback_for_event(int event_mask) { switch(event_mask) { case REDISMODULE_NOTIFY_LOADED: @@ -442,6 +545,34 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) return REDISMODULE_ERR; } + if (RedisModule_CreateCommand(ctx, "keyspace.subscribe_subkeys", cmdSubscribeSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.unsubscribe_subkeys", cmdUnsubscribeSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.get_subkey_events", cmdGetSubkeyEvents, "readonly", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.reset_subkey_events", cmdResetSubkeyEvents, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.notify_with_subkeys", cmdNotifyWithSubkeys, "write", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.subscribe_require_subkeys", cmdSubscribeRequireSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + + if (RedisModule_CreateCommand(ctx, "keyspace.unsubscribe_require_subkeys", cmdUnsubscribeRequireSubkeys, "", 0, 0, 0) == REDISMODULE_ERR) { + return REDISMODULE_ERR; + } + if (argc == 1) { const char *ptr = RedisModule_StringPtrLen(argv[0], NULL); if (!strcasecmp(ptr, "noload")) { diff --git a/tests/modules/usercall.c b/tests/modules/usercall.c index 5bb88084a..dd013af08 100644 --- a/tests/modules/usercall.c +++ b/tests/modules/usercall.c @@ -119,13 +119,13 @@ int get_user_username(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModule_ReplyWithSimpleString(ctx, "none"); return REDISMODULE_OK; } - RedisModuleString *name = RedisModule_GetUserUsername(user); + RedisModuleString *name = RedisModule_GetUserUsername(ctx, user); if (name == NULL) { RedisModule_ReplyWithSimpleString(ctx, "none"); return REDISMODULE_OK; } RedisModule_ReplyWithString(ctx, name); - RedisModule_FreeString(NULL, name); + RedisModule_FreeString(ctx, name); return REDISMODULE_OK; } diff --git a/tests/sentinel/tests/15-config-set-config-get.tcl b/tests/sentinel/tests/15-config-set-config-get.tcl index f9831f8e8..16b302000 100644 --- a/tests/sentinel/tests/15-config-set-config-get.tcl +++ b/tests/sentinel/tests/15-config-set-config-get.tcl @@ -6,17 +6,22 @@ test "SENTINEL CONFIG SET and SENTINEL CONFIG GET handles multiple variables" { } assert_match {*yes*1234*} [S 1 SENTINEL CONFIG GET resolve-hostnames announce-port] assert_match {announce-port 1234} [S 1 SENTINEL CONFIG GET announce-port] + foreach_sentinel_id id { + S $id SENTINEL CONFIG SET resolve-hostnames no announce-port 0 + } } test "SENTINEL CONFIG GET for duplicate and unknown variables" { assert_equal {OK} [S 1 SENTINEL CONFIG SET resolve-hostnames yes announce-port 1234] assert_match {resolve-hostnames yes} [S 1 SENTINEL CONFIG GET resolve-hostnames resolve-hostnames does-not-exist] + S 1 SENTINEL CONFIG SET resolve-hostnames no announce-port 0 } test "SENTINEL CONFIG GET for patterns" { assert_equal {OK} [S 1 SENTINEL CONFIG SET loglevel notice announce-port 1234 announce-hostnames yes ] assert_match {loglevel notice} [S 1 SENTINEL CONFIG GET log* *level loglevel] assert_match {announce-hostnames yes announce-ip*announce-port 1234} [S 1 SENTINEL CONFIG GET announce*] + S 1 SENTINEL CONFIG SET announce-port 0 announce-hostnames no } test "SENTINEL CONFIG SET duplicate variables" { @@ -36,6 +41,9 @@ test "SENTINEL CONFIG SET, one option does not exist" { } # The announce-port should not be set to 1234 as it was called with a wrong argument assert_match {*111*} [S 1 SENTINEL CONFIG GET announce-port] + foreach_sentinel_id id { + S $id SENTINEL CONFIG SET announce-port 0 + } } test "SENTINEL CONFIG SET, one option with wrong value" { diff --git a/tests/sentinel/tests/16-config-injection.tcl b/tests/sentinel/tests/16-config-injection.tcl new file mode 100644 index 000000000..6aff07de9 --- /dev/null +++ b/tests/sentinel/tests/16-config-injection.tcl @@ -0,0 +1,312 @@ +# Test that control characters are rejected where appropriate, and that +# string values are safely quoted when persisted to disk. +# +# Config injection is prevented by sentinelSdscatConfigArg(), which escapes +# values containing special characters at persistence time. Fields like +# notification-script, rename-command, master name, and announce-ip also +# reject control characters at input time as an additional safeguard. + +source "../tests/includes/init-tests.tcl" + +# Helper: read the sentinel config file for a given sentinel id. +proc read_sentinel_config {id} { + set configfile [file join "sentinel_${id}" "sentinel.conf"] + set fp [open $configfile r] + set content [read $fp] + close $fp + return $content +} + +# Helper: count how many lines in the config match a pattern. +proc count_config_lines {content pattern} { + set count 0 + foreach line [split $content "\n"] { + if {[string match $pattern $line]} { + incr count + } + } + return $count +} + +# Helper: restart a (already stopped) sentinel and wait until it responds to PING. +proc start_sentinel_and_wait {sid} { + restart_instance sentinel $sid + wait_for_condition 200 50 { + [catch {S $sid PING}] == 0 + } else { + fail "Sentinel $sid did not restart in time" + } +} + +# Helper: kill sentinel, restart it, and wait until it responds to PING. +proc restart_sentinel_and_wait {sid} { + kill_instance sentinel $sid + start_sentinel_and_wait $sid +} + +# Helper: assert that the sentinel config file contains the expected substring. +proc assert_config_contains {sid expected} { + set content [read_sentinel_config $sid] + assert {[string first $expected $content] >= 0} +} + +# Helper: append lines to a sentinel's config file (sentinel must be stopped). +proc append_to_sentinel_config {sid lines} { + set configfile [file join "sentinel_${sid}" "sentinel.conf"] + set fp [open $configfile a] + foreach line $lines { + puts $fp $line + } + close $fp +} + +# Helper: create an executable script with spaces in its path. +# Returns the full path. Caller should "file delete -force" the directory. +proc create_script_with_spaces {sid} { + set script_dir [file join [pwd] "sentinel_${sid}" "script dir"] + file mkdir $script_dir + set script_path [file join $script_dir "my script.sh"] + set fp [open $script_path w] + puts $fp "#!/bin/sh" + close $fp + file attributes $script_path -permissions 0755 + return $script_path +} + +# -------------------------------------------------------------------------- +# Section 1: Control character rejection in SENTINEL SET +# -------------------------------------------------------------------------- + +test "SENTINEL SET notification-script rejects control characters" { + assert_error "*must not contain control characters*" { + S 0 SENTINEL SET mymaster notification-script "/tmp/ok\n/tmp/evil.sh" + } +} + +test "SENTINEL SET client-reconfig-script rejects control characters" { + assert_error "*must not contain control characters*" { + S 0 SENTINEL SET mymaster client-reconfig-script "/tmp/ok\n/tmp/evil.sh" + } +} + +test "SENTINEL SET rename-command rejects control characters" { + assert_error "*must not contain control characters*" { + S 0 SENTINEL SET mymaster rename-command "CONFIG\nEVIL" "NEWCONFIG" + } + assert_error "*must not contain control characters*" { + S 0 SENTINEL SET mymaster rename-command "CONFIG" "NEW\nCONFIG" + } +} + +# -------------------------------------------------------------------------- +# Section 2: Control character rejection in SENTINEL MONITOR +# -------------------------------------------------------------------------- + +test "SENTINEL MONITOR rejects master name with control characters" { + set port [get_instance_attrib redis 0 port] + assert_error "*must not contain control characters*" { + S 0 SENTINEL MONITOR "bad\nmaster" 127.0.0.1 $port 2 + } + assert_error "*must not contain control characters*" { + S 0 SENTINEL MONITOR "bad\rmaster" 127.0.0.1 $port 2 + } +} + +# -------------------------------------------------------------------------- +# Section 3: Control character rejection in SENTINEL CONFIG SET +# -------------------------------------------------------------------------- + +test "SENTINEL CONFIG SET announce-ip rejects control characters" { + catch {S 0 SENTINEL CONFIG SET announce-ip "1.2.3.4\nevil-directive"} e + assert_match "*must not contain control characters*" $e +} + +# -------------------------------------------------------------------------- +# Section 4: Config injection attempt does not pollute config file +# -------------------------------------------------------------------------- + +test "Newline injection in auth-pass does not pollute config file" { + # Auth-pass accepts control characters, but sentinelSdscatConfigArg + # escapes them at persistence time, preventing config injection. + S 0 SENTINEL SET mymaster auth-pass "x\nsentinel notification-script mymaster /tmp/evil.sh" + S 0 SENTINEL FLUSHCONFIG + set content [read_sentinel_config 0] + assert {[count_config_lines $content "sentinel notification-script mymaster /tmp/evil.sh"] == 0} + assert_config_contains 0 {sentinel auth-pass mymaster "x\nsentinel notification-script mymaster /tmp/evil.sh"} + S 0 SENTINEL SET mymaster auth-pass "" +} + +test "Newline injection in auth-user does not pollute config file" { + S 0 SENTINEL SET mymaster auth-user "x\nsentinel notification-script mymaster /tmp/evil.sh" + S 0 SENTINEL FLUSHCONFIG + set content [read_sentinel_config 0] + assert {[count_config_lines $content "sentinel notification-script mymaster /tmp/evil.sh"] == 0} + assert_config_contains 0 {sentinel auth-user mymaster "x\nsentinel notification-script mymaster /tmp/evil.sh"} + S 0 SENTINEL SET mymaster auth-user "" +} + +test "Newline injection in sentinel-pass does not pollute config file" { + S 0 SENTINEL CONFIG SET sentinel-pass "x\nsentinel notification-script mymaster /tmp/evil.sh" + S 0 SENTINEL FLUSHCONFIG + set content [read_sentinel_config 0] + assert {[count_config_lines $content "sentinel notification-script mymaster /tmp/evil.sh"] == 0} + assert_config_contains 0 {sentinel sentinel-pass "x\nsentinel notification-script mymaster /tmp/evil.sh"} + S 0 SENTINEL CONFIG SET sentinel-pass "" +} + +test "Newline injection in sentinel-user does not pollute config file" { + S 0 SENTINEL CONFIG SET sentinel-user "x\nsentinel notification-script mymaster /tmp/evil.sh" + S 0 SENTINEL FLUSHCONFIG + set content [read_sentinel_config 0] + assert {[count_config_lines $content "sentinel notification-script mymaster /tmp/evil.sh"] == 0} + assert_config_contains 0 {sentinel sentinel-user "x\nsentinel notification-script mymaster /tmp/evil.sh"} + S 0 SENTINEL CONFIG SET sentinel-user "" +} + +# -------------------------------------------------------------------------- +# Section 5: Values with special characters survive config round-trip +# -------------------------------------------------------------------------- + +test "auth-pass with special characters persists correctly through restart" { + S 0 SENTINEL SET mymaster auth-pass {my "comp#$&^`'!,lex pass} + set expected {sentinel auth-pass mymaster "my \"comp#$&^`'!,lex pass"} + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + restart_sentinel_and_wait 0 + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + S 0 SENTINEL SET mymaster auth-pass "" +} + +test "auth-user with spaces persists correctly through restart" { + S 0 SENTINEL SET mymaster auth-user {user with spaces} + set expected {sentinel auth-user mymaster "user with spaces"} + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + restart_sentinel_and_wait 0 + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + S 0 SENTINEL SET mymaster auth-user "" +} + +test "notification-script with spaces persists correctly through restart" { + set script_path [create_script_with_spaces 0] + S 0 SENTINEL SET mymaster notification-script $script_path + S 0 SENTINEL FLUSHCONFIG + set content [read_sentinel_config 0] + # The path must be quoted since it contains spaces. + assert {[string first "notification-script" $content] >= 0} + restart_sentinel_and_wait 0 + set info [S 0 SENTINEL MASTER mymaster] + set idx [lsearch $info "notification-script"] + assert {$idx >= 0} + assert_equal [lindex $info [expr {$idx+1}]] $script_path + S 0 SENTINEL SET mymaster notification-script "" + file delete -force [file dirname $script_path] +} + +test "client-reconfig-script with spaces persists correctly through restart" { + set script_path [create_script_with_spaces 0] + S 0 SENTINEL SET mymaster client-reconfig-script $script_path + S 0 SENTINEL FLUSHCONFIG + set content [read_sentinel_config 0] + # The path must be quoted since it contains spaces. + assert {[string first "client-reconfig-script" $content] >= 0} + restart_sentinel_and_wait 0 + set info [S 0 SENTINEL MASTER mymaster] + set idx [lsearch $info "client-reconfig-script"] + assert {$idx >= 0} + assert_equal [lindex $info [expr {$idx+1}]] $script_path + S 0 SENTINEL SET mymaster client-reconfig-script "" + file delete -force [file dirname $script_path] +} + +test "rename-command persists unquoted through restart" { + S 0 SENTINEL SET mymaster rename-command CONFIG CONF_RENAMED + set expected {sentinel rename-command mymaster CONFIG CONF_RENAMED} + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + restart_sentinel_and_wait 0 + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + S 0 SENTINEL SET mymaster rename-command CONFIG CONFIG +} + +# -------------------------------------------------------------------------- +# Section 6: Backward compatibility -- old unquoted config format still loads +# -------------------------------------------------------------------------- + +test "Old unquoted config format for auth-pass and auth-user loads correctly" { + kill_instance sentinel 0 + append_to_sentinel_config 0 { + "sentinel auth-pass mymaster oldformatpass" + "sentinel auth-user mymaster oldformatuser" + } + start_sentinel_and_wait 0 + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 "sentinel auth-pass mymaster oldformatpass" + assert_config_contains 0 "sentinel auth-user mymaster oldformatuser" + S 0 SENTINEL SET mymaster auth-pass "" + S 0 SENTINEL SET mymaster auth-user "" +} + +test "Old unquoted config format for rename-command loads correctly" { + kill_instance sentinel 0 + append_to_sentinel_config 0 { + "sentinel rename-command mymaster CONFIG NEWCONFIGNAME" + } + start_sentinel_and_wait 0 + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 "sentinel rename-command mymaster CONFIG NEWCONFIGNAME" + S 0 SENTINEL SET mymaster rename-command CONFIG CONFIG +} + +test "Old unquoted config format for sentinel-pass loads correctly" { + kill_instance sentinel 0 + append_to_sentinel_config 0 { + "sentinel sentinel-pass oldsentinelpass" + } + start_sentinel_and_wait 0 + set result [S 0 SENTINEL CONFIG GET sentinel-pass] + assert_equal [lindex $result 1] "oldsentinelpass" + S 0 SENTINEL CONFIG SET sentinel-pass "" +} + +test "Old unquoted config format for sentinel-user loads correctly" { + kill_instance sentinel 0 + append_to_sentinel_config 0 { + "sentinel sentinel-user oldsentineluser" + } + start_sentinel_and_wait 0 + set result [S 0 SENTINEL CONFIG GET sentinel-user] + assert_equal [lindex $result 1] "oldsentineluser" + S 0 SENTINEL CONFIG SET sentinel-user "" +} + +# -------------------------------------------------------------------------- +# Section 7: Values with special characters survive config round-trip +# -------------------------------------------------------------------------- + +test "sentinel-pass with special characters persists correctly through restart" { + set test_pass {sentinel pass word} + S 0 SENTINEL CONFIG SET sentinel-pass $test_pass + set expected {sentinel sentinel-pass "sentinel pass word"} + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + restart_sentinel_and_wait 0 + set result [S 0 SENTINEL CONFIG GET sentinel-pass] + assert_equal [lindex $result 1] $test_pass + S 0 SENTINEL CONFIG SET sentinel-pass "" +} + +test "sentinel-user with special characters persists correctly through restart" { + set test_user {sentinel user name} + S 0 SENTINEL CONFIG SET sentinel-user $test_user + set expected {sentinel sentinel-user "sentinel user name"} + S 0 SENTINEL FLUSHCONFIG + assert_config_contains 0 $expected + restart_sentinel_and_wait 0 + set result [S 0 SENTINEL CONFIG GET sentinel-user] + assert_equal [lindex $result 1] $test_user + S 0 SENTINEL CONFIG SET sentinel-user "" +} diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 16eb80008..e46da150a 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -81,7 +81,8 @@ proc sanitizer_errors_from_file {filename} { # GCC UBSAN output does not contain 'Sanitizer' but 'runtime error'. if {[string match {*runtime error*} $line] || - [string match {*Sanitizer*} $line]} { + [string match {*Sanitizer*} $line] || + [string match {*:*size mismatch*} $line]} { return $log } } @@ -800,7 +801,12 @@ proc generate_fuzzy_traffic_on_key {key type duration} { set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE} set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM XDELEX XACKDEL XNACK} set vset_commands {VADD VREM} - set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands] + set array_commands {ARSET ARGET ARDEL ARCOUNT ARMSET ARMGET ARGETRANGE ARDELRANGE ARINFO} + set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands array $array_commands] +if 0 { + set gcra_commands {GCRA} + dict set commands gcra $gcra_commands +} set cmds [dict get $commands $type] set start_time [clock seconds] @@ -861,6 +867,49 @@ proc generate_fuzzy_traffic_on_key {key type duration} { lappend cmd [randomValue] incr i 2 } + # Array commands need integer indices + if {$cmd == "ARSET"} { + lappend cmd $key + lappend cmd [randomInt 100000] ;# index + lappend cmd [randomValue] ;# value + incr i 3 + } + if {$cmd == "ARGET" || $cmd == "ARDEL"} { + lappend cmd $key + lappend cmd [randomInt 100000] ;# index + incr i 2 + } + if {$cmd == "ARCOUNT" || $cmd == "ARINFO"} { + lappend cmd $key + incr i 1 + } + if {$cmd == "ARMSET"} { + lappend cmd $key + # Add 2-4 index/value pairs + set npairs [expr {int(rand() * 3) + 2}] + for {set p 0} {$p < $npairs} {incr p} { + lappend cmd [randomInt 100000] + lappend cmd [randomValue] + } + incr i [expr {1 + $npairs * 2}] + } + if {$cmd == "ARMGET"} { + lappend cmd $key + # Add 2-4 indices + set nidx [expr {int(rand() * 3) + 2}] + for {set p 0} {$p < $nidx} {incr p} { + lappend cmd [randomInt 100000] + } + incr i [expr {1 + $nidx}] + } + if {$cmd == "ARGETRANGE" || $cmd == "ARDELRANGE"} { + lappend cmd $key + set idx1 [randomInt 100000] + set idx2 [expr {$idx1 + [randomInt 1000]}] + lappend cmd $idx1 + lappend cmd $idx2 + incr i 3 + } for {} {$i < $arity} {incr i} { if {$i == $firstkey || $i == $lastkey} { diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl index 11324e18d..8341fcdbf 100644 --- a/tests/unit/aofrw.tcl +++ b/tests/unit/aofrw.tcl @@ -204,6 +204,70 @@ start_server {tags {"aofrw external:skip debug_defrag:skip"} overrides {aof-use- r FUNCTION LIST } {{library_name test engine LUA functions {{name test description {} flags {}}}}} + # Array AOF rewrite tests + test "AOF rewrite of array with mixed value types" { + r flushall + # Create array with various value types + r arset myarray 0 12345 ;# int + r arset myarray 1 "hello" ;# small string + r arset myarray 2 3.14159 ;# float + r arset myarray 100 [string repeat x 50] ;# large string + r arset myarray 10000 "sparse" ;# sparse index + set d1 [debug_digest] + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set d2 [debug_digest] + if {$d1 ne $d2} { + error "assertion:$d1 is not equal to $d2" + } + } + + test "AOF rewrite of array with insert_idx (circular buffer)" { + r flushall + # Create circular buffer using ARRING + for {set i 0} {$i < 25} {incr i} { + r arring myarray 10 "v$i" + } + # insert_idx should be 4 ((25-1) % 10 = 4) + set next_before [r arnext myarray] + set d1 [debug_digest] + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + set d2 [debug_digest] + if {$d1 ne $d2} { + error "assertion:$d1 is not equal to $d2" + } + # Verify insert_idx preserved + assert_equal $next_before [r arnext myarray] + + # Continue inserting - should continue from correct position + set new_idx [r arring myarray 10 "after_aof"] + assert_equal $next_before $new_idx + } + + test "AOF rewrite of array spanning multiple slices" { + r flushall + # Create array across multiple slices (slice_size = 4096) + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * 4096}] + for {set i 0} {$i < 20} {incr i} { + r arset myarray [expr {$base + $i * 100}] "s${slice}_v$i" + } + } + set d1 [debug_digest] + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set d2 [debug_digest] + if {$d1 ne $d2} { + error "assertion:$d1 is not equal to $d2" + } + } + test {BGREWRITEAOF is delayed if BGSAVE is in progress} { r flushall r set k v diff --git a/tests/unit/client-eviction.tcl b/tests/unit/client-eviction.tcl index ac2860f2a..afe32e4f9 100644 --- a/tests/unit/client-eviction.tcl +++ b/tests/unit/client-eviction.tcl @@ -52,7 +52,6 @@ proc kb {v} { start_server {} { set maxmemory_clients 3000000 r config set maxmemory-clients $maxmemory_clients - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage test "client evicted due to large argv" { r flushdb @@ -328,7 +327,6 @@ start_server {} { set obuf_limit [mb 3] r config set maxmemory-clients $maxmemory_clients r config set client-output-buffer-limit "normal $obuf_limit 0 0" - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage test "avoid client eviction when client is freed by output buffer limit" { r flushdb @@ -391,7 +389,6 @@ start_server {} { } start_server {} { - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage test "decrease maxmemory-clients causes client eviction" { set maxmemory_clients [mb 4] @@ -432,8 +429,6 @@ start_server {} { } start_server {} { - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage - test "evict clients only until below limit" { set client_count 10 set client_mem [mb 1] @@ -501,8 +496,6 @@ start_server {} { } start_server {} { - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage - test "evict clients in right order (large to small)" { # Note that each size step needs to be at least x2 larger than previous step # because of how the client-eviction size bucketing works @@ -571,15 +564,13 @@ start_server {} { } start_server {} { - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage - foreach type {"client no-evict" "maxmemory-clients disabled"} { r flushall r client no-evict on r config set maxmemory-clients 0 test "client total memory grows during $type" { - r setrange k [mb 1] v + r setrange k [kb 10] v ;# Keep value <= 16KB to avoid copy-avoidance, which shares memory and slows tot-mem growth. set rr [redis_client] $rr client setname test_client if {$type eq "client no-evict"} { @@ -591,8 +582,9 @@ start_server {} { # Fill output buffer in loop without reading it and make sure # the tot-mem of client has increased (OS buffers didn't swallow it) # and eviction not occurring. + set mget_args [lrepeat 100 k] ;# Use mget with 100 keys so each reply adds ~1MB to tot-mem, reaching 10MB faster. while {true} { - $rr get k + $rr mget {*}$mget_args $rr flush after 10 if {[client_field test_client tot-mem] > [mb 10]} { @@ -619,5 +611,34 @@ start_server {} { } } +start_server {} { + r flushall + r client no-evict on + r config set maxmemory-clients 0 + + test "Verify blocked client eviction during unblock does not cause use-after-free" { + # Create a deferring client that will be blocked on stream + # Use a long stream name to make client memory usage exceed 200000 bytes + set rd [redis_deferring_client] + $rd XREAD BLOCK 0 STREAMS mystream stream_[string repeat x 200000] $ $ + + # Wait for the client to be blocked + wait_for_condition 50 100 { + [s blocked_clients] eq {1} + } else { + fail "Client was not blocked" + } + + # Now lower MAXMEMORY-CLIENTS to a low value and use + # XADD to unblock the blocked client, triggering eviction. + r MULTI + r CONFIG SET MAXMEMORY-CLIENTS 100000 ;# Put in MULTI to defer blocked client eviction until after EXEC + r XADD mystream * field val + r EXEC + r PING + $rd close + } +} + } ;# tags diff --git a/tests/unit/cluster/announced-endpoints.tcl b/tests/unit/cluster/announced-endpoints.tcl index a37ca58d1..58643a2a7 100644 --- a/tests/unit/cluster/announced-endpoints.tcl +++ b/tests/unit/cluster/announced-endpoints.tcl @@ -72,4 +72,97 @@ start_cluster 2 2 {tags {external:skip cluster}} { fail "Cluster announced port was not updated in cluster slots" } } + + # Tests for cluster-announce-ip validation + test "cluster-announce-ip validation" { + # Reject control characters in IP-like values + catch {R 0 config set cluster-announce-ip "192.168.1.100\nnext"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "10.0.0.1\ttab"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "1.2.3.4\r\n"} err + assert_match "*alphanumeric*" $err + + # Reject control characters in hostname-like values + catch {R 0 config set cluster-announce-ip "redis-node\nnext"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "redis-node\ttab"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "redis-node\r\n"} err + assert_match "*alphanumeric*" $err + + # Accept valid IPv4 + R 0 config set cluster-announce-ip "192.168.1.100" + assert_equal "192.168.1.100" [lindex [R 0 config get cluster-announce-ip] 1] + + # Accept valid IPv6 + R 0 config set cluster-announce-ip "2001:db8::1" + assert_equal "2001:db8::1" [lindex [R 0 config get cluster-announce-ip] 1] + + # Accept valid hostname + R 0 config set cluster-announce-ip "redis-node-1.example.com" + assert_equal "redis-node-1.example.com" [lindex [R 0 config get cluster-announce-ip] 1] + + # Can be cleared + R 0 config set cluster-announce-ip "" + assert_equal "" [lindex [R 0 config get cluster-announce-ip] 1] + } + + # Tests for cluster-announce-human-nodename validation + test "cluster-announce-human-nodename validation" { + # Reject control characters + catch {R 0 config set cluster-announce-human-nodename "badchar\nnext"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad\ttab"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad\r\nline"} err + assert_match "*invalid character*" $err + + # Reject delimiter characters (comma, equals, space) + catch {R 0 config set cluster-announce-human-nodename "bad,comma"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad=equals"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad space"} err + assert_match "*invalid character*" $err + + # Reject quote characters (double quote, single quote, backslash) + catch {R 0 config set cluster-announce-human-nodename "bad\"quote"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad'quote"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad\\slash"} err + assert_match "*invalid character*" $err + + # Accept valid names + R 0 config set cluster-announce-human-nodename "my-redis-node-1" + assert_equal "my-redis-node-1" [lindex [R 0 config get cluster-announce-human-nodename] 1] + } + + # DoS prevention test: verify server can restart after CLUSTER SAVECONFIG + test "cluster-announce-ip persists correctly with CLUSTER SAVECONFIG" { + R 0 config set cluster-announce-ip "192.168.1.100" + R 0 cluster saveconfig + + # Verify the IP appears in CLUSTER NODES output + assert_match "*192.168.1.100*" [R 0 cluster nodes] + } + + test "cluster-announce-human-nodename persists correctly with CLUSTER SAVECONFIG" { + R 0 config set cluster-announce-human-nodename "production-node-1" + R 0 cluster saveconfig + + # Verify the nodename is set correctly + assert_equal "production-node-1" [lindex [R 0 config get cluster-announce-human-nodename] 1] + } } diff --git a/tests/unit/cluster/sharded-pubsub.tcl b/tests/unit/cluster/sharded-pubsub.tcl index 57b550ab7..5f78b7f0f 100644 --- a/tests/unit/cluster/sharded-pubsub.tcl +++ b/tests/unit/cluster/sharded-pubsub.tcl @@ -64,4 +64,31 @@ start_cluster 1 1 {tags {external:skip cluster}} { catch {[$replica EXEC]} err assert_match {EXECABORT*} $err } + + # Regression: shard channel slot must not follow getKeySlot() current_client + # cache when CLIENT KILL runs inside another client's EXEC (pubsubUnsubscribeChannel). + test {Shard pubsub: CLIENT KILL subscriber inside MULTI/EXEC (cross-slot)} { + # SET fixes the transaction client's slot to keyk's slot; the subscriber must + # use a shard channel in a different slot so a wrong-slot lookup would fail. + set keyk "{06S}k" + set channel "{Qi}ch" + assert {[R 0 cluster keyslot $channel] != [R 0 cluster keyslot $keyk]} + + set rd_sub [redis_deferring_client] + $rd_sub client id + set cid [$rd_sub read] + $rd_sub ssubscribe $channel + $rd_sub read + + $primary multi + $primary set $keyk v + $primary client kill id $cid + set got [$primary exec] + + assert_equal {OK 1} $got + assert_equal PONG [$primary ping] + + catch {$rd_sub read} + $rd_sub close + } } diff --git a/tests/unit/cluster/slot-stats.tcl b/tests/unit/cluster/slot-stats.tcl index 004351369..2724ed519 100644 --- a/tests/unit/cluster/slot-stats.tcl +++ b/tests/unit/cluster/slot-stats.tcl @@ -158,7 +158,7 @@ proc wait_for_replica_key_exists {key key_count} { # Test cases for CLUSTER SLOT-STATS cpu-usec metric correctness. # ----------------------------------------------------------------------------- -start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 0 {tags {external:skip cluster}} { # Define shared variables. set key "FOO" @@ -361,7 +361,7 @@ start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-en # Test cases for CLUSTER SLOT-STATS network-bytes-in. # ----------------------------------------------------------------------------- -start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 0 {tags {external:skip cluster}} { # Define shared variables. set key "key" @@ -471,7 +471,7 @@ start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-en R 0 FLUSHALL } -start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 1 {tags {external:skip cluster}} { set channel "channel" set key_slot [R 0 cluster keyslot $channel] set metrics_to_assert [list network-bytes-in] @@ -525,7 +525,6 @@ start_cluster 1 0 {tags {external:skip cluster}} { set key_slot [R 0 cluster keyslot $key] set expected_slots_to_key_count [dict create $key_slot 1] set metrics_to_assert [list network-bytes-out] - R 0 CONFIG SET cluster-slot-stats-enabled yes test "CLUSTER SLOT-STATS network-bytes-out, for non-slot specific commands." { R 0 INFO @@ -583,7 +582,6 @@ start_cluster 1 1 {tags {external:skip cluster}} { set key "FOO" set key_slot [R 0 CLUSTER KEYSLOT $key] set metrics_to_assert [list network-bytes-out] - R 0 CONFIG SET cluster-slot-stats-enabled yes # Setup replication. assert {[s -1 role] eq {slave}} @@ -616,7 +614,6 @@ start_cluster 1 1 {tags {external:skip cluster}} { set channel_secondary "channel2" set key_slot_secondary [R 0 cluster keyslot $channel_secondary] set metrics_to_assert [list network-bytes-out] - R 0 CONFIG SET cluster-slot-stats-enabled yes test "CLUSTER SLOT-STATS network-bytes-out, sharded pub/sub, single channel." { set slot [R 0 cluster keyslot $channel] @@ -700,7 +697,7 @@ start_cluster 1 1 {tags {external:skip cluster}} { # Test cases for CLUSTER SLOT-STATS key-count metric correctness. # ----------------------------------------------------------------------------- -start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 0 {tags {external:skip cluster}} { # Define shared variables. set key "FOO" @@ -785,7 +782,7 @@ start_cluster 1 0 {tags {external:skip cluster}} { # Test cases for CLUSTER SLOT-STATS ORDERBY sub-argument. # ----------------------------------------------------------------------------- -start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 0 {tags {external:skip cluster}} { set metrics [list "key-count" "memory-bytes" "cpu-usec" "network-bytes-in" "network-bytes-out"] @@ -891,7 +888,7 @@ start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-en # Test cases for CLUSTER SLOT-STATS replication. # ----------------------------------------------------------------------------- -start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 1 {tags {external:skip cluster}} { # Define shared variables. set key "key" @@ -994,7 +991,7 @@ start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-en R 1 CONFIG RESETSTAT } -start_cluster 2 2 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 2 2 {tags {external:skip cluster}} { test "CLUSTER SLOT-STATS reset upon atomic slot migration" { # key on slot-0 set key0 "{06S}mykey0" @@ -1044,7 +1041,7 @@ start_cluster 2 2 {tags {external:skip cluster} overrides {cluster-slot-stats-en # Test cases for CLUSTER SLOT-STATS memory-bytes field presence. # ----------------------------------------------------------------------------- -start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 0 {tags {external:skip cluster}} { # Define shared variables. set key "FOO" set key_slot [R 0 cluster keyslot $key] @@ -1174,7 +1171,7 @@ start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-en # may change object encoding (e.g., listTypeTryConversion). # ----------------------------------------------------------------------------- -start_cluster 1 0 {tags {external:skip cluster needs:debug} overrides {cluster-slot-stats-enabled yes}} { +start_cluster 1 0 {tags {external:skip cluster needs:debug}} { # Enable debug assertion that validates memory tracking after each command. # This will cause a panic if tracked memory doesn't match actual memory. R 0 DEBUG ALLOCSIZE-SLOTS-ASSERT 1 @@ -1234,3 +1231,14 @@ start_cluster 1 0 {tags {external:skip cluster needs:debug} overrides {cluster-s R 0 CONFIG SET list-max-listpack-size [lindex $origin_conf 1] } } + +start_server {} { + test "CLUSTER SLOT-STATS memory tracking cannot be re-enabled after being disabled (non-clustered mode)" { + # Once memory tracking is disabled, it cannot be re-enabled at runtime + assert_error "ERR*memory tracking cannot be enabled at runtime*" {r CONFIG SET cluster-slot-stats-enabled yes} + assert_error "ERR*memory tracking cannot be enabled at runtime*" {r CONFIG SET cluster-slot-stats-enabled mem} + + # But cpu and net can still be enabled + assert_match "OK" [r CONFIG SET cluster-slot-stats-enabled "cpu net"] + } +} diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl index 923e391b4..1a5c01ea4 100644 --- a/tests/unit/dump.tcl +++ b/tests/unit/dump.tcl @@ -158,6 +158,19 @@ start_server {tags {"dump"}} { close_replication_stream $repl } {} {needs:repl} + test {RESTORE fail with invalid payload size} { + # Payload with mismatched size: claims 0xFFFFFFFFFFFFFFF7 bytes (max uint64 - 8) but provides no data + # \x00 = String type + # \x81 = 64-bit length marker + # \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xF7 = 18446744073709551607 in big-endian + # \x0c\x00 = RDB version + # \x00... = fake CRC64 + set encoded "\x00\x81\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xF7\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00" + r del test + catch {r restore test 0 $encoded} e + set e + } {*Bad data format*} + test {DUMP of non existing key returns nil} { r dump nonexisting_key } {} diff --git a/tests/unit/gcra.tcl b/tests/unit/gcra.tcl index b012a0fc4..1721334cb 100644 --- a/tests/unit/gcra.tcl +++ b/tests/unit/gcra.tcl @@ -1,4 +1,5 @@ start_server {tags {"gcra" "external:skip"}} { +if 0 { test {GCRA - argument validation} { # Wrong number of arguments (too few) catch {r gcra} err @@ -227,9 +228,19 @@ start_server {tags {"gcra" "external:skip"}} { catch {r gcra mykey 1 1 2147483647 TOKENS 2147483647} err assert_match "*would cause an overflow*" $err } + + test {GCRASETVALUE - basic functionality} { + r del mykey + set tat_us [expr {[clock microseconds] + 60000000}] + assert_equal {OK} [r gcrasetvalue mykey $tat_us] + assert_equal {gcra} [r type mykey] + assert {[r pttl mykey] > 0} + } +} } start_server {tags {"gcra" "external:skip"}} { +if 0 { test {GCRA - RDB save and reload preserves value} { r del mykey r gcra mykey 5 1 60 @@ -325,8 +336,10 @@ start_server {tags {"gcra" "external:skip"}} { assert_equal $digest_before $digest_after } {} {needs:debug} } +} start_server {tags {"gcra repl" "external:skip"}} { +if 0 { set replica [srv 0 client] set replica_host [srv 0 host] set replica_port [srv 0 port] @@ -360,3 +373,4 @@ start_server {tags {"gcra repl" "external:skip"}} { } {} {external:skip} } } +} diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index 6175329da..8ae201df9 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -223,6 +223,14 @@ start_server {tags {"geo"}} { set err } {*valid*} + test {GEOADD out-of-range longitude/latitude error reply is well-formed} { + r readraw 1 + set reply [r geoadd nyc 200 40 "bad lon"] + r readraw 0 + # RESP simple error: single line starting with '-', no duplicated "-ERR" prefix. + assert_match {-ERR invalid longitude,latitude pair*} $reply + } + test {GEOADD multi add} { r geoadd nyc -73.9733487 40.7648057 "central park n/q/r" -73.9903085 40.7362513 "union square" -74.0131604 40.7126674 "wtc one" -73.7858139 40.6428986 "jfk" -73.9375699 40.7498929 "q4" -73.9564142 40.7480973 4545 } {6} diff --git a/tests/unit/hotkeys.tcl b/tests/unit/hotkeys.tcl index b7d2f5fe8..f78a526bf 100644 --- a/tests/unit/hotkeys.tcl +++ b/tests/unit/hotkeys.tcl @@ -251,7 +251,7 @@ start_server {tags {external:skip "hotkeys"}} { r multi # Send multiple commands to avoid <1us cpu for $key2 which we assert # at end of test - for {set i 0} {$i < 7} {incr i} { + for {set i 0} {$i < 30} {incr i} { r set $key1 value1 r set $key2 value1 r set $key1 value2 diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 07543b3ad..5f1aa1ba2 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -393,8 +393,6 @@ start_server {tags {"info" "external:skip"}} { } test {stats: client input and output buffer limit disconnections} { - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage - r config resetstat set info [r info stats] assert_equal [getInfoProperty $info client_query_buffer_limit_disconnections] {0} diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl index 9e5abae53..fc1b496c7 100644 --- a/tests/unit/introspection.tcl +++ b/tests/unit/introspection.tcl @@ -21,9 +21,9 @@ start_server {tags {"introspection"}} { test {CLIENT LIST} { set client_list [r client list] if {[lindex [r config get io-threads] 1] == 1} { - assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client_list + assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 omem-shared=0 omem-unshared=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client_list } else { - assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client_list + assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 omem-shared=0 omem-unshared=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client_list } } @@ -36,11 +36,11 @@ start_server {tags {"introspection"}} { test {CLIENT INFO} { set client [r client info] if {[lindex [r config get io-threads] 1] == 1} { - assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client + assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 omem-shared=0 omem-unshared=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client } else { - assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client + assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 omem-shared=0 omem-unshared=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=* lib-name=* lib-ver=* io-thread=* tot-net-in=* tot-net-out=* tot-cmds=* read-events=* avg-pipeline-len-sum=* avg-pipeline-len-cnt=*} $client } - } + } proc get_field_in_client_info {info field} { set info [string trim $info] diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index f9b36b491..f86ed7ce8 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -15,7 +15,6 @@ start_server {tags {"maxmemory" "external:skip"}} { r config set maxmemory 11mb r config set maxmemory-policy allkeys-lru set server_pid [s process_id] - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage proc init_test {client_eviction} { r flushdb @@ -29,11 +28,11 @@ start_server {tags {"maxmemory" "external:skip"}} { } r config resetstat - # fill 5mb using 50 keys of 100kb - for {set j 0} {$j < 50} {incr j} { - r setrange $j 100000 x + # fill 5mb using 500 keys of 10kb + for {set j 0} {$j < 500} {incr j} { + r setrange key$j 10000 x } - assert_equal [r dbsize] 50 + assert_equal [r dbsize] 500 } # Return true if the eviction occurred (client or key) based on argument @@ -44,12 +43,12 @@ start_server {tags {"maxmemory" "external:skip"}} { if $client_eviction { if {[lindex [r config get io-threads] 1] == 1} { - return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 50] + return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 500] } else { - return [expr $evicted_clients >= 0 && $evicted_keys >= 0 && $dbsize <= 50] + return [expr $evicted_clients >= 0 && $evicted_keys >= 0 && $dbsize <= 500] } } else { - return [expr $evicted_clients == 0 && $evicted_keys > 0 && $dbsize < 50] + return [expr $evicted_clients == 0 && $evicted_keys > 0 && $dbsize < 500] } } @@ -84,7 +83,7 @@ start_server {tags {"maxmemory" "external:skip"}} { while {![check_eviction_test $client_eviction] && [expr [clock seconds] - $t] < 20} { foreach rr $clients { if {[catch { - $rr mget 1 + $rr mget key1 key2 key3 key4 key5 key6 key7 key8 key9 key10 $rr flush } err]} { lremove clients $rr diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 0ab12c6c7..f488ca85f 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -83,8 +83,6 @@ run_solo {defrag} { # note: Disabling lookahead because it changes the number and order of allocations which interferes with defrag and causes tests to fail r config set lookahead 1 - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage - if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} { test "Active defrag main dictionary: $type" { r config set hz 100 @@ -1115,6 +1113,97 @@ run_solo {defrag} { } ;# standalone } } + + if {[string match {*jemalloc*} [s mem_allocator]] && + [r debug mallctl arenas.page] <= 8192 && + $type eq "standalone"} { ;# skip in cluster mode and non-jemalloc + test "Active defrag arrays: $type" { + r flushdb + r config set hz 100 + r config set activedefrag no + wait_for_defrag_stop 500 100 + r config resetstat + r config set active-defrag-max-scan-fields 100 + r config set active-defrag-threshold-lower 1 + r config set active-defrag-cycle-min 65 + r config set active-defrag-cycle-max 75 + r config set active-defrag-ignore-bytes 512kb + r config set maxmemory 0 + + # Create two large arrays with interleaved allocations. Indices are + # one full slice apart so the surviving array is stored as many + # separate slices and uses superdir mode. + set rd [redis_deferring_client] + set payload [string repeat A 500] + set elements 3000 + set base 8388608 + set count 0 + for {set j 0} {$j < $elements} {incr j} { + set idx [expr {$base + $j * 4096}] + $rd arset bigarray1 $idx "a1:$j:$payload" + $rd arset bigarray2 $idx "a2:$j:$payload" + + incr count + discard_replies_every $rd $count 1000 2000 + } + set remaining [expr {($count % 1000) * 2}] + for {set j 0} {$j < $remaining} {incr j} { + $rd read + } + + assert_equal $elements [r arcount bigarray1] + assert_equal $elements [r arcount bigarray2] + assert_morethan [dict get [r arinfo bigarray1] directory-size] 0 + + # Free one full array to create fragmentation around the surviving + # array's slices and string allocations. + r del bigarray2 + + after 120 ;# serverCron only updates the info once in 100ms + r config set latency-monitor-threshold 5 + r latency reset + + set digest [debug_digest] + catch {r config set activedefrag yes} e + if {[r config get activedefrag] eq "activedefrag yes"} { + wait_for_condition 50 100 { + [s total_active_defrag_time] ne 0 + } else { + after 120 ;# serverCron only updates the info once in 100ms + puts [r info memory] + puts [r info stats] + puts [r memory malloc-stats] + fail "defrag not started." + } + + # This test only needs to verify that active defrag reached the + # array and processed it without corrupting the value. We do + # not require the allocator to fully converge to a no-fragmentation + # state on every platform. + wait_for_condition 500 100 { + [s active_defrag_key_hits] + [s active_defrag_key_misses] > 0 + } else { + after 120 ;# serverCron only updates the info once in 100ms + puts [r info memory] + puts [r info stats] + puts [r memory malloc-stats] + fail "array defrag did not touch the key." + } + + r config set activedefrag no + wait_for_defrag_stop 500 100 + } + + # Verify the array stayed intact after active defrag touched it. + assert_equal $elements [r arcount bigarray1] + assert_equal "a1:0:$payload" [r arget bigarray1 $base] + assert_equal "a1:1234:$payload" [r arget bigarray1 [expr {$base + 1234 * 4096}]] + assert_equal "a1:2999:$payload" [r arget bigarray1 [expr {$base + 2999 * 4096}]] + assert_equal $digest [debug_digest] + assert_equal OK [r save] ;# Iterates all pointers again after defrag. + expr 1 + } {1} + } } test "Active defrag can't be triggered during replicaof database flush. See issue #14267" { diff --git a/tests/unit/moduleapi/keyspace_events.tcl b/tests/unit/moduleapi/keyspace_events.tcl index 5d62a7178..49c4d5da1 100644 --- a/tests/unit/moduleapi/keyspace_events.tcl +++ b/tests/unit/moduleapi/keyspace_events.tcl @@ -116,6 +116,139 @@ tags "modules external:skip" { assert_equal [r get testkeyspace:expired] 1 } + test "Subkey notification: subscribe starts callback" { + r keyspace.subscribe_subkeys + r keyspace.reset_subkey_events + r config set notify-keyspace-events "" + } + + test "Subkey notification: HSET triggers module subkey callback" { + r keyspace.reset_subkey_events + r hset myhash f1 v1 f2 v2 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hset myhash 2 f1 f2" [lindex $events 0] + r del myhash + } + + test "Subkey notification: HDEL triggers module subkey callback" { + r hset myhash f1 v1 f2 v2 + r keyspace.reset_subkey_events + r hdel myhash f1 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hdel myhash 1 f1" [lindex $events 0] + r del myhash + } + + test "Subkey notification: non-subkey event calls subkey callback with count=0" { + r hset myhash f1 v1 + r keyspace.reset_subkey_events + r del myhash + set events [r keyspace.get_subkey_events] + # DEL is NOTIFY_GENERIC — our callback is registered for + # HASH|GENERIC, so it should be called with subkeys=NULL, count=0. + assert_equal 1 [llength $events] + assert_equal "del myhash 0" [lindex $events 0] + } + + test "Subkey notification: module-triggered NotifyKeyspaceEventWithSubkeys" { + r keyspace.reset_subkey_events + r keyspace.notify_with_subkeys mykey sk1 sk2 sk3 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "module_subkey_event mykey 3 sk1 sk2 sk3" [lindex $events 0] + } + + test "Subkey notification: lazy hash field expiry triggers hexpired with subkeys" { + r debug set-active-expire 0 + r del myhash + r hset myhash f1 v1 f2 v2 f3 v3 + r hpexpire myhash 10 FIELDS 2 f1 f2 + r keyspace.reset_subkey_events + after 100 + r hmget myhash f1 f2 + assert_equal "hexpired myhash 2 f1 f2" [lindex [r keyspace.get_subkey_events] 0] + r debug set-active-expire 1 + } {OK} {needs:debug} + + test "Subkey notification: active hash field expiry triggers hexpired with subkeys" { + r del myhash + r hset myhash f1 v1 f2 v2 + r keyspace.reset_subkey_events + r hpexpire myhash 10 FIELDS 2 f1 f2 + # wait for active expiry to kick in + wait_for_condition 50 100 { + [r exists myhash] == 0 + } else { + fail "Fields not expired by active expiry" + } + # fields order is undefined + assert_match "hexpired myhash 2 f* f*" [lindex [r keyspace.get_subkey_events] 1] + r del myhash + } + + test "Subkey notification: unsubscribe stops callback and resubscribe resumes" { + r keyspace.reset_subkey_events + r hset myhash f1 v1 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + + # Unsubscribe — events should stop + r keyspace.unsubscribe_subkeys + r keyspace.reset_subkey_events + r hset myhash f2 v2 + set events [r keyspace.get_subkey_events] + assert_equal 0 [llength $events] + # active expire should not trigger subkey callback + r hpexpire myhash 10 FIELDS 2 f1 f2 + wait_for_condition 50 100 { + [r exists myhash] == 0 + } else { + fail "Fields not expired by active expiry" + } + set events [r keyspace.get_subkey_events] + assert_equal 0 [llength $events] + + # Re-subscribe — events should resume + r keyspace.subscribe_subkeys + r del myhash + r hset myhash f1 v1 f2 v2 + r keyspace.reset_subkey_events + r hpexpire myhash 10 FIELDS 2 f1 f2 + assert_match "hexpire myhash 2 f* f*" [lindex [r keyspace.get_subkey_events] 0] + # active expire should also resume subkey callback + wait_for_condition 50 100 { + [r exists myhash] == 0 + } else { + fail "Fields not expired by active expiry" + } + assert_match "hexpired myhash 2 f* f*" [lindex [r keyspace.get_subkey_events] 1] + + r keyspace.unsubscribe_subkeys + r keyspace.reset_subkey_events + r del myhash + } + + test "Subkey notification: SUBKEYS_REQUIRED flag skips events without subkeys" { + r keyspace.subscribe_require_subkeys + r keyspace.reset_subkey_events + + # HSET has subkeys — should trigger callback + r hset myhash f1 v1 f2 v2 + set events [r keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hset myhash 2 f1 f2" [lindex $events 0] + + # DEL has no subkeys — the callback should be skipped. + r keyspace.reset_subkey_events + r del myhash + set events [r keyspace.get_subkey_events] + assert_equal 0 [llength $events] + + r keyspace.unsubscribe_require_subkeys + } + test "Unload the module - testkeyspace" { assert_equal {OK} [r module unload testkeyspace] } @@ -125,6 +258,38 @@ tags "modules external:skip" { } } + # Replication test: replica module receives subkey notifications + start_server [list overrides [list loadmodule "$testmodule"]] { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + start_server [list overrides [list loadmodule "$testmodule"]] { + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + test "Subkey notification: replica module receives subkey callback after replication" { + $master keyspace.subscribe_subkeys + $replica keyspace.subscribe_subkeys + $replica keyspace.reset_subkey_events + + $master hset myhash f1 v1 f2 v2 + + wait_for_ofs_sync $master $replica + + set events [$replica keyspace.get_subkey_events] + assert_equal 1 [llength $events] + assert_equal "hset myhash 2 f1 f2" [lindex $events 0] + + $master del myhash + $master keyspace.unsubscribe_subkeys + $replica keyspace.unsubscribe_subkeys + } + } + } + start_server {} { test {OnLoad failure will handle un-registration} { catch {r module load $testmodule noload} diff --git a/tests/unit/obuf-limits.tcl b/tests/unit/obuf-limits.tcl index 148187b73..f58eeda89 100644 --- a/tests/unit/obuf-limits.tcl +++ b/tests/unit/obuf-limits.tcl @@ -1,6 +1,4 @@ start_server {tags {"obuf-limits external:skip logreqres:skip"}} { - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage - test {CONFIG SET client-output-buffer-limit} { set oldval [lindex [r config get client-output-buffer-limit] 1] @@ -237,4 +235,82 @@ start_server {tags {"obuf-limits external:skip logreqres:skip"}} { assert_match "*I/O error*" $e reconnect } + + test "zero-copy referenced reply bytes are reflected in memory stats" { + r flushdb + r config set client-output-buffer-limit {normal 0 0 0} + # Use a value large enough to trigger copy avoidance + set val_size 100000 + r set bigkey [string repeat v $val_size] + + # Use MULTI/EXEC so all observers see the zero-copy ref before it is sent. + r client setname refmem_test + r multi + r get bigkey ;# adds zero-copy ref to output buffer + r client list ;# per-client omem / omem-shared / omem-unshared / tot-mem + r info memory ;# global mem_clients_normal_shared / mem_clients_normal_unshared + r memory stats ;# clients.normal.shared and clients.normal.unshared + set res [r exec] + + # omem-shared tracks total shared reply bytes, key is still alive so omem-unshared must be 0. + set clients [split [string trim [lindex $res 1]] "\r\n"] + set c [lsearch -inline $clients *name=refmem_test*] + regexp {omem-shared=([0-9]+)} $c - omem_shared + regexp {omem-unshared=([0-9]+)} $c - omem_unshared + assert {$omem_shared >= $val_size} + assert_equal 0 $omem_unshared + + # mem_clients_normal_shared is incremented at write time, before the reply is sent + set info_mem [lindex $res 2] + assert {[getInfoProperty $info_mem mem_clients_normal_shared] >= $val_size} + assert_equal 0 [getInfoProperty $info_mem mem_clients_normal_unshared] + + # MEMORY STATS exposes the same shared bytes; normal.unshared is 0 since the key is still in keyspace + set mem_stats [lindex $res 3] + assert {[dict get $mem_stats clients.normal.shared] >= $val_size} + assert_equal 0 [dict get $mem_stats clients.normal.unshared] ;# key still in keyspace + + # After the reply is fully sent, the global counter must return to 0 + wait_for_condition 50 10 { + [s mem_clients_normal_shared] == 0 + } else { + fail "mem_clients_normal_shared did not return to 0 after reply was sent" + } + } + + test "shared reply bytes are tracked as unshared after the key is deleted" { + r flushdb + r config set client-output-buffer-limit {normal 0 0 0} + + set rr [redis_deferring_client] + $rr client setname test_client + $rr flush + + # Repeatedly SET/GET/DEL a big key on a deferred client and poll CLIENT LIST + # until omem-unshared on test_client reflects the referenced bytes. + set val_size 100000 + set deadline [expr {[clock milliseconds] + 5000}] + while {true} { + r set k [string repeat v $val_size] + $rr get k + $rr del k + $rr flush + after 10 + + set clients [split [r client list] "\r\n"] + set c [lsearch -inline $clients *name=test_client*] + regexp {omem-shared=([0-9]+)} $c - omem_shared + regexp {omem-unshared=([0-9]+)} $c - omem_unshared + if {$omem_unshared >= $val_size} { + assert_morethan_equal $omem_shared $omem_unshared + break + } + + if {[clock milliseconds] > $deadline} { + fail "timed out waiting for omem-unshared to reflect unshared bytes" + } + } + + $rr close + } } diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index 7ab9ab89b..9fdd576df 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -30,6 +30,15 @@ start_server {tags {"other"}} { assert_equal {OK} [r memory purge] } } + + test {je_malloc_conf compile-time tuning is active} { + # Verify je_malloc_conf in src/zmalloc.c overrides jemalloc defaults: + # (tcache_nslots_small_max: 200, lg_tcache_nslots_mul: 1). + if {[string match {*jemalloc*} [s mem_allocator]]} { + assert_equal 1000 [r debug mallctl opt.tcache_nslots_small_max] + assert_equal 3 [r debug mallctl opt.lg_tcache_nslots_mul] + } + } {} {needs:debug} test {SAVE - make sure there are all the types as values} { # Wait for a background saving in progress to terminate diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl index 24f779ffc..afcddee77 100644 --- a/tests/unit/pubsub.tcl +++ b/tests/unit/pubsub.tcl @@ -602,7 +602,6 @@ start_server {tags {"pubsub network"}} { after 15 r hget myhash f2 assert_equal "pmessage * __keyspace@${db}__:myhash hexpire" [$rd1 read] - assert_equal "pmessage * __keyspace@${db}__:myhash hexpired" [$rd1 read] assert_equal "pmessage * __keyspace@${db}__:myhash del" [$rd1 read] # FNX on logically expired field @@ -962,6 +961,364 @@ start_server {tags {"pubsub network"}} { $rd1 close } + ### Subkey-level notification tests for HASH type ### + + # Helper: build expected payload "event|len:field0,len:field1,..." + proc build_expected_payload {event prefix count} { + set parts {} + for {set i 0} {$i < $count} {incr i} { + set f "${prefix}${i}" + lappend parts "[string length $f]:$f" + } + return "${event}|[join $parts ,]" + } + + # Compare subkey notification payloads as sets (order-insensitive). + # Parses "event|f1,f2,..." and checks event matches and fields match as sets. + proc assert_subkey_payload_equal {expected actual} { + set ep [split $expected "|"] + set ap [split $actual "|"] + assert_equal [lindex $ep 0] [lindex $ap 0] ;# event name + set ef [lsort [split [lindex $ep 1] ","]] + set af [lsort [split [lindex $ap 1] ","]] + assert_equal $ef $af + } + + # Generate N field-value pairs: {f0 v0 f1 v1 ...} + proc gen_field_values {prefix n} { + set args {} + for {set i 0} {$i < $n} {incr i} { + lappend args "${prefix}${i}" "v${i}" + } + return $args + } + + # Generate N field names: {f0 f1 ...} + proc gen_fields {prefix n} { + set fields {} + for {set i 0} {$i < $n} {incr i} { + lappend fields "${prefix}${i}" + } + return $fields + } + + # Subkey notification: subkeyspace channel + foreach {type max_lp_entries} {listpackex 512 hashtable 0} { + r config set hash-max-listpack-entries $max_lp_entries + r config set notify-keyspace-events Sh + set rd1 [redis_deferring_client] + assert_equal {1} [subscribe $rd1 "__subkeyspace@${db}__:myhash"] + + test "Subkey notifications: subkeyspace - HSET single field ($type)" { + r del myhash + r hset myhash f1 v1 + assert_equal "message __subkeyspace@${db}__:myhash hset|2:f1" [$rd1 read] + } + + test "Subkey notifications: subkeyspace - HINCRBY ($type)" { + r del myhash + r hset myhash counter 10 + r hincrby myhash counter 5 + assert_equal "message __subkeyspace@${db}__:myhash hset|7:counter" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hincrby|7:counter" [$rd1 read] + } + + test "Subkey notifications: subkeyspace - HSETNX ($type)" { + r del myhash + r hsetnx myhash newfield val + assert_equal "message __subkeyspace@${db}__:myhash hset|8:newfield" [$rd1 read] + } + + test "Subkey notifications: subkeyspace - HINCRBYFLOAT ($type)" { + r del myhash + r hset myhash counter 10.5 + r hincrbyfloat myhash counter 2.5 + assert_equal "message __subkeyspace@${db}__:myhash hset|7:counter" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hincrbyfloat|7:counter" [$rd1 read] + } + + # Test with N=3 (stack path, within FIELDS_STACK_SIZE=16) and + # N=32 (heap path, exceeds FIELDS_STACK_SIZE). + foreach N {3 32} { + + test "Subkey notifications: HSET $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + set expected [build_expected_payload "hset" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HDEL $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hdel myhash {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETDEL $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hgetdel myhash FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HEXPIRE $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hexpire myhash 1000 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hexpire" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HEXPIRE past timestamp $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset notification + r hexpireat myhash 1 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HPERSIST $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + set fields [gen_fields "f" $N] + r hset myhash {*}[gen_field_values "f" $N] + r hexpire myhash 1000 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + r hpersist myhash FIELDS $N {*}$fields + set expected [build_expected_payload "hpersist" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETEX with expire $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset + r hgetex myhash EX 1000 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hexpire" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETEX with persist $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + set fields [gen_fields "f" $N] + r hset myhash {*}[gen_field_values "f" $N] + r hexpire myhash 1000 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + r hgetex myhash PERSIST FIELDS $N {*}$fields + set expected [build_expected_payload "hpersist" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HGETEX past timestamp $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hset myhash {*}[gen_field_values "f" $N] + $rd1 read ;# consume hset + r hgetex myhash PX 0 FIELDS $N {*}[gen_fields "f" $N] + set expected [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected" [$rd1 read] + } + + test "Subkey notifications: HSETEX $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hsetex myhash EX 1000 FIELDS $N {*}[gen_field_values "f" $N] + set expected_hset [build_expected_payload "hset" "f" $N] + set expected_hexpire [build_expected_payload "hexpire" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hset" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hexpire" [$rd1 read] + } + + test "Subkey notifications: HSETEX past timestamp $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + r hsetex myhash PX 0 FIELDS $N {*}[gen_field_values "f" $N] + set expected_hset [build_expected_payload "hset" "f" $N] + set expected_hdel [build_expected_payload "hdel" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hset" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hdel" [$rd1 read] + } + + test "Subkey notifications: lazy field expiry triggers hexpired $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + # Create N+1 fields, expire N of them; keep one to prevent hash deletion. + set fields [gen_fields "f" $N] + set args [gen_field_values "f" $N] + lappend args "keep" "val" + r hset myhash {*}$args + r debug set-active-expire 0 + r hpexpire myhash 10 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + # Trigger lazy expiry by reading the fields + after 100 + r hmget myhash {*}$fields + set expected_hexpired [build_expected_payload "hexpired" "f" $N] + assert_equal "message __subkeyspace@${db}__:myhash $expected_hexpired" [$rd1 read] + r debug set-active-expire 1 + } {OK} {needs:debug} + + test "Subkey notifications: active field expiry triggers hexpired $N fields ($type, [expr {$N <= 16 ? {stack} : {heap}}])" { + r del myhash + # Create N+1 fields, expire N of them; keep one to prevent hash deletion. + set fields [gen_fields "f" $N] + set args [gen_field_values "f" $N] + lappend args "keep" "val" + r hset myhash {*}$args + r hpexpire myhash 10 FIELDS $N {*}$fields + $rd1 read ;# consume hset + $rd1 read ;# consume hexpire + # Wait for active expiry; field order depends on hash table iteration, + # so compare as set. + set expected_hexpired [build_expected_payload "hexpired" "f" $N] + set actual [$rd1 read] + set prefix "message __subkeyspace@${db}__:myhash " + assert_equal $prefix [string range $actual 0 [expr {[string length $prefix]-1}]] + assert_subkey_payload_equal $expected_hexpired [string range $actual [string length $prefix] end] + } + } ;# end foreach N + $rd1 close + } ;# end foreach type + + # Subkey notification format tests for subkeyevent/subkeyspaceitem/subkeyspaceevent + # Full command coverage is done via subkeyspace channel below; here we only verify channel format. + foreach {type max_lp_entries} {listpackex 512 hashtable 0} { + r config set hash-max-listpack-entries $max_lp_entries + + test "Subkey notifications: subkeyevent format ($type)" { + r config set notify-keyspace-events Th + r del myhash + set rd1 [redis_deferring_client] + assert_equal {1} [subscribe $rd1 "__subkeyevent@${db}__:hset"] + r hset myhash f1 v1 f2 v2 f3 v3 + assert_equal "message __subkeyevent@${db}__:hset 6:myhash|2:f1,2:f2,2:f3" [$rd1 read] + $rd1 close + } + + test "Subkey notifications: subkeyspaceitem format ($type)" { + r config set notify-keyspace-events Ih + r del myhash + set rd1 [redis_deferring_client] + $rd1 subscribe "__subkeyspaceitem@${db}__:myhash\nf1" + $rd1 read ;# consume subscribe confirmation + r hset myhash f1 v1 + set msg [$rd1 read] + assert_equal "message" [lindex $msg 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf1" [lindex $msg 1] + assert_equal "hset" [lindex $msg 2] + $rd1 close + } + + test "Subkey notifications: subkeyspaceitem per-subkey delivery with psubscribe ($type)" { + r config set notify-keyspace-events Ih + r del myhash + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 "__subkeyspaceitem@${db}__:myhash*"] + r hset myhash f1 v1 f2 v2 + # Should get one notification per subkey + set msg1 [$rd1 read] + set msg2 [$rd1 read] + assert_equal "pmessage" [lindex $msg1 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf1" [lindex $msg1 2] + assert_equal "hset" [lindex $msg1 3] + assert_equal "pmessage" [lindex $msg2 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf2" [lindex $msg2 2] + assert_equal "hset" [lindex $msg2 3] + $rd1 close + } + + test "Subkey notifications: subkeyspaceitem skips key with newline ($type)" { + r config set notify-keyspace-events Ih + r del "key\nwith\nnewline" + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 "__subkeyspaceitem@${db}__:*"] + r hset "key\nwith\nnewline" f1 v1 + # Normal key to verify notifications still work + r hset normalkey f1 v1 + # Should only get notification for normalkey + set msg [$rd1 read] + assert_equal "pmessage" [lindex $msg 0] + assert_equal "__subkeyspaceitem@${db}__:normalkey\nf1" [lindex $msg 2] + assert_equal "hset" [lindex $msg 3] + r del "key\nwith\nnewline" + r del normalkey + $rd1 close + } + + test "Subkey notifications: subkeyspaceevent format ($type)" { + r config set notify-keyspace-events Vh + r del myhash + set rd1 [redis_deferring_client] + assert_equal {1} [subscribe $rd1 "__subkeyspaceevent@${db}__:hset|myhash"] + r hset myhash f1 v1 f2 v2 + assert_equal "message __subkeyspaceevent@${db}__:hset|myhash 2:f1,2:f2" [$rd1 read] + $rd1 close + } + } ; + + # Test all 4 channels enabled simultaneously + test "Subkey notifications: all 4 channels enabled simultaneously" { + r config set notify-keyspace-events STIVh + r del myhash + set rd_s [redis_deferring_client] + set rd_t [redis_deferring_client] + set rd_i [redis_deferring_client] + set rd_v [redis_deferring_client] + assert_equal {1} [subscribe $rd_s "__subkeyspace@${db}__:myhash"] + assert_equal {1} [subscribe $rd_t "__subkeyevent@${db}__:hset"] + assert_equal {1} [subscribe $rd_v "__subkeyspaceevent@${db}__:hset|myhash"] + $rd_i subscribe "__subkeyspaceitem@${db}__:myhash\nf1" + $rd_i read ;# consume subscribe confirmation + r hset myhash f1 v1 + assert_equal "message __subkeyspace@${db}__:myhash hset|2:f1" [$rd_s read] + assert_equal "message __subkeyevent@${db}__:hset 6:myhash|2:f1" [$rd_t read] + assert_equal "message __subkeyspaceevent@${db}__:hset|myhash 2:f1" [$rd_v read] + set msg_i [$rd_i read] + assert_equal "message" [lindex $msg_i 0] + assert_equal "__subkeyspaceitem@${db}__:myhash\nf1" [lindex $msg_i 1] + assert_equal "hset" [lindex $msg_i 2] + $rd_s close + $rd_t close + $rd_i close + $rd_v close + } + + # Test that subkey notifications are triggered on replica after replication + test "Subkey notifications: replica receives subkey notifications after replication" { + start_server {tags {"repl external:skip"}} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Enable subkeyspace notifications on replica + $replica config set notify-keyspace-events Sh + + # Subscribe on replica + set rd1 [redis_deferring_client -1] + assert_equal {1} [subscribe $rd1 "__subkeyspace@${db}__:myhash"] + + # Write on master + $master hset myhash f1 v1 f2 v2 + $master hpexpire myhash 100 FIELDS 2 f1 f2 + + # Replica should receive subkey notification + assert_equal "message __subkeyspace@${db}__:myhash hset|2:f1,2:f2" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hexpire|2:f1,2:f2" [$rd1 read] + assert_equal "message __subkeyspace@${db}__:myhash hexpired|2:f1,2:f2" [$rd1 read] + $rd1 close + $master del myhash + } + } + test "publish to self inside multi" { r hello 3 r subscribe foo @@ -1012,5 +1369,134 @@ start_server {tags {"pubsub network"}} { assert_equal [r publish foo vaz] {1} assert_equal [r read] {message foo vaz} } {} {resp3} - +} + +start_server {tags {"pubsub network"}} { + # Helper proc for tests that subscribe multiple times until hitting OOM + proc test_subscribe_oom_loop {cmd description clients} { + test "$cmd $description fails with OOM when memory limit exceeded" { + # Set 10MB memory limit + r config set maxmemory 10485760 + r config set maxmemory-policy noeviction + + # Create clients + if {$clients == 1} { + set rd [redis_deferring_client] + } else { + set rd1 [redis_deferring_client] + set rd2 [redis_deferring_client] + } + + set base_str [string repeat "a" 2048] + set success_count 0 + set oom_occurred 0 + + # Try to subscribe until we hit OOM + for {set i 0} {$i < 5000} {incr i} { + # Select client + if {$clients == 1} { + set client $rd + } else { + set client [expr {$i % 2 ? $rd1 : $rd2}] + } + + # Build channel/pattern name + if {$cmd eq "psubscribe"} { + set channel_name "${base_str}${i}*" + } else { + set channel_name "${base_str}${i}" + } + + $client $cmd $channel_name + if {[catch {$client read} err]} { + if {[string match "*OOM command not allowed*" $err]} { + set oom_occurred 1 + break + } + error "Unexpected error: $err" + } + incr success_count + } + + # Verify we had at least one success and hit OOM + assert {$success_count > 10} + assert {$oom_occurred == 1} + + # Close clients + if {$clients == 1} { + $rd close + } else { + $rd1 close + $rd2 close + } + } + } + + # Helper proc for tests with single large channel that immediately fails + proc test_subscribe_large_channel_oom {cmd channel_type} { + test "$cmd with large $channel_type name fails due to OOM" { + # Set maxmemory to 2MB + r config set maxmemory 2097152 + r config set maxmemory-policy noeviction + + # Create large channel/pattern name: 2MB + set channel_name [string repeat "a" 2097152] + + # Create a single pubsub client + set rd [redis_deferring_client] + + # Subscribe should fail with OOM error + $rd $cmd $channel_name + assert_error "*OOM command not allowed when used memory > 'maxmemory'*" {$rd read} + + # Cleanup + $rd close + } + } + + # Helper proc for tests with small success then large failure + proc test_subscribe_small_then_large_oom {cmd channel_type} { + test "$cmd succeeds with small $channel_type but fails with large $channel_type due to OOM" { + # Set maxmemory to 5MB + r config set maxmemory 5242880 + r config set maxmemory-policy noeviction + + # Create channel names: first 10KB, second 5MB + set channel1 [string repeat "a" 10240] + set channel2 [string repeat "b" 5242880] + + # Create a single pubsub client + set rd [redis_deferring_client] + + # First subscribe should succeed (10KB) + $rd $cmd $channel1 + set reply1 [$rd read] + assert_equal [list $cmd] [lindex $reply1 0] + + # Second subscribe should fail with OOM error (5MB exceeds limit) + $rd $cmd $channel2 + assert_error "*OOM command not allowed when used memory > 'maxmemory'*" {$rd read} + + # Cleanup + $rd close + } + } + + # Multiple subscriptions until OOM tests + test_subscribe_oom_loop "subscribe" "" 1 + test_subscribe_oom_loop "ssubscribe" "" 1 + test_subscribe_oom_loop "psubscribe" "" 1 + test_subscribe_oom_loop "subscribe" "with 2 clients" 2 + test_subscribe_oom_loop "ssubscribe" "with 2 clients" 2 + test_subscribe_oom_loop "psubscribe" "with 2 clients" 2 + + # Single large channel immediate OOM tests + test_subscribe_large_channel_oom "subscribe" "channel" + test_subscribe_large_channel_oom "psubscribe" "pattern" + test_subscribe_large_channel_oom "ssubscribe" "shard channel" + + # Small success then large failure tests + test_subscribe_small_then_large_oom "subscribe" "channel" + test_subscribe_small_then_large_oom "psubscribe" "pattern" + test_subscribe_small_then_large_oom "ssubscribe" "channel" } diff --git a/tests/unit/replybufsize.tcl b/tests/unit/replybufsize.tcl index 302417cf8..151d7757d 100644 --- a/tests/unit/replybufsize.tcl +++ b/tests/unit/replybufsize.tcl @@ -13,7 +13,6 @@ start_server {tags {"replybufsize"}} { test {verify reply buffer limits} { # In order to reduce test time we can set the peak reset time very low r debug replybuffer peak-reset-time 100 - r debug reply-copy-avoidance 0 ;# Disable copy avoidance because it affects memory usage # Create a simple idle test client variable tc [redis_client] @@ -27,13 +26,13 @@ start_server {tags {"replybufsize"}} { fail "reply buffer of idle client is $rbs after 1 seconds" } - r set bigval [string repeat x 32768] + r set bigval [string repeat x 8192] ;# Keep value <= 16KB to avoid copy-avoidance, which shares memory and slows tot-mem growth. # In order to reduce test time we can set the peak reset time very low r debug replybuffer peak-reset-time never wait_for_condition 10 100 { - [$tc get bigval ; get_reply_buffer_size test_client] >= 16384 && [get_reply_buffer_size test_client] < 32768 + [$tc mget bigval bigval bigval bigval ; get_reply_buffer_size test_client] >= 16384 && [get_reply_buffer_size test_client] < 32768 } else { set rbs [get_reply_buffer_size test_client] fail "reply buffer of busy client is $rbs after 1 seconds" diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index 6a092cb4e..c3ec5f273 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -471,6 +471,21 @@ proc test_scan {type} { } } + test "{$type} SCAN COUNT overflow" { + r flushdb + populate 10 + + # count = LONG_MAX/10 + 1, within LONG_MAX so it parses fine, + # but count*10 overflows signed long which is undefined behavior. + # Compute dynamically to support both 32-bit and 64-bit builds. + set long_max [expr {[s arch_bits] == 32 ? 2147483647 : 9223372036854775807}] + set big_count [expr {$long_max / 10 + 1}] + set res [r scan 0 count $big_count] + assert {[llength $res] == 2} + assert_equal 0 [lindex $res 0] + assert_equal 10 [llength [lindex $res 1]] + } + test "{$type} SCAN MATCH pattern implies cluster slot" { # Tests the code path for an optimization for patterns like "{foo}-*" # which implies that all matching keys belong to one slot. diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl index 68af0cb6c..c62b3302c 100644 --- a/tests/unit/slowlog.tcl +++ b/tests/unit/slowlog.tcl @@ -360,4 +360,123 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} { assert_match {*slowlog_time_ms_sum=*} $cmdstat_debug assert_match {*slowlog_time_ms_max=*} $cmdstat_debug } {} {needs:debug} + + # Helper: return the argv (field index 3) of the most recent slowlog + # entry whose first token matches $cmd (case-insensitive). Skips entries + # generated by CONFIG SET / SLOWLOG GET that are interleaved with the + # command we actually want to inspect. + proc latest_slowlog_argv_for {cmd} { + foreach e [r slowlog get] { + set argv [lindex $e 3] + if {[string equal -nocase [lindex $argv 0] $cmd]} { + return $argv + } + } + return {} + } + + test {SLOWLOG - slowlog-entry-max-argc and slowlog-entry-max-string-len defaults} { + # Defaults must match the legacy hard-coded constants + # (SLOWLOG_ENTRY_MAX_ARGC=32, SLOWLOG_ENTRY_MAX_STRING=128). + assert_equal 32 [lindex [r config get slowlog-entry-max-argc] 1] + assert_equal 128 [lindex [r config get slowlog-entry-max-string-len] 1] + } + + test {SLOWLOG - slowlog-entry-max-argc enforces minimum value of 2} { + assert_error "*argument must be between*" {r config set slowlog-entry-max-argc 1} + r config set slowlog-entry-max-argc 2 + assert_equal 2 [lindex [r config get slowlog-entry-max-argc] 1] + } + + test {SLOWLOG - slowlog-entry-max-string-len enforces minimum value of 1} { + assert_error "*argument must be between*" {r config set slowlog-entry-max-string-len 0} + r config set slowlog-entry-max-string-len 1 + assert_equal 1 [lindex [r config get slowlog-entry-max-string-len] 1] + } + + test {SLOWLOG - slowlog-entry-max-argc=2 preserves command name and adds trim marker} { + r slowlog reset + r config set slowlog-entry-max-string-len 128 + + # The minimum argc of 2 exists so that the command name is preserved + # and the trim marker can still be written into the last slot. + r config set slowlog-log-slower-than 0 + r config set slowlog-entry-max-argc 2 + r sadd myset a b c d + # 6 args total, slargc=2: marker == argc - slargc + 1 == 5. + assert_equal {sadd {... (5 more arguments)}} [latest_slowlog_argv_for sadd] + } + + test {SLOWLOG - custom slowlog-entry-max-argc trims correctly} { + r config set slowlog-log-slower-than 0 + r config set slowlog-entry-max-argc 5 + + # argc > limit: trimmed with marker in the last slot. + r slowlog reset + r sadd myset a b c d e f g h + assert_equal {sadd myset a b {... (6 more arguments)}} \ + [latest_slowlog_argv_for sadd] + + # argc == limit: no marker, logged as-is. + r slowlog reset + r sadd myset a b c + assert_equal {sadd myset a b c} [latest_slowlog_argv_for sadd] + + # argc < limit: no marker, logged as-is. + r slowlog reset + r sadd myset a + assert_equal {sadd myset a} [latest_slowlog_argv_for sadd] + } + + test {SLOWLOG - custom slowlog-entry-max-string-len trims string args} { + r slowlog reset + r config set slowlog-log-slower-than 0 + r config set slowlog-entry-max-argc 32 + r config set slowlog-entry-max-string-len 16 + + # String longer than limit: trimmed with "... (N more bytes)" suffix. + r set mykey [string repeat A 20] + set expected "set mykey {[string repeat A 16]... (4 more bytes)}" + assert_equal $expected [latest_slowlog_argv_for set] + + # String length == limit: no suffix, logged as-is. + r slowlog reset + r set mykey [string repeat B 16] + assert_equal "set mykey [string repeat B 16]" \ + [latest_slowlog_argv_for set] + + # String shorter than limit: logged as-is. + r slowlog reset + r set mykey short + assert_equal {set mykey short} [latest_slowlog_argv_for set] + } + + test {SLOWLOG - runtime config change applies only to subsequent entries} { + r config set slowlog-log-slower-than 0 + r config set slowlog-entry-max-string-len 128 + r slowlog reset + + set arg [string repeat C 50] + + # First SET is logged with the old (default) limit -> not trimmed. + # Use short key names so the new (smaller) limit cannot trim them + # when we look for the entry later. + r set k1 $arg + + set old_entry_argv [latest_slowlog_argv_for set] + assert_equal "set k1 $arg" $old_entry_argv + + # Lower the limit and log another entry. + r config set slowlog-entry-max-string-len 8 + r mset k2{x} v1 k3{x} $arg + + # The new entry must be trimmed... + set new_entry_argv [latest_slowlog_argv_for mset] + assert_equal "mset k2{x} v1 k3{x} {[string repeat C 8]... (42 more bytes)}" \ + $new_entry_argv + + # ... while the old one remains untouched + set old_entry_argv_again [latest_slowlog_argv_for set] + assert_equal "set k1 $arg" $old_entry_argv_again + } } diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl index 666b5930e..174575eee 100644 --- a/tests/unit/tracking.tcl +++ b/tests/unit/tracking.tcl @@ -413,6 +413,18 @@ start_server {tags {"tracking network logreqres:skip"}} { $r CLIENT TRACKING OFF } + test {BCAST prefix self-overlap past first index reports error without enabling} { + # When any of the provided BCAST prefixes overlap with each other, + # CLIENT TRACKING ON must reply with a single error and leave tracking + # disabled, regardless of the position of the overlapping prefix in + # the argument list. + r CLIENT TRACKING OFF + catch {r CLIENT TRACKING ON BCAST PREFIX BAZ PREFIX FOOBAR PREFIX FOO} output + assert_match {ERR Prefix 'FOOBAR' overlaps with another provided prefix 'FOO'*} $output + # Tracking must not have been enabled after the overlap error. + assert_match {*flags off*} [r CLIENT TRACKINGINFO] + } + test {hdel deliver invalidate message after response in the same connection} { r CLIENT TRACKING off r HELLO 3 diff --git a/tests/unit/type/array.tcl b/tests/unit/type/array.tcl new file mode 100644 index 000000000..d0f62fe3e --- /dev/null +++ b/tests/unit/type/array.tcl @@ -0,0 +1,3114 @@ +start_server { + tags {"array"} +} { + # Basic ARSET/ARGET tests + test {ARSET and ARGET basics} { + r del myarray + assert_equal 1 [r arset myarray 0 hello] + assert_equal hello [r arget myarray 0] + assert_equal {} [r arget myarray 1] + } + + test {ARSET overwrites existing value} { + r del myarray + assert_equal 1 [r arset myarray 0 hello] + assert_equal 0 [r arset myarray 0 world] + assert_equal world [r arget myarray 0] + } + + test {ARGET non-existing key} { + r del myarray + assert_equal {} [r arget myarray 0] + } + + test {ARGET validates index even on non-existing key} { + r del myarray + assert_error {*invalid array index*} {r arget myarray not-an-index} + } + + test {ARSET/ARGET with integer values} { + r del myarray + r arset myarray 0 12345 + assert_equal 12345 [r arget myarray 0] + } + + test {ARSET/ARGET with float values} { + r del myarray + r arset myarray 0 3.14159 + assert_equal 3.14159 [r arget myarray 0] + } + + test {ARSET/ARGET with small strings} { + r del myarray + r arset myarray 0 abc + assert_equal abc [r arget myarray 0] + } + + test {ARSET/ARGET with large string} { + r del myarray + set longstr [string repeat x 100] + r arset myarray 0 $longstr + assert_equal $longstr [r arget myarray 0] + } + + test {ARSET/ARGET with empty string} { + r del myarray + r arset myarray 0 "" + assert_equal "" [r arget myarray 0] + } + + # ARLEN and ARCOUNT tests + test {ARLEN and ARCOUNT basics} { + r del myarray + assert_equal 0 [r arlen myarray] + assert_equal 0 [r arcount myarray] + + r arset myarray 0 a + assert_equal 1 [r arlen myarray] + assert_equal 1 [r arcount myarray] + + r arset myarray 5 b + assert_equal 6 [r arlen myarray] + assert_equal 2 [r arcount myarray] + + r arset myarray 100 c + assert_equal 101 [r arlen myarray] + assert_equal 3 [r arcount myarray] + } + + # ARDEL tests + test {ARDEL basics} { + r del myarray + r arset myarray 0 a + r arset myarray 1 b + r arset myarray 2 c + + assert_equal 1 [r ardel myarray 1] + assert_equal {} [r arget myarray 1] + assert_equal 2 [r arcount myarray] + + # Delete non-existing index returns 0 + assert_equal 0 [r ardel myarray 1] + } + + test {ARDEL multiple indices} { + r del myarray + r arset myarray 0 a + r arset myarray 1 b + r arset myarray 2 c + r arset myarray 3 d + + assert_equal 3 [r ardel myarray 0 1 2] + assert_equal 1 [r arcount myarray] + } + + test {ARDEL last element deletes key} { + r del myarray + r arset myarray 0 a + r ardel myarray 0 + assert_equal 0 [r exists myarray] + } + + test {ARDEL notifies array event before del when key is removed} { + set orig_notify [lindex [r config get notify-keyspace-events] 1] + r config set notify-keyspace-events KEA + r del myarray + r arset myarray 0 a + + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 *] + assert_equal 1 [r ardel myarray 0] + + assert_match "pmessage * __keyspace@*__:myarray ardel" [$rd1 read] + assert_match "pmessage * __keyevent@*__:ardel myarray" [$rd1 read] + assert_match "pmessage * __keyspace@*__:myarray del" [$rd1 read] + assert_match "pmessage * __keyevent@*__:del myarray" [$rd1 read] + + $rd1 close + r config set notify-keyspace-events $orig_notify + } + + # ARDELRANGE tests + test {ARDELRANGE basics} { + r del myarray + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i [expr $i * 10] + } + assert_equal 10 [r arcount myarray] + + assert_equal 5 [r ardelrange myarray 2 6] + assert_equal 5 [r arcount myarray] + } + + test {ARDELRANGE reverse order} { + r del myarray + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i [expr $i * 10] + } + + assert_equal 5 [r ardelrange myarray 6 2] + assert_equal 5 [r arcount myarray] + } + + test {ARDELRANGE notifies array event before del when key is removed} { + set orig_notify [lindex [r config get notify-keyspace-events] 1] + r config set notify-keyspace-events KEA + r del myarray + assert_equal 3 [r arset myarray 0 a b c] + + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 *] + assert_equal 3 [r ardelrange myarray 0 2] + + assert_match "pmessage * __keyspace@*__:myarray ardelrange" [$rd1 read] + assert_match "pmessage * __keyevent@*__:ardelrange myarray" [$rd1 read] + assert_match "pmessage * __keyspace@*__:myarray del" [$rd1 read] + assert_match "pmessage * __keyevent@*__:del myarray" [$rd1 read] + + $rd1 close + r config set notify-keyspace-events $orig_notify + } + + # ARMSET and ARMGET tests + test {ARMSET basics} { + r del myarray + assert_equal 3 [r armset myarray 0 a 1 b 2 c] + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 1] + assert_equal c [r arget myarray 2] + } + + test {ARMSET returns only newly filled slots} { + r del myarray + r arset myarray 0 a + assert_equal 1 [r armset myarray 0 aa 1 b] + assert_equal aa [r arget myarray 0] + assert_equal b [r arget myarray 1] + } + + test {ARMGET basics} { + r del myarray + r arset myarray 0 a + r arset myarray 1 b + r arset myarray 5 c + + set result [r armget myarray 0 1 5 3] + assert_equal a [lindex $result 0] + assert_equal b [lindex $result 1] + assert_equal c [lindex $result 2] + assert_equal {} [lindex $result 3] + } + + # ARGETRANGE and contiguous ARSET tests + test {ARGETRANGE basics} { + r del myarray + r armset myarray 0 a 1 b 2 c 3 d 4 e + + set result [r argetrange myarray 1 3] + assert_equal {b c d} $result + } + + test {ARGETRANGE reverse} { + r del myarray + r armset myarray 0 a 1 b 2 c 3 d 4 e + + set result [r argetrange myarray 3 1] + assert_equal {d c b} $result + } + + test {ARGETRANGE errors when requested range exceeds the hard limit} { + assert_error {*range exceeds maximum of 1000000 items*} {r argetrange myarray 0 1000000} + } + + test {ARGETRANGE reverse errors when requested range exceeds the hard limit} { + assert_error {*range exceeds maximum of 1000000 items*} {r argetrange myarray 1000000 0} + } + + # ARSCAN tests + test {ARSCAN returns only existing elements with indices} { + r del myarray + r arset myarray 0 a + r arset myarray 5 b + r arset myarray 9 c + + set result [r arscan myarray 0 10] + assert_equal {{0 a} {5 b} {9 c}} $result + } + + test {ARSCAN on empty range returns empty array} { + r del myarray + r arset myarray 500 x + + set result [r arscan myarray 0 100] + assert_equal {} $result + } + + test {ARSCAN reversed range} { + r del myarray + r arset myarray 0 a + r arset myarray 5 b + + set result [r arscan myarray 5 0] + assert_equal {{5 b} {0 a}} $result + } + + test {ARSCAN on non-existent key returns empty array} { + r del nokey + set result [r arscan nokey 0 100] + assert_equal {} $result + } + + test {ARSCAN with mixed value types} { + r del myarray + r arset myarray 0 string + r arset myarray 1 12345 + r arset myarray 2 3.14 + + set result [r arscan myarray 0 10] + assert_equal 3 [llength $result] + assert_equal {0 string} [lindex $result 0] + assert_equal {1 12345} [lindex $result 1] + assert_equal {2 3.14} [lindex $result 2] + } + + # ARGREP tests + test {ARGREP MATCH returns matching indexes} { + r del myarray + r armset myarray 0 alpha 1 beta 2 alphabet 5 gamma + + assert_equal {0 2} [r argrep myarray - + MATCH alpha] + } + + test {ARGREP supports WITHVALUES and reverse ranges} { + r del myarray + r armset myarray 0 alpha 1 beta 2 alphabet 3 delta + + assert_equal {{2 alphabet} {0 alpha}} \ + [r argrep myarray 3 0 MATCH alpha WITHVALUES] + } + + test {ARGREP supports AND, GLOB, and NOCASE} { + r del myarray + r armset myarray 0 RedisArray 1 redis-match 2 array-only 3 plain + + assert_equal {0} [r argrep myarray - + MATCH redis GLOB *array* AND NOCASE] + } + + test {ARGREP supports RE predicates} { + r del myarray + r armset myarray 0 foo123 1 bar 2 zoo999 3 Foo777 + + assert_equal {0 2 3} [r argrep myarray - + RE {^.*[0-9]{3}$}] + assert_equal {0 3} [r argrep myarray - + RE {^foo[0-9]+$} NOCASE] + } + + test {ARGREP RE literal alternation forms still match correctly} { + r del myarray + r armset myarray 0 foo 1 bar 2 baz 3 foobar 4 BAR 5 quxfoo 6 zedbar \ + 7 plain 8 ALPS 9 alphabet + + assert_equal {0 1 3 5 6} [r argrep myarray - + RE {foo|bar}] + assert_equal {0 1 3 4 5 6} [r argrep myarray - + RE {foo|bar} NOCASE] + assert_equal {0 1 4} [r argrep myarray - + RE {^(foo|bar)$} NOCASE] + assert_equal {0 1 3 4} [r argrep myarray - + RE {^(foo|bar)} NOCASE] + assert_equal {0 1 3 4 5 6} [r argrep myarray - + RE {(foo|bar)$} NOCASE] + assert_equal {8 9} [r argrep myarray - + RE {alpha|alps} NOCASE] + } + + test {ARGREP RE grouped alternation smoke test} { + r del myarray + r armset myarray 0 item-foo-123 1 ITEM-BAR-456 2 item-baz 3 plain + + assert_equal {0 1} \ + [r argrep myarray - + RE {^item-(foo|bar)-[0-9]{3}$} NOCASE] + } + + test {ARGREP enforces RE length and rejects backreferences} { + r del myarray + set re2048 [string repeat a 2048] + set re2049 [string repeat a 2049] + r arset myarray 0 $re2048 + + assert_equal {0} [r argrep myarray - + RE $re2048] + assert_error {*maximum is 2048 bytes*} {r argrep myarray - + RE $re2049} + assert_error {*backreferences are not supported*} {r argrep myarray - + RE {(a)\1}} + assert_error {*regular expression is empty*} {r argrep myarray - + RE {}} + } + + test {ARGREP LIMIT stops after enough matches} { + r del myarray + r armset myarray 0 hit-1 1 hit-2 2 miss 3 hit-3 + + assert_equal {0 1} [r argrep myarray - + MATCH hit LIMIT 2] + } + + test {ARGREP allows mixed predicate and option order, last wins} { + r del myarray + r armset myarray 0 RedisArray 1 redis-match 2 array-only 3 plain + + assert_equal {0} \ + [r argrep myarray - + OR MATCH redis LIMIT 3 GLOB *array* AND LIMIT 1 NOCASE] + } + + test {ARGREP enforces the predicate limit} { + r del myarray + r arset myarray 0 foo + + set cmd [list r argrep myarray - +] + for {set i 0} {$i < 250} {incr i} { + lappend cmd MATCH foo + } + assert_equal {0} [uplevel 1 $cmd] + + lappend cmd MATCH foo + assert_error {*maximum is 250*} [list uplevel 1 $cmd] + } + + test {ARGREP handles missing keys and syntax errors} { + r del nokey + assert_equal {} [r argrep nokey - + MATCH foo] + assert_error {*syntax error*} {r argrep myarray - + LIMIT 1} + assert_error {*invalid regular expression*} {r argrep myarray - + RE {(}} + } + + test {ARGREP rejects malformed braced hex regex escapes} { + r del myarray + r arset myarray 0 hello + + set invalid [format "\\%c%c1" 120 123] + assert_error {*invalid regular expression*} [list r argrep myarray - + RE $invalid] + assert_error {*invalid regular expression*} [list r argrep myarray - + RE $invalid NOCASE] + } + + test {ARSET contiguous write basics} { + r del myarray + assert_equal 3 [r arset myarray 0 a b c] + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 1] + assert_equal c [r arget myarray 2] + } + + # ARINSERT tests + test {ARINSERT basics} { + r del myarray + assert_equal 0 [r arinsert myarray a] + assert_equal 1 [r arinsert myarray b] + assert_equal 2 [r arinsert myarray c] + + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 1] + assert_equal c [r arget myarray 2] + } + + test {ARRING creates ring buffer} { + r del myarray + for {set i 0} {$i < 10} {incr i} { + r arring myarray 5 $i + } + + # After wrap, we should have indices 0-4 with values 5-9 + assert_equal 5 [r arget myarray 0] + assert_equal 6 [r arget myarray 1] + assert_equal 7 [r arget myarray 2] + assert_equal 8 [r arget myarray 3] + assert_equal 9 [r arget myarray 4] + assert_equal 5 [r arcount myarray] + } + + # ARNEXT, ARSEEK tests + test {ARNEXT tracks insert position} { + r del myarray + assert_equal 0 [r arnext myarray] + + r arinsert myarray a + assert_equal 1 [r arnext myarray] + + r arinsert myarray b + assert_equal 2 [r arnext myarray] + } + + test {ARSEEK} { + r del myarray + r arinsert myarray a + r arinsert myarray b + + assert_equal 1 [r arseek myarray 10] + r arinsert myarray c + assert_equal 11 [r arnext myarray] + assert_equal c [r arget myarray 10] + } + + test {ARNEXT returns null when insert cursor is exhausted} { + r del myarray + r arinsert myarray a + + # Move to terminal cursor state: insert_idx = UINT64_MAX-1 + r arseek myarray 18446744073709551615 + assert_equal {} [r arnext myarray] + assert_error {*insert index overflow*} {r arinsert myarray b} + } + + # ARLASTITEMS tests + test {ARLASTITEMS basics} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arinsert myarray [expr $i * 10] + } + + set result [r arlastitems myarray 3] + assert_equal {20 30 40} $result + + set result [r arlastitems myarray 3 REV] + assert_equal {40 30 20} $result + } + + test {ARLASTITEMS after ARSEEK 0 uses array tail} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arinsert myarray [expr $i * 10] + } + + assert_equal 1 [r arseek myarray 0] + assert_equal {20 30 40} [r arlastitems myarray 3] + assert_equal {40 30 20} [r arlastitems myarray 3 REV] + } + + # AROP tests + test {AROP SUM} { + r del myarray + r armset myarray 0 10 1 20 2 30 + + set result [r arop myarray 0 2 SUM] + assert_equal 60 $result + } + + test {AROP MIN} { + r del myarray + r armset myarray 0 30 1 10 2 20 + + set result [r arop myarray 0 2 MIN] + assert_equal 10 $result + } + + test {AROP MAX} { + r del myarray + r armset myarray 0 30 1 10 2 20 + + set result [r arop myarray 0 2 MAX] + assert_equal 30 $result + } + + test {AROP MATCH} { + r del myarray + r armset myarray 0 hello 1 world 2 hello 3 foo + + assert_equal 2 [r arop myarray 0 3 MATCH hello] + assert_equal 1 [r arop myarray 0 3 MATCH world] + assert_equal 0 [r arop myarray 0 3 MATCH bar] + } + + test {AROP USED} { + r del myarray + r armset myarray 0 a 2 b 5 c + + assert_equal 3 [r arop myarray 0 10 USED] + } + + test {AROP AND/OR/XOR} { + r del myarray + # Use decimal values: 255, 15, 240 + r armset myarray 0 255 1 15 2 240 + + assert_equal 0 [r arop myarray 0 2 AND] + assert_equal 255 [r arop myarray 0 2 OR] + assert_equal 0 [r arop myarray 0 2 XOR] + } + + test {AROP AND/OR/XOR truncates floats toward zero} { + r del myarray + # Truncated values: 7, 3, 1 + r armset myarray 0 7.9 1 3.2 2 1.8 + + assert_equal 1 [r arop myarray 0 2 AND] + assert_equal 7 [r arop myarray 0 2 OR] + assert_equal 5 [r arop myarray 0 2 XOR] + } + + # ARINFO tests + test {ARINFO basics} { + r del myarray + r armset myarray 0 a 1 b 100 c + + set info [r arinfo myarray] + assert_equal 3 [dict get $info count] + assert_equal 101 [dict get $info len] + } + + # Type check tests + test {Array commands on wrong type} { + r del mykey + r set mykey value + assert_error {WRONGTYPE*} {r arget mykey 0} + assert_error {WRONGTYPE*} {r arset mykey 0 foo} + assert_error {WRONGTYPE*} {r arlen mykey} + assert_error {WRONGTYPE*} {r arcount mykey} + } + + # TYPE command + test {TYPE returns array} { + r del myarray + r arset myarray 0 hello + assert_equal array [r type myarray] + } + + # OBJECT ENCODING command + test {OBJECT ENCODING returns sliced-array} { + r del myarray + r arset myarray 0 hello + assert_equal sliced-array [r object encoding myarray] + } + + # Sparse indices test + test {Sparse array with large gaps} { + r del myarray + r arset myarray 0 a + r arset myarray 10000 b + r arset myarray 1000000 c + + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 10000] + assert_equal c [r arget myarray 1000000] + assert_equal 3 [r arcount myarray] + assert_equal 1000001 [r arlen myarray] + } + + # RDB persistence test + test {Array survives RDB save and load} { + r del myarray + r armset myarray 0 hello 1 world 100 test + r arseek myarray 101 + r arinsert myarray value + + r bgsave + waitForBgsave r + + r debug reload + assert_equal hello [r arget myarray 0] + assert_equal world [r arget myarray 1] + assert_equal test [r arget myarray 100] + assert_equal value [r arget myarray 101] + assert_equal 102 [r arnext myarray] + } {} {needs:debug} + + # ========================================================================= + # Edge case tests: directory resizing, slice transitions, window growth + # ========================================================================= + + # Directory resizing tests + test {Directory resize - many slices} { + r del myarray + # Default slice size is 4096, so indices 0, 4096, 8192, 12288, etc. + # create new slices requiring directory growth + set slice_size 4096 + for {set i 0} {$i < 20} {incr i} { + set idx [expr {$i * $slice_size}] + r arset myarray $idx "slice$i" + } + + # Verify all values + for {set i 0} {$i < 20} {incr i} { + set idx [expr {$i * $slice_size}] + assert_equal "slice$i" [r arget myarray $idx] + } + assert_equal 20 [r arcount myarray] + } + + test {Directory resize - very large index jump} { + r del myarray + r arset myarray 0 "start" + # Jump to a very high slice index, forcing directory allocation + r arset myarray 1000000 "middle" + r arset myarray 10000000 "end" + + assert_equal "start" [r arget myarray 0] + assert_equal "middle" [r arget myarray 1000000] + assert_equal "end" [r arget myarray 10000000] + assert_equal 3 [r arcount myarray] + } + + # Dense slice window growth tests + test {Dense window growth - right expansion} { + r del myarray + # Start with element at offset 0, then add elements going right + # Initial window is small (8 elements), this forces growth + for {set i 0} {$i < 100} {incr i} { + r arset myarray $i "val$i" + } + + # Verify all values stored correctly + for {set i 0} {$i < 100} {incr i} { + assert_equal "val$i" [r arget myarray $i] + } + assert_equal 100 [r arcount myarray] + + # Verify window grew (avg-dense-size should be >= 128 to fit 100 elements) + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert {[dict get $info avg-dense-size] >= 128} + } + + test {Dense window growth - left expansion} { + r del myarray + # Start with element at high offset, then add elements going left + # This forces window to expand leftward + r arset myarray 500 "anchor" + for {set i 499} {$i >= 400} {incr i -1} { + r arset myarray $i "val$i" + } + + assert_equal "anchor" [r arget myarray 500] + for {set i 400} {$i < 500} {incr i} { + assert_equal "val$i" [r arget myarray $i] + } + assert_equal 101 [r arcount myarray] + + # Verify window grew (avg-dense-size should be >= 128 to fit 101 elements) + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert {[dict get $info avg-dense-size] >= 128} + } + + test {Dense window growth - bidirectional expansion} { + r del myarray + # Start in middle, expand both directions + r arset myarray 500 "center" + for {set i 1} {$i <= 50} {incr i} { + r arset myarray [expr {500 - $i}] "left$i" + r arset myarray [expr {500 + $i}] "right$i" + } + + assert_equal "center" [r arget myarray 500] + for {set i 1} {$i <= 50} {incr i} { + assert_equal "left$i" [r arget myarray [expr {500 - $i}]] + assert_equal "right$i" [r arget myarray [expr {500 + $i}]] + } + assert_equal 101 [r arcount myarray] + + # Verify window grew (avg-dense-size should be >= 128 to fit 101 elements) + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert {[dict get $info avg-dense-size] >= 128} + } + + # Sparse to dense promotion tests + test {Sparse to dense promotion - exceed kmax threshold} { + r del myarray + # kmax default is 10, add 11+ elements to force promotion + # Use sparse pattern (scattered offsets within one slice) + for {set i 0} {$i < 15} {incr i} { + # Scattered within first slice (0-4095) + set idx [expr {$i * 100}] + r arset myarray $idx "sparse$i" + } + + # Verify all values after promotion + for {set i 0} {$i < 15} {incr i} { + set idx [expr {$i * 100}] + assert_equal "sparse$i" [r arget myarray $idx] + } + assert_equal 15 [r arcount myarray] + + # Verify promotion actually happened using ARINFO FULL + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + } + + test {Sparse to dense promotion - then continue adding} { + r del myarray + # First create sparse slice, then promote, then add more + for {set i 0} {$i < 5} {incr i} { + r arset myarray [expr {$i * 200}] "phase1_$i" + } + + # Verify starts as sparse + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Add more to trigger promotion + for {set i 5} {$i < 20} {incr i} { + r arset myarray [expr {$i * 200}] "phase2_$i" + } + + # Verify all + for {set i 0} {$i < 20} {incr i} { + assert_equal "phase[expr {$i < 5 ? 1 : 2}]_$i" [r arget myarray [expr {$i * 200}]] + } + + # Verify promotion happened + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + } + + # Dense to sparse demotion tests + test {Dense to sparse demotion - delete below kmin threshold} { + r del myarray + # Create dense slice with many elements + for {set i 0} {$i < 50} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 50 [r arcount myarray] + + # Verify starts as dense + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + + # Delete most elements, leaving only 3 (below kmin=5) + for {set i 3} {$i < 50} {incr i} { + r ardel myarray $i + } + + # Verify remaining elements + assert_equal "val0" [r arget myarray 0] + assert_equal "val1" [r arget myarray 1] + assert_equal "val2" [r arget myarray 2] + assert_equal 3 [r arcount myarray] + + # Verify demotion happened + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + } + + test {Dense to sparse demotion - then add again} { + r del myarray + # Create dense, demote to sparse, then add more + for {set i 0} {$i < 30} {incr i} { + r arset myarray $i "initial$i" + } + + # Delete to demote + for {set i 4} {$i < 30} {incr i} { + r ardel myarray $i + } + assert_equal 4 [r arcount myarray] + + # Verify demotion happened + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Add new elements (should work in sparse mode) + for {set i 100} {$i < 105} {incr i} { + r arset myarray $i "new$i" + } + + # Verify old and new + for {set i 0} {$i < 4} {incr i} { + assert_equal "initial$i" [r arget myarray $i] + } + for {set i 100} {$i < 105} {incr i} { + assert_equal "new$i" [r arget myarray $i] + } + } + + # Combined stress test + test {Stress test - mixed operations across multiple slices} { + r del myarray + set slice_size 4096 + + # Create elements across 5 slices + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * $slice_size}] + # Add 20 elements per slice + for {set i 0} {$i < 20} {incr i} { + r arset myarray [expr {$base + $i * 50}] "s${slice}_e$i" + } + } + assert_equal 100 [r arcount myarray] + + # Delete half from each slice (should cause some demotions) + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * $slice_size}] + for {set i 10} {$i < 20} {incr i} { + r ardel myarray [expr {$base + $i * 50}] + } + } + assert_equal 50 [r arcount myarray] + + # Verify remaining elements + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * $slice_size}] + for {set i 0} {$i < 10} {incr i} { + assert_equal "s${slice}_e$i" [r arget myarray [expr {$base + $i * 50}]] + } + } + } + + test {Stress test - rapid insert/delete cycles} { + r del myarray + + # Multiple cycles of growth and shrinkage + for {set cycle 0} {$cycle < 3} {incr cycle} { + # Grow + for {set i 0} {$i < 100} {incr i} { + r arset myarray $i "cycle${cycle}_$i" + } + assert_equal 100 [r arcount myarray] + + # Shrink (but leave some) + for {set i 10} {$i < 100} {incr i} { + r ardel myarray $i + } + assert_equal 10 [r arcount myarray] + } + + # Verify final state + for {set i 0} {$i < 10} {incr i} { + assert_equal "cycle2_$i" [r arget myarray $i] + } + } + + # RDB with complex state + test {RDB persistence with sparse and dense slices} { + r del myarray + + # Create mix of sparse and dense slices + # Slice 0: dense (many elements) + for {set i 0} {$i < 50} {incr i} { + r arset myarray $i "dense$i" + } + + # Slice 1 (offset 4096): sparse (few elements) + r arset myarray 4096 "sparse0" + r arset myarray 4200 "sparse1" + r arset myarray 4500 "sparse2" + + # Slice 10 (offset 40960): single element + r arset myarray 40960 "lonely" + + r bgsave + waitForBgsave r + r debug reload + + # Verify all types survived + for {set i 0} {$i < 50} {incr i} { + assert_equal "dense$i" [r arget myarray $i] + } + assert_equal "sparse0" [r arget myarray 4096] + assert_equal "sparse1" [r arget myarray 4200] + assert_equal "sparse2" [r arget myarray 4500] + assert_equal "lonely" [r arget myarray 40960] + } {} {needs:debug} + + # Regression test for dense window boundary bug (GitHub issue) + # When a dense slice window doubles but doesn't reach ar_slice_size, + # offset + winsize could exceed the slice boundary (4096), causing crashes. + test {Regression - dense window growth must not exceed slice boundary} { + r del myarray + set slice_size 4096 + + # Create a dense slice with elements at high offsets within the slice. + # Start at offset 2100 with a small window, then force growth. + # Initial window: offset=2100, winsize=64 (or similar small power of 2) + r arset myarray 2100 "start" + + # Add elements to grow the window to the right. + # After several doublings, winsize might become 2048. + # With offset=2100 and winsize=2048, end would be 4148 > 4096 (BUG!) + # The fix adjusts offset so the window stays within bounds. + for {set i 2101} {$i < 2200} {incr i} { + r arset myarray $i "val$i" + } + + # Now force further right growth that would exceed boundary without fix + for {set i 2200} {$i < 3500} {incr i 10} { + r arset myarray $i "val$i" + } + + # Verify all values are accessible (would crash before the fix) + assert_equal "start" [r arget myarray 2100] + assert_equal "val2150" [r arget myarray 2150] + assert_equal "val3000" [r arget myarray 3000] + + # Verify window respects slice boundary via ARINFO FULL + set info [r arinfo myarray FULL] + set avg_size [dict get $info avg-dense-size] + # With the fix, window should be properly sized (at most slice_size) + assert {$avg_size <= $slice_size} + } + + test {Regression - sparse to dense promotion with high offset boundary} { + r del myarray + set slice_size 4096 + + # Create sparse slice with elements near upper boundary of slice + # This tests arSparsePromote boundary handling + for {set i 0} {$i < 8} {incr i} { + set idx [expr {2200 + $i * 100}] ;# 2200, 2300, ..., 2900 + r arset myarray $idx "sparse$i" + } + + # Verify starts as sparse + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info sparse-slices] + + # Add more to trigger promotion - elements span 2200 to 3800 + # Window needs to cover 2200-3800 range (1601 elements span) + # Without boundary fix, offset=2200 + winsize=2048 = 4248 > 4096 (BUG!) + for {set i 8} {$i < 20} {incr i} { + set idx [expr {2200 + $i * 100}] ;# continues: 3000, 3100, ..., 4100 + r arset myarray $idx "promoted$i" + } + + # Verify all values survived promotion (would crash before fix) + for {set i 0} {$i < 8} {incr i} { + set idx [expr {2200 + $i * 100}] + assert_equal "sparse$i" [r arget myarray $idx] + } + for {set i 8} {$i < 20} {incr i} { + set idx [expr {2200 + $i * 100}] + assert_equal "promoted$i" [r arget myarray $idx] + } + } + + # Helper to generate random values of different encoding types + proc random_value {} { + set type [expr {int(rand() * 4)}] + switch $type { + 0 { + # INT encoding: small integers + set val [expr {int(rand() * 200000) - 100000}] + } + 1 { + # FLOAT encoding: synthetic float with random digits + set int_digits [expr {int(rand() * 5) + 1}] ;# 1-5 digits before dot + set frac_digits [expr {int(rand() * 5) + 1}] ;# 1-5 digits after dot + set int_part "" + for {set i 0} {$i < $int_digits} {incr i} { + append int_part [expr {int(rand() * 10)}] + } + set frac_part "" + for {set i 0} {$i < $frac_digits} {incr i} { + append frac_part [expr {int(rand() * 10)}] + } + # Add negative sign randomly + set sign [expr {rand() < 0.5 ? "-" : ""}] + set val "${sign}${int_part}.${frac_part}" + } + 2 { + # SMALLSTR encoding: short strings (1-6 bytes) + set len [expr {int(rand() * 6) + 1}] + set val "" + for {set i 0} {$i < $len} {incr i} { + append val [format %c [expr {int(rand() * 26) + 97}]] ;# a-z + } + } + 3 { + # arString pointer: longer strings (10-30 bytes) + set len [expr {int(rand() * 21) + 10}] + set val "" + for {set i 0} {$i < $len} {incr i} { + append val [format %c [expr {int(rand() * 26) + 97}]] ;# a-z + } + } + } + return $val + } + + proc random_array_index {} { + set roll [expr {int(rand() * 100)}] + if {$roll < 35} { + return [expr {int(rand() * 256)}] + } elseif {$roll < 55} { + return [expr {4096 + int(rand() * 512)}] + } elseif {$roll < 75} { + return [expr {8388608 + int(rand() * 8192)}] + } elseif {$roll < 90} { + return [expr {16777216 + int(rand() * 8192)}] + } else { + return [expr {int(rand() * 30000000)}] + } + } + + proc model_array_delrange {arrname lo hi} { + upvar 1 $arrname expected + + if {$lo > $hi} { + set tmp $lo + set lo $hi + set hi $tmp + } + + set deleted 0 + foreach idx [array names expected] { + if {$idx >= $lo && $idx <= $hi} { + unset expected($idx) + incr deleted + } + } + return $deleted + } + + proc model_array_scan {arrname} { + upvar 1 $arrname expected + + set result {} + foreach idx [lsort -integer [array names expected]] { + lappend result [list $idx $expected($idx)] + } + return $result + } + + proc iterator_stress_rand_between {lo hi} { + return [expr {$lo + int(rand() * ($hi - $lo + 1))}] + } + + proc iterator_stress_random_index {slice_size mode} { + set roll [expr {int(rand() * 100)}] + switch -- $mode { + mixed { + if {$roll < 25} { + return [expr {int(rand() * ($slice_size * 2))}] + } elseif {$roll < 45} { + return [expr {$slice_size - 4 + int(rand() * 9)}] + } elseif {$roll < 60} { + return [expr {$slice_size * 2 - 4 + int(rand() * 9)}] + } elseif {$roll < 78} { + return [expr {8388608 + int(rand() * ($slice_size * 2))}] + } elseif {$roll < 92} { + return [expr {16777216 + int(rand() * ($slice_size * 2))}] + } else { + return [expr {int(rand() * 30000000)}] + } + } + dense { + if {$roll < 60} { + return [expr {int(rand() * ($slice_size * 2))}] + } elseif {$roll < 80} { + return [expr {$slice_size - 8 + int(rand() * 17)}] + } else { + return [expr {int(rand() * ($slice_size * 8))}] + } + } + superdir { + if {$roll < 20} { + return [expr {int(rand() * 1024)}] + } elseif {$roll < 45} { + return [expr {8388608 + int(rand() * ($slice_size * 4))}] + } elseif {$roll < 70} { + return [expr {16777216 + int(rand() * ($slice_size * 4))}] + } elseif {$roll < 90} { + return [expr {25165824 + int(rand() * ($slice_size * 4))}] + } else { + return [expr {int(rand() * 40000000)}] + } + } + } + return [expr {int(rand() * 30000000)}] + } + + proc iterator_stress_sorted_indices {arrname reverse} { + upvar 1 $arrname model + if {$reverse} { + return [lsort -integer -decreasing [array names model]] + } + return [lsort -integer [array names model]] + } + + proc iterator_stress_scan {arrname start end limit} { + upvar 1 $arrname model + set reverse [expr {$start > $end}] + set lo [expr {$reverse ? $end : $start}] + set hi [expr {$reverse ? $start : $end}] + set result {} + set emitted 0 + + foreach idx [iterator_stress_sorted_indices model $reverse] { + if {$idx < $lo || $idx > $hi} continue + lappend result [list $idx $model($idx)] + incr emitted + if {$limit > 0 && $emitted >= $limit} break + } + return $result + } + + proc iterator_stress_argrep {arrname start end type pattern nocase withvalues limit} { + upvar 1 $arrname model + set reverse [expr {$start > $end}] + set lo [expr {$reverse ? $end : $start}] + set hi [expr {$reverse ? $start : $end}] + set pattern_cmp $pattern + if {$nocase} { set pattern_cmp [string tolower $pattern_cmp] } + set result {} + set emitted 0 + + foreach idx [iterator_stress_sorted_indices model $reverse] { + if {$idx < $lo || $idx > $hi} continue + set value $model($idx) + set cmp $value + if {$nocase} { set cmp [string tolower $cmp] } + + if {$type eq "EXACT"} { + set match [expr {$cmp eq $pattern_cmp}] + } else { + set match [expr {[string first $pattern_cmp $cmp] != -1}] + } + + if {$match} { + if {$withvalues} { + lappend result [list $idx $value] + } else { + lappend result $idx + } + incr emitted + if {$emitted >= $limit} break + } + } + return $result + } + + proc iterator_stress_arop_used {arrname start end} { + upvar 1 $arrname model + set lo [expr {$start > $end ? $end : $start}] + set hi [expr {$start > $end ? $start : $end}] + set used 0 + + foreach idx [array names model] { + if {$idx >= $lo && $idx <= $hi} { incr used } + } + return $used + } + + proc iterator_stress_arop_match {arrname start end needle} { + upvar 1 $arrname model + set lo [expr {$start > $end ? $end : $start}] + set hi [expr {$start > $end ? $start : $end}] + set matches 0 + + foreach idx [array names model] { + if {$idx >= $lo && $idx <= $hi && $model($idx) eq $needle} { + incr matches + } + } + return $matches + } + + proc iterator_stress_arop_sum {arrname start end} { + upvar 1 $arrname model + set lo [expr {$start > $end ? $end : $start}] + set hi [expr {$start > $end ? $start : $end}] + set sum 0.0 + set has_numeric 0 + + foreach idx [array names model] { + if {$idx < $lo || $idx > $hi} continue + if {[string is double -strict $model($idx)]} { + set sum [expr {$sum + ($model($idx) + 0.0)}] + set has_numeric 1 + } + } + + if {!$has_numeric} { return {} } + return $sum + } + + proc iterator_stress_pick_existing_value {arrname} { + upvar 1 $arrname model + set keys [array names model] + if {[llength $keys] == 0} { return [random_value] } + return $model([lindex $keys [expr {int(rand() * [llength $keys])}]]) + } + + proc iterator_stress_pick_match_pattern {value} { + set len [string length $value] + if {$len <= 1} { return $value } + set start [expr {int(rand() * $len)}] + set width [expr {1 + int(rand() * ($len - $start))}] + return [string range $value $start [expr {$start + $width - 1}]] + } + + proc iterator_stress_flip_case {value} { + set out "" + foreach ch [split $value ""] { + if {![string is alpha -strict $ch] || rand() < 0.5} { + append out $ch + } elseif {$ch eq [string tolower $ch]} { + append out [string toupper $ch] + } else { + append out [string tolower $ch] + } + } + return $out + } + + proc iterator_stress_check_equal {label expected got} { + if {$expected ne $got} { + fail "$label mismatch - expected '$expected', got '$got'" + } + } + + proc iterator_stress_check_sum {label expected got} { + if {$expected eq {} || $got eq {}} { + if {$expected ne $got} { + fail "$label mismatch - expected '$expected', got '$got'" + } + return + } + + if {abs(($expected + 0.0) - ($got + 0.0)) > 1e-9} { + fail "$label mismatch - expected '$expected', got '$got'" + } + } + + proc iterator_stress_validate {r arrname slice_size mode tag step full_scan} { + upvar 1 $arrname model + set count [array size model] + + if {$count == 0} { + iterator_stress_check_equal "$tag/$step exists" 0 [r exists myarray] + if {$full_scan} { + iterator_stress_check_equal "$tag/$step empty-scan" {} \ + [r arscan myarray 0 50000000] + } + return + } + + iterator_stress_check_equal "$tag/$step count" $count [r arcount myarray] + if {$full_scan} { + set start [expr {$step % 2 == 0 ? 0 : 50000000}] + set end [expr {$step % 2 == 0 ? 50000000 : 0}] + iterator_stress_check_equal "$tag/$step full-scan" \ + [iterator_stress_scan model $start $end 0] \ + [r arscan myarray $start $end] + } + + for {set probe 0} {$probe < 2} {incr probe} { + set start [iterator_stress_random_index $slice_size $mode] + set end [iterator_stress_random_index $slice_size $mode] + if {rand() < 0.15} { set start 0 } + if {rand() < 0.15} { set end 50000000 } + + set limit [iterator_stress_rand_between 1 10] + iterator_stress_check_equal "$tag/$step scan/$probe" \ + [iterator_stress_scan model $start $end $limit] \ + [r arscan myarray $start $end LIMIT $limit] + + set grep_type [expr {rand() < 0.5 ? "EXACT" : "MATCH"}] + if {rand() < 0.7} { + set pattern [iterator_stress_pick_existing_value model] + if {$grep_type eq "MATCH"} { + set pattern [iterator_stress_pick_match_pattern $pattern] + } + } else { + set pattern [random_value] + } + + set withvalues [expr {rand() < 0.5}] + set nocase [expr {rand() < 0.5}] + if {$nocase} { set pattern [iterator_stress_flip_case $pattern] } + set grep_limit [iterator_stress_rand_between 1 8] + set grep_cmd [list r argrep myarray $start $end $grep_type $pattern LIMIT $grep_limit] + if {$withvalues} { lappend grep_cmd WITHVALUES } + if {$nocase} { lappend grep_cmd NOCASE } + + iterator_stress_check_equal "$tag/$step argrep/$probe" \ + [iterator_stress_argrep model $start $end $grep_type $pattern $nocase $withvalues $grep_limit] \ + [uplevel 1 $grep_cmd] + + iterator_stress_check_equal "$tag/$step used/$probe" \ + [iterator_stress_arop_used model $start $end] \ + [r arop myarray $start $end USED] + + set needle [iterator_stress_pick_existing_value model] + iterator_stress_check_equal "$tag/$step match/$probe" \ + [iterator_stress_arop_match model $start $end $needle] \ + [r arop myarray $start $end MATCH $needle] + + iterator_stress_check_sum "$tag/$step sum/$probe" \ + [iterator_stress_arop_sum model $start $end] \ + [r arop myarray $start $end SUM] + } + } + + proc iterator_stress_apply_operation {r arrname slice_size mode} { + upvar 1 $arrname model + set roll [expr {int(rand() * 100)}] + + if {$roll < 30} { + set idx [iterator_stress_random_index $slice_size $mode] + set val [random_value] + r arset myarray $idx $val + set model($idx) $val + } elseif {$roll < 45} { + set start [iterator_stress_random_index $slice_size $mode] + set values {} + set len [iterator_stress_rand_between 2 8] + + for {set i 0} {$i < $len} {incr i} { + set val [random_value] + lappend values $val + set model([expr {$start + $i}]) $val + } + r arset myarray $start {*}$values + } elseif {$roll < 58} { + set idx [iterator_stress_random_index $slice_size $mode] + r ardel myarray $idx + catch {unset model($idx)} + } elseif {$roll < 78} { + set args {} + set nranges [iterator_stress_rand_between 1 3] + + for {set i 0} {$i < $nranges} {incr i} { + set lo [iterator_stress_random_index $slice_size $mode] + set hi [iterator_stress_random_index $slice_size $mode] + lappend args $lo $hi + model_array_delrange model $lo $hi + } + r ardelrange myarray {*}$args + } elseif {$roll < 90} { + set base [expr {[iterator_stress_random_index $slice_size $mode] / $slice_size * $slice_size}] + set start [expr {$base + [iterator_stress_rand_between 0 [expr {$slice_size > 16 ? 16 : $slice_size - 1}]]}] + set values {} + set len [iterator_stress_rand_between 4 10] + + for {set i 0} {$i < $len} {incr i} { + set val [random_value] + lappend values $val + set model([expr {$start + $i}]) $val + } + r arset myarray $start {*}$values + } else { + set base [expr {[iterator_stress_random_index $slice_size $mode] / $slice_size * $slice_size}] + set lo [expr {$base + [iterator_stress_rand_between 0 [expr {$slice_size > 24 ? 24 : $slice_size - 1}]]}] + set hi [expr {$base + [iterator_stress_rand_between 0 [expr {$slice_size > 24 ? 24 : $slice_size - 1}]]}] + model_array_delrange model $lo $hi + r ardelrange myarray $lo $hi + } + } + + # Random testing - most effective way to find edge case bugs + test {Random testing - staged write/delete workload with verification} { + r flushdb + expr {srand(12345)} ;# Fixed seed for reproducibility + set max_idx 5000 ;# Range of possible indices + set ops_per_stage 200 ;# Operations per stage + + # Tcl-side tracking of expected state + array set expected {} + + # 11 stages with decreasing write ratio + # Stage 0: 100% writes, Stage 10: 0% writes 100% deletes + set stages { + {100 0} + {90 10} + {80 20} + {70 30} + {60 40} + {50 50} + {40 60} + {30 70} + {20 80} + {10 90} + {0 100} + } + + set stage_num 0 + foreach stage $stages { + set write_pct [lindex $stage 0] + + for {set op 0} {$op < $ops_per_stage} {incr op} { + set roll [expr {int(rand() * 100)}] + set idx [expr {int(rand() * $max_idx)}] + + if {$roll < $write_pct} { + # Write operation with random value type + set val [random_value] + r arset myarray $idx $val + set expected($idx) $val + } else { + # Delete operation - always send to Redis, track locally + r ardel myarray $idx + if {[info exists expected($idx)]} { + unset expected($idx) + } + } + } + + # Verify entire array matches expected state + set expected_count [array size expected] + if {[r exists myarray]} { + set actual_count [r arcount myarray] + } else { + set actual_count 0 + } + + if {$expected_count != $actual_count} { + fail "Stage $stage_num: count mismatch - expected $expected_count, got $actual_count" + } + + # Verify all expected values individually + foreach idx [array names expected] { + set got [r arget myarray $idx] + if {$got ne $expected($idx)} { + fail "Stage $stage_num: idx $idx - expected '$expected($idx)', got '$got'" + } + } + + incr stage_num + } + + # Final cleanup: delete all remaining expected entries + foreach idx [array names expected] { + r ardel myarray $idx + unset expected($idx) + } + + # After cleanup, array should be empty/deleted + assert_equal 0 [r exists myarray] + } + + test {Random testing - large scale with RDB verification} { + r flushdb + expr {srand(54321)} ;# Fixed seed for reproducibility + set max_idx 100000 ;# Range to test multiple slices + set num_writes 2000 + + # Tcl-side tracking + array set expected {} + + # Phase 1: Random writes with mixed value types + for {set i 0} {$i < $num_writes} {incr i} { + set idx [expr {int(rand() * $max_idx)}] + set val [random_value] + r arset myarray $idx $val + set expected($idx) $val + } + + set expected_count [array size expected] + set count_before [r arcount myarray] + assert_equal $expected_count $count_before + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify count preserved + assert_equal $count_before [r arcount myarray] + + # Verify all expected values + foreach idx [array names expected] { + set got [r arget myarray $idx] + if {$got ne $expected($idx)} { + fail "After reload: idx $idx - expected '$expected($idx)', got '$got'" + } + } + + # Phase 2: Random deletes (delete half) + set keys_list [array names expected] + set delete_count [expr {[llength $keys_list] / 2}] + for {set i 0} {$i < $delete_count} {incr i} { + set idx [lindex $keys_list $i] + r ardel myarray $idx + unset expected($idx) + } + + # Verify remaining + set remaining [array size expected] + assert_equal $remaining [r arcount myarray] + + foreach idx [array names expected] { + assert_equal $expected($idx) [r arget myarray $idx] + } + } {} {needs:debug} + + test {Random testing - iterator model stress across dense sparse and superdir} { + set orig_slice_size [lindex [r config get array-slice-size] 1] + set orig_kmax [lindex [r config get array-sparse-kmax] 1] + set orig_kmin [lindex [r config get array-sparse-kmin] 1] + set scenarios { + {mixed-default 4096 10 5 mixed 120 111} + {small-slices 256 6 3 dense 140 333} + {superdir-heavy 1024 8 4 superdir 160 555} + {superdir-heavy 1024 8 4 superdir 160 666} + } + + set err [catch { + foreach scenario $scenarios { + lassign $scenario name slice_size kmax kmin mode steps seed + r flushdb + r config set array-sparse-kmax $kmax + r config set array-sparse-kmin $kmin + r config set array-slice-size $slice_size + expr {srand($seed)} + catch {array unset model} + array set model {} + + # Start each scenario with the exact superdir shape that + # previously exposed iterator progress bugs. + r arset myarray 43 a + set model(43) a + r arset myarray [expr {$slice_size + 490}] b + set model([expr {$slice_size + 490}]) b + r arset myarray 19245258 c + set model(19245258) c + + iterator_stress_validate r model $slice_size $mode "$name/$seed" -1 1 + + for {set step 0} {$step < $steps} {incr step} { + iterator_stress_apply_operation r model $slice_size $mode + iterator_stress_validate r model $slice_size $mode \ + "$name/$seed" $step [expr {$step % 20 == 0}] + } + } + } msg opts] + + r flushdb + r config set array-sparse-kmax $orig_kmax + r config set array-sparse-kmin $orig_kmin + r config set array-slice-size $orig_slice_size + + if {$err} { + return -options $opts $msg + } + } + + # ========================================================================= + # Circular buffer (ring buffer) comprehensive tests + # ========================================================================= + + test {Circular buffer - ARRING basic wraparound} { + r del myarray + # Insert 20 values with MOD 10 - should wrap around twice + for {set i 0} {$i < 20} {incr i} { + set result [r arring myarray 10 "val$i"] + assert_equal [expr {$i % 10}] $result + } + # Should have exactly 10 elements (0-9) + assert_equal 10 [r arcount myarray] + # Values should be the last 10 inserted (val10-val19) + for {set i 0} {$i < 10} {incr i} { + assert_equal "val[expr {$i + 10}]" [r arget myarray $i] + } + } + + test {Circular buffer - ARRING with size 1} { + r del myarray + # MOD 1 means only ever keep one element at index 0 + for {set i 0} {$i < 100} {incr i} { + r arring myarray 1 "val$i" + } + assert_equal 1 [r arcount myarray] + assert_equal "val99" [r arget myarray 0] + } + + test {Circular buffer - ARRING preserves insert_idx through RDB} { + r del myarray + # Create a circular buffer, wrap around a few times + for {set i 0} {$i < 15} {incr i} { + r arring myarray 5 "val$i" + } + # insert_idx should now be 0 (15 % 5 = 0) + set next_before [r arnext myarray] + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify insert_idx is preserved + assert_equal $next_before [r arnext myarray] + + # Continue inserting - should continue from where it left off + r arring myarray 5 "after_reload" + # The next insert should be at position 1 (since we were at 0) + assert_equal "after_reload" [r arget myarray [expr {$next_before % 5}]] + } {} {needs:debug} + + test {Circular buffer - ARLASTITEMS with wraparound} { + r del myarray + # Create circular buffer with 8 items, MOD 5 + for {set i 0} {$i < 8} {incr i} { + r arring myarray 5 $i + } + # Values: 0->3, 1->4, 2->5, 3->6, 4->7 + # insert_idx = 3 (8 % 5 = 3) + + # ARLASTITEMS should return the N most recently inserted + set result [r arlastitems myarray 3] + # Last 3 inserted: 7, 6, 5 - in chronological order: 5, 6, 7 + assert_equal {5 6 7} $result + + # With REV flag + set result [r arlastitems myarray 3 REV] + assert_equal {7 6 5} $result + + # Request more items than exist + set result [r arlastitems myarray 10] + assert_equal 5 [llength $result] + } + + test {Circular buffer - ARLASTITEMS handles empty and partial cases} { + r del myarray + # Empty array + set result [r arlastitems myarray 5] + assert_equal {} $result + + # Fewer items than requested (no wraparound yet) + r arring myarray 10 a + r arring myarray 10 b + r arring myarray 10 c + + set result [r arlastitems myarray 5] + assert_equal {a b c} $result + } + + test {Circular buffer - ARNEXT tracks correctly with ARRING} { + r del myarray + # Insert with MOD, tracking position + # MOD wraps the insert position but ARNEXT continues until next wrap + for {set i 0} {$i < 7} {incr i} { + set expected_idx [expr {$i % 4}] + set result [r arring myarray 4 $i] + assert_equal $expected_idx $result + # ARNEXT: after a wraparound insert, it's expected_idx+1 + # Otherwise it's the running counter+1 until it wraps + if {$i < 4} { + # Before first wrap, ARNEXT is i+1 + assert_equal [expr {$i + 1}] [r arnext myarray] + } else { + # After wrap, ARNEXT is (position+1) + assert_equal [expr {$expected_idx + 1}] [r arnext myarray] + } + } + } + + test {Circular buffer - ARSEEK followed by ARRING} { + r del myarray + # Start inserting + r arinsert myarray a + r arinsert myarray b + r arinsert myarray c + # insert_idx = 2, next = 3 + + # Seek to position 10 + r arseek myarray 10 + assert_equal 10 [r arnext myarray] + + # Now use MOD - should reset behavior + r arring myarray 5 x + # This should insert at index 0 (10 % 5 = 0) + assert_equal x [r arget myarray 0] + } + + test {Circular buffer - ARSEEK 0 is honored on ARRING grow} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arring myarray 3 "ring$i" + } + + assert_equal 1 [r arseek myarray 0] + r arring myarray 8 "grown" + + # ARSEEK 0 is an explicit cursor override, so grow should not repack + # first: the next ARRING write still goes to index 0. + assert_equal "grown" [r arget myarray 0] + assert_equal "ring4" [r arget myarray 1] + assert_equal "ring2" [r arget myarray 2] + assert_equal 1 [r arnext myarray] + } + + test {Circular buffer - ARRING growth uses new capacity after wrap} { + r del myarray + for {set i 0} {$i < 8} {incr i} { + r arring myarray 5 "v$i" + } + # Current ring window contains the latest 5 values: + # v3 v4 v5 v6 v7, with insert_idx at position 2. + + r arring myarray 8 "grown" + + # Growing must compact the wrapped ring first, so the new value uses + # the newly added capacity instead of overwriting low indexes again. + assert_equal "v3" [r arget myarray 0] + assert_equal "v4" [r arget myarray 1] + assert_equal "v5" [r arget myarray 2] + assert_equal "v6" [r arget myarray 3] + assert_equal "v7" [r arget myarray 4] + assert_equal "grown" [r arget myarray 5] + assert_equal 6 [r arnext myarray] + } + + test {Circular buffer - Mixed ARSET and ARRING immediately restores ring size} { + r del myarray + # Use MOD to create ring buffer + for {set i 0} {$i < 5} {incr i} { + r arring myarray 3 "ring$i" + } + # After 5 inserts with MOD 3: + # Position 0: ring0 -> ring3 (overwritten) + # Position 1: ring1 -> ring4 (overwritten) + # Position 2: ring2 + # insert_idx=1, next=2 + + # Now manually set a value outside the ring + r arset myarray 100 "outside" + + # Ring buffer values should still be there + assert_equal "ring3" [r arget myarray 0] + assert_equal "ring4" [r arget myarray 1] + assert_equal "ring2" [r arget myarray 2] + assert_equal "outside" [r arget myarray 100] + + # Continue ring buffer. The ring size should be re-established + # immediately, so values outside the 0..2 window disappear at once. + r arring myarray 3 "ring5" + assert_equal 3 [r arcount myarray] + assert_equal {} [r arget myarray 100] + assert_equal "ring5" [r arget myarray 0] + } + + test {Circular buffer - insert_idx survives RDB with complex state} { + r del myarray + # Create circular buffer across multiple slices + for {set i 0} {$i < 100} {incr i} { + # Use large MOD to spread across slices + r arring myarray 50 "v$i" + } + + set info_before [r arinfo myarray] + set next_before [r arnext myarray] + set count_before [r arcount myarray] + + # Also set some values outside the ring + r arset myarray 10000 "far_away" + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify state preserved + assert_equal $count_before [expr {[r arcount myarray] - 1}] ;# -1 for far_away + assert_equal $next_before [r arnext myarray] + assert_equal "far_away" [r arget myarray 10000] + + # Verify ring buffer content - last 50 values should be v50-v99 + for {set i 0} {$i < 50} {incr i} { + assert_equal "v[expr {$i + 50}]" [r arget myarray $i] + } + } {} {needs:debug} + + test {Circular buffer - ARLASTITEMS reverse order} { + r del myarray + # Create ring with wraparound + for {set i 0} {$i < 12} {incr i} { + r arring myarray 8 "v$i" + } + # After 12 inserts MOD 8: + # insert_idx = 12 % 8 = 4 - 1 = 3 (last inserted at position 3) + # Values: positions 0-7 contain v4-v11 + + # ARLASTITEMS returns most recent items in chronological order + set result [r arlastitems myarray 4] + # Last 4 inserted were v11, v10, v9, v8 - returned oldest to newest + assert_equal {v8 v9 v10 v11} $result + + # With REV flag - returned newest to oldest + set result [r arlastitems myarray 4 REV] + assert_equal {v11 v10 v9 v8} $result + + # Request all items + set result [r arlastitems myarray 100] + assert_equal 8 [llength $result] + } + + test {Circular buffer - ARRING truncation when size decreases} { + r del myarray + # Create ring buffer with MOD 10 + for {set i 0} {$i < 15} {incr i} { + r arring myarray 10 "v$i" + } + # Now have 10 elements at positions 0-9 + # After 15 inserts: 0->v10, 1->v11, ..., 4->v14, 5->v5, ..., 9->v9 + assert_equal 10 [r arcount myarray] + + # Use smaller MOD - this truncates to positions 0-4 AND inserts new value + # The new insert goes to position (15 % 5) = 0, replacing v10 + r arring myarray 5 "truncated" + # Now have only 5 elements (positions 0-4), with position 0 = "truncated" + assert_equal 5 [r arcount myarray] + + # Verify values + assert_equal "truncated" [r arget myarray 0] ;# new value + assert_equal "v11" [r arget myarray 1] + assert_equal "v12" [r arget myarray 2] + assert_equal "v13" [r arget myarray 3] + assert_equal "v14" [r arget myarray 4] + + # Positions 5-9 should be empty (truncated) + assert_equal {} [r arget myarray 5] + assert_equal {} [r arget myarray 9] + } + + test {Circular buffer - ARRING shrink stops at first hole} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arring myarray 5 "v$i" + } + + r ardel myarray 3 + r arring myarray 3 "new" + + assert_equal 2 [r arcount myarray] + assert_equal "v4" [r arget myarray 0] + assert_equal "new" [r arget myarray 1] + assert_equal {} [r arget myarray 2] + } + + test {Circular buffer - ARRING grow stops at first hole} { + r del myarray + for {set i 0} {$i < 8} {incr i} { + r arring myarray 5 "v$i" + } + + r ardel myarray 1 + r arring myarray 8 "grown" + + assert_equal 2 [r arcount myarray] + assert_equal "v7" [r arget myarray 0] + assert_equal "grown" [r arget myarray 1] + assert_equal {} [r arget myarray 2] + } + + test {Circular buffer - ARLASTITEMS with various counts and REV} { + r del myarray + # Create simple ring buffer + for {set i 0} {$i < 20} {incr i} { + r arring myarray 10 "item$i" + } + # Contains item10-item19 at positions 0-9 + + # Get exactly 1 item + assert_equal {item19} [r arlastitems myarray 1] + assert_equal {item19} [r arlastitems myarray 1 REV] + + # Get 3 items + set result [r arlastitems myarray 3] + assert_equal {item17 item18 item19} $result + set result [r arlastitems myarray 3 REV] + assert_equal {item19 item18 item17} $result + + # Get all 10 items + set result [r arlastitems myarray 10] + assert_equal 10 [llength $result] + assert_equal "item10" [lindex $result 0] + assert_equal "item19" [lindex $result end] + + # REV order for all items + set result [r arlastitems myarray 10 REV] + assert_equal "item19" [lindex $result 0] + assert_equal "item10" [lindex $result end] + } + + test {Circular buffer - ARLASTITEMS edge cases} { + r del myarray + # Empty array + assert_equal {} [r arlastitems myarray 5] + assert_equal {} [r arlastitems myarray 5 REV] + + # Single element + r arinsert myarray "only" + assert_equal {only} [r arlastitems myarray 1] + assert_equal {only} [r arlastitems myarray 10] + assert_equal {only} [r arlastitems myarray 1 REV] + + # Two elements - no wraparound yet + r arinsert myarray "second" + assert_equal {only second} [r arlastitems myarray 5] + assert_equal {second only} [r arlastitems myarray 5 REV] + } + + # ============================================================ + # Regression tests for bugs found during code review + # ============================================================ + + test {Regression #3 - arTruncate must decrement count correctly} { + r del myarray + # Fill array with 20 elements + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 20 [r arcount myarray] + + # Use ARRING to trigger truncation + # First set insert_idx to 15, then insert with MOD 10 + r arseek myarray 16 + r arring myarray 10 "wrap" + + # After MOD 10 truncation, only indices 0-9 should exist + # The count should be <= 10 (some original values + new one) + set count [r arcount myarray] + assert_lessthan $count 11 ;# count <= 10 + + # Verify elements >= 10 are gone + assert_equal {} [r arget myarray 10] + assert_equal {} [r arget myarray 15] + assert_equal {} [r arget myarray 19] + } + + test {Regression #5 - AROP MATCH with large strings (>256 bytes)} { + r del myarray + # Create a string larger than 256 bytes + set largestr [string repeat "x" 300] + set largestr2 [string repeat "y" 300] + + r arset myarray 0 $largestr + r arset myarray 1 "small" + r arset myarray 2 $largestr + r arset myarray 3 $largestr2 + + # MATCH should find exactly 2 occurrences of largestr + assert_equal 2 [r arop myarray 0 3 MATCH $largestr] + assert_equal 1 [r arop myarray 0 3 MATCH $largestr2] + assert_equal 1 [r arop myarray 0 3 MATCH "small"] + assert_equal 0 [r arop myarray 0 3 MATCH "notfound"] + } + + test {Regression #6 - DEBUG DIGEST with large strings (>256 bytes)} { + r del myarray + set largestr [string repeat "z" 500] + r arset myarray 0 $largestr + r arset myarray 1 "small" + r arset myarray 100 [string repeat "w" 1000] + + # Get digest - should not crash and should be deterministic + set d1 [r debug digest-value myarray] + set d2 [r debug digest-value myarray] + assert_equal $d1 $d2 "Digest should be deterministic" + + # Modify and verify digest changes + r arset myarray 0 "changed" + set d3 [r debug digest-value myarray] + if {$d1 eq $d3} { + fail "Digest should change after modification" + } + } {} {needs:debug} + + test {Regression #7 - RDB with negative integers including -1} { + r flushdb + # -1 was problematic because it became UINT64_MAX which was RDB_LENERR + r arset myarray 0 -1 + r arset myarray 1 -100 + r arset myarray 2 -9223372036854775808 ;# INT64_MIN as string + r arset myarray 3 0 + r arset myarray 4 1 + r arset myarray 5 9223372036854775807 ;# INT64_MAX as string + + set d1 [r debug digest-value myarray] + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify values survived + assert_equal -1 [r arget myarray 0] + assert_equal -100 [r arget myarray 1] + # Note: very large integers may be stored as strings + assert_equal 0 [r arget myarray 3] + assert_equal 1 [r arget myarray 4] + + set d2 [r debug digest-value myarray] + assert_equal $d1 $d2 "Digest should match after RDB reload" + } {} {needs:debug} + + test {Regression #10 - ARSEEK on non-existent key should not create it} { + r del myarray + # ARSEEK on non-existent key + assert_equal 0 [r arseek myarray 100] + + # Key should NOT exist + assert_equal 0 [r exists myarray] + + # Now create the array and verify ARSEEK works + r arinsert myarray "first" + assert_equal 1 [r exists myarray] + + # ARSEEK on existing key should work + assert_equal 1 [r arseek myarray 50] + r arinsert myarray "second" + assert_equal 51 [r arnext myarray] + } + + test {Regression #12 - ARMGET/ARGETRANGE return WRONGTYPE on wrong type} { + r del myarray + r set myarray "string_value" + + # ARMGET should return WRONGTYPE error + assert_error {WRONGTYPE*} {r armget myarray 0 1 2} + + # ARGETRANGE should return WRONGTYPE error + assert_error {WRONGTYPE*} {r argetrange myarray 0 10} + + # Cleanup + r del myarray + } + + test {Regression - RDB preserves exact numeric string forms} { + r flushdb + set values [list \ + 0 "3.141592653589793" \ + 1 "-2.718281828459045" \ + 2 "1.0e-10" \ + 3 "1.0e+100"] + + foreach {idx val} $values { + r arset myarray $idx $val + } + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + } {} {needs:debug} + + test {Whole-number floats with .0 suffix encode as inline floats} { + # Values like "1.0" should be encoded as inline floats, not heap strings. + # This tests the ".0" suffix optimization in arTryEncodeFloat. + r del myarray + + # Various whole-number floats that should round-trip with ".0" + r arset myarray 0 1.0 + r arset myarray 1 -1.0 + r arset myarray 2 0.0 + r arset myarray 3 42.0 + r arset myarray 4 -42.0 + r arset myarray 5 1000000.0 + r arset myarray 6 -9999999.0 + + # Verify exact round-trip (the ".0" must be preserved) + assert_equal "1.0" [r arget myarray 0] + assert_equal "-1.0" [r arget myarray 1] + assert_equal "0.0" [r arget myarray 2] + assert_equal "42.0" [r arget myarray 3] + assert_equal "-42.0" [r arget myarray 4] + assert_equal "1000000.0" [r arget myarray 5] + assert_equal "-9999999.0" [r arget myarray 6] + + # Verify these survive RDB save/reload (confirms they're properly encoded) + r bgsave + waitForBgsave r + r debug reload + + assert_equal "1.0" [r arget myarray 0] + assert_equal "-1.0" [r arget myarray 1] + assert_equal "0.0" [r arget myarray 2] + assert_equal "42.0" [r arget myarray 3] + assert_equal "-42.0" [r arget myarray 4] + assert_equal "1000000.0" [r arget myarray 5] + assert_equal "-9999999.0" [r arget myarray 6] + } {} {needs:debug} + + test {Integer values without .0 still encode as integers, not floats} { + # Ensure "1" (without decimal) is encoded as integer, not float + r del myarray + + r arset myarray 0 1 + r arset myarray 1 -1 + r arset myarray 2 0 + r arset myarray 3 42 + r arset myarray 4 9999999 + + # Values without ".0" should stay as integers + assert_equal "1" [r arget myarray 0] + assert_equal "-1" [r arget myarray 1] + assert_equal "0" [r arget myarray 2] + assert_equal "42" [r arget myarray 3] + assert_equal "9999999" [r arget myarray 4] + + # Verify RDB round-trip preserves them as integers + r bgsave + waitForBgsave r + r debug reload + + assert_equal "1" [r arget myarray 0] + assert_equal "-1" [r arget myarray 1] + assert_equal "0" [r arget myarray 2] + assert_equal "42" [r arget myarray 3] + assert_equal "9999999" [r arget myarray 4] + } {} {needs:debug} + + test {AROP on whole-number floats works correctly} { + # Verify AROP aggregation works on values encoded with the .0 optimization + r del myarray + + r arset myarray 0 10.0 + r arset myarray 1 20.0 + r arset myarray 2 30.0 + + # SUM should work on whole-number floats (AROP returns computed values) + assert_equal 60 [r arop myarray 0 2 SUM] + + # MIN/MAX should work + assert_equal 10 [r arop myarray 0 2 MIN] + assert_equal 30 [r arop myarray 0 2 MAX] + + # MATCH should find the encoded values + assert_equal 1 [r arop myarray 0 2 MATCH 10.0] + assert_equal 1 [r arop myarray 0 2 MATCH 20.0] + } + + test {Exact string recovery survives AOF rewrite} { + r flushdb + set longstr [string repeat x 100] + set values [list \ + 0 "1.0" \ + 1 "-1.0" \ + 2 "42.0" \ + 3 "hello" \ + 4 "12345" \ + 5 "-0.0" \ + 6 "0.00" \ + 7 "10.500" \ + 8 "001.25" \ + 9 "1.0e-10" \ + 10 "1.0e+100" \ + 11 $longstr \ + 12 ""] + + foreach {idx val} $values { + r arset myarray $idx $val + } + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + + # Trigger AOF rewrite and reload + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + } {} {needs:debug} + + test {Regression - CONFIG GET/SET for array settings} { + # Verify config options exist and are readable + set slice_size [lindex [r config get array-slice-size] 1] + set sparse_kmax [lindex [r config get array-sparse-kmax] 1] + set sparse_kmin [lindex [r config get array-sparse-kmin] 1] + + # Verify defaults + assert_equal 4096 $slice_size + assert_equal 10 $sparse_kmax + assert_equal 5 $sparse_kmin + + # sparse-kmax and sparse-kmin should be modifiable + r config set array-sparse-kmax 20 + assert_equal 20 [lindex [r config get array-sparse-kmax] 1] + r config set array-sparse-kmax $sparse_kmax ;# restore + + r config set array-sparse-kmin 8 + assert_equal 8 [lindex [r config get array-sparse-kmin] 1] + r config set array-sparse-kmin $sparse_kmin ;# restore + + # slice-size is modifiable but must be a power of two + r config set array-slice-size 8192 + assert_equal 8192 [lindex [r config get array-slice-size] 1] + r config set array-slice-size $slice_size ;# restore + + # Non-power-of-two should error + assert_error {*power of two*} {r config set array-slice-size 5000} + } + + test {Arrays created with different slice sizes work after config change} { + # Create an array with current slice size + r del myarray + set orig_size [lindex [r config get array-slice-size] 1] + + # Create array and populate it + for {set i 0} {$i < 10000} {incr i 1000} { + r arset myarray $i "value_$i" + } + set orig_count [r arcount myarray] + + # Change slice size - existing arrays should keep working + r config set array-slice-size 8192 + + # Verify old array still works + assert_equal $orig_count [r arcount myarray] + assert_equal "value_0" [r arget myarray 0] + assert_equal "value_5000" [r arget myarray 5000] + assert_equal "value_9000" [r arget myarray 9000] + + # Create new array with new slice size + r del newarray + r arset newarray 0 "new_value" + assert_equal "new_value" [r arget newarray 0] + + # Restore config + r config set array-slice-size $orig_size + r del myarray + r del newarray + } + + test {Regression - AOF rewrite with superdir mode (high indices)} { + # This tests the fix for AOF rewrite not iterating superdir blocks. + # With slice_size=4096, slice_id 2048 starts at index 8388608. + # Indices >= 8388608 trigger superdir mode. + + r del aoftest + + # Create array with elements that trigger superdir mode + r arset aoftest 0 base + r arset aoftest 8388608 triggers_superdir + r arset aoftest 50000000 high + r arset aoftest 100000000 very_high + + assert_equal 4 [r arcount aoftest] + + # Verify superdir mode is active (directory-size shows number of blocks) + set info [r arinfo aoftest] + set dir_size [dict get $info directory-size] + # With these indices across multiple superdir blocks, dir_size should be > 1 + assert {$dir_size >= 1} + + # Trigger AOF rewrite and reload (same pattern as other AOF tests) + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + # Verify data survived AOF rewrite and reload + assert_equal 4 [r arcount aoftest] + assert_equal "base" [r arget aoftest 0] + assert_equal "triggers_superdir" [r arget aoftest 8388608] + assert_equal "high" [r arget aoftest 50000000] + assert_equal "very_high" [r arget aoftest 100000000] + + assert_equal 1 [r del aoftest] + } {} {needs:debug} + + # ========================================================================= + # Superdir command coverage + # ========================================================================= + + test {ARGETRANGE works across a superdir slice boundary} { + r del myarray + + # Cross slice 2047 -> 2048. Inserting the high index forces the array + # into superdir mode, but the range itself is still short. + r arset myarray 8388607 "left" + r arset myarray 8388608 "mid" + r arset myarray 8388609 "right" + + assert_equal {left mid right} [r argetrange myarray 8388607 8388609] + assert_equal {right mid left} [r argetrange myarray 8388609 8388607] + } + + test {ARSET pre-promotes sparse slice in superdir mode} { + r del myarray + set kmax [lindex [r config get array-sparse-kmax] 1] + assert {$kmax >= 4} + + # Build a sparse slice with kmax-1 existing elements at even offsets. + # The later range write covers offsets 0..kmax-1, so some of these + # positions are already filled and some are new. + for {set i 0} {$i < $kmax - 1} {incr i} { + set off [expr {$i * 2}] + r arset myarray [expr {8388608 + $off}] "old$off" + } + + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # The range has kmax slots, while the slice already contains kmax-1 + # elements spread across the slice. This keeps range_size <= kmax, so + # the helper must take the count+new_elements path in order to decide + # the promotion. + set values {} + set existing_in_range 0 + for {set off 0} {$off < $kmax} {incr off} { + lappend values "n$off" + if {$off % 2 == 0 && $off <= 2 * ($kmax - 2)} { + incr existing_in_range + } + } + set expected_new [expr {$kmax - $existing_in_range}] + assert_equal $expected_new [r arset myarray 8388608 {*}$values] + + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + assert_equal $values [r argetrange myarray 8388608 [expr {8388608 + $kmax - 1}]] + assert_equal "old[expr {2 * ($kmax - 2)}]" [r arget myarray [expr {8388608 + 2 * ($kmax - 2)}]] + } + + # ========================================================================= + # Range delete + iterator tests (dense→sparse demotion, superdir, sparse) + # ========================================================================= + + test {ARDELRANGE triggers dense to sparse demotion} { + r del myarray + # Pin config to ensure test doesn't break if defaults change + set orig_kmin [lindex [r config get array-sparse-kmin] 1] + r config set array-sparse-kmin 5 + + # Create a dense slice with 50 elements + for {set i 0} {$i < 50} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 50 [r arcount myarray] + + # Verify it's dense + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + + # Delete most elements with ARDELRANGE, leaving only 3 (below kmin=5) + assert_equal 47 [r ardelrange myarray 3 49] + assert_equal 3 [r arcount myarray] + + # Verify demotion to sparse + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Verify remaining elements + assert_equal "val0" [r arget myarray 0] + assert_equal "val1" [r arget myarray 1] + assert_equal "val2" [r arget myarray 2] + + r config set array-sparse-kmin $orig_kmin + } + + test {ARDELRANGE partial delete preserves dense then demotes} { + r del myarray + # Pin config + set orig_kmin [lindex [r config get array-sparse-kmin] 1] + r config set array-sparse-kmin 5 + + # Create dense slice + for {set i 0} {$i < 40} {incr i} { + r arset myarray $i $i + } + + # Delete some but not enough to trigger demotion (keep 10 > kmin=5) + assert_equal 30 [r ardelrange myarray 10 39] + assert_equal 10 [r arcount myarray] + + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + + # Now delete more to trigger demotion + assert_equal 6 [r ardelrange myarray 4 9] + assert_equal 4 [r arcount myarray] + + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + r config set array-sparse-kmin $orig_kmin + } + + test {ARDELRANGE deletes full slices within superdir block} { + r del myarray + # With slice_size=4096: + # - Slice 2048 starts at index 8388608 + # - Slice 2049 starts at index 8392704 + # - Both are in superdir block 1 + + # Create elements in two adjacent slices within same superdir block + r arset myarray 8388608 "slice2048_a" + r arset myarray 8388700 "slice2048_b" + r arset myarray 8392704 "slice2049_a" + r arset myarray 8392800 "slice2049_b" + # And one element in a different block for reference + r arset myarray 0 "slice0" + + assert_equal 5 [r arcount myarray] + + # Delete range that fully covers both slices 2048 and 2049 + # This should trigger full-slice deletion (not element-by-element) + assert_equal 4 [r ardelrange myarray 8388608 8396799] + assert_equal 1 [r arcount myarray] + + # Verify only slice0 element remains + assert_equal "slice0" [r arget myarray 0] + assert_equal {} [r arget myarray 8388608] + assert_equal {} [r arget myarray 8392704] + + r del myarray + } + + test {ARDELRANGE spanning multiple superdir blocks} { + r del myarray + # Superdir block boundaries with slice_size=4096: + # - Block 0: slices 0-2047 (indices 0 - 8388607) + # - Block 1: slices 2048-4095 (indices 8388608 - 16777215) + # - Block 2: slices 4096+ (indices 16777216+) + + # Create elements across three blocks + r arset myarray 100 "block0" + r arset myarray 8388608 "block1_start" + r arset myarray 12000000 "block1_mid" + r arset myarray 16777200 "block1_end" + r arset myarray 16777216 "block2_start" + r arset myarray 20000000 "block2_mid" + + assert_equal 6 [r arcount myarray] + + # Delete range spanning from block1 into block2 + # This exercises cross-block deletion + assert_equal 4 [r ardelrange myarray 8388608 18000000] + assert_equal 2 [r arcount myarray] + + # Verify block0 and remaining block2 element + assert_equal "block0" [r arget myarray 100] + assert_equal "block2_mid" [r arget myarray 20000000] + assert_equal {} [r arget myarray 8388608] + assert_equal {} [r arget myarray 16777216] + + r del myarray + } + + test {ARDELRANGE superdir middle range with missing upper block} { + r del myarray + # Occupied blocks: + # - block 0: boundary lo_slice + # - block 1: middle full slices to delete + # - block 3: boundary hi_slice + # block 2 is intentionally empty, so the upper lower-bound search + # must stop at the insertion point rather than on an exact match. + r arset myarray 8388590 "block0_keep" + r arset myarray 8388608 "block1_a" + r arset myarray 8392704 "block1_b" + r arset myarray 25165825 "block3_keep" + + assert_equal 4 [r arcount myarray] + assert_equal 2 [r ardelrange myarray 8388595 25165824] + assert_equal 2 [r arcount myarray] + + assert_equal "block0_keep" [r arget myarray 8388590] + assert_equal {} [r arget myarray 8388608] + assert_equal {} [r arget myarray 8392704] + assert_equal "block3_keep" [r arget myarray 25165825] + } + + test {ARDELRANGE superdir with empty middle block interval} { + r del myarray + # Only the boundary slices are populated. The superdir middle interval + # is empty, so the block loop must resolve to [start, end) = empty. + r arset myarray 8388590 "block0_keep" + r arset myarray 8388607 "block0_del" + r arset myarray 25165824 "block3_del" + r arset myarray 25165825 "block3_keep" + + assert_equal 4 [r arcount myarray] + assert_equal 2 [r ardelrange myarray 8388600 25165824] + assert_equal 2 [r arcount myarray] + + assert_equal "block0_keep" [r arget myarray 8388590] + assert_equal {} [r arget myarray 8388607] + assert_equal {} [r arget myarray 25165824] + assert_equal "block3_keep" [r arget myarray 25165825] + } + + test {ARDELRANGE with multiple ranges in single call} { + r del myarray + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 20 [r arcount myarray] + + # Delete two separate ranges in one command + # Ranges: [2,4] and [10,14] + assert_equal 8 [r ardelrange myarray 2 4 10 14] + assert_equal 12 [r arcount myarray] + + # Verify correct elements deleted + assert_equal "val0" [r arget myarray 0] + assert_equal "val1" [r arget myarray 1] + assert_equal {} [r arget myarray 2] + assert_equal {} [r arget myarray 3] + assert_equal {} [r arget myarray 4] + assert_equal "val5" [r arget myarray 5] + assert_equal "val9" [r arget myarray 9] + assert_equal {} [r arget myarray 10] + assert_equal {} [r arget myarray 14] + assert_equal "val15" [r arget myarray 15] + } + + test {ARDELRANGE with overlapping ranges} { + r del myarray + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i "val$i" + } + + # Overlapping ranges: [5,12] and [8,15] + # Should delete [5,15] total = 11 elements + # But second range re-deletes already-deleted [8,12], so still 11 unique + assert_equal 11 [r ardelrange myarray 5 12 8 15] + assert_equal 9 [r arcount myarray] + + assert_equal "val4" [r arget myarray 4] + assert_equal {} [r arget myarray 5] + assert_equal {} [r arget myarray 12] + assert_equal {} [r arget myarray 15] + assert_equal "val16" [r arget myarray 16] + } + + test {ARDELRANGE sparse slice middle-span deletion} { + r del myarray + # Create sparse slice with specific offsets + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + assert_equal 5 [r arcount myarray] + + # Delete a middle contiguous sparse span. + assert_equal 3 [r ardelrange myarray 20 40] + assert_equal 2 [r arcount myarray] + + # Verify correct elements remain + assert_equal "a" [r arget myarray 10] + assert_equal {} [r arget myarray 20] + assert_equal {} [r arget myarray 30] + assert_equal {} [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + } + + test {ARDELRANGE sparse with non-contiguous deletions} { + r del myarray + # Sparse elements at various offsets + r arset myarray 5 "v5" + r arset myarray 15 "v15" + r arset myarray 25 "v25" + r arset myarray 35 "v35" + r arset myarray 45 "v45" + + # Delete range that only hits some elements + assert_equal 2 [r ardelrange myarray 10 30] + assert_equal 3 [r arcount myarray] + + assert_equal "v5" [r arget myarray 5] + assert_equal {} [r arget myarray 15] + assert_equal {} [r arget myarray 25] + assert_equal "v35" [r arget myarray 35] + assert_equal "v45" [r arget myarray 45] + } + + test {ARDELRANGE sparse prefix span deletion} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete the sparse prefix span: first == 0, last in the middle. + assert_equal 2 [r ardelrange myarray 0 25] + assert_equal 3 [r arcount myarray] + + assert_equal {} [r arget myarray 10] + assert_equal {} [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + } + + test {ARDELRANGE sparse suffix span deletion} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete the sparse suffix span: first in the middle, last == count. + assert_equal 2 [r ardelrange myarray 35 100] + assert_equal 3 [r arcount myarray] + + assert_equal "a" [r arget myarray 10] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal {} [r arget myarray 40] + assert_equal {} [r arget myarray 50] + } + + test {ARDELRANGE sparse whole-slice deletion} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete the whole sparse slice: first == 0, last == count. + assert_equal 5 [r ardelrange myarray 0 100] + assert_equal 0 [r exists myarray] + } + + test {ARDELRANGE sparse no-hit range} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete a range that falls strictly between two sparse offsets. + assert_equal 0 [r ardelrange myarray 11 19] + assert_equal 5 [r arcount myarray] + + assert_equal "a" [r arget myarray 10] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + } + + test {ARDELRANGE sparse single edge deletions} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete exactly the first sparse element, then exactly the last one. + assert_equal 1 [r ardelrange myarray 10 10] + assert_equal 4 [r arcount myarray] + assert_equal {} [r arget myarray 10] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + + assert_equal 1 [r ardelrange myarray 50 50] + assert_equal 3 [r arcount myarray] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal {} [r arget myarray 50] + } + + test {Random testing - blackbox ARDELRANGE model stress} { + r flushdb + expr {srand(24680)} + array set model_state {} + + for {set step 0} {$step < 400} {incr step} { + set roll [expr {int(rand() * 100)}] + + if {$roll < 50} { + set idx [random_array_index] + set val [random_value] + r arset myarray $idx $val + set model_state($idx) $val + } elseif {$roll < 70} { + set idx [random_array_index] + set expected_deleted 0 + if {[info exists model_state($idx)]} { + unset model_state($idx) + set expected_deleted 1 + } + assert_equal $expected_deleted [r ardel myarray $idx] + } else { + set args {} + set expected_deleted 0 + set nranges [expr {int(rand() * 3) + 1}] + + for {set i 0} {$i < $nranges} {incr i} { + set lo [random_array_index] + set hi [random_array_index] + lappend args $lo $hi + incr expected_deleted [model_array_delrange model_state $lo $hi] + } + + assert_equal $expected_deleted [r ardelrange myarray {*}$args] + } + + if {$step % 25 == 0 || $step == 399} { + set expected_scan [model_array_scan model_state] + set expected_count [array size model_state] + + if {$expected_count == 0} { + assert_equal 0 [r exists myarray] + assert_equal {} [r arscan myarray 0 30000000] + } else { + assert_equal $expected_count [r arcount myarray] + assert_equal $expected_scan [r arscan myarray 0 30000000] + } + + for {set probe 0} {$probe < 20} {incr probe} { + set idx [random_array_index] + if {[info exists model_state($idx)]} { + assert_equal $model_state($idx) [r arget myarray $idx] + } else { + assert_equal {} [r arget myarray $idx] + } + } + } + } + } + + test {ARSCAN after ARDELRANGE with demotion} { + r del myarray + # Create dense + for {set i 0} {$i < 30} {incr i} { + r arset myarray $i "val$i" + } + + # Delete most, triggering demotion + r ardelrange myarray 4 29 + + # ARSCAN should find remaining elements + set result [r arscan myarray 0 100] + assert_equal 4 [llength $result] + assert_equal {{0 val0} {1 val1} {2 val2} {3 val3}} $result + + # Reverse scan + set result [r arscan myarray 100 0] + assert_equal {{3 val3} {2 val2} {1 val1} {0 val0}} $result + } + + test {ARSCAN with LIMIT after range delete} { + r del myarray + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i $i + } + + # Delete some in the middle + r ardelrange myarray 5 14 + + # Scan with limit + set result [r arscan myarray 0 100 LIMIT 3] + assert_equal 3 [llength $result] + assert_equal {{0 0} {1 1} {2 2}} $result + } + + test {AROP after ARDELRANGE across multiple slices} { + r del myarray + # Create elements across slice boundaries (slice_size=4096) + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i $i + } + for {set i 4096} {$i < 4106} {incr i} { + r arset myarray $i $i + } + + assert_equal 20 [r arcount myarray] + + # Delete first slice partially + r ardelrange myarray 5 9 + + # AROP SUM should work across slices + # Remaining: 0+1+2+3+4 + 4096..4105 = 10 + sum(4096..4105) + # sum(4096..4105) = (4096+4105)*10/2 = 41005 + set sum [r arop myarray 0 5000 SUM] + assert_equal 41015 $sum + + # AROP USED + assert_equal 15 [r arop myarray 0 5000 USED] + + # AROP MIN/MAX + assert_equal 0 [r arop myarray 0 5000 MIN] + assert_equal 4105 [r arop myarray 0 5000 MAX] + } + + test {AROP MATCH after dense demotion} { + r del myarray + # Create dense with repeated values + for {set i 0} {$i < 30} {incr i} { + r arset myarray $i "target" + } + r arset myarray 2 "other" + + # Delete most to trigger demotion, keep indices 0-3 + # After delete: 0=target, 1=target, 2=other, 3=target + r ardelrange myarray 4 29 + + # Verify demotion happened + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Count matches in sparse slice (3 "target" values) + assert_equal 3 [r arop myarray 0 100 MATCH target] + } + + test {ARSCAN over superdir blocks} { + r del myarray + # Elements in different superdir blocks + r arset myarray 0 "first" + r arset myarray 8388608 "second" + r arset myarray 16777216 "third" + + # Scan entire range + set result [r arscan myarray 0 20000000] + assert_equal 3 [llength $result] + assert_equal {0 first} [lindex $result 0] + assert_equal {8388608 second} [lindex $result 1] + assert_equal {16777216 third} [lindex $result 2] + + # Reverse scan + set result [r arscan myarray 20000000 0] + assert_equal {16777216 third} [lindex $result 0] + assert_equal {8388608 second} [lindex $result 1] + assert_equal {0 first} [lindex $result 2] + + r del myarray + } + + test {Iterator commands do not rescan exhausted superdir blocks} { + r del myarray + r arset myarray 43 "a" + r arset myarray 4586 "b" + r arset myarray 19245258 "c" + + assert_equal {{43 a} {4586 b} {19245258 c}} \ + [r arscan myarray 0 30000000 LIMIT 8] + assert_equal {{19245258 c}} \ + [r argrep myarray 0 30000000 EXACT c WITHVALUES LIMIT 4] + assert_equal 3 [r arop myarray 0 30000000 USED] + } + + test {AROP over superdir with partial range} { + r del myarray + r arset myarray 0 10 + r arset myarray 100 20 + r arset myarray 8388608 30 + r arset myarray 8388700 40 + r arset myarray 16777216 50 + + # SUM only in first block + assert_equal 30 [r arop myarray 0 1000 SUM] + + # SUM spanning blocks + assert_equal 150 [r arop myarray 0 20000000 SUM] + + # USED in specific range + assert_equal 2 [r arop myarray 8388600 8388800 USED] + + r del myarray + } + + test {ARDELRANGE delete entire slice then verify iteration} { + r del myarray + # Two slices + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i "slice0_$i" + } + for {set i 4096} {$i < 4106} {incr i} { + r arset myarray $i "slice1_$i" + } + + # Delete entire first slice + assert_equal 10 [r ardelrange myarray 0 4095] + assert_equal 10 [r arcount myarray] + + # ARSCAN should only find second slice elements + set result [r arscan myarray 0 5000] + assert_equal 10 [llength $result] + assert_equal {4096 slice1_4096} [lindex $result 0] + } + +} + +# Test loading a 32-bit generated RDB on the current architecture. +# The RDB file contains arrays exercising all tagged pointer encodings: +# immediate ints (including 30-bit boundary values), inline floats, +# small strings, arString heap strings, mixed types, sparse indices, +# and insert_idx preservation. +set server_path [tmpdir "server.array-32bit-rdb-test"] +exec cp [file join [pwd] tests/assets/array-32bit.rdb] $server_path + +start_server [list overrides [list "dir" $server_path "dbfilename" "array-32bit.rdb"] tags {"array external:skip"}] { + + test {Load 32-bit RDB - integer encodings} { + r select 0 + # Inline ints and boundary values + assert_equal 0 [r arget ints 0] + assert_equal 1 [r arget ints 1] + assert_equal -1 [r arget ints 2] + assert_equal 42 [r arget ints 3] + assert_equal -42 [r arget ints 4] + # 30-bit int boundary (max/min for 32-bit tagged ints) + assert_equal 536870911 [r arget ints 5] + assert_equal -536870912 [r arget ints 6] + # Values beyond 30-bit range (arString on 32-bit, re-encoded on load) + assert_equal 536870912 [r arget ints 7] + assert_equal -536870913 [r arget ints 8] + assert_equal 2147483647 [r arget ints 9] + assert_equal -2147483648 [r arget ints 10] + assert_equal 1000000000 [r arget ints 11] + assert_equal 999999999 [r arget ints 12] + assert_equal 100 [r arget ints 13] + assert_equal 14 [r arcount ints] + } + + test {Load 32-bit RDB - float encodings} { + r select 0 + assert_equal 1.0 [r arget floats 0] + assert_equal -1.0 [r arget floats 1] + assert_equal 3.14 [r arget floats 2] + assert_equal 0.5 [r arget floats 3] + assert_equal -0.5 [r arget floats 4] + assert_equal 0.25 [r arget floats 5] + assert_equal 100.0 [r arget floats 6] + assert_equal -100.0 [r arget floats 7] + assert_equal 1.5 [r arget floats 8] + assert_equal 1.75 [r arget floats 9] + assert_equal 0.1 [r arget floats 10] + assert_equal 1234.5 [r arget floats 11] + assert_equal 0.0625 [r arget floats 12] + assert_equal 999999.0 [r arget floats 13] + assert_equal 1.23456789012 [r arget floats 14] + assert_equal 15 [r arcount floats] + } + + test {Load 32-bit RDB - string encodings} { + r select 0 + # Empty string, 1-3 byte inline (smallstr on 32-bit), + # 4-7 byte (smallstr on 64-bit only, arString on 32-bit), + # 8+ byte (always arString) + assert_equal {} [r arget strs 0] + assert_equal a [r arget strs 1] + assert_equal ab [r arget strs 2] + assert_equal abc [r arget strs 3] + assert_equal abcd [r arget strs 4] + assert_equal abcde [r arget strs 5] + assert_equal abcdef [r arget strs 6] + assert_equal abcdefg [r arget strs 7] + assert_equal abcdefgh [r arget strs 8] + assert_equal {hello world} [r arget strs 9] + assert_equal {this is a longer string for testing} [r arget strs 10] + assert_equal x [r arget strs 11] + assert_equal xy [r arget strs 12] + assert_equal xyz [r arget strs 13] + assert_equal 14 [r arcount strs] + } + + test {Load 32-bit RDB - mixed type encodings} { + r select 0 + assert_equal 42 [r arget mixed 0] + assert_equal 3.14 [r arget mixed 1] + assert_equal hi [r arget mixed 2] + assert_equal -536870912 [r arget mixed 3] + assert_equal 0.5 [r arget mixed 4] + assert_equal abcdefghij [r arget mixed 5] + assert_equal 536870911 [r arget mixed 6] + assert_equal -1.5 [r arget mixed 7] + assert_equal ab [r arget mixed 8] + assert_equal 0 [r arget mixed 9] + assert_equal 1.0 [r arget mixed 10] + assert_equal hello [r arget mixed 11] + assert_equal 2147483647 [r arget mixed 12] + assert_equal 0.25 [r arget mixed 13] + assert_equal xyz [r arget mixed 14] + assert_equal 15 [r arcount mixed] + } + + test {Load 32-bit RDB - sparse indices across slices} { + r select 0 + assert_equal first [r arget sparse 0] + assert_equal slice0end [r arget sparse 4095] + assert_equal slice1start [r arget sparse 4096] + assert_equal slice1end [r arget sparse 8191] + assert_equal 42 [r arget sparse 10000] + assert_equal 3.14 [r arget sparse 50000] + assert_equal hello [r arget sparse 100000] + assert_equal 7 [r arcount sparse] + } + + test {Load 32-bit RDB - insert_idx preservation} { + r select 0 + assert_equal one [r arget withinsert 0] + assert_equal two [r arget withinsert 1] + assert_equal three [r arget withinsert 2] + assert_equal four [r arget withinsert 3] + assert_equal five [r arget withinsert 4] + assert_equal 5 [r arcount withinsert] + # Verify insert_idx was preserved: next insert should go at index 5 + r arinsert withinsert six + assert_equal six [r arget withinsert 5] + } + + test {Load 32-bit RDB - re-save and reload cycle} { + r select 0 + # Save from 64-bit, reload, verify integrity + r save + r debug reload + foreach {idx value} { + 0 0 1 1 2 -1 3 42 4 -42 + 5 536870911 6 -536870912 7 536870912 8 -536870913 + 9 2147483647 10 -2147483648 11 1000000000 12 999999999 13 100 + } { + assert_equal $value [r arget ints $idx] + } + assert_equal 14 [r arcount ints] + + foreach {idx value} { + 0 1.0 1 -1.0 2 3.14 3 0.5 4 -0.5 + 5 0.25 6 100.0 7 -100.0 8 1.5 9 1.75 + 10 0.1 11 1234.5 12 0.0625 13 999999.0 14 1.23456789012 + } { + assert_equal $value [r arget floats $idx] + } + assert_equal 15 [r arcount floats] + + foreach {idx value} { + 0 {} 1 a 2 ab 3 abc 4 abcd 5 abcde 6 abcdef 7 abcdefg + 8 abcdefgh 9 {hello world} 10 {this is a longer string for testing} + 11 x 12 xy 13 xyz + } { + assert_equal $value [r arget strs $idx] + } + assert_equal 14 [r arcount strs] + + foreach {idx value} { + 0 42 1 3.14 2 hi 3 -536870912 4 0.5 + 5 abcdefghij 6 536870911 7 -1.5 8 ab 9 0 + 10 1.0 11 hello 12 2147483647 13 0.25 14 xyz + } { + assert_equal $value [r arget mixed $idx] + } + assert_equal 15 [r arcount mixed] + + foreach {idx value} { + 0 first 4095 slice0end 4096 slice1start 8191 slice1end + 10000 42 50000 3.14 100000 hello + } { + assert_equal $value [r arget sparse $idx] + } + assert_equal 7 [r arcount sparse] + + foreach {idx value} { + 0 one 1 two 2 three 3 four 4 five 5 six + } { + assert_equal $value [r arget withinsert $idx] + } + assert_equal 6 [r arcount withinsert] + r arinsert withinsert seven + assert_equal seven [r arget withinsert 6] + } {} {needs:debug} +} diff --git a/tests/unit/type/hash-field-expire.tcl b/tests/unit/type/hash-field-expire.tcl index 402a9ad72..b69130db4 100644 --- a/tests/unit/type/hash-field-expire.tcl +++ b/tests/unit/type/hash-field-expire.tcl @@ -296,7 +296,7 @@ start_server {tags {"external:skip needs:debug"}} { test "HPEXPIRETIME persists after RDB reload ($type)" { r del myhash r hset myhash field1 value1 field2 value2 - r hpexpire myhash 300 NX FIELDS 1 field1 + r hpexpire myhash 500 NX FIELDS 1 field1 set before [r HPEXPIRETIME myhash FIELDS 1 field1] r debug reload set after [r HPEXPIRETIME myhash FIELDS 1 field1] diff --git a/tests/unit/type/increx.tcl b/tests/unit/type/increx.tcl new file mode 100644 index 000000000..1797cfbb5 --- /dev/null +++ b/tests/unit/type/increx.tcl @@ -0,0 +1,783 @@ +start_server {tags {"increx"}} { + # --------------------------------------------------------------------- + # Default behavior (no increment option -> +1, create with 0 if missing) + # --------------------------------------------------------------------- + + test {INCREX - creates key with value 0 then +1 when missing (no options)} { + r del mykey + assert_equal [r increx mykey] {1 1} + } + + test {INCREX - default increment on existing integer key} { + r set mykey 10 + assert_equal [r increx mykey] {11 1} + } + + # --------------------------------------------------------------------- + # BYINT behavior + # --------------------------------------------------------------------- + + test {INCREX - BYINT positive and negative increments} { + r set mykey 100 + assert_equal [r increx mykey BYINT 5] {105 5} + assert_equal [r increx mykey BYINT -10] {95 -10} + assert_equal [r increx mykey BYINT 0] {95 0} + } + + test {INCREX - BYINT saturates to UBOUND} { + r set mykey 50 + assert_equal [r increx mykey BYINT 100 UBOUND 80 OVERFLOW SAT] {80 30} + assert_equal [r get mykey] 80 + } + + test {INCREX - BYINT saturates to LBOUND} { + r set mykey 10 + assert_equal [r increx mykey BYINT -100 LBOUND 0 OVERFLOW SAT] {0 -10} + assert_equal [r get mykey] 0 + } + + test {INCREX - BYINT LBOUND and UBOUND together, value already inside range unaffected} { + r set mykey 5 + assert_equal [r increx mykey BYINT 1 LBOUND 0 UBOUND 10] {6 1} + } + + test {INCREX - BYINT positive overflow with OVERFLOW SAT saturates to LLONG_MAX} { + # LLONG_MAX = 9223372036854775807 + r set mykey 9223372036854775800 + assert_equal [r increx mykey BYINT 9223372036854775800 OVERFLOW SAT] {9223372036854775807 7} + assert_equal [r get mykey] 9223372036854775807 + } + + test {INCREX - BYINT positive overflow with OVERFLOW SAT and UBOUND saturates to UBOUND} { + # LLONG_MAX = 9223372036854775807 + r set mykey 9223372036854775800 + assert_equal [r increx mykey BYINT 9223372036854775800 UBOUND 9223372036854775807 OVERFLOW SAT] {9223372036854775807 7} + assert_equal [r get mykey] 9223372036854775807 + } + + test {INCREX - BYINT negative overflow with OVERFLOW SAT saturates to LLONG_MIN} { + # LLONG_MIN = -9223372036854775808 + r set mykey -9223372036854775800 + assert_equal [r increx mykey BYINT -9223372036854775800 OVERFLOW SAT] {-9223372036854775808 -8} + assert_equal [r get mykey] -9223372036854775808 + } + + test {INCREX - BYINT negative overflow with OVERFLOW SAT and LBOUND saturates to LBOUND} { + # LLONG_MIN = -9223372036854775808 + r set mykey -9223372036854775800 + assert_equal [r increx mykey BYINT -9223372036854775800 LBOUND -9223372036854775808 OVERFLOW SAT] {-9223372036854775808 -8} + assert_equal [r get mykey] -9223372036854775808 + } + + test {INCREX - BYINT SAT rejects when applied delta would overflow long long} { + # The saturated result lands at LLONG_MIN while the prior value is positive, + # so the reported delta would not fit in a long long. + r set mykey 9223372036854775800 + assert_error "*applied increment would overflow*" { + r increx mykey BYINT 1 OVERFLOW SAT UBOUND -9223372036854775808 + } + } + + test {INCREX - result within [LONG_MIN, LONG_MAX] keeps int encoding} { + r del mykey + r increx mykey + assert_encoding int mykey + r set mykey 2000000000 + r increx mykey BYINT 100 + assert_encoding int mykey + r set mykey -2000000000 + r increx mykey BYINT -100 + assert_encoding int mykey + } + + # --------------------------------------------------------------------- + # BYFLOAT behavior + # --------------------------------------------------------------------- + + test {INCREX - BYFLOAT basic increment} { + r set mykey 1.5 + assert_equal [lmap v [r increx mykey BYFLOAT 0.25] {roundFloat $v}] {1.75 0.25} + assert_equal [lmap v [r increx mykey BYFLOAT 1] {roundFloat $v}] {2.75 1} + } + + test {INCREX - BYFLOAT saturates to UBOUND/LBOUND} { + r set mykey 10 + assert_equal [lmap v [r increx mykey BYFLOAT 100 UBOUND 42.5 OVERFLOW SAT] {roundFloat $v}] {42.5 32.5} + r set mykey 0 + assert_equal [lmap v [r increx mykey BYFLOAT -100 LBOUND -5.5 OVERFLOW SAT] {roundFloat $v}] {-5.5 -5.5} + } + + # On some platforms strtold("+inf") with valgrind returns a non-inf result + if {!$::valgrind} { + test {INCREX - BYFLOAT rejects inf/-inf increment and existing inf/-inf value} { + # Increment is +inf/-inf -> rejected at parse time. + r set mykey 0 + assert_error "*BYFLOAT increment cannot be Infinity*" {r increx mykey BYFLOAT +inf} + assert_error "*BYFLOAT increment cannot be Infinity*" {r increx mykey BYFLOAT -inf} + + # Existing stored value is inf/-inf -> rejected at execution time. + r set mykey inf + assert_error "*value cannot be Infinity*" {r increx mykey BYFLOAT 1} + assert_equal [r get mykey] inf + r set mykey -inf + assert_error "*value cannot be Infinity*" {r increx mykey BYFLOAT 0 LBOUND -100} + assert_equal [r get mykey] -inf + } + } + + # --------------------------------------------------------------------- + # Non-existent key whose default 0 is already outside [LBOUND, UBOUND] + # and the increment cannot bring it back into range -> refuse to create. + # --------------------------------------------------------------------- + + test {INCREX - BYINT/BYFLOAT on non-existent key refuses to create when result stays below LBOUND} { + r del mykey + assert_error "*value is out of bounds*" {r increx mykey BYINT 5 LBOUND 10} + assert_equal [r exists mykey] 0 + + assert_error "*value is out of bounds*" {r increx mykey BYFLOAT -0.5 UBOUND -1.5} + assert_equal [r exists mykey] 0 + } + + # --------------------------------------------------------------------- + # Existing key whose value is already outside [LBOUND, UBOUND] is treated + # the same as an in-range value pushed outside by the increment: OVERFLOW + # FAIL errors out and OVERFLOW SAT saturates the result. + # --------------------------------------------------------------------- + + test {INCREX - BYFLOAT existing value already outside bounds} { + # Above UBOUND, same-side increment: FAIL errors, SAT saturates to UBOUND. + r set mykey 50.5 + assert_error "*out of bounds*" {r increx mykey BYFLOAT 5.5 UBOUND 30} + assert_equal [roundFloat [r get mykey]] 50.5 + assert_equal [lmap v [r increx mykey BYFLOAT 5.5 UBOUND 30 OVERFLOW SAT] {roundFloat $v}] {30 -20.5} + + # Below LBOUND, same-side decrement: SAT saturates to LBOUND. + r set mykey -50.5 + assert_equal [lmap v [r increx mykey BYFLOAT -5.5 LBOUND -30 OVERFLOW SAT] {roundFloat $v}] {-30 20.5} + + # Increment that brings the out-of-range value back inside is applied normally. + r set mykey 50 + assert_equal [lmap v [r increx mykey BYFLOAT -25 UBOUND 30] {roundFloat $v}] {25 -25} + } + + test {INCREX - BYINT existing value already outside bounds} { + # Above UBOUND, same-side increment: FAIL errors, SAT saturates to UBOUND. + r set mykey 50 + assert_error "*out of bounds*" {r increx mykey BYINT 5 UBOUND 30} + assert_equal [r get mykey] 50 + assert_equal [r increx mykey BYINT 5 UBOUND 30 OVERFLOW SAT] {30 -20} + + # Below LBOUND, same-side decrement: SAT saturates to LBOUND. + r set mykey -50 + assert_equal [r increx mykey BYINT -5 LBOUND -30 OVERFLOW SAT] {-30 20} + + # Increment that brings the out-of-range value back inside is applied normally. + r set mykey 50 + assert_equal [r increx mykey BYINT -25 UBOUND 30] {25 -25} + } + + # --------------------------------------------------------------------- + # Out-of-range behavior: OVERFLOW FAIL (the default) errors out (like + # INCRBY); OVERFLOW SAT saturates the result silently. + # --------------------------------------------------------------------- + + test {INCREX - BYINT OVERFLOW FAIL rejects increment exceeding UBOUND; OVERFLOW SAT saturates it} { + r set mykey 10 + assert_error "*out of bounds*" {r increx mykey BYINT 10 UBOUND 15} + # Value is unchanged after the error + assert_equal [r get mykey] 10 + # OVERFLOW FAIL is the explicit form of the default + assert_error "*out of bounds*" {r increx mykey BYINT 10 UBOUND 15 OVERFLOW FAIL} + assert_equal [r get mykey] 10 + # OVERFLOW SAT saturates the result at UBOUND + assert_equal [r increx mykey BYINT 10 UBOUND 15 OVERFLOW SAT] {15 5} + assert_equal [r get mykey] 15 + } + + test {INCREX - BYINT OVERFLOW FAIL rejects decrement falling below LBOUND; OVERFLOW SAT floors it} { + r set mykey 10 + assert_error "*out of bounds*" {r increx mykey BYINT -10 LBOUND 5} + assert_equal [r get mykey] 10 + # OVERFLOW SAT floors the result at LBOUND + assert_equal [r increx mykey BYINT -10 LBOUND 5 OVERFLOW SAT] {5 -5} + assert_equal [r get mykey] 5 + } + + test {INCREX - BYINT within bounds is unaffected by OVERFLOW policy} { + r set mykey 10 + assert_equal [r increx mykey BYINT 3 UBOUND 20] {13 3} + assert_equal [r increx mykey BYINT -3 LBOUND 0 OVERFLOW SAT] {10 -3} + assert_equal [r increx mykey BYINT 1 UBOUND 20 OVERFLOW FAIL] {11 1} + } + + test {INCREX - BYINT with both LBOUND and UBOUND} { + r set mykey 5 + # Within range -> allowed + assert_equal [r increx mykey BYINT 2 LBOUND 0 UBOUND 10] {7 2} + # Exceeds UBOUND -> rejected, value unchanged + assert_error "*out of bounds*" {r increx mykey BYINT 10 LBOUND 0 UBOUND 10} + # Falls below LBOUND -> rejected, value unchanged + assert_error "*out of bounds*" {r increx mykey BYINT -20 LBOUND 0 UBOUND 10} + assert_equal [r get mykey] 7 + # OVERFLOW SAT saturates at the bounds + assert_equal [r increx mykey BYINT 10 LBOUND 0 UBOUND 10 OVERFLOW SAT] {10 3} + assert_equal [r increx mykey BYINT -20 LBOUND 0 UBOUND 10 OVERFLOW SAT] {0 -10} + } + + test {INCREX - BYINT at exact bound value is accepted} { + r set mykey 5 + # Increment that lands exactly on UBOUND -> allowed + assert_equal [r increx mykey BYINT 5 UBOUND 10] {10 5} + # Decrement that lands exactly on LBOUND -> allowed + assert_equal [r increx mykey BYINT -10 LBOUND 0] {0 -10} + } + + test {INCREX - BYFLOAT OVERFLOW FAIL rejects increment exceeding UBOUND; OVERFLOW SAT saturates it} { + r set mykey 10.0 + assert_error "ERR value is out of bounds*" {r increx mykey BYFLOAT 10.0 UBOUND 15.5} + assert_equal [roundFloat [r get mykey]] 10 + # OVERFLOW SAT saturates the result at UBOUND + assert_equal [lmap v [r increx mykey BYFLOAT 10.0 UBOUND 15.5 OVERFLOW SAT] {roundFloat $v}] {15.5 5.5} + } + + test {INCREX - BYFLOAT OVERFLOW FAIL rejects decrement falling below LBOUND; OVERFLOW SAT floors it} { + r set mykey 10.0 + assert_error "ERR value is out of bounds*" {r increx mykey BYFLOAT -10.0 LBOUND 5.5} + assert_equal [roundFloat [r get mykey]] 10 + # OVERFLOW SAT floors the result at LBOUND + assert_equal [lmap v [r increx mykey BYFLOAT -10.0 LBOUND 5.5 OVERFLOW SAT] {roundFloat $v}] {5.5 -4.5} + } + + test {INCREX - BYFLOAT within bounds is unaffected by OVERFLOW policy} { + r set mykey 1.5 + assert_equal [lmap v [r increx mykey BYFLOAT 0.25 UBOUND 10.0] {roundFloat $v}] {1.75 0.25} + assert_equal [lmap v [r increx mykey BYFLOAT 0.25 UBOUND 10.0 OVERFLOW SAT] {roundFloat $v}] {2 0.25} + } + + test {INCREX - BYFLOAT with both LBOUND and UBOUND} { + r set mykey 5.0 + # Within range -> allowed + assert_equal [lmap v [r increx mykey BYFLOAT 1.5 LBOUND 0 UBOUND 10] {roundFloat $v}] {6.5 1.5} + # Exceeds UBOUND -> rejected + assert_error "ERR value is out of bounds*" {r increx mykey BYFLOAT 10 LBOUND 0 UBOUND 10} + # Falls below LBOUND -> rejected + assert_error "ERR value is out of bounds*" {r increx mykey BYFLOAT -20 LBOUND 0 UBOUND 10} + assert_equal [lmap v [r get mykey] {roundFloat $v}] {6.5} + } + + test {INCREX - BYFLOAT at exact bound value is accepted} { + r set mykey 5.0 + assert_equal [lmap v [r increx mykey BYFLOAT 5.0 UBOUND 10.0] {roundFloat $v}] {10 5} + assert_equal [lmap v [r increx mykey BYFLOAT -10.0 LBOUND 0] {roundFloat $v}] {0 -10} + } + + test {INCREX - BYINT positive overflow: default errors, OVERFLOW SAT saturates} { + # LLONG_MAX = 9223372036854775807 + r set mykey 9223372036854775800 + assert_error "*increment or decrement would overflow*" {r increx mykey BYINT 9223372036854775800 UBOUND 9223372036854775807} + assert_equal [r get mykey] 9223372036854775800 + # OVERFLOW SAT: overflow saturates to LLONG_MAX, then saturates to UBOUND + assert_equal [r increx mykey BYINT 9223372036854775800 UBOUND 9223372036854775807 OVERFLOW SAT] {9223372036854775807 7} + } + + test {INCREX - BYINT negative overflow: default errors, OVERFLOW SAT saturates} { + # LLONG_MIN = -9223372036854775808 + r set mykey -9223372036854775800 + assert_error "*increment or decrement would overflow*" {r increx mykey BYINT -9223372036854775800 LBOUND -9223372036854775808} + assert_equal [r get mykey] -9223372036854775800 + # OVERFLOW SAT: overflow saturates to LLONG_MIN, then saturates to LBOUND + assert_equal [r increx mykey BYINT -9223372036854775800 LBOUND -9223372036854775808 OVERFLOW SAT] {-9223372036854775808 -8} + } + + test {INCREX - BYINT on new key (created from zero) with bound} { + r del mykey + # Increment from 0 stays within UBOUND -> allowed + assert_equal [r increx mykey BYINT 5 UBOUND 10] {5 5} + r del mykey + # Increment from 0 exceeds UBOUND -> rejected, key not created + assert_error "*out of bounds*" {r increx mykey BYINT 15 UBOUND 10} + assert_equal [r exists mykey] 0 + } + + test {INCREX - BYFLOAT on new key (created from zero) with bound} { + r del mykey + # Increment from 0 stays within UBOUND -> allowed + assert_equal [lmap v [r increx mykey BYFLOAT 5.5 UBOUND 10] {roundFloat $v}] {5.5 5.5} + r del mykey + # Increment from 0 exceeds UBOUND -> rejected, key not created + assert_error "ERR value is out of bounds*" {r increx mykey BYFLOAT 15.5 UBOUND 10} + assert_equal [r exists mykey] 0 + } + + test {INCREX - default with no bound behaves like INCRBY/INCRBYFLOAT} { + # In-range increments behave like INCRBY/INCRBYFLOAT. + r set mykey 10 + assert_equal [r increx mykey BYINT 1] {11 1} + assert_equal [lmap v [r increx mykey BYFLOAT 1.0] {roundFloat $v}] {12 1} + assert_equal [r increx mykey] {13 1} + + # BYINT overflow without an explicit bound -> error (like INCRBY). + r set mykey 9223372036854775800 + assert_error "*increment or decrement would overflow*" {r increx mykey BYINT 9223372036854775800} + assert_equal [r get mykey] 9223372036854775800 + } + + test {INCREX - error aborts before side effects: neither value nor TTL is modified} { + r del mykey + r set mykey 10 + # An out-of-range result aborts the command before any side effect. + assert_error "*out of bounds*" {r increx mykey BYINT 100 UBOUND 15 EX 100} + assert_equal [r get mykey] 10 + assert_equal [r ttl mykey] -1 + + r del mykey + r set mykey 10 + # In-range increment with EX still updates the TTL. + assert_equal [r increx mykey BYINT 3 UBOUND 20 EX 200] {13 3} + assert_morethan [r ttl mykey] 0 + + r del mykey + r set mykey 10 + # OVERFLOW SAT also updates the TTL when saturation kicks in. + assert_equal [r increx mykey BYINT 100 UBOUND 15 OVERFLOW SAT EX 200] {15 5} + assert_morethan [r ttl mykey] 0 + } + + # --------------------------------------------------------------------- + # OVERFLOW REJECT: leave the key (and TTL) unchanged and reply + # [current_value, 0] when the result would be out of bounds, instead of + # producing an error. + # --------------------------------------------------------------------- + + test {INCREX - BYINT REJECT on overflow leaves value unchanged, in-range applies normally} { + # llong overflow path + r set mykey 9223372036854775800 + assert_equal [r increx mykey BYINT 9223372036854775800 OVERFLOW REJECT] {9223372036854775800 0} + assert_equal [r get mykey] 9223372036854775800 + # UBOUND / LBOUND paths + r set mykey 10 + assert_equal [r increx mykey BYINT 100 UBOUND 15 OVERFLOW REJECT] {10 0} + assert_equal [r increx mykey BYINT -100 LBOUND 5 OVERFLOW REJECT] {10 0} + assert_equal [r get mykey] 10 + # In-range increment is applied normally + assert_equal [r increx mykey BYINT 3 UBOUND 20 OVERFLOW REJECT] {13 3} + assert_equal [r get mykey] 13 + } + + # --------------------------------------------------------------------- + # Argument parsing / syntax validation + # --------------------------------------------------------------------- + + test {INCREX - wrong number of arguments} { + assert_error "*wrong number of arguments*" {r increx} + } + + test {INCREX - unknown argument} { + assert_error "*syntax error*" {r increx mykey FOO} + assert_error "*syntax error*" {r increx mykey BYINT 1 FOO} + } + + test {INCREX - BYINT and BYFLOAT are mutually exclusive} { + assert_error "*syntax error*" {r increx mykey BYINT 1 BYFLOAT 1.5} + assert_error "*syntax error*" {r increx mykey BYFLOAT 1.5 BYINT 1} + } + + test {INCREX - multiple expiration flags are mutually exclusive} { + assert_error "*syntax error*" {r increx mykey BYINT 1 EX 10 PX 5000} + assert_error "*syntax error*" {r increx mykey BYINT 1 EX 10 EXAT 9999999999} + assert_error "*syntax error*" {r increx mykey BYINT 1 PX 5000 PXAT 9999999999000} + assert_error "*syntax error*" {r increx mykey BYINT 1 EX 10 PERSIST} + assert_error "*syntax error*" {r increx mykey BYINT 1 PERSIST EX 10} + } + + test {INCREX - PERSIST and ENX are mutually exclusive} { + assert_error "*syntax error*" {r increx mykey BYINT 1 PERSIST ENX} + assert_error "*syntax error*" {r increx mykey BYINT 1 ENX PERSIST} + } + + test {INCREX - duplicate options are rejected} { + assert_error "*syntax error*" {r increx mykey BYINT 1 BYINT 2} + assert_error "*syntax error*" {r increx mykey BYFLOAT 1.0 BYFLOAT 2.0} + assert_error "*syntax error*" {r increx mykey LBOUND 0 LBOUND 1} + assert_error "*syntax error*" {r increx mykey UBOUND 9 UBOUND 8} + assert_error "*syntax error*" {r increx mykey OVERFLOW FAIL OVERFLOW SAT LBOUND 0} + assert_error "*syntax error*" {r increx mykey OVERFLOW SAT OVERFLOW SAT LBOUND 0} + assert_error "*syntax error*" {r increx mykey OVERFLOW REJECT OVERFLOW SAT LBOUND 0} + assert_error "*syntax error*" {r increx mykey OVERFLOW REJECT OVERFLOW REJECT LBOUND 0} + assert_error "*syntax error*" {r increx mykey ENX ENX EX 10} + assert_error "*syntax error*" {r increx mykey PERSIST PERSIST} + assert_error "*syntax error*" {r increx mykey EX 10 EX 20} + assert_error "*syntax error*" {r increx mykey PX 10 PX 20} + assert_error "*syntax error*" {r increx mykey EXAT 9999999999 EXAT 9999999998} + assert_error "*syntax error*" {r increx mykey PXAT 9999999999000 PXAT 9999999998000} + } + + test {INCREX - ENX without expiration is an error} { + assert_error "*ENX flag requires an expiration*" {r increx mykey BYINT 1 ENX} + assert_error "*ENX flag requires an expiration*" {r increx mykey ENX} + } + + test {INCREX - BYINT requires a valid integer value} { + assert_error "*Increment is not an integer*" {r increx mykey BYINT abc} + assert_error "*Increment is not an integer*" {r increx mykey BYINT 1.5} + } + + test {INCREX - BYFLOAT requires a valid float value} { + assert_error "*Increment is not a valid float*" {r increx mykey BYFLOAT abc} + } + + test {INCREX - LBOUND > UBOUND should be rejected (integer)} { + assert_error "*LBOUND can't be greater than UBOUND*" {r increx mykey BYINT 1 LBOUND 10 UBOUND 5} + } + + test {INCREX - LBOUND > UBOUND should be rejected (float)} { + assert_error "*LBOUND can't be greater than UBOUND*" {r increx mykey BYFLOAT 0.5 LBOUND 10.5 UBOUND 1.5} + } + + test {INCREX - EX/PX non-positive value is rejected} { + assert_error "*invalid expire time*" {r increx mykey BYINT 1 EX 0} + assert_error "*invalid expire time*" {r increx mykey BYINT 1 PX 0} + assert_error "*invalid expire time*" {r increx mykey BYINT 1 EX -1} + } + + # --------------------------------------------------------------------- + # Type check + # --------------------------------------------------------------------- + + test {INCREX - WRONGTYPE against a list} { + r del mylist + r rpush mylist a b c + assert_error "WRONGTYPE*" {r increx mylist} + assert_error "WRONGTYPE*" {r increx mylist BYINT 1} + assert_error "WRONGTYPE*" {r increx mylist BYFLOAT 1.5} + } + + test {INCREX - WRONGTYPE when BYFLOAT applied to non-numeric string} { + r set mykey "hello" + assert_error "*value is not a valid float*" {r increx mykey BYFLOAT 1.5} + assert_error "*value is not an integer*" {r increx mykey BYINT 1} + } + + # --------------------------------------------------------------------- + # Expiration handling + # --------------------------------------------------------------------- + + test {INCREX - EX sets TTL (seconds)} { + r del mykey + r increx mykey BYINT 1 EX 100 + assert_morethan [r ttl mykey] 0 + assert_lessthan_equal [r ttl mykey] 100 + } + + test {INCREX - PX sets TTL (milliseconds)} { + r del mykey + r increx mykey BYINT 1 PX 100000 + assert_morethan [r pttl mykey] 0 + assert_lessthan_equal [r pttl mykey] 100000 + } + + test {INCREX - EXAT sets absolute TTL (seconds)} { + r del mykey + set ts [expr [clock seconds] + 100] + r increx mykey BYINT 1 EXAT $ts + assert_morethan [r ttl mykey] 0 + assert_lessthan_equal [r ttl mykey] 100 + } + + test {INCREX - PXAT sets absolute TTL (milliseconds)} { + r del mykey + set ts [expr [clock milliseconds] + 100000] + r increx mykey BYINT 1 PXAT $ts + assert_morethan [r pttl mykey] 0 + assert_lessthan_equal [r pttl mykey] 100000 + } + + test {INCREX - without expiration option preserves existing TTL} { + r del mykey + r set mykey 5 EX 1000 + set old_ttl [r ttl mykey] + r increx mykey BYINT 1 + set new_ttl [r ttl mykey] + # Existing TTL is preserved (should be within a small delta of old_ttl) + assert_morethan $new_ttl [expr $old_ttl - 5] + } + + test {INCREX - PERSIST removes existing TTL} { + r set mykey 5 EX 1000 + assert_morethan [r ttl mykey] 0 + r increx mykey BYINT 1 PERSIST + assert_equal [r ttl mykey] -1 + } + + test {INCREX - PERSIST on key without TTL leaves it TTL-less} { + r del mykey + r set mykey 10 + r increx mykey BYINT 1 PERSIST + assert_equal [r ttl mykey] -1 + } + + test {INCREX - ENX only sets TTL when key has no existing TTL} { + # Case 1: key exists with no TTL -> ENX sets the TTL + r del mykey + r set mykey 10 + assert_equal [r ttl mykey] -1 + r increx mykey BYINT 1 EX 100 ENX + assert_morethan [r ttl mykey] 0 + assert_lessthan_equal [r ttl mykey] 100 + + # Case 2: key already has TTL -> ENX must NOT touch it + r del mykey + r set mykey 10 EX 500 + set old_ttl [r ttl mykey] + r increx mykey BYINT 1 EX 10 ENX + set new_ttl [r ttl mykey] + # TTL should not have been shortened to ~10s + assert_morethan $new_ttl 490 + # Value should have been incremented + assert_equal [r get mykey] 11 + } + + test {INCREX - ENX on new key sets TTL (no existing expiry)} { + r del mykey + r increx mykey BYINT 5 EX 100 ENX + assert_morethan [r ttl mykey] 0 + assert_equal [r get mykey] 5 + } + + test {INCREX - EXAT in the past deletes the key} { + r del mykey + r set mykey 10 + # An expiration time clearly in the past + r increx mykey BYINT 1 EXAT 1 + assert_equal [r exists mykey] 0 + } + + test {INCREX - PXAT in the past deletes the key} { + r del mykey + r set mykey 10 + r increx mykey BYINT 1 PXAT 1 + assert_equal [r exists mykey] 0 + } + + test {INCREX - ENX skips deletion when key already has TTL and past EXAT is given} { + r del mykey + r set mykey 10 EX 500 + # ENX means "only set TTL if key has no TTL" - the past EXAT must not + # cause deletion because ENX prevents the TTL from being modified. + r increx mykey BYINT 1 EXAT 1 ENX + assert_equal [r exists mykey] 1 + assert_equal [r get mykey] 11 + # Old TTL is preserved + assert_morethan [r ttl mykey] 100 + } + + # --------------------------------------------------------------------- + # Order-independent / flexible argument ordering + # --------------------------------------------------------------------- + + test {INCREX - flags can appear in different orders} { + r del mykey + # Expiration before increment spec + r increx mykey EX 100 BYINT 5 + assert_equal [r get mykey] 5 + assert_morethan [r ttl mykey] 0 + + # LBOUND/UBOUND interleaved with increment + r set mykey 5 + assert_equal [r increx mykey LBOUND 0 BYINT 100 UBOUND 10 OVERFLOW SAT] {10 5} + } + + # --------------------------------------------------------------------- + # Command rewrite / replication propagation + # + # INCREX is always propagated as a SET command carrying the final value. + # The exact rewrite depends on TTL-related options: + # + # (a) no expiration option -> SET KEEPTTL + # (b) PERSIST (with existing TTL) -> SET + # (c) EX/PX/EXAT/PXAT -> SET PXAT + # (d) ENX and key already has TTL -> SET KEEPTTL + # (e) ENX and key has no TTL -> SET PXAT + # (f) expiration already elapsed -> DEL/UNLINK + # --------------------------------------------------------------------- + + test {INCREX - rewrite without expiration: SET key KEEPTTL} { + r flushall + set repl [attach_to_replication_stream] + r set mykey 10 + r increx mykey BYINT 5 + r increx mykey BYFLOAT 0.5 + assert_replication_stream $repl { + {select *} + {set mykey 10*} + {set mykey 15 KEEPTTL} + {set mykey 15.5 KEEPTTL} + } + close_replication_stream $repl + } + + test {INCREX - rewrite with PERSIST on a key with TTL: SET key } { + r flushall + set repl [attach_to_replication_stream] + r set mykey 10 EX 500 + r increx mykey BYINT 1 PERSIST + assert_replication_stream $repl { + {select *} + {set mykey 10 PXAT *} + {set mykey 11} + } + close_replication_stream $repl + } + + test {INCREX - rewrite with EX/PX/EXAT/PXAT: SET key PXAT *} { + r flushall + set repl [attach_to_replication_stream] + r set mykey 10 + r increx mykey BYINT 1 EX 100 + r increx mykey BYINT 1 PX 100000 + r increx mykey BYINT 1 EXAT [expr [clock seconds] + 100] + r increx mykey BYINT 1 PXAT [expr [clock milliseconds] + 100000] + assert_replication_stream $repl { + {select *} + {set mykey 10*} + {set mykey 11 PXAT *} + {set mykey 12 PXAT *} + {set mykey 13 PXAT *} + {set mykey 14 PXAT *} + } + close_replication_stream $repl + } + + test {INCREX - rewrite with ENX on key that already has TTL: SET key KEEPTTL} { + r flushall + set repl [attach_to_replication_stream] + r set mykey 10 EX 500 + # ENX must preserve the existing TTL, so the rewrite must use KEEPTTL + # rather than an absolute PXAT derived from the new EX argument. + r increx mykey BYINT 1 EX 10 ENX + assert_replication_stream $repl { + {select *} + {set mykey 10 PXAT *} + {set mykey 11 KEEPTTL} + } + close_replication_stream $repl + } + + test {INCREX - rewrite with ENX on key without TTL: SET key PXAT *} { + r flushall + set repl [attach_to_replication_stream] + r set mykey 10 + # No existing TTL, so ENX does set one and we propagate PXAT. + r increx mykey BYINT 1 EX 100 ENX + assert_replication_stream $repl { + {select *} + {set mykey 10*} + {set mykey 11 PXAT *} + } + close_replication_stream $repl + } + + test {INCREX - rewrite when expiration already elapsed propagates as DEL} { + r flushall + r config set lazyfree-lazy-expire no + set repl [attach_to_replication_stream] + r set mykey 10 + r increx mykey BYINT 1 EXAT 1 + assert_equal [r exists mykey] 0 + assert_replication_stream $repl { + {select *} + {set mykey 10*} + {del mykey} + } + close_replication_stream $repl + } + + test {INCREX - rewrite when expiration already elapsed propagates as UNLINK (lazyfree)} { + r flushall + r config set lazyfree-lazy-expire yes + set repl [attach_to_replication_stream] + r set mykey 10 + r increx mykey BYINT 1 PXAT 1 + assert_equal [r exists mykey] 0 + assert_replication_stream $repl { + {select *} + {set mykey 10*} + {unlink mykey} + } + close_replication_stream $repl + r config set lazyfree-lazy-expire no + } + + test {INCREX - rewrite carries saturated value after UBOUND/LBOUND} { + r flushall + set repl [attach_to_replication_stream] + r set mykey 50 + # With UBOUND + OVERFLOW SAT the final value is saturated; the SET + # rewrite must carry the saturated value (80), not the unbounded 150. + r increx mykey BYINT 100 UBOUND 80 OVERFLOW SAT + r set myfloat 10 + r increx myfloat BYFLOAT 100 UBOUND 42.5 OVERFLOW SAT + assert_replication_stream $repl { + {select *} + {set mykey 50*} + {set mykey 80 KEEPTTL} + {set myfloat 10*} + {set myfloat 42.5 KEEPTTL} + } + close_replication_stream $repl + } + + test {INCREX - rewrite creates the key from zero when key did not exist} { + r flushall + set repl [attach_to_replication_stream] + r increx mykey BYINT 7 + assert_replication_stream $repl { + {select *} + {set mykey 7 KEEPTTL} + } + close_replication_stream $repl + } + + test {INCREX - keyspace notifications fire expected events in order} { + r flushall + r config set notify-keyspace-events KEA + set rd [redis_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*__:*] + + # BYINT -> "incrby" + r increx k BYINT 5 + assert_match "*__keyevent*incrby*k*" [$rd read] + + # BYFLOAT -> "incrbyfloat" + r increx k BYFLOAT 0.5 + assert_match "*__keyevent*incrbyfloat*k*" [$rd read] + + # PERSIST on key with TTL -> "incrby" then "persist" + r set k 10 EX 100 + assert_match "*set*" [$rd read] + assert_match "*expire*" [$rd read] + r increx k BYINT 1 PERSIST + assert_match "*__keyevent*incrby*k*" [$rd read] + assert_match "*__keyevent*persist*k*" [$rd read] + + # EX -> "incrby" then "expire" + r increx k BYINT 1 EX 100 + assert_match "*__keyevent*incrby*k*" [$rd read] + assert_match "*__keyevent*expire*k*" [$rd read] + + # ENX on key with TTL: only "incrby", no "expire" (probe with DEL). + r increx k BYINT 1 EX 200 ENX + assert_match "*__keyevent*incrby*k*" [$rd read] + r del k + assert_match "*__keyevent*del*k*" [$rd read] + + # Past EXAT: early-return branch, only "del". + r set k 10 + assert_match "*set*" [$rd read] + r increx k BYINT 1 EXAT 1 + assert_match "*__keyevent*del*k*" [$rd read] + + $rd close + } +} diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 60e40596b..a300e6dbf 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -3402,47 +3402,47 @@ start_server { # Unrecognized option at various positions — the parser accepts options # both before and after the IDS block, so verify rejection in each slot. - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL BADOPT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 BADOPT} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT BADOPT IDS 1 1-0 FORCE} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT FORCE BADOPT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL RETRYCOUNT 5 BADOPT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 5 BADOPT} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL FORCE IDS 1 1-0 BADOPT RETRYCOUNT 5} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL BADOPT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 BADOPT} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT BADOPT IDS 1 1-0 FORCE} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT FORCE BADOPT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL RETRYCOUNT 5 BADOPT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 5 BADOPT} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL FORCE IDS 1 1-0 BADOPT RETRYCOUNT 5} # Invalid mode - assert_error "*mode must be SILENT, FAIL, or FATAL*" {r XNACK mystream grp BADMODE IDS 1 1-0} + assert_error "ERR mode must be SILENT, FAIL, or FATAL" {r XNACK mystream grp BADMODE IDS 1 1-0} # Multiple mode words — only one mode is allowed per invocation. - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL FATAL IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT FAIL IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FATAL SILENT IDS 1 1-0} - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL SILENT FATAL IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL FATAL IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT FAIL IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FATAL SILENT IDS 1 1-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL SILENT FATAL IDS 1 1-0} # IDS keyword validation - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp SILENT NOTIDS 1 1-0} - assert_error "*expected IDS keyword*" {r XNACK mystream grp SILENT FORCE RETRYCOUNT 5} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp SILENT NOTIDS 1 1-0} + assert_error "ERR syntax error, expected IDS keyword" {r XNACK mystream grp SILENT FORCE RETRYCOUNT 5} # numids validation - assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS abc 1-0} - assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS 0 1-0} - assert_error "*numids must be a positive integer*" {r XNACK mystream grp SILENT IDS -1 1-0} - assert_error "*number of IDs doesn't match numids*" {r XNACK mystream grp SILENT IDS 2 1-0} + assert_error "ERR numids must be a positive integer*" {r XNACK mystream grp SILENT IDS abc 1-0} + assert_error "ERR numids must be a positive integer*" {r XNACK mystream grp SILENT IDS 0 1-0} + assert_error "ERR numids must be a positive integer*" {r XNACK mystream grp SILENT IDS -1 1-0} + assert_error "ERR number of IDs doesn't match numids" {r XNACK mystream grp SILENT IDS 2 1-0} # Invalid stream ID format - assert_error "*Invalid stream ID*" {r XNACK mystream grp FAIL IDS 1 not-a-valid-id} + assert_error "ERR Invalid stream ID*" {r XNACK mystream grp FAIL IDS 1 not-a-valid-id} # RETRYCOUNT validation — non-integer, negative, overflow, missing value - assert_error "*value is not an integer or out of range*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT abc} - assert_error "*Invalid RETRYCOUNT*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT -1} - assert_error "*value is not an integer or out of range*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 99999999999999999999} + assert_error "ERR value is not an integer or out of range" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT abc} + assert_error "ERR Invalid RETRYCOUNT value, must be >= 0" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT -1} + assert_error "ERR value is not an integer or out of range" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT 99999999999999999999} # RETRYCOUNT without a following value — consumed as trailing option - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 RETRYCOUNT} # RETRYCOUNT right after mode with no IDS — too few arguments - assert_error "*wrong number of arguments*" {r XNACK mystream grp FAIL RETRYCOUNT} + assert_error "ERR wrong number of arguments for 'xnack' command" {r XNACK mystream grp FAIL RETRYCOUNT} # Extra args after numids IDs — the surplus ID is parsed as an option - assert_error "*Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 2-0} + assert_error "ERR Unrecognized XNACK option*" {r XNACK mystream grp FAIL IDS 1 1-0 2-0} } # Verify SILENT mode decrements delivery_count by 1, clamped at 0. diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index c2cb72b29..21cfca046 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -256,6 +256,47 @@ start_server {tags {"string"}} { list [r msetnx x1{t} xxx x1{t} zzz] [r get x1{t}] } {0 yyy} + test {MSET spanning multiple prefetch batches (batch size 16)} { + # Exercise the batched prefetch loop across the 16-key boundary. + # Test sizes chosen to hit: last batch only (16), boundary+1 (17), + # two full batches (32), and partial-tail (33, 40). + foreach n {16 17 32 33 40} { + r flushdb + set cmd [list mset] + for {set i 0} {$i < $n} {incr i} { + lappend cmd "k:${i}{t}" "v:$i" + } + assert_equal [r {*}$cmd] "OK" + for {set i 0} {$i < $n} {incr i} { + assert_equal [r get "k:${i}{t}"] "v:$i" + } + } + } + + test {MSET overwrites expired keys across batch boundary} { + # Regression test for dict-pointer staleness across batches + # (see src/t_string.c:prefetchKeysBatch). When lookupKeyWrite in + # batch 1 expires a pre-existing key, under cluster mode the slot + # dict may be freed (KVSTORE_FREE_EMPTY_DICTS) and recreated + # mid-command; msetGenericCommand must re-fetch the slot dict per + # batch. This test exercises the same code path in standalone mode. + r flushdb + r debug set-active-expire 0 + for {set i 0} {$i < 8} {incr i} { + r set "k:${i}{t}" "old:$i" px 1 + } + after 20 + set cmd [list mset] + for {set i 0} {$i < 20} {incr i} { + lappend cmd "k:${i}{t}" "new:$i" + } + assert_equal [r {*}$cmd] "OK" + for {set i 0} {$i < 20} {incr i} { + assert_equal [r get "k:${i}{t}"] "new:$i" + } + r debug set-active-expire 1 + } {OK} {needs:debug} + test {MSETEX - all expiration flags} { # Test each expiration type separately (EX, PX, EXAT, PXAT) set future_sec [expr [clock seconds] + 10] @@ -635,6 +676,14 @@ if {[string match {*jemalloc*} [s mem_allocator]]} { list $old_value $new_value } {{} bar} + test {Extended SET GET option accepts repeated GET tokens} { + r del foo + r set foo bar + set old_value [r set foo baz GET GET] + set new_value [r get foo] + list $old_value $new_value + } {bar baz} + test {Extended SET GET option with XX} { r del foo r set foo bar diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index f08ddf70c..e840b2a16 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -1761,6 +1761,38 @@ start_server {tags {"zset"}} { } } {} {needs:debug} + test "ZSCORE 17-19 significant digit mantissas (widened fast path) - $encoding" { + # Exercise the widened fast_float_strtod path that handles + # mantissas > 2^53 (via __uint128_t arithmetic). ZADD/ZSCORE + # must round-trip bit-exactly through the listpack/skiplist + # encoding (parse on ingest, parse again on retrieval). Each + # input string below parses to a specific IEEE double whose + # canonical string representation is itself, so `expr` in Tcl + # re-evaluates to the same numeric value. + r del zscorewide + set widecases { + 0.49606648747577575 + 0.8731899671198792 + 0.34912978268081996 + 0.0033318113277969186 + 0.9955843393406656 + -0.8731899671198792 + } + set i 0 + foreach s $widecases { + r zadd zscorewide $s m$i + assert_equal [expr $s] [expr [r zscore zscorewide m$i]] + incr i + } + r debug reload + assert_encoding $encoding zscorewide + set i 0 + foreach s $widecases { + assert_equal [expr $s] [expr [r zscore zscorewide m$i]] + incr i + } + } {} {needs:debug} + test "ZSET sorting stresser - $encoding" { set delta 0 for {set test 0} {$test < 2} {incr test} { diff --git a/tools/array-bench.py b/tools/array-bench.py new file mode 100755 index 000000000..959e12961 --- /dev/null +++ b/tools/array-bench.py @@ -0,0 +1,431 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import re +import signal +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Optional + + +QPS_RE = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s+requests per second") + + +@dataclass +class Workload: + name: str + description: str + command: list[str] + requests: int + clients: int + pipeline: int + rand_range: int = 0 + warmup_requests: int = 2000 + setup: Optional[str] = None + + +@dataclass +class Result: + name: str + description: str + qps: float + requests: int + clients: int + pipeline: int + rand_range: int + command: list[str] + raw_output: str + + +class BenchError(RuntimeError): + pass + + +class RedisArrayBench: + def __init__(self, args: argparse.Namespace): + self.args = args + self.base_dir = Path(__file__).resolve().parent + repo_root = self.base_dir.parent + src_dir = Path(args.src_dir) if args.src_dir else repo_root / "src" + self.redis_server = str(src_dir / "redis-server") + self.redis_cli = str(src_dir / "redis-cli") + self.redis_benchmark = str(src_dir / "redis-benchmark") + self.server_proc: Optional[subprocess.Popen[str]] = None + self.server_dir: Optional[tempfile.TemporaryDirectory[str]] = None + self.host = args.host + self.port = args.port + self.db = args.db + self.results: list[Result] = [] + + for binary in (self.redis_server, self.redis_cli, self.redis_benchmark): + if not os.path.exists(binary): + raise BenchError(f"missing binary: {binary}") + + def run(self) -> int: + try: + if self.args.start_server: + self.start_server() + self.prepare_data() + self.print_dataset_summary() + for workload in self.selected_workloads(): + result = self.run_workload(workload) + self.results.append(result) + print(f"{result.name:28s} {result.qps:12.2f} req/s") + self.print_summary() + if self.args.json_out: + with open(self.args.json_out, "w", encoding="utf-8") as fp: + json.dump({ + "host": self.host, + "port": self.port, + "db": self.db, + "results": [asdict(r) for r in self.results], + }, fp, indent=2) + print(f"json written to {self.args.json_out}") + return 0 + finally: + if self.args.start_server and not self.args.keep_server: + self.stop_server() + + def start_server(self) -> None: + self.server_dir = tempfile.TemporaryDirectory(prefix="array-bench-") + cmd = [ + self.redis_server, + "--port", str(self.port), + "--save", "", + "--appendonly", "no", + "--dir", self.server_dir.name, + "--loglevel", "warning", + "--daemonize", "no", + ] + self.server_proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + self.wait_for_ping(timeout=10.0) + + def stop_server(self) -> None: + if self.server_proc is not None and self.server_proc.poll() is None: + self.server_proc.send_signal(signal.SIGTERM) + try: + self.server_proc.wait(timeout=5) + except subprocess.TimeoutExpired: + self.server_proc.kill() + self.server_proc.wait(timeout=5) + if self.server_dir is not None: + self.server_dir.cleanup() + self.server_proc = None + self.server_dir = None + + def wait_for_ping(self, timeout: float) -> None: + deadline = time.time() + timeout + last_error = None + while time.time() < deadline: + if self.server_proc is not None and self.server_proc.poll() is not None: + raise BenchError( + "server exited before becoming ready:\n" + f"{self.read_server_output().strip()}" + ) + try: + cmd = [ + self.redis_cli, + "-h", self.host, + "-p", str(self.port), + "-n", str(self.db), + "--raw", + "PING", + ] + probe = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if probe.returncode != 0: + raise BenchError(probe.stderr.strip() or probe.stdout.strip()) + out = probe.stdout.strip() + if out == "PONG": + return + except Exception as exc: # pragma: no cover - startup race handling + last_error = exc + time.sleep(0.05) + raise BenchError( + f"server did not start on {self.host}:{self.port}: {last_error}\n" + f"{self.read_server_output().strip()}" + ) + + def read_server_output(self) -> str: + if self.server_proc is None or self.server_proc.stdout is None: + return "" + try: + return self.server_proc.stdout.read() + except Exception: # pragma: no cover - best effort diagnostics + return "" + + def cli(self, command: list[str], raw: bool = False) -> str: + cmd = [self.redis_cli, "-h", self.host, "-p", str(self.port), "-n", str(self.db)] + if raw: + cmd.append("--raw") + cmd.extend(command) + return subprocess.check_output(cmd, text=True) + + def pipe(self, payload: bytes) -> None: + cmd = [self.redis_cli, "-h", self.host, "-p", str(self.port), "-n", str(self.db), "--pipe"] + proc = subprocess.run(cmd, input=payload, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if proc.returncode != 0: + raise BenchError(f"redis-cli --pipe failed:\n{proc.stdout.decode('utf-8', 'replace')}") + out = proc.stdout.decode("utf-8", "replace") + if "errors: 0, replies:" not in out: + raise BenchError(f"unexpected --pipe output:\n{out}") + + @staticmethod + def resp(parts: list[str]) -> bytes: + out = [f"*{len(parts)}\r\n".encode()] + for part in parts: + data = part.encode("utf-8") + out.append(f"${len(data)}\r\n".encode()) + out.append(data) + out.append(b"\r\n") + return b"".join(out) + + def prepare_data(self) -> None: + print("preparing datasets...", file=sys.stderr) + self.cli(["FLUSHDB"]) + payload = bytearray() + payload += self.resp(["DEL", "bench:array:dense:num", "bench:array:dense:text", "bench:array:sparse:text", "bench:array:append", "bench:array:ring"]) + payload += self.build_dense_numeric() + payload += self.build_dense_text() + payload += self.build_sparse_text() + self.pipe(bytes(payload)) + + def build_dense_numeric(self) -> bytes: + key = "bench:array:dense:num" + total = self.args.dense_len + batch = 256 + payload = bytearray() + for start in range(0, total, batch): + values = [str(start + i) for i in range(min(batch, total - start))] + payload += self.resp(["ARSET", key, str(start), *values]) + return bytes(payload) + + def build_dense_text(self) -> bytes: + key = "bench:array:dense:text" + total = self.args.dense_len + batch = 128 + payload = bytearray() + for start in range(0, total, batch): + values = [] + for i in range(start, min(start + batch, total)): + mod = i % 4 + if mod == 0: + values.append(f"row:{i} alpha encoding complexity") + elif mod == 1: + values.append(f"row:{i} beta sparse vector") + elif mod == 2: + values.append(f"row:{i} gamma dense matcher") + else: + values.append(f"row:{i} delta encoding helper") + payload += self.resp(["ARSET", key, str(start), *values]) + return bytes(payload) + + def build_sparse_text(self) -> bytes: + key = "bench:array:sparse:text" + clusters = [ + (0, 97, 384), + (8_388_608, 113, 640), + (16_777_216, 127, 896), + (25_165_824, 151, 896), + ] + batch_pairs = 64 + pairs: list[str] = [] + payload = bytearray() + nth = 0 + for base, stride, count in clusters: + for i in range(count): + idx = base + i * stride + mod = nth % 4 + if mod == 0: + value = f"slot:{idx} alpha encoding complexity" + elif mod == 1: + value = f"slot:{idx} beta sparse needle" + elif mod == 2: + value = f"slot:{idx} gamma dense helper" + else: + value = f"slot:{idx} delta complexity marker" + pairs.extend([str(idx), value]) + nth += 1 + if len(pairs) >= batch_pairs * 2: + payload += self.resp(["ARMSET", key, *pairs]) + pairs.clear() + if pairs: + payload += self.resp(["ARMSET", key, *pairs]) + return bytes(payload) + + def print_dataset_summary(self) -> None: + summary = { + "bench:array:dense:num": { + "count": self.cli(["ARCOUNT", "bench:array:dense:num"], raw=True).strip(), + "len": self.cli(["ARLEN", "bench:array:dense:num"], raw=True).strip(), + }, + "bench:array:dense:text": { + "count": self.cli(["ARCOUNT", "bench:array:dense:text"], raw=True).strip(), + "len": self.cli(["ARLEN", "bench:array:dense:text"], raw=True).strip(), + }, + "bench:array:sparse:text": { + "count": self.cli(["ARCOUNT", "bench:array:sparse:text"], raw=True).strip(), + "len": self.cli(["ARLEN", "bench:array:sparse:text"], raw=True).strip(), + }, + } + print("dataset:") + for key, info in summary.items(): + print(f" {key}: count={info['count']} len={info['len']}") + + def selected_workloads(self) -> list[Workload]: + workloads = self.workloads() + if not self.args.only: + return workloads + wanted = {name.strip() for name in self.args.only.split(",") if name.strip()} + unknown = wanted - {w.name for w in workloads} + if unknown: + raise BenchError(f"unknown workload(s): {', '.join(sorted(unknown))}") + return [w for w in workloads if w.name in wanted] + + def workloads(self) -> list[Workload]: + dense_range_end = min(8192 + 31, self.args.dense_len - 1) + return [ + Workload("arget_dense_rand", "ARGET dense random hit", ["ARGET", "bench:array:dense:num", "__rand_int__"], 200_000, 50, 16, rand_range=self.args.dense_len), + Workload("armget_dense_4_rand", "ARMGET dense 4 random hits", ["ARMGET", "bench:array:dense:num", "__rand_int__", "__rand_int__", "__rand_int__", "__rand_int__"], 100_000, 50, 16, rand_range=self.args.dense_len), + Workload("argetrange_dense_32", "ARGETRANGE dense 32 hot", ["ARGETRANGE", "bench:array:dense:num", "8192", str(dense_range_end)], 50_000, 32, 8), + Workload("arscan_dense_limit_100", "ARSCAN dense LIMIT 100", ["ARSCAN", "bench:array:dense:text", "0", str(self.args.dense_len - 1), "LIMIT", "100"], 50_000, 24, 4), + Workload("argrep_match_dense", "ARGREP MATCH dense", ["ARGREP", "bench:array:dense:text", "0", str(self.args.dense_len - 1), "MATCH", "encoding", "LIMIT", "20", "WITHVALUES"], 20_000, 20, 2), + Workload("argrep_re_dense_nocase", "ARGREP RE dense nocase", ["ARGREP", "bench:array:dense:text", "0", str(self.args.dense_len - 1), "RE", "encoding|complexity|helper", "NOCASE", "LIMIT", "20", "WITHVALUES"], 20_000, 20, 2), + Workload("arop_sum_dense_4096", "AROP SUM dense 4096", ["AROP", "bench:array:dense:num", "0", "4095", "SUM"], 50_000, 24, 4), + Workload("arget_sparse_rand", "ARGET sparse random mostly miss", ["ARGET", "bench:array:sparse:text", "__rand_int__"], 200_000, 50, 16, rand_range=self.args.sparse_space), + Workload("arscan_sparse_limit_100", "ARSCAN sparse LIMIT 100", ["ARSCAN", "bench:array:sparse:text", "0", str(self.args.sparse_space - 1), "LIMIT", "100"], 25_000, 20, 2), + Workload("argrep_match_sparse", "ARGREP MATCH sparse", ["ARGREP", "bench:array:sparse:text", "0", str(self.args.sparse_space - 1), "MATCH", "encoding", "LIMIT", "20", "WITHVALUES"], 10_000, 16, 1), + Workload("arop_used_sparse", "AROP USED sparse", ["AROP", "bench:array:sparse:text", "0", str(self.args.sparse_space - 1), "USED"], 25_000, 20, 2), + Workload("arset_dense_rand", "ARSET dense random update", ["ARSET", "bench:array:dense:num", "__rand_int__", "42"], 150_000, 50, 16, rand_range=self.args.dense_len), + Workload("armset_dense_4_rand", "ARMSET dense 4 random updates", ["ARMSET", "bench:array:dense:num", "__rand_int__", "11", "__rand_int__", "22", "__rand_int__", "33", "__rand_int__", "44"], 100_000, 50, 16, rand_range=self.args.dense_len), + Workload("arinsert_append_hot", "ARINSERT append hot path", ["ARINSERT", "bench:array:append", "x"], 50_000, 24, 8, setup="reset_append"), + Workload("arring_hot_1024", "ARRING size 1024 hot path", ["ARRING", "bench:array:ring", "1024", "x"], 100_000, 50, 16, setup="reset_ring"), + ] + + def run_workload(self, workload: Workload) -> Result: + if workload.setup: + getattr(self, workload.setup)() + if self.args.warmup and workload.warmup_requests > 0: + self.invoke_benchmark(workload, workload.warmup_requests, quiet=True) + raw = self.invoke_benchmark(workload, self.scale_requests(workload.requests), quiet=True) + qps = self.parse_qps(raw) + return Result( + name=workload.name, + description=workload.description, + qps=qps, + requests=self.scale_requests(workload.requests), + clients=workload.clients, + pipeline=workload.pipeline, + rand_range=workload.rand_range, + command=workload.command, + raw_output=raw.strip(), + ) + + def invoke_benchmark(self, workload: Workload, requests: int, quiet: bool) -> str: + cmd = [ + self.redis_benchmark, + "-h", self.host, + "-p", str(self.port), + "--dbnum", str(self.db), + "-n", str(requests), + "-c", str(workload.clients), + "-P", str(workload.pipeline), + "--seed", str(self.args.seed), + ] + if quiet: + cmd.append("-q") + if workload.rand_range: + cmd.extend(["-r", str(workload.rand_range)]) + cmd.extend(workload.command) + return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT) + + def parse_qps(self, raw: str) -> float: + m = QPS_RE.search(raw) + if not m: + raise BenchError(f"could not parse qps from redis-benchmark output:\n{raw}") + return float(m.group(1)) + + def scale_requests(self, requests: int) -> int: + scaled = int(requests * self.args.request_scale) + return max(1000, scaled) + + def reset_append(self) -> None: + self.cli(["DEL", "bench:array:append"]) + + def reset_ring(self) -> None: + self.cli(["DEL", "bench:array:ring"]) + + def print_summary(self) -> None: + print("\nsummary:") + print("| workload | qps | req | c | P | notes |") + print("|---|---:|---:|---:|---:|---|") + for r in self.results: + notes = r.description + if r.rand_range: + notes += f", rand=0..{r.rand_range - 1}" + print(f"| {r.name} | {r.qps:.2f} | {r.requests} | {r.clients} | {r.pipeline} | {notes} |") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Standalone Array benchmark harness. It uses DB 9 by default, " + "flushes that DB, loads deterministic Array datasets, and runs " + "custom redis-benchmark workloads." + ) + ) + parser.add_argument("--src-dir", help="Path to the src directory containing redis-server, redis-cli, and redis-benchmark") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", type=int, default=6395) + parser.add_argument("--db", type=int, default=9) + parser.add_argument("--start-server", action="store_true", default=True, + help="Start an ephemeral redis-server on --port (default: enabled)") + parser.add_argument("--no-start-server", dest="start_server", action="store_false", + help="Use an already running server instead of starting one") + parser.add_argument("--keep-server", action="store_true", + help="Do not stop the ephemeral server after the run") + parser.add_argument("--only", help="Comma-separated workload names to run") + parser.add_argument("--seed", type=int, default=12345) + parser.add_argument("--request-scale", type=float, default=1.0, + help="Scale factor applied to all workload request counts") + parser.add_argument("--warmup", action="store_true", default=True, + help="Run a short warmup before each benchmark (default: enabled)") + parser.add_argument("--no-warmup", dest="warmup", action="store_false") + parser.add_argument("--json-out", help="Optional path for machine-readable results") + parser.add_argument("--dense-len", type=int, default=16_384, + help="Number of contiguous dense elements to preload") + parser.add_argument("--sparse-space", type=int, default=30_000_000, + help="Logical range used by sparse benchmarks") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + try: + bench = RedisArrayBench(args) + return bench.run() + except BenchError as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + except subprocess.CalledProcessError as exc: + output = exc.output if isinstance(exc.output, str) else exc.output.decode("utf-8", "replace") + print(output, file=sys.stderr) + return exc.returncode or 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/utils/generate-command-code.py b/utils/generate-command-code.py index 8a25039ad..fcd676df5 100755 --- a/utils/generate-command-code.py +++ b/utils/generate-command-code.py @@ -34,6 +34,7 @@ GROUPS = { "geo": "COMMAND_GROUP_GEO", "stream": "COMMAND_GROUP_STREAM", "bitmap": "COMMAND_GROUP_BITMAP", + "array": "COMMAND_GROUP_ARRAY", "rate_limit": "COMMAND_GROUP_RATE_LIMIT", } @@ -603,8 +604,11 @@ const char *COMMAND_GROUP_STR[] = { "geo", "stream", "bitmap", + "array", "module", +#ifdef ENABLE_GCRA "rate_limit" +#endif }; const char *commandGroupStr(int index) {