diff --git a/.github/workflows/post-release-automation.yml b/.github/workflows/post-release-automation.yml deleted file mode 100644 index e9e756b4b..000000000 --- a/.github/workflows/post-release-automation.yml +++ /dev/null @@ -1,165 +0,0 @@ -name: Post-Release Automation - -on: - release: - types: [published] - -jobs: - extract-release-info: - if: github.repository == 'redis/redis' - runs-on: ubuntu-latest - outputs: - tag_name: ${{ steps.release-info.outputs.tag_name }} - release_type: ${{ steps.release-info.outputs.release_type }} - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Extract and validate release information - id: release-info - env: - TAG_NAME: ${{ github.event.release.tag_name }} - GH_TOKEN: ${{ github.token }} - run: | - echo "tag_name=${TAG_NAME}" >> $GITHUB_OUTPUT - echo "Release tag: ${TAG_NAME}" - - LATEST_TAG=$(gh release view --json tagName --jq '.tagName') - echo "Latest release tag(from gh release): ${LATEST_TAG}" - - if [[ "${TAG_NAME}" == "${LATEST_TAG}" ]]; then - echo "release_type=latest" >> $GITHUB_OUTPUT - echo "Detected latest release: ${TAG_NAME}" - else - echo "release_type=non-latest" >> $GITHUB_OUTPUT - echo "Detected non-latest release: ${TAG_NAME} (latest is ${LATEST_TAG})" - fi - - create-tarball: - needs: extract-release-info - runs-on: ubuntu-latest - env: - TAG_NAME: ${{ needs.extract-release-info.outputs.tag_name }} - outputs: - sha256: ${{ steps.checksum.outputs.sha256 }} - size_mb: ${{ steps.size.outputs.size_mb }} - size_warning: ${{ steps.size.outputs.size_warning }} - steps: - - name: Checkout repository - uses: actions/checkout@v6 - with: - ref: ${{ env.TAG_NAME }} - fetch-depth: 0 - - - name: Create tarball - run: ./utils/releasetools/01_create_tarball.sh "$TAG_NAME" - - - name: Verify tarball size - id: size - run: | - TARBALL="/tmp/redis-${TAG_NAME}.tar.gz" - SIZE_MB=$(du -m "$TARBALL" | cut -f1) - echo "Tarball size: ${SIZE_MB} MB" - echo "size_mb=${SIZE_MB}" >> $GITHUB_OUTPUT - if [ "$SIZE_MB" -lt 3 ] || [ "$SIZE_MB" -gt 5 ]; then - echo "::warning::Tarball size ${SIZE_MB} MB is outside expected range (3-5 MB)" - echo "size_warning=true" >> $GITHUB_OUTPUT - else - echo "size_warning=false" >> $GITHUB_OUTPUT - fi - - - name: Calculate SHA256 checksum - id: checksum - run: | - TARBALL="/tmp/redis-${TAG_NAME}.tar.gz" - SHA256=$(shasum -a 256 "$TARBALL" | cut -d' ' -f1) - echo "SHA256: $SHA256" - echo "sha256=$SHA256" >> $GITHUB_OUTPUT - - - name: Upload tarball as artifact - uses: actions/upload-artifact@v6 - with: - name: redis-${{ env.TAG_NAME }}-tarball - path: /tmp/redis-${{ env.TAG_NAME }}.tar.gz - compression-level: 0 - - # approval-gate: - # needs: [extract-release-info, create-tarball] - # if: needs.extract-release-info.outputs.release_type == 'latest' - # runs-on: ubuntu-latest - # steps: - # - name: Approval gate - # run: | - # echo "Latest release detected. Manual approval required for production deployment." - # # TODO: Implement approval workflow - # # This could use GitHub Environments with required reviewers - # # or a manual approval step - - # upload-tarball: - # needs: [extract-release-info, create-tarball, approval-gate] - # if: always() && !cancelled() && needs.create-tarball.result == 'success' && (needs.approval-gate.result == 'success' || needs.approval-gate.result == 'skipped') - # runs-on: ubuntu-latest - # steps: - # - name: Upload tarball - # run: | - # echo "TODO: Implement tarball upload" - # # This will require: - # # - SSH credentials/keys for upload to download.redis.io - # # - Adaptation of utils/releasetools/02_upload_tarball.sh for CI environment - - # test-release-tarball: - # needs: upload-tarball - # runs-on: ubuntu-latest - # steps: - # - name: Test release tarball - # run: | - # echo "TODO: Implement release testing using utils/releasetools/03_test_release.sh" - # # This will: - # # - Download the uploaded tarball - # # - Extract and build Redis - - # update-release-hashes: - # needs: test-release-tarball - # runs-on: ubuntu-latest - # steps: - # - name: Update release hashes - # run: | - # echo "TODO: Implement hash update using utils/releasetools/04_release_hash.sh" - # # This will require: - # # - Access to redis-hashes repository - # # - Git credentials for committing and pushing - - summary-and-notify: - needs: [extract-release-info, create-tarball] # update-release-hashes - if: always() && github.repository == 'redis/redis' - runs-on: ubuntu-latest - env: - TAG_NAME: ${{ needs.extract-release-info.outputs.tag_name }} - RELEASE_TYPE: ${{ needs.extract-release-info.outputs.release_type }} - SHA256: ${{ needs.create-tarball.outputs.sha256 }} - SIZE_MB: ${{ needs.create-tarball.outputs.size_mb }} - SIZE_WARNING: ${{ needs.create-tarball.outputs.size_warning }} - steps: - - name: Summary - run: | - { - echo "## Post-Release Automation Summary" - echo "" - echo "- **Release Tag:** ${TAG_NAME}" - echo "- **Release Type:** ${RELEASE_TYPE}" - echo "- **Tarball SHA256:** ${SHA256}" - echo "- **Tarball Size:** ${SIZE_MB} MB" - if [ "${SIZE_WARNING}" == "true" ]; then - echo "" - echo "> [!WARNING]" - echo "> Tarball size is outside expected range, check the logs for details." - fi - } >> $GITHUB_STEP_SUMMARY - - # - name: Send Slack notification - # run: | - # echo "TODO: Implement Slack notification" - # # This will require: - # # - Slack webhook URL or bot token (stored in secrets) - # # - Determine appropriate channel (e.g., #releases, #redis-releases) - # # - Craft message with release information and workflow status diff --git a/.gitignore b/.gitignore index 5ed94f1da..63968fb29 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,7 @@ deps/lua/src/luac deps/lua/src/liblua.a deps/hdr_histogram/libhdrhistogram.a deps/fpconv/libfpconv.a +deps/tre/libtre.a tests/tls/* .make-* .prerequisites diff --git a/README.md b/README.md index 5ea9241ad..21de64642 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Redis excels in various applications, including: - **Distributed Session Store:** Offers flexible session data modeling (string, JSON, hash). - **Data Structure Server:** Provides low-level data structures (strings, lists, sets, hashes, sorted sets, JSON, etc.) with high-level semantics (counters, queues, leaderboards, rate limiters) and supports transactions & scripting. - **NoSQL Data Store:** Key-value, document, and time series data storage. -- **Search and Query Engine:** Indexing for hash/JSON documents, supporting vector search, full-text search, geospatial queries, ranking, and aggregations via Redis Query Engine. +- **Search and Query Engine:** Indexing for hash/JSON documents, supporting vector search, full-text search, geospatial queries, ranking, and aggregations via Redis Search. - **Event Store & Message Broker:** Implements queues (lists), priority queues (sorted sets), event deduplication (sets), streams, and pub/sub with probabilistic stream processing capabilities. - **Vector Store for GenAI:** Integrates with AI applications (e.g. LangGraph, mem0) for short-term memory, long-term memory, LLM response caching (semantic caching), and retrieval augmented generation (RAG). - **Real-Time Analytics:** Powers personalization, recommendations, fraud detection, and risk assessment. @@ -172,9 +172,10 @@ Redis provides a variety of data types, processing engines, and capabilities to **Important:** Features marked with an asterisk (\*) require Redis to be compiled with the `BUILD_WITH_MODULES=yes` flag when [building Redis from source](#build-redis-from-source) - [**String:**](https://redis.io/docs/latest/develop/data-types/strings) Sequences of bytes, including text, serialized objects, and binary arrays used for caching, counters, and bitwise operations. -- [**JSON:**](https://redis.io/docs/latest/develop/data-types/json/) Nested JSON documents that are indexed and searchable using JSONPath expressions and with [Redis Query Engine](https://redis.io/docs/latest/develop/interact/search-and-query/) +- [**JSON:**](https://redis.io/docs/latest/develop/data-types/json/) Nested JSON documents that are indexed and searchable using JSONPath expressions and with [Redis Search](https://redis.io/docs/latest/develop/ai/search-and-query/) +- [**Array:**](https://redis.io/docs/latest/develop/data-types/arrays/) Sparse, index-addressable collection of string values - [**Hash:**](https://redis.io/docs/latest/develop/data-types/hashes/) Field-value maps used to represent basic objects and store groupings of key-value pairs with support for [hash field expiration (TTL)](https://redis.io/docs/latest/develop/data-types/hashes/#field-expiration) -- [**Redis Query Engine:**](https://redis.io/docs/latest/develop/interact/search-and-query/) Use Redis as a document database, a vector database, a secondary index, and a search engine. Define indexes for hash and JSON documents and then use a rich query language for vector search, full-text search, geospatial queries, and aggregations. +- [**Redis Search:**](https://redis.io/docs/latest/develop/ai/search-and-query/) Use Redis as a document database, a vector database, a secondary index, and a search engine. Define indexes for hash and JSON documents and then use a rich query language for vector search, full-text search, geospatial queries, and aggregations. - [**List:**](https://redis.io/docs/latest/develop/data-types/lists/) Linked lists of string values used as stacks, queues, and for queue management. - [**Set:**](https://redis.io/docs/latest/develop/data-types/sets/) Unordered collection of unique strings used for tracking unique items, relations, and common set operations (intersections, unions, differences). - [**Sorted set:**](https://redis.io/docs/latest/develop/data-types/sorted-sets/) Collection of unique strings ordered by an associated score used for leaderboards and rate limiters. diff --git a/deps/Makefile b/deps/Makefile index ef6168bbd..7ca6de4c2 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -59,6 +59,7 @@ distclean: -(cd jemalloc && [ -f Makefile ] && $(MAKE) distclean) > /dev/null || true -(cd hdr_histogram && $(MAKE) clean) > /dev/null || true -(cd fpconv && $(MAKE) clean) > /dev/null || true + -(cd tre && $(MAKE) clean) > /dev/null || true -(cd xxhash && $(MAKE) clean) > /dev/null || true -(rm -f .make-*) @@ -94,6 +95,13 @@ fpconv: .make-prerequisites .PHONY: fpconv +tre: .make-prerequisites + @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) + cd tre && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)" + +.PHONY: tre + + XXHASH_CFLAGS = -fPIC $(DEPS_CFLAGS) xxhash: .make-prerequisites @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) diff --git a/deps/tre/LICENSE b/deps/tre/LICENSE new file mode 100644 index 000000000..76ea75f40 --- /dev/null +++ b/deps/tre/LICENSE @@ -0,0 +1,29 @@ +This is the license, copyright notice, and disclaimer for TRE, a regex +matching package (library and tools) with support for approximate +matching. + +Copyright (c) 2001-2009 Ville Laurikari +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/deps/tre/Makefile b/deps/tre/Makefile new file mode 100644 index 000000000..507487749 --- /dev/null +++ b/deps/tre/Makefile @@ -0,0 +1,79 @@ +STD= -std=c99 +WARN= -Wall +OPT= -Os + +ifeq ($(SANITIZER),address) + CFLAGS+=-fsanitize=address -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=address +else +ifeq ($(SANITIZER),undefined) + CFLAGS+=-fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=undefined +else +ifeq ($(SANITIZER),thread) + CFLAGS+=-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=thread +else +ifeq ($(SANITIZER),memory) + CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer + LDFLAGS+=-fsanitize=memory +endif +endif +endif +endif + +R_CFLAGS= $(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) -DTRE_REGEX_T_FIELD=value -Ilocal_includes -Ilib +R_LDFLAGS= $(LDFLAGS) +DEBUG= -g + +R_CC=$(CC) $(R_CFLAGS) +R_LD=$(CC) $(R_LDFLAGS) + +AR= ar +ARFLAGS= rcs + +TRE_OBJ=lib/regcomp.o lib/regerror.o lib/regexec.o lib/tre-ast.o lib/tre-compile.o \ + lib/tre-filter.o lib/tre-match-backtrack.o lib/tre-match-parallel.o \ + lib/tre-mem.o lib/tre-parse.o lib/tre-stack.o lib/xmalloc.o +TRE_TESTS=tests/retest tests/test-str-source tests/test-literal-opt tests/test-malformed-regn + +libtre.a: $(TRE_OBJ) + $(AR) $(ARFLAGS) $@ $+ + +check: $(TRE_TESTS) + @set -e; \ + for test in $(TRE_TESTS); do \ + echo "TEST $$test"; \ + ./$$test; \ + done + +tests/retest: tests/retest.c libtre.a + $(R_LD) $(R_CFLAGS) -DHAVE_REGNEXEC -DHAVE_REGNCOMP -o $@ $< libtre.a + +tests/test-str-source: tests/test-str-source.c libtre.a + $(R_LD) $(R_CFLAGS) -o $@ $< libtre.a + +tests/test-literal-opt: tests/test-literal-opt.c libtre.a + $(R_LD) $(R_CFLAGS) -o $@ $< libtre.a + +tests/test-malformed-regn: tests/test-malformed-regn.c libtre.a + $(R_LD) $(R_CFLAGS) -o $@ $< libtre.a + +lib/regcomp.o: lib/regcomp.c local_includes/tre.h local_includes/tre-config.h lib/tre-internal.h lib/xmalloc.h +lib/regerror.o: lib/regerror.c local_includes/tre.h +lib/regexec.o: lib/regexec.c local_includes/tre.h lib/tre-internal.h lib/xmalloc.h +lib/tre-ast.o: lib/tre-ast.c lib/tre-ast.h lib/tre-internal.h +lib/tre-compile.o: lib/tre-compile.c lib/tre-compile.h lib/tre-internal.h lib/tre-mem.h lib/tre-parse.h lib/tre-stack.h lib/xmalloc.h +lib/tre-filter.o: lib/tre-filter.c lib/tre-filter.h lib/tre-internal.h +lib/tre-match-backtrack.o: lib/tre-match-backtrack.c lib/tre-internal.h lib/tre-match-utils.h lib/tre-mem.h lib/tre-stack.h +lib/tre-match-parallel.o: lib/tre-match-parallel.c lib/tre-internal.h lib/tre-match-utils.h lib/tre-mem.h +lib/tre-mem.o: lib/tre-mem.c lib/tre-mem.h +lib/tre-parse.o: lib/tre-parse.c lib/tre-ast.h lib/tre-compile.h lib/tre-filter.h lib/tre-internal.h lib/tre-mem.h lib/tre-parse.h lib/tre-stack.h lib/xmalloc.h +lib/tre-stack.o: lib/tre-stack.c lib/tre-internal.h lib/tre-stack.h +lib/xmalloc.o: lib/xmalloc.c lib/xmalloc.h + +.c.o: + $(R_CC) -c -o $@ $< + +clean: + rm -f $(TRE_OBJ) libtre.a $(TRE_TESTS) diff --git a/deps/tre/README.md b/deps/tre/README.md new file mode 100644 index 000000000..b2e09bbcb --- /dev/null +++ b/deps/tre/README.md @@ -0,0 +1,276 @@ +Introduction +============ + +TRE is a lightweight, robust, and efficient POSIX compliant regexp +matching library with some exciting features such as approximate +(fuzzy) matching. + +The matching algorithm used in TRE uses linear worst-case time in +the length of the text being searched, and quadratic worst-case +time in the length of the used regular expression. + +In other words, the time complexity of the algorithm is O(M^2N), where +M is the length of the regular expression and N is the length of the +text. The used space is also quadratic on the length of the regex, +but does not depend on the searched string. This quadratic behaviour +occurs only on pathological cases which are probably very rare in +practice. + + +Hacking +======= + +Here's how to work with this code. + +Prerequisites +------------- + +You will need the following tools installed on your system: + + - autoconf + - automake + - gettext (including autopoint) + - libtool + - zip (optional) + + +Building +-------- + +First, prepare the tree. Change to the root of the source directory +and run + + ./utils/autogen.sh + +This will regenerate various things using the prerequisite tools so +that you end up with a buildable tree. + +After this, you can run the configure script and build TRE as usual: + + ./configure + make + make check + make install + + +Building a source code package +------------------------------ + +In a prepared tree, this command creates a source code tarball: + + ./configure && make dist + +Alternatively, you can run + + ./utils/build-sources.sh + +which builds the source code packages and puts them in the `dist` +subdirectory. This script needs a working `zip` command. + + +Features +======== + +TRE is not just yet another regexp matcher. TRE has some features +which are not there in most free POSIX compatible implementations. +Most of these features are not present in non-free implementations +either, for that matter. + +Approximate matching +-------------------- + +Approximate pattern matching allows matches to be approximate, that +is, allows the matches to be close to the searched pattern under some +measure of closeness. TRE uses the edit-distance measure (also known +as the Levenshtein distance) where characters can be inserted, +deleted, or substituted in the searched text in order to get an exact +match. + +Each insertion, deletion, or substitution adds the distance, or cost, +of the match. TRE can report the matches which have a cost lower than +some given threshold value. TRE can also be used to search for +matches with the lowest cost. + +TRE includes a version of the agrep (approximate grep) command line +tool for approximate regexp matching in the style of grep. Unlike +other agrep implementations (like the one by Sun Wu and Udi Manber +from University of Arizona) TRE agrep allows full regexps of any +length, any number of errors, and non-uniform costs for insertion, +deletion and substitution. + +Strict standard conformance +--------------------------- + +POSIX defines the behaviour of regexp functions precisely. TRE +attempts to conform to these specifications as strictly as possible. +TRE always returns the correct matches for subpatterns, for example. +Very few other implementations do this correctly. In fact, the only +other implementations besides TRE that I am aware of (free or not) +that get it right are Rx by Tom Lord, Regex++ by John Maddock, and the +AT&T ast regex by Glenn Fowler and Doug McIlroy. + +The standard TRE tries to conform to is the IEEE Std 1003.1-2001, or +Open Group Base Specifications Issue 6, commonly referred to as +“POSIX”. The relevant parts are the base specifications on regular +expressions (and the rationale) and the description of the `regcomp()` +API. + +For an excellent survey on POSIX regexp matchers, see the testregex +pages by Glenn Fowler of AT&T Labs Research. + +Predictable matching speed +-------------------------- + +Because of the matching algorithm used in TRE, the maximum time +consumed by any `regexec()` call is always directly proportional to +the length of the searched string. There is one exception: if back +references are used, the matching may take time that grows +exponentially with the length of the string. This is because matching +back references is an NP complete problem, and almost certainly +requires exponential time to match in the worst case. + +Predictable and modest memory consumption +----------------------------------------- + +A `regexec()` call never allocates memory from the heap. TRE allocates +all the memory it needs during a `regcomp()` call, and some temporary +working space from the stack frame for the duration of the `regexec()` +call. The amount of temporary space needed is constant during +matching and does not depend on the searched string. For regexps of +reasonable size TRE needs less than 50K of dynamically allocated +memory during the `regcomp()` call, less than 20K for the compiled +pattern buffer, and less than two kilobytes of temporary working space +from the stack frame during a `regexec()` call. There is no time / +memory tradeoff. TRE is also small in code size; statically linking +with TRE increases the executable size less than 30K (gcc-3.2, x86, +GNU/Linux). + +Wide character and multibyte character set support +-------------------------------------------------- + +TRE supports multibyte character sets. This makes it possible to use +regexps seamlessly with, for example, Japanese locales. TRE also +provides a wide character API. + +Binary pattern and data support +------------------------------- + +TRE provides APIs which allow binary zero characters both in regexps +and searched strings. The standard API cannot be easily used to, for +example, search for printable words from binary data (although it is +possible with some hacking). Searching for patterns which contain +binary zeroes embedded is not possible at all with the standard API. + +Completely thread safe +---------------------- + +TRE is completely thread safe. All the exported functions are +re-entrant, and a single compiled regexp object can be used +simultaneously in multiple contexts; e.g. in `main()` and a signal +handler, or in many threads of a multithreaded application. + +Portable +-------- + +TRE is portable across multiple platforms. Below is a table of +platforms and compilers used to develop and test TRE: + + + + + + + +
Platform Compiler
FreeBSD 14.1 Clang 18
Ubuntu 22.04 GCC 11
macOS 14.6 Clang 14
Windows 11 Microsoft Visual Studio 2022
+ +TRE should compile without changes on most modern POSIX-like +platforms, and be easily portable to any platform with a hosted C +implementation. + +Depending on the platform, you may need to install libutf8 to get +wide character and multibyte character set support. + +Free +---- + +TRE is released under a license which is essentially the same as the +“2 clause” BSD-style license used in NetBSD. See the file LICENSE for +details. + +Roadmap +------- + +There are currently two features, both related to collating elements, +missing from 100% POSIX compliance. These are: + +* Support for collating elements (e.g. `[[.\.]]`, where `\` is a + collating element). It is not possible to support multi-character + collating elements portably, since POSIX does not define a way to + determine whether a character sequence is a multi-character + collating element or not. + +* Support for equivalence classes, for example `[[=\=]]`, where + `\` is a collating element. An equivalence class matches any + character which has the same primary collation weight as `\`. + Again, POSIX provides no portable mechanism for determining the + primary collation weight of a collating element. + +Note that other portable regexp implementations don't support +collating elements either. The single exception is Regex++, which +comes with its own database for collating elements for different +locales. Support for collating elements and equivalence classes has +not been widely requested and is not very high on the TODO list at the +moment. + +These are other features I'm planning to implement real soon now: + +* All the missing GNU extensions enabled in GNU regex, such as + `[[:<:]]` and `[[:>:]]`. + +* A `REG_SHORTEST` `regexec()` flag for returning the shortest match + instead of the longest match. + +* Perl-compatible syntax: + * `[:^class:]` + Matches anything but the characters in class. Note that + `[^[:class:]]` works already, this would be just a convenience + shorthand. + + * `\A` + Match only at beginning of string. + + * `\Z` + Match only at end of string, or before newline at the end. + + * `\z` + Match only at end of string. + + * `\l` + Lowercase next char (think vi). + + * `\u` + Uppercase next char (think vi). + + * `\L` + Lowercase till `\E` (think vi). + + * `\U` + Uppercase till `\E` (think vi). + + * `(?=pattern)` + Zero-width positive look-ahead assertions. + + * `(?!pattern)` + Zero-width negative look-ahead assertions. + + * `(?<=pattern)` + Zero-width positive look-behind assertions. + + * `(? +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#include "tre-internal.h" +#include "xmalloc.h" + +int +tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags) +{ + int ret; + if (n > TRE_MAX_RE) + return REG_ESPACE; +#if TRE_WCHAR + tre_char_t *wregex; + size_t wlen; + + wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); + if (wregex == NULL) + return REG_ESPACE; + + /* If the current locale uses the standard single byte encoding of + characters, we don't do a multibyte string conversion. If we did, + many applications which use the default locale would break since + the default "C" locale uses the 7-bit ASCII character set, and + all characters with the eighth bit set would be considered invalid. */ +#if TRE_MULTIBYTE + if (TRE_MB_CUR_MAX == 1) +#endif /* TRE_MULTIBYTE */ + { + size_t i; + const unsigned char *str = (const unsigned char *)regex; + tre_char_t *wstr = wregex; + + for (i = 0; i < n; i++) + *(wstr++) = *(str++); + wlen = n; + } +#if TRE_MULTIBYTE + else + { + size_t consumed; + tre_char_t *wcptr = wregex; +#ifdef HAVE_MBSTATE_T + mbstate_t state; + memset(&state, '\0', sizeof(state)); +#endif /* HAVE_MBSTATE_T */ + while (n > 0) + { + consumed = tre_mbrtowc(wcptr, regex, n, &state); + + switch (consumed) + { + case 0: + if (*regex == '\0') + consumed = 1; + else + { + xfree(wregex); + return REG_BADPAT; + } + break; + case -1: + DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); + xfree(wregex); + return REG_BADPAT; + case -2: + /* The last character wasn't complete. Let's not call it a + fatal error. */ + consumed = n; + break; + } + regex += consumed; + n -= consumed; + wcptr++; + } + wlen = wcptr - wregex; + } +#endif /* TRE_MULTIBYTE */ + + wregex[wlen] = L'\0'; + ret = tre_compile(preg, wregex, wlen, cflags); + xfree(wregex); +#else /* !TRE_WCHAR */ + ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags); +#endif /* !TRE_WCHAR */ + + return ret; +} + +/* this version takes bytes literally, to be used with raw vectors */ +int +tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags) +{ + int ret; + if (n > TRE_MAX_RE) + return REG_ESPACE; +#if TRE_WCHAR /* wide chars = we need to convert it all to the wide format */ + tre_char_t *wregex; + size_t i; + + wregex = xmalloc(sizeof(tre_char_t) * n); + if (wregex == NULL) + return REG_ESPACE; + + for (i = 0; i < n; i++) + wregex[i] = (tre_char_t) ((unsigned char) regex[i]); + + ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES); + xfree(wregex); +#else /* !TRE_WCHAR */ + ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags | REG_USEBYTES); +#endif /* !TRE_WCHAR */ + + return ret; +} + +int +tre_regcomp(regex_t *preg, const char *regex, int cflags) +{ + size_t n = regex ? strlen(regex) : 0; + if (n > TRE_MAX_RE) + return REG_ESPACE; + return tre_regncomp(preg, regex, n, cflags); +} + +int +tre_regcompb(regex_t *preg, const char *regex, int cflags) +{ + int ret; + tre_char_t *wregex; + size_t i, n = regex ? strlen(regex) : 0; + const unsigned char *str = (const unsigned char *)regex; + tre_char_t *wstr; + + if (n > TRE_MAX_RE) + return REG_ESPACE; + wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); + if (wregex == NULL) return REG_ESPACE; + wstr = wregex; + + for (i = 0; i < n; i++) + *(wstr++) = *(str++); + wregex[n] = L'\0'; + ret = tre_compile(preg, wregex, n, cflags | REG_USEBYTES); + xfree(wregex); + return ret; +} + + +#ifdef TRE_WCHAR +int +tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags) +{ + if (n > TRE_MAX_RE) + return REG_ESPACE; + return tre_compile(preg, regex, n, cflags); +} + +int +tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags) +{ + size_t n = regex ? wcslen(regex) : 0; + if (n > TRE_MAX_RE) + return REG_ESPACE; + return tre_compile(preg, regex, n, cflags); +} +#endif /* TRE_WCHAR */ + +void +tre_regfree(regex_t *preg) +{ + tre_free(preg); +} + +/* EOF */ diff --git a/deps/tre/lib/regerror.c b/deps/tre/lib/regerror.c new file mode 100644 index 000000000..2f8326ce7 --- /dev/null +++ b/deps/tre/lib/regerror.c @@ -0,0 +1,86 @@ +/* + tre_regerror.c - POSIX tre_regerror() implementation for TRE. + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ + +#include "tre-internal.h" + +#ifdef HAVE_GETTEXT +#include +#else +#define dgettext(p, s) s +#define gettext(s) s +#endif + +#define _(String) dgettext(PACKAGE, String) +#define gettext_noop(String) String + +#define xstr(s) str(s) +#define str(s) #s + +/* Error message strings for error codes listed in `tre.h'. This list + needs to be in sync with the codes listed there, naturally. */ +static const char *tre_error_messages[] = + { gettext_noop("No error"), /* REG_OK */ + gettext_noop("No match"), /* REG_NOMATCH */ + gettext_noop("Invalid regexp"), /* REG_BADPAT */ + gettext_noop("Unknown collating element"), /* REG_ECOLLATE */ + gettext_noop("Unknown character class name"), /* REG_ECTYPE */ + gettext_noop("Trailing backslash"), /* REG_EESCAPE */ + gettext_noop("Invalid back reference"), /* REG_ESUBREG */ + gettext_noop("Missing ']'"), /* REG_EBRACK */ + gettext_noop("Missing ')'"), /* REG_EPAREN */ + gettext_noop("Missing '}'"), /* REG_EBRACE */ + gettext_noop("Invalid contents of {}"), /* REG_BADBR */ + gettext_noop("Invalid character range"), /* REG_ERANGE */ + gettext_noop("Out of memory"), /* REG_ESPACE */ + gettext_noop("Invalid use of repetition operators"), /* REG_BADRPT */ + gettext_noop("Maximum repetition in {} larger than " xstr(RE_DUP_MAX)), /* REG_BADMAX */ + }; + +size_t +tre_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) +{ + const char *err; + size_t err_len; + + /*LINTED*/(void)&preg; + if (errcode >= 0 + && errcode < (int)(sizeof(tre_error_messages) + / sizeof(*tre_error_messages))) + err = gettext(tre_error_messages[errcode]); + else + err = gettext("Unknown error"); + + err_len = strlen(err) + 1; + if (errbuf_size > 0 && errbuf != NULL) + { + if (err_len > errbuf_size) + { + strncpy(errbuf, err, errbuf_size - 1); + errbuf[errbuf_size - 1] = '\0'; + } + else + { + strcpy(errbuf, err); + } + } + return err_len; +} + +/* EOF */ diff --git a/deps/tre/lib/regexec.c b/deps/tre/lib/regexec.c new file mode 100644 index 000000000..c70eb70a4 --- /dev/null +++ b/deps/tre/lib/regexec.c @@ -0,0 +1,584 @@ +/* + tre_regexec.c - TRE POSIX compatible matching functions (and more). + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ +#include + +#include "tre-internal.h" +#include "xmalloc.h" + +/* Literal alternatives are grouped by the first byte so the matcher can + * reach the relevant candidates in O(1). In nocase mode the lookup uses the + * same folded byte mapping that was applied at compile time. */ +static void +tre_litopt_candidate_range(const tre_literal_opt_t *opt, unsigned char first_byte, + size_t *start, size_t *end) +{ + unsigned char key = opt->nocase ? opt->fold_map[first_byte] : first_byte; + *start = opt->start_offsets[key]; + *end = opt->start_offsets[key + 1]; +} + +static int +tre_litopt_bytes_equal(const unsigned char *haystack, + const unsigned char *needle, size_t len, + const unsigned char *fold_map) +{ + size_t i; + + if (fold_map == NULL) + return memcmp(haystack, needle, len) == 0; + + for (i = 0; i < len; i++) + if (fold_map[haystack[i]] != needle[i]) + return 0; + return 1; +} + +static int +tre_litopt_contains_case(const unsigned char *haystack, size_t hay_len, + const unsigned char *needle, size_t needle_len, + int *match_end_ofs) +{ + const unsigned char *p; + size_t remaining; + + if (needle_len > hay_len) + return 0; + + p = haystack; + remaining = hay_len; + while (remaining >= needle_len) + { + p = memchr(p, needle[0], remaining - needle_len + 1); + if (p == NULL) + return 0; + if (memcmp(p, needle, needle_len) == 0) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)(p - haystack + needle_len); + return 1; + } + remaining = hay_len - (size_t)(p - haystack) - 1; + p++; + } + return 0; +} + +/* Nocase substring matching is still byte-oriented, but scanning once and + * only checking literals that share the same folded first byte avoids the + * old O(haystack * literals) restart pattern. */ +static int +tre_litopt_contains_nocase(const tre_literal_opt_t *opt, + const unsigned char *haystack, size_t hay_len, + int *match_end_ofs) +{ + size_t i, start, end, j; + + for (i = 0; i < hay_len; i++) + { + tre_litopt_candidate_range(opt, haystack[i], &start, &end); + for (j = start; j < end; j++) + { + const tre_literal_opt_literal_t *lit = &opt->literals[j]; + if (lit->len <= hay_len - i + && tre_litopt_bytes_equal(haystack + i, lit->data, lit->len, + opt->fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)(i + lit->len); + return 1; + } + } + } + return 0; +} + +static reg_errcode_t +tre_match_literal_opt(const tre_tnfa_t *tnfa, const char *string, size_t len, + int eflags, int *match_end_ofs) +{ + const tre_literal_opt_t *opt = &tnfa->literal_opt; + const unsigned char *haystack = (const unsigned char *)string; + size_t start = 0, end = opt->num_literals, i; + const unsigned char *fold_map = opt->nocase ? opt->fold_map : NULL; + + if ((opt->mode == TRE_LITERAL_OPT_PREFIX + || opt->mode == TRE_LITERAL_OPT_EXACT) + && (eflags & REG_NOTBOL)) + return REG_NOMATCH; + if ((opt->mode == TRE_LITERAL_OPT_SUFFIX + || opt->mode == TRE_LITERAL_OPT_EXACT) + && (eflags & REG_NOTEOL)) + return REG_NOMATCH; + + if ((opt->mode == TRE_LITERAL_OPT_EXACT + || opt->mode == TRE_LITERAL_OPT_PREFIX) + && len > 0) + tre_litopt_candidate_range(opt, haystack[0], &start, &end); + + if (opt->mode == TRE_LITERAL_OPT_CONTAINS) + { + if (opt->nocase) + return tre_litopt_contains_nocase(opt, haystack, len, match_end_ofs) + ? REG_OK : REG_NOMATCH; + + for (i = 0; i < opt->num_literals; i++) + { + const tre_literal_opt_literal_t *lit = &opt->literals[i]; + if (tre_litopt_contains_case(haystack, len, lit->data, lit->len, + match_end_ofs)) + return REG_OK; + } + return REG_NOMATCH; + } + + for (i = start; i < end; i++) + { + const tre_literal_opt_literal_t *lit = &opt->literals[i]; + + switch (opt->mode) + { + case TRE_LITERAL_OPT_EXACT: + if (len == lit->len + && tre_litopt_bytes_equal(haystack, lit->data, len, fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)len; + return REG_OK; + } + break; + + case TRE_LITERAL_OPT_PREFIX: + if (len >= lit->len + && tre_litopt_bytes_equal(haystack, lit->data, lit->len, + fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)lit->len; + return REG_OK; + } + break; + + case TRE_LITERAL_OPT_SUFFIX: + if (len >= lit->len + && tre_litopt_bytes_equal(haystack + len - lit->len, lit->data, + lit->len, fold_map)) + { + if (match_end_ofs != NULL) + *match_end_ofs = (int)len; + return REG_OK; + } + break; + + case TRE_LITERAL_OPT_CONTAINS: + case TRE_LITERAL_OPT_NONE: + break; + } + } + + return REG_NOMATCH; +} + + +/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match + endpoint values. */ +void +tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, + const tre_tnfa_t *tnfa, int *tags, int match_eo) +{ + tre_submatch_data_t *submatch_data; + unsigned int i, j; + int *parents; + + i = 0; + if (match_eo >= 0 && !(cflags & REG_NOSUB)) + { + /* Construct submatch offsets from the tags. */ + DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo)); + submatch_data = tnfa->submatch_data; + while (i < tnfa->num_submatches && i < nmatch) + { + if (submatch_data[i].so_tag == tnfa->end_tag) + pmatch[i].rm_so = match_eo; + else + pmatch[i].rm_so = tags[submatch_data[i].so_tag]; + + if (submatch_data[i].eo_tag == tnfa->end_tag) + pmatch[i].rm_eo = match_eo; + else + pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; + + /* If either of the endpoints were not used, this submatch + was not part of the match. */ + if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) + pmatch[i].rm_so = pmatch[i].rm_eo = -1; + + DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i, + submatch_data[i].so_tag, pmatch[i].rm_so, + submatch_data[i].eo_tag, pmatch[i].rm_eo)); + i++; + } + /* Reset all submatches that are not within all of their parent + submatches. */ + i = 0; + while (i < tnfa->num_submatches && i < nmatch) + { + if (pmatch[i].rm_eo == -1) + assert(pmatch[i].rm_so == -1); + assert(pmatch[i].rm_so <= pmatch[i].rm_eo); + + parents = submatch_data[i].parents; + if (parents != NULL) + for (j = 0; parents[j] >= 0; j++) + { + DPRINT(("pmatch[%d] parent %d\n", i, parents[j])); + if (pmatch[i].rm_so < pmatch[parents[j]].rm_so + || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) + pmatch[i].rm_so = pmatch[i].rm_eo = -1; + } + i++; + } + } + + while (i < nmatch) + { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + i++; + } +} + + +/* + Wrapper functions for POSIX compatible regexp matching. +*/ + +int +tre_have_backrefs(const regex_t *preg) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tnfa->have_backrefs; +} + +int +tre_have_approx(const regex_t *preg) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tnfa->have_approx; +} + +static int +tre_match(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], + int eflags) +{ + reg_errcode_t status; + int *tags = NULL, eo; + if (tnfa->num_tags > 0 && nmatch > 0) + { +#ifdef TRE_USE_ALLOCA + tags = alloca(sizeof(*tags) * tnfa->num_tags); +#else /* !TRE_USE_ALLOCA */ + tags = xmalloc(sizeof(*tags) * tnfa->num_tags); +#endif /* !TRE_USE_ALLOCA */ + if (tags == NULL) + return REG_ESPACE; + } + + if (type == STR_BYTE + && tnfa->literal_opt.mode != TRE_LITERAL_OPT_NONE + && (nmatch == 0 || (tnfa->cflags & REG_NOSUB)) +#ifdef TRE_APPROX + && !(eflags & REG_APPROX_MATCHER) +#endif /* TRE_APPROX */ + && !(eflags & REG_BACKTRACKING_MATCHER)) + { + size_t byte_len = (len >= 0) ? (size_t)len : strlen((const char *)string); + status = tre_match_literal_opt(tnfa, string, byte_len, eflags, &eo); + + /* Even when the caller asked for no submatches, regexec() still has to + * clear any pmatch entries it was handed. The normal matcher path does + * this through tre_fill_pmatch(), so mirror that behavior here. */ + if (status == REG_OK && nmatch > 0) + tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, NULL, eo); + +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return status; + } + + /* Dispatch to the appropriate matcher. */ + if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER) + { + /* The regex has back references, use the backtracking matcher. */ + if (type == STR_USER) + { + const tre_str_source *source = string; + if (source->rewind == NULL || source->compare == NULL) + { + /* The backtracking matcher requires rewind and compare + capabilities from the input stream. */ +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return REG_BADPAT; + } + } + status = tre_tnfa_run_backtrack(tnfa, string, len, type, + tags, eflags, &eo); + } +#ifdef TRE_APPROX + else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER) + { + /* The regex uses approximate matching, use the approximate matcher. */ + regamatch_t match; + regaparams_t params; + tre_regaparams_default(¶ms); + params.max_err = 0; + params.max_cost = 0; + status = tre_tnfa_run_approx(tnfa, string, len, type, tags, + &match, params, eflags, &eo); + } +#endif /* TRE_APPROX */ + else + { + /* Exact matching, no back references, use the parallel matcher. */ + status = tre_tnfa_run_parallel(tnfa, string, len, type, + tags, eflags, &eo); + } + + if (status == REG_OK) + /* A match was found, so fill the submatch registers. */ + tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return status; +} + +int +tre_regnexec(const regex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; + + return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags); +} + +#ifdef TRE_USE_GNUC_REGEXEC_FPL +int +tre_regexec(const regex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[_Restrict_arr_ _REGEX_NELTS (nmatch)], + int eflags) +#else +int +tre_regexec(const regex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +#endif +{ + return tre_regnexec(preg, str, -1, nmatch, pmatch, eflags); +} + +int +tre_regexecb(const regex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + + return tre_match(tnfa, str, -1, STR_BYTE, nmatch, pmatch, eflags); +} + +int +tre_regnexecb(const regex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + + return tre_match(tnfa, str, len, STR_BYTE, nmatch, pmatch, eflags); +} + + +#ifdef TRE_WCHAR + +int +tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags); +} + +int +tre_regwexec(const regex_t *preg, const wchar_t *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + return tre_regwnexec(preg, str, -1, nmatch, pmatch, eflags); +} + +#endif /* TRE_WCHAR */ + +int +tre_reguexec(const regex_t *preg, const tre_str_source *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tre_match(tnfa, str, -1, STR_USER, nmatch, pmatch, eflags); +} + + +#ifdef TRE_APPROX + +/* + Wrapper functions for approximate regexp matching. +*/ + +static int +tre_match_approx(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, regamatch_t *match, regaparams_t params, + int eflags) +{ + reg_errcode_t status; + int *tags = NULL, eo; + + /* If the regexp does not use approximate matching features, the + maximum cost is zero, and the approximate matcher isn't forced, + use the exact matcher instead. */ + if (params.max_cost == 0 && !tnfa->have_approx + && !(eflags & REG_APPROX_MATCHER)) + return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch, + eflags); + + /* Back references are not supported by the approximate matcher. */ + if (tnfa->have_backrefs) + return REG_BADPAT; + + if (tnfa->num_tags > 0 && match->nmatch > 0) + { +#if TRE_USE_ALLOCA + tags = alloca(sizeof(*tags) * tnfa->num_tags); +#else /* !TRE_USE_ALLOCA */ + tags = xmalloc(sizeof(*tags) * tnfa->num_tags); +#endif /* !TRE_USE_ALLOCA */ + if (tags == NULL) + return REG_ESPACE; + } + status = tre_tnfa_run_approx(tnfa, string, len, type, tags, + match, params, eflags, &eo); + if (status == REG_OK) + tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo); +#ifndef TRE_USE_ALLOCA + if (tags) + xfree(tags); +#endif /* !TRE_USE_ALLOCA */ + return status; +} + +int +tre_reganexec(const regex_t *preg, const char *str, size_t len, + regamatch_t *match, regaparams_t params, int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; + + return tre_match_approx(tnfa, str, len, type, match, params, eflags); +} + +int +tre_regaexec(const regex_t *preg, const char *str, + regamatch_t *match, regaparams_t params, int eflags) +{ + return tre_reganexec(preg, str, -1, match, params, eflags); +} + +int +tre_regaexecb(const regex_t *preg, const char *str, + regamatch_t *match, regaparams_t params, int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + + return tre_match_approx(tnfa, str, -1, STR_BYTE, match, params, eflags); +} + +#ifdef TRE_WCHAR + +int +tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len, + regamatch_t *match, regaparams_t params, int eflags) +{ + tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; + return tre_match_approx(tnfa, str, len, STR_WIDE, + match, params, eflags); +} + +int +tre_regawexec(const regex_t *preg, const wchar_t *str, + regamatch_t *match, regaparams_t params, int eflags) +{ + return tre_regawnexec(preg, str, -1, match, params, eflags); +} + +#endif /* TRE_WCHAR */ + +void +tre_regaparams_default(regaparams_t *params) +{ + memset(params, 0, sizeof(*params)); + params->cost_ins = 1; + params->cost_del = 1; + params->cost_subst = 1; + params->max_cost = INT_MAX; + params->max_ins = INT_MAX; + params->max_del = INT_MAX; + params->max_subst = INT_MAX; + params->max_err = INT_MAX; +} + +#endif /* TRE_APPROX */ + +/* EOF */ diff --git a/deps/tre/lib/tre-ast.c b/deps/tre/lib/tre-ast.c new file mode 100644 index 000000000..5a4bb1940 --- /dev/null +++ b/deps/tre/lib/tre-ast.c @@ -0,0 +1,226 @@ +/* + tre-ast.c - Abstract syntax tree (AST) routines + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include + +#include "tre-ast.h" +#include "tre-mem.h" + +tre_ast_node_t * +tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size) +{ + tre_ast_node_t *node; + + node = tre_mem_calloc(mem, sizeof(*node)); + if (!node) + return NULL; + node->obj = tre_mem_calloc(mem, size); + if (!node->obj) + return NULL; + node->type = type; + node->nullable = -1; + node->submatch_id = -1; + + return node; +} + +tre_ast_node_t * +tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max) +{ + tre_ast_node_t *node; + tre_literal_t *lit; + + node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t)); + if (!node) + return NULL; + lit = node->obj; + lit->code_min = code_min; + lit->code_max = code_max; + lit->position = -1; + + return node; +} + +tre_ast_node_t * +tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, + int minimal) +{ + tre_ast_node_t *node; + tre_iteration_t *iter; + + node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t)); + if (!node) + return NULL; + iter = node->obj; + iter->arg = arg; + iter->min = min; + iter->max = max; + iter->minimal = minimal; + node->num_submatches = arg->num_submatches; + + return node; +} + +tre_ast_node_t * +tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) +{ + tre_ast_node_t *node; + + node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t)); + if (node == NULL) + return NULL; + ((tre_union_t *)node->obj)->left = left; + ((tre_union_t *)node->obj)->right = right; + node->num_submatches = left->num_submatches + right->num_submatches; + + return node; +} + +tre_ast_node_t * +tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, + tre_ast_node_t *right) +{ + tre_ast_node_t *node; + + node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t)); + if (node == NULL) + return NULL; + ((tre_catenation_t *)node->obj)->left = left; + ((tre_catenation_t *)node->obj)->right = right; + node->num_submatches = left->num_submatches + right->num_submatches; + + return node; +} + +#ifdef TRE_DEBUG + +static void +tre_findent(FILE *stream, int i) +{ + while (i-- > 0) + fputc(' ', stream); +} + +void +tre_print_params(int *params) +{ + int i; + if (params) + { + DPRINT(("params [")); + for (i = 0; i < TRE_PARAM_LAST; i++) + { + if (params[i] == TRE_PARAM_UNSET) + DPRINT(("unset")); + else if (params[i] == TRE_PARAM_DEFAULT) + DPRINT(("default")); + else + DPRINT(("%d", params[i])); + if (i < TRE_PARAM_LAST - 1) + DPRINT((", ")); + } + DPRINT(("]")); + } +} + +static void +tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent) +{ + int code_min, code_max, pos; + int num_tags = ast->num_tags; + tre_literal_t *lit; + tre_iteration_t *iter; + + tre_findent(stream, indent); + switch (ast->type) + { + case LITERAL: + lit = ast->obj; + code_min = lit->code_min; + code_max = lit->code_max; + pos = lit->position; + if (IS_EMPTY(lit)) + { + fprintf(stream, "literal empty\n"); + } + else if (IS_ASSERTION(lit)) + { + int i; + char *assertions[] = { "bol", "eol", "ctype", "!ctype", + "bow", "eow", "wb", "!wb" }; + if (code_max >= ASSERT_LAST << 1) + assert(0); + fprintf(stream, "assertions: "); + for (i = 0; (1 << i) <= ASSERT_LAST; i++) + if (code_max & (1 << i)) + fprintf(stream, "%s ", assertions[i]); + fprintf(stream, "\n"); + } + else if (IS_TAG(lit)) + { + fprintf(stream, "tag %d\n", code_max); + } + else if (IS_BACKREF(lit)) + { + fprintf(stream, "backref %d, pos %d\n", code_max, pos); + } + else if (IS_PARAMETER(lit)) + { + tre_print_params(lit->u.params); + fprintf(stream, "\n"); + } + else + { + fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, " + "%d tags\n", code_min, code_max, code_min, code_max, pos, + ast->submatch_id, num_tags); + } + break; + case ITERATION: + iter = ast->obj; + fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n", + iter->min, iter->max, ast->submatch_id, num_tags, + iter->minimal ? "minimal" : "greedy"); + tre_do_print(stream, iter->arg, indent + 2); + break; + case UNION: + fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags); + tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2); + tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2); + break; + case CATENATION: + fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id, + num_tags); + tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2); + tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2); + break; + default: + assert(0); + break; + } +} + +static void +tre_ast_fprint(FILE *stream, tre_ast_node_t *ast) +{ + tre_do_print(stream, ast, 0); +} + +void +tre_ast_print(tre_ast_node_t *tree) +{ + printf("AST:\n"); + tre_ast_fprint(stdout, tree); +} + +#endif /* TRE_DEBUG */ + +/* EOF */ diff --git a/deps/tre/lib/tre-ast.h b/deps/tre/lib/tre-ast.h new file mode 100644 index 000000000..190c4b033 --- /dev/null +++ b/deps/tre/lib/tre-ast.h @@ -0,0 +1,128 @@ +/* + tre-ast.h - Abstract syntax tree (AST) definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_AST_H +#define TRE_AST_H 1 + +#include "tre-mem.h" +#include "tre-internal.h" +#include "tre-compile.h" + +/* The different AST node types. */ +typedef enum { + LITERAL, + CATENATION, + ITERATION, + UNION +} tre_ast_type_t; + +/* Special subtypes of TRE_LITERAL. */ +#define EMPTY -1 /* Empty leaf (denotes empty string). */ +#define ASSERTION -2 /* Assertion leaf. */ +#define TAG -3 /* Tag leaf. */ +#define BACKREF -4 /* Back reference leaf. */ +#define PARAMETER -5 /* Parameter. */ + +#define IS_SPECIAL(x) ((x)->code_min < 0) +#define IS_EMPTY(x) ((x)->code_min == EMPTY) +#define IS_ASSERTION(x) ((x)->code_min == ASSERTION) +#define IS_TAG(x) ((x)->code_min == TAG) +#define IS_BACKREF(x) ((x)->code_min == BACKREF) +#define IS_PARAMETER(x) ((x)->code_min == PARAMETER) + + +/* A generic AST node. All AST nodes consist of this node on the top + level with `obj' pointing to the actual content. */ +typedef struct { + tre_ast_type_t type; /* Type of the node. */ + void *obj; /* Pointer to actual node. */ + int nullable; + int submatch_id; + unsigned int num_submatches; + unsigned int num_tags; + tre_pos_and_tags_t *firstpos; + tre_pos_and_tags_t *lastpos; +} tre_ast_node_t; + + +/* A "literal" node. These are created for assertions, back references, + tags, matching parameter settings, and all expressions that match one + character. */ +typedef struct { + long code_min; + long code_max; + int position; + union { + tre_ctype_t class; + int *params; + } u; + tre_ctype_t *neg_classes; +} tre_literal_t; + +/* A "catenation" node. These are created when two regexps are concatenated. + If there are more than one subexpressions in sequence, the `left' part + holds all but the last, and `right' part holds the last subexpression + (catenation is left associative). */ +typedef struct { + tre_ast_node_t *left; + tre_ast_node_t *right; +} tre_catenation_t; + +/* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" + operators. */ +typedef struct { + /* Subexpression to match. */ + tre_ast_node_t *arg; + /* Minimum number of consecutive matches. */ + int min; + /* Maximum number of consecutive matches. */ + int max; + /* If 0, match as many characters as possible, if 1 match as few as + possible. Note that this does not always mean the same thing as + matching as many/few repetitions as possible. */ + unsigned int minimal:1; + /* Approximate matching parameters (or NULL). */ + int *params; +} tre_iteration_t; + +/* An "union" node. These are created for the "|" operator. */ +typedef struct { + tre_ast_node_t *left; + tre_ast_node_t *right; +} tre_union_t; + +tre_ast_node_t * +tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); + +tre_ast_node_t * +tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max); + +tre_ast_node_t * +tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, + int minimal); + +tre_ast_node_t * +tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); + +tre_ast_node_t * +tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, + tre_ast_node_t *right); + +#ifdef TRE_DEBUG +void +tre_ast_print(tre_ast_node_t *tree); + +/* XXX - rethink AST printing API */ +void +tre_print_params(int *params); +#endif /* TRE_DEBUG */ + +#endif /* TRE_AST_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-compile.c b/deps/tre/lib/tre-compile.c new file mode 100644 index 000000000..a3573df5a --- /dev/null +++ b/deps/tre/lib/tre-compile.c @@ -0,0 +1,2673 @@ +/* + tre-compile.c - TRE regex compiler + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + TODO: + - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive + function calls. +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include +#include + +#include "tre-internal.h" +#include "tre-mem.h" +#include "tre-stack.h" +#include "tre-ast.h" +#include "tre-parse.h" +#include "tre-compile.h" +#include "xmalloc.h" + +typedef struct { + const tre_ast_node_t **nodes; + size_t len; + size_t cap; +} tre_ast_node_vec_t; + +typedef struct { + unsigned char *bytes; + size_t len; + size_t cap; +} tre_literal_byte_buf_t; + +static unsigned char +tre_litopt_fold_byte(unsigned char c) +{ + return (unsigned char)tre_tolower((tre_cint_t)c); +} + +static void +tre_litopt_free_literal_list(tre_literal_opt_literal_t *literals, size_t count) +{ + size_t i; + + if (literals == NULL) + return; + for (i = 0; i < count; i++) + if (literals[i].data != NULL) + xfree(literals[i].data); + xfree(literals); +} + +static void +tre_litopt_reset_byte_buf(tre_literal_byte_buf_t *buf) +{ + if (buf->bytes != NULL) + xfree(buf->bytes); + buf->bytes = NULL; + buf->len = 0; + buf->cap = 0; +} + +static int +tre_litopt_append_ast_node(tre_ast_node_vec_t *vec, const tre_ast_node_t *node) +{ + const tre_ast_node_t **new_nodes; + size_t new_cap; + + if (vec->len == vec->cap) + { + new_cap = vec->cap ? vec->cap * 2 : 8; + new_nodes = xrealloc(vec->nodes, sizeof(*new_nodes) * new_cap); + if (new_nodes == NULL) + return REG_ESPACE; + vec->nodes = new_nodes; + vec->cap = new_cap; + } + + vec->nodes[vec->len++] = node; + return REG_OK; +} + +static int +tre_litopt_append_byte(tre_literal_byte_buf_t *buf, unsigned char byte) +{ + unsigned char *new_bytes; + size_t new_cap; + + if (buf->len == buf->cap) + { + new_cap = buf->cap ? buf->cap * 2 : 8; + new_bytes = xrealloc(buf->bytes, new_cap); + if (new_bytes == NULL) + return REG_ESPACE; + buf->bytes = new_bytes; + buf->cap = new_cap; + } + + buf->bytes[buf->len++] = byte; + return REG_OK; +} + +static int +tre_litopt_append_literal(tre_literal_opt_t *opt, + const tre_literal_byte_buf_t *buf) +{ + tre_literal_opt_literal_t *new_literals; + unsigned char *copy; + size_t new_count; + + new_count = opt->num_literals + 1; + new_literals = xrealloc(opt->literals, sizeof(*new_literals) * new_count); + if (new_literals == NULL) + return REG_ESPACE; + opt->literals = new_literals; + + copy = xmalloc(buf->len); + if (copy == NULL) + return REG_ESPACE; + memcpy(copy, buf->bytes, buf->len); + + opt->literals[opt->num_literals].data = copy; + opt->literals[opt->num_literals].len = buf->len; + opt->num_literals = new_count; + return REG_OK; +} + +/* Fill the fold table once and group literals by the first byte so the + * matcher can jump straight to the small set of candidates that can match + * at a given position. */ +static reg_errcode_t +tre_litopt_prepare(tre_literal_opt_t *opt) +{ + size_t counts[256] = { 0 }; + size_t next[256]; + tre_literal_opt_literal_t *grouped; + size_t i; + + for (i = 0; i < 256; i++) + opt->fold_map[i] = tre_litopt_fold_byte((unsigned char)i); + + memset(opt->start_offsets, 0, sizeof(opt->start_offsets)); + if (opt->num_literals == 0) + return REG_OK; + + for (i = 0; i < opt->num_literals; i++) + counts[opt->literals[i].data[0]]++; + + for (i = 0; i < 256; i++) + opt->start_offsets[i + 1] = opt->start_offsets[i] + counts[i]; + + grouped = xmalloc(sizeof(*grouped) * opt->num_literals); + if (grouped == NULL) + return REG_ESPACE; + + memcpy(next, opt->start_offsets, sizeof(next)); + for (i = 0; i < opt->num_literals; i++) + { + unsigned char first = opt->literals[i].data[0]; + grouped[next[first]++] = opt->literals[i]; + } + + xfree(opt->literals); + opt->literals = grouped; + return REG_OK; +} + +static int +tre_litopt_is_simple_literal(const tre_ast_node_t *node, unsigned char *byte) +{ + tre_literal_t *lit; + + if (node == NULL || node->type != LITERAL) + return 0; + lit = node->obj; + if (IS_SPECIAL(lit) || lit->code_min != lit->code_max) + return 0; + if (lit->code_min < 0 || lit->code_min > UCHAR_MAX) + return 0; + *byte = (unsigned char)lit->code_min; + return 1; +} + +static int +tre_litopt_is_icase_char_union(const tre_ast_node_t *node, int cflags, + unsigned char *byte) +{ + tre_union_t *uni; + unsigned char left, right; + + if (!(cflags & REG_ICASE) || node == NULL || node->type != UNION) + return 0; + + uni = node->obj; + if (!tre_litopt_is_simple_literal(uni->left, &left) + || !tre_litopt_is_simple_literal(uni->right, &right)) + return 0; + + if (tre_litopt_fold_byte(left) != tre_litopt_fold_byte(right)) + return 0; + + *byte = tre_litopt_fold_byte(left); + return 1; +} + +static int +tre_litopt_is_assertion(const tre_ast_node_t *node, int assertion) +{ + tre_literal_t *lit; + + if (node == NULL || node->type != LITERAL) + return 0; + lit = node->obj; + return IS_ASSERTION(lit) && lit->code_max == assertion; +} + +static int +tre_litopt_collect_cat_nodes(const tre_ast_node_t *node, tre_ast_node_vec_t *vec) +{ + tre_catenation_t *cat; + int err; + + if (node->type != CATENATION) + return tre_litopt_append_ast_node(vec, node); + + cat = node->obj; + err = tre_litopt_collect_cat_nodes(cat->left, vec); + if (err != REG_OK) + return err; + return tre_litopt_collect_cat_nodes(cat->right, vec); +} + +static int +tre_litopt_collect_alt_nodes(const tre_ast_node_t *node, int cflags, + tre_ast_node_vec_t *vec) +{ + tre_union_t *uni; + unsigned char byte; + int err; + + if (node->type != UNION || tre_litopt_is_icase_char_union(node, cflags, &byte)) + return tre_litopt_append_ast_node(vec, node); + + uni = node->obj; + err = tre_litopt_collect_alt_nodes(uni->left, cflags, vec); + if (err != REG_OK) + return err; + return tre_litopt_collect_alt_nodes(uni->right, cflags, vec); +} + +static int +tre_litopt_collect_literal_string(const tre_ast_node_t *node, int cflags, + tre_literal_byte_buf_t *buf) +{ + tre_catenation_t *cat; + unsigned char byte; + int err; + + switch (node->type) + { + case CATENATION: + cat = node->obj; + err = tre_litopt_collect_literal_string(cat->left, cflags, buf); + if (err != 1) + return err; + return tre_litopt_collect_literal_string(cat->right, cflags, buf); + + case LITERAL: + if (!tre_litopt_is_simple_literal(node, &byte)) + return 0; + if (cflags & REG_ICASE) + byte = tre_litopt_fold_byte(byte); + return tre_litopt_append_byte(buf, byte) == REG_OK ? 1 : -1; + + case UNION: + if (!tre_litopt_is_icase_char_union(node, cflags, &byte)) + return 0; + return tre_litopt_append_byte(buf, byte) == REG_OK ? 1 : -1; + + default: + return 0; + } +} + +static reg_errcode_t +tre_litopt_try_compile(tre_tnfa_t *tnfa, const tre_ast_node_t *tree, + int cflags, int mb_cur_max) +{ + tre_ast_node_vec_t pieces = { 0 }, alts = { 0 }; + tre_literal_byte_buf_t buf = { 0 }; + tre_literal_opt_t opt = { 0 }; + size_t first, last, i; + int err; + + if (mb_cur_max != 1 || (cflags & REG_NEWLINE)) + return REG_OK; + + err = tre_litopt_collect_cat_nodes(tree, &pieces); + if (err != REG_OK) + goto error; + + first = 0; + last = pieces.len; + + if (first < last && tre_litopt_is_assertion(pieces.nodes[first], ASSERT_AT_BOL)) + first++; + if (first < last && tre_litopt_is_assertion(pieces.nodes[last - 1], ASSERT_AT_EOL)) + last--; + + if (first == last) + goto out; + + if (last - first == 1) + { + err = tre_litopt_collect_alt_nodes(pieces.nodes[first], cflags, &alts); + if (err != REG_OK) + goto error; + + for (i = 0; i < alts.len; i++) + { + err = tre_litopt_collect_literal_string(alts.nodes[i], cflags, &buf); + if (err < 0) + goto error; + if (err == 0 || buf.len == 0) + goto out; + err = tre_litopt_append_literal(&opt, &buf); + if (err != REG_OK) + goto error; + buf.len = 0; + } + } + else + { + for (i = first; i < last; i++) + { + err = tre_litopt_collect_literal_string(pieces.nodes[i], cflags, &buf); + if (err < 0) + goto error; + if (err == 0) + goto out; + } + if (buf.len == 0) + goto out; + err = tre_litopt_append_literal(&opt, &buf); + if (err != REG_OK) + goto error; + buf.len = 0; + } + + if (opt.num_literals == 0) + goto out; + + if (first > 0 && last < pieces.len) + opt.mode = TRE_LITERAL_OPT_EXACT; + else if (first > 0) + opt.mode = TRE_LITERAL_OPT_PREFIX; + else if (last < pieces.len) + opt.mode = TRE_LITERAL_OPT_SUFFIX; + else + opt.mode = TRE_LITERAL_OPT_CONTAINS; + opt.nocase = !!(cflags & REG_ICASE); + err = tre_litopt_prepare(&opt); + if (err != REG_OK) + goto error; + + tnfa->literal_opt = opt; + opt.literals = NULL; + opt.num_literals = 0; + + out: + if (pieces.nodes != NULL) + xfree(pieces.nodes); + if (alts.nodes != NULL) + xfree(alts.nodes); + tre_litopt_reset_byte_buf(&buf); + tre_litopt_free_literal_list(opt.literals, opt.num_literals); + return REG_OK; + + error: + if (pieces.nodes != NULL) + xfree(pieces.nodes); + if (alts.nodes != NULL) + xfree(alts.nodes); + tre_litopt_reset_byte_buf(&buf); + tre_litopt_free_literal_list(opt.literals, opt.num_literals); + return REG_ESPACE; +} + +/* + Algorithms to setup tags so that submatch addressing can be done. +*/ + + +/* Inserts a catenation node to the root of the tree given in `node'. + As the left child a new tag with number `tag_id' to `node' is added, + and the right child is the old root. */ +static reg_errcode_t +tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) +{ + tre_catenation_t *c; + + DPRINT(("add_tag_left: tag %d\n", tag_id)); + + c = tre_mem_alloc(mem, sizeof(*c)); + if (c == NULL) + return REG_ESPACE; + c->left = tre_ast_new_literal(mem, TAG, tag_id); + if (c->left == NULL) + return REG_ESPACE; + c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); + if (c->right == NULL) + return REG_ESPACE; + + c->right->obj = node->obj; + c->right->type = node->type; + c->right->nullable = -1; + c->right->submatch_id = -1; + c->right->firstpos = NULL; + c->right->lastpos = NULL; + c->right->num_tags = 0; + node->obj = c; + node->type = CATENATION; + return REG_OK; +} + +/* Inserts a catenation node to the root of the tree given in `node'. + As the right child a new tag with number `tag_id' to `node' is added, + and the left child is the old root. */ +static reg_errcode_t +tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id) +{ + tre_catenation_t *c; + + DPRINT(("tre_add_tag_right: tag %d\n", tag_id)); + + c = tre_mem_alloc(mem, sizeof(*c)); + if (c == NULL) + return REG_ESPACE; + c->right = tre_ast_new_literal(mem, TAG, tag_id); + if (c->right == NULL) + return REG_ESPACE; + c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); + if (c->left == NULL) + return REG_ESPACE; + + c->left->obj = node->obj; + c->left->type = node->type; + c->left->nullable = -1; + c->left->submatch_id = -1; + c->left->firstpos = NULL; + c->left->lastpos = NULL; + c->left->num_tags = 0; + node->obj = c; + node->type = CATENATION; + return REG_OK; +} + +typedef enum { + ADDTAGS_RECURSE, + ADDTAGS_AFTER_ITERATION, + ADDTAGS_AFTER_UNION_LEFT, + ADDTAGS_AFTER_UNION_RIGHT, + ADDTAGS_AFTER_CAT_LEFT, + ADDTAGS_AFTER_CAT_RIGHT, + ADDTAGS_SET_SUBMATCH_END +} tre_addtags_symbol_t; + + +typedef struct { + int tag; + int next_tag; +} tre_tag_states_t; + + +/* Go through `regset' and set submatch data for submatches that are + using this tag. */ +static void +tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag) +{ + int i; + + for (i = 0; regset[i] >= 0; i++) + { + int id = regset[i] / 2; + int start = !(regset[i] % 2); + DPRINT((" Using tag %d for %s offset of " + "submatch %d\n", tag, + start ? "start" : "end", id)); + if (start) + tnfa->submatch_data[id].so_tag = tag; + else + tnfa->submatch_data[id].eo_tag = tag; + } + regset[0] = -1; +} + + +/* Adds tags to appropriate locations in the parse tree in `tree', so that + subexpressions marked for submatch addressing can be traced. */ +static reg_errcode_t +tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, + tre_tnfa_t *tnfa) +{ + reg_errcode_t status = REG_OK; + tre_addtags_symbol_t symbol; + tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ + size_t bottom = tre_stack_num_items(stack); + /* True for first pass (counting number of needed tags) */ + int first_pass = (mem == NULL || tnfa == NULL); + int *regset, *orig_regset; + unsigned int num_tags = 0; /* Total number of tags. */ + unsigned int num_minimals = 0; /* Number of special minimal tags. */ + unsigned int tag = 0; /* The tag that is to be added next. */ + unsigned int next_tag = 1; /* Next tag to use after this one. */ + int *parents; /* Stack of submatches the current submatch is + contained in. */ + int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */ + tre_tag_states_t *saved_states; + + tre_tag_direction_t direction = TRE_TAG_MINIMIZE; + if (!first_pass) + { + tnfa->end_tag = 0; + tnfa->minimal_tags[0] = -1; + } + + regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2)); + if (regset == NULL) + return REG_ESPACE; + regset[0] = -1; + orig_regset = regset; + + parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1)); + if (parents == NULL) + { + xfree(regset); + return REG_ESPACE; + } + parents[0] = -1; + + saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1)); + if (saved_states == NULL) + { + xfree(regset); + xfree(parents); + return REG_ESPACE; + } + else + { + unsigned int i; + for (i = 0; i <= tnfa->num_submatches; i++) + saved_states[i].tag = -1; + } + + STACK_PUSH(stack, voidptr, node); + STACK_PUSH(stack, int, ADDTAGS_RECURSE); + + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack); + switch (symbol) + { + + case ADDTAGS_SET_SUBMATCH_END: + { + int id = tre_stack_pop_int(stack); + int i; + + /* Add end of this submatch to regset. */ + for (i = 0; regset[i] >= 0; i++); + regset[i] = id * 2 + 1; + regset[i + 1] = -1; + + /* Pop this submatch from the parents stack. */ + for (i = 0; parents[i] >= 0; i++); + parents[i - 1] = -1; + break; + } + + case ADDTAGS_RECURSE: + node = tre_stack_pop_voidptr(stack); + + if (node->submatch_id >= 0) + { + int id = node->submatch_id; + int i; + + + /* Add start of this submatch to regset. */ + for (i = 0; regset[i] >= 0; i++); + regset[i] = id * 2; + regset[i + 1] = -1; + + if (!first_pass) + { + for (i = 0; parents[i] >= 0; i++); + tnfa->submatch_data[id].parents = NULL; + if (i > 0) + { + int *p = xmalloc(sizeof(*p) * (i + 1)); + if (p == NULL) + { + status = REG_ESPACE; + break; + } + assert(tnfa->submatch_data[id].parents == NULL); + tnfa->submatch_data[id].parents = p; + for (i = 0; parents[i] >= 0; i++) + p[i] = parents[i]; + p[i] = -1; + } + } + + /* Add end of this submatch to regset after processing this + node. */ + STACK_PUSHX(stack, int, node->submatch_id); + STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); + } + + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = node->obj; + + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + int i; + DPRINT(("Literal %d-%d\n", + (int)lit->code_min, (int)lit->code_max)); + if (regset[0] >= 0) + { + /* Regset is not empty, so add a tag before the + literal or backref. */ + if (!first_pass) + { + status = tre_add_tag_left(mem, node, tag); + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + else + { + DPRINT((" num_tags = 1\n")); + node->num_tags = 1; + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + } + else + { + assert(!IS_TAG(lit)); + } + break; + } + case CATENATION: + { + tre_catenation_t *cat = node->obj; + tre_ast_node_t *left = cat->left; + tre_ast_node_t *right = cat->right; + int reserved_tag = -1; + DPRINT(("Catenation, next_tag = %d\n", next_tag)); + + + /* After processing right child. */ + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); + + /* Process right child. */ + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* After processing left child. */ + STACK_PUSHX(stack, int, next_tag + left->num_tags); + DPRINT((" Pushing %d for after left\n", + next_tag + left->num_tags)); + if (left->num_tags > 0 && right->num_tags > 0) + { + /* Reserve the next tag to the right child. */ + DPRINT((" Reserving next_tag %d to right child\n", + next_tag)); + reserved_tag = next_tag; + next_tag++; + } + STACK_PUSHX(stack, int, reserved_tag); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); + + /* Process left child. */ + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + } + break; + case ITERATION: + { + tre_iteration_t *iter = node->obj; + DPRINT(("Iteration\n")); + + if (first_pass) + { + STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); + } + else + { + STACK_PUSHX(stack, int, tag); + STACK_PUSHX(stack, int, iter->minimal); + } + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); + + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* Regset is not empty, so add a tag here. */ + if (regset[0] >= 0 || iter->minimal) + { + if (!first_pass) + { + int i; + status = tre_add_tag_left(mem, node, tag); + if (iter->minimal) + tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; + else + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + direction = TRE_TAG_MINIMIZE; + } + break; + case UNION: + { + tre_union_t *uni = node->obj; + tre_ast_node_t *left = uni->left; + tre_ast_node_t *right = uni->right; + int left_tag; + int right_tag; + + if (regset[0] >= 0) + { + left_tag = next_tag; + right_tag = next_tag + 1; + } + else + { + left_tag = tag; + right_tag = next_tag; + } + + DPRINT(("Union\n")); + + /* After processing right child. */ + STACK_PUSHX(stack, int, right_tag); + STACK_PUSHX(stack, int, left_tag); + STACK_PUSHX(stack, voidptr, regset); + STACK_PUSHX(stack, int, regset[0] >= 0); + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); + + /* Process right child. */ + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* After processing left child. */ + STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); + + /* Process left child. */ + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* Regset is not empty, so add a tag here. */ + if (regset[0] >= 0) + { + if (!first_pass) + { + int i; + status = tre_add_tag_left(mem, node, tag); + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + + if (node->num_submatches > 0) + { + /* The next two tags are reserved for markers. */ + next_tag++; + tag = next_tag; + next_tag++; + } + + break; + } + } + + if (node->submatch_id >= 0) + { + int i; + /* Push this submatch on the parents stack. */ + for (i = 0; parents[i] >= 0; i++); + parents[i] = node->submatch_id; + parents[i + 1] = -1; + } + + break; /* end case: ADDTAGS_RECURSE */ + + case ADDTAGS_AFTER_ITERATION: + { + int minimal = 0; + int enter_tag; + node = tre_stack_pop_voidptr(stack); + if (first_pass) + { + node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags + + tre_stack_pop_int(stack); + minimal_tag = -1; + } + else + { + minimal = tre_stack_pop_int(stack); + enter_tag = tre_stack_pop_int(stack); + if (minimal) + minimal_tag = enter_tag; + } + + DPRINT(("After iteration\n")); + if (!first_pass) + { + DPRINT((" Setting direction to %s\n", + minimal ? "minimize" : "maximize")); + if (minimal) + direction = TRE_TAG_MINIMIZE; + else + direction = TRE_TAG_MAXIMIZE; + } + break; + } + + case ADDTAGS_AFTER_CAT_LEFT: + { + int new_tag = tre_stack_pop_int(stack); + next_tag = tre_stack_pop_int(stack); + DPRINT(("After cat left, tag = %d, next_tag = %d\n", + tag, next_tag)); + if (new_tag >= 0) + { + DPRINT((" Setting tag to %d\n", new_tag)); + tag = new_tag; + } + break; + } + + case ADDTAGS_AFTER_CAT_RIGHT: + DPRINT(("After cat right\n")); + node = tre_stack_pop_voidptr(stack); + if (first_pass) + node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags + + ((tre_catenation_t *)node->obj)->right->num_tags; + break; + + case ADDTAGS_AFTER_UNION_LEFT: + DPRINT(("After union left\n")); + /* Lift the bottom of the `regset' array so that when processing + the right operand the items currently in the array are + invisible. The original bottom was saved at ADDTAGS_UNION and + will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ + while (*regset >= 0) + regset++; + break; + + case ADDTAGS_AFTER_UNION_RIGHT: + { + int added_tags, tag_left, tag_right; + tre_ast_node_t *left = tre_stack_pop_voidptr(stack); + tre_ast_node_t *right = tre_stack_pop_voidptr(stack); + DPRINT(("After union right\n")); + node = tre_stack_pop_voidptr(stack); + added_tags = tre_stack_pop_int(stack); + if (first_pass) + { + node->num_tags = ((tre_union_t *)node->obj)->left->num_tags + + ((tre_union_t *)node->obj)->right->num_tags + added_tags + + ((node->num_submatches > 0) ? 2 : 0); + } + regset = tre_stack_pop_voidptr(stack); + tag_left = tre_stack_pop_int(stack); + tag_right = tre_stack_pop_int(stack); + + /* Add tags after both children, the left child gets a smaller + tag than the right child. This guarantees that we prefer + the left child over the right child. */ + /* XXX - This is not always necessary (if the children have + tags which must be seen for every match of that child). */ + /* XXX - Check if this is the only place where tre_add_tag_right + is used. If so, use tre_add_tag_left (putting the tag before + the child as opposed after the child) and throw away + tre_add_tag_right. */ + if (node->num_submatches > 0) + { + if (!first_pass) + { + status = tre_add_tag_right(mem, left, tag_left); + tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; + status = tre_add_tag_right(mem, right, tag_right); + tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; + } + DPRINT((" num_tags += 2\n")); + num_tags += 2; + } + direction = TRE_TAG_MAXIMIZE; + break; + } + + default: + assert(0); + break; + + } /* end switch(symbol) */ + } /* end while(tre_stack_num_items(stack) > bottom) */ + + if (!first_pass) + tre_purge_regset(regset, tnfa, tag); + + if (!first_pass && minimal_tag >= 0) + { + int i; + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + + DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n", + first_pass? "First pass" : "Second pass", num_tags)); + + assert(tree->num_tags == num_tags); + tnfa->end_tag = num_tags; + tnfa->num_tags = num_tags; + tnfa->num_minimals = num_minimals; + xfree(orig_regset); + xfree(parents); + xfree(saved_states); + return status; +} + + + +/* + AST to TNFA compilation routines. +*/ + +typedef enum { + COPY_RECURSE, + COPY_SET_RESULT_PTR +} tre_copyast_symbol_t; + +/* Flags for tre_copy_ast(). */ +#define COPY_REMOVE_TAGS 1 +#define COPY_MAXIMIZE_FIRST_TAG 2 + +static reg_errcode_t +tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, + int flags, int *pos_add, tre_tag_direction_t *tag_directions, + tre_ast_node_t **copy, int *max_pos) +{ + reg_errcode_t status = REG_OK; + size_t bottom = tre_stack_num_items(stack); + int num_copied = 0; + int first_tag = 1; + tre_ast_node_t **result = copy; + tre_copyast_symbol_t symbol; + + STACK_PUSH(stack, voidptr, ast); + STACK_PUSH(stack, int, COPY_RECURSE); + + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + tre_ast_node_t *node; + if (status != REG_OK) + break; + + symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack); + switch (symbol) + { + case COPY_SET_RESULT_PTR: + result = tre_stack_pop_voidptr(stack); + break; + case COPY_RECURSE: + node = tre_stack_pop_voidptr(stack); + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = node->obj; + int pos = lit->position; + long min = lit->code_min; + long max = lit->code_max; + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + /* XXX - e.g. [ab] has only one position but two + nodes, so we are creating holes in the state space + here. Not fatal, just wastes memory. */ + pos += *pos_add; + num_copied++; + } + else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS)) + { + /* Change this tag to empty. */ + min = EMPTY; + max = pos = -1; + } + else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG) + && first_tag) + { + /* Maximize the first tag. */ + tag_directions[max] = TRE_TAG_MAXIMIZE; + first_tag = 0; + } + *result = tre_ast_new_literal(mem, min, max); + if (*result == NULL) { + status = REG_ESPACE; + break; + } + if (!IS_SPECIAL(lit)) { + ((tre_literal_t *)(*result)->obj)->u.class = lit->u.class; + ((tre_literal_t *)(*result)->obj)->neg_classes = lit->neg_classes; + } else if (IS_PARAMETER(lit)) { + ((tre_literal_t *)(*result)->obj)->u.params = lit->u.params; + } + + if (pos > *max_pos) + *max_pos = pos; + break; + } + case UNION: + { + tre_union_t *uni = node->obj; + tre_union_t *tmp; + *result = tre_ast_new_union(mem, uni->left, uni->right); + if (*result == NULL) + { + status = REG_ESPACE; + break; + } + tmp = (*result)->obj; + result = &tmp->left; + STACK_PUSHX(stack, voidptr, uni->right); + STACK_PUSHX(stack, int, COPY_RECURSE); + STACK_PUSHX(stack, voidptr, &tmp->right); + STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); + STACK_PUSHX(stack, voidptr, uni->left); + STACK_PUSHX(stack, int, COPY_RECURSE); + break; + } + case CATENATION: + { + tre_catenation_t *cat = node->obj; + tre_catenation_t *tmp; + *result = tre_ast_new_catenation(mem, cat->left, cat->right); + if (*result == NULL) + { + status = REG_ESPACE; + break; + } + tmp = (*result)->obj; + tmp->left = NULL; + tmp->right = NULL; + result = &tmp->left; + + STACK_PUSHX(stack, voidptr, cat->right); + STACK_PUSHX(stack, int, COPY_RECURSE); + STACK_PUSHX(stack, voidptr, &tmp->right); + STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR); + STACK_PUSHX(stack, voidptr, cat->left); + STACK_PUSHX(stack, int, COPY_RECURSE); + break; + } + case ITERATION: + { + tre_iteration_t *iter = node->obj; + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, COPY_RECURSE); + *result = tre_ast_new_iter(mem, iter->arg, iter->min, + iter->max, iter->minimal); + if (*result == NULL) + { + status = REG_ESPACE; + break; + } + iter = (*result)->obj; + result = &iter->arg; + break; + } + default: + assert(0); + break; + } + break; + } + } + *pos_add += num_copied; + return status; +} + +typedef enum { + EXPAND_RECURSE, + EXPAND_AFTER_ITER +} tre_expand_ast_symbol_t; + +/* Expands each iteration node that has a finite nonzero minimum or maximum + iteration count to a catenated sequence of copies of the node. */ +static reg_errcode_t +tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, + tre_tag_direction_t *tag_directions, int *max_depth) +{ + reg_errcode_t status = REG_OK; + size_t bottom = tre_stack_num_items(stack); + int pos_add = 0; + int pos_add_total = 0; + int max_pos = 0; + /* Current approximate matching parameters. */ + int params[TRE_PARAM_LAST]; + /* Approximate parameter nesting level. */ + int params_depth = 0; + int iter_depth = 0; + int i; + + for (i = 0; i < TRE_PARAM_LAST; i++) + params[i] = TRE_PARAM_DEFAULT; + + STACK_PUSHR(stack, voidptr, ast); + STACK_PUSHR(stack, int, EXPAND_RECURSE); + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + tre_ast_node_t *node; + tre_expand_ast_symbol_t symbol; + + if (status != REG_OK) + break; + + DPRINT(("pos_add %d\n", pos_add)); + + symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack); + node = tre_stack_pop_voidptr(stack); + switch (symbol) + { + case EXPAND_RECURSE: + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit= node->obj; + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + lit->position += pos_add; + if (lit->position > max_pos) + max_pos = lit->position; + } + break; + } + case UNION: + { + tre_union_t *uni = node->obj; + STACK_PUSHX(stack, voidptr, uni->right); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + STACK_PUSHX(stack, voidptr, uni->left); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + break; + } + case CATENATION: + { + tre_catenation_t *cat = node->obj; + STACK_PUSHX(stack, voidptr, cat->right); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + STACK_PUSHX(stack, voidptr, cat->left); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + break; + } + case ITERATION: + { + tre_iteration_t *iter = node->obj; + STACK_PUSHX(stack, int, pos_add); + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, EXPAND_AFTER_ITER); + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, EXPAND_RECURSE); + /* If we are going to expand this node at EXPAND_AFTER_ITER + then don't increase the `pos' fields of the nodes now, it + will get done when expanding. */ + if (iter->min > 1 || iter->max > 1) + pos_add = 0; + iter_depth++; + DPRINT(("iter\n")); + break; + } + default: + assert(0); + break; + } + break; + case EXPAND_AFTER_ITER: + { + tre_iteration_t *iter = node->obj; + int pos_add_last; + pos_add = tre_stack_pop_int(stack); + pos_add_last = pos_add; + if (iter->min > 1 || iter->max > 1) + { + tre_ast_node_t *seq1 = NULL, *seq2 = NULL; + int j; + int pos_add_save = pos_add; + + /* Create a catenated sequence of copies of the node. */ + for (j = 0; j < iter->min; j++) + { + tre_ast_node_t *copy; + /* Remove tags from all but the last copy. */ + int flags = ((j + 1 < iter->min) + ? COPY_REMOVE_TAGS + : COPY_MAXIMIZE_FIRST_TAG); + DPRINT((" pos_add %d\n", pos_add)); + pos_add_save = pos_add; + status = tre_copy_ast(mem, stack, iter->arg, flags, + &pos_add, tag_directions, ©, + &max_pos); + if (status != REG_OK) + return status; + if (seq1 != NULL) + seq1 = tre_ast_new_catenation(mem, seq1, copy); + else + seq1 = copy; + if (seq1 == NULL) + return REG_ESPACE; + } + + if (iter->max == -1) + { + /* No upper limit. */ + pos_add_save = pos_add; + status = tre_copy_ast(mem, stack, iter->arg, 0, + &pos_add, NULL, &seq2, &max_pos); + if (status != REG_OK) + return status; + seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0); + if (seq2 == NULL) + return REG_ESPACE; + } + else + { + for (j = iter->min; j < iter->max; j++) + { + tre_ast_node_t *copy; + pos_add_save = pos_add; + status = tre_copy_ast(mem, stack, iter->arg, 0, + &pos_add, NULL, ©, &max_pos); + if (status != REG_OK) + return status; + if (seq2 != NULL) + seq2 = tre_ast_new_catenation(mem, copy, seq2); + else + seq2 = copy; + if (seq2 == NULL) + return REG_ESPACE; + seq2 = tre_ast_new_iter(mem, seq2, 0, 1, 0); + if (seq2 == NULL) + return REG_ESPACE; + } + } + + pos_add = pos_add_save; + if (seq1 == NULL) + seq1 = seq2; + else if (seq2 != NULL) + seq1 = tre_ast_new_catenation(mem, seq1, seq2); + if (seq1 == NULL) + return REG_ESPACE; + node->obj = seq1->obj; + node->type = seq1->type; + } + + iter_depth--; + pos_add_total += pos_add - pos_add_last; + if (iter_depth == 0) + pos_add = pos_add_total; + + /* If approximate parameters are specified, surround the result + with two parameter setting nodes. The one on the left sets + the specified parameters, and the one on the right restores + the old parameters. */ + if (iter->params) + { + tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy; + int *old_params; + + tmp_l = tre_ast_new_literal(mem, PARAMETER, 0); + if (!tmp_l) + return REG_ESPACE; + ((tre_literal_t *)tmp_l->obj)->u.params = iter->params; + iter->params[TRE_PARAM_DEPTH] = params_depth + 1; + tmp_r = tre_ast_new_literal(mem, PARAMETER, 0); + if (!tmp_r) + return REG_ESPACE; + old_params = tre_mem_alloc(mem, sizeof(*old_params) + * TRE_PARAM_LAST); + if (!old_params) + return REG_ESPACE; + for (i = 0; i < TRE_PARAM_LAST; i++) + old_params[i] = params[i]; + ((tre_literal_t *)tmp_r->obj)->u.params = old_params; + old_params[TRE_PARAM_DEPTH] = params_depth; + /* XXX - this is the only place where ast_new_node is + needed -- should be moved inside AST module. */ + node_copy = tre_ast_new_node(mem, ITERATION, + sizeof(tre_iteration_t)); + if (!node_copy) + return REG_ESPACE; + node_copy->obj = node->obj; + tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy); + if (!tmp_node) + return REG_ESPACE; + tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r); + if (!tmp_node) + return REG_ESPACE; + /* Replace the contents of `node' with `tmp_node'. */ + memcpy(node, tmp_node, sizeof(*node)); + node->obj = tmp_node->obj; + node->type = tmp_node->type; + params_depth++; + if (params_depth > *max_depth) + *max_depth = params_depth; + } + break; + } + default: + assert(0); + break; + } + } + +#ifdef TRE_DEBUG + DPRINT(("Expanded AST:\n")); + tre_ast_print(ast); +#endif + + return status; +} + +static tre_pos_and_tags_t * +tre_set_empty(tre_mem_t mem) +{ + tre_pos_and_tags_t *new_set; + + new_set = tre_mem_calloc(mem, sizeof(*new_set)); + if (new_set == NULL) + return NULL; + + new_set[0].position = -1; + new_set[0].code_min = -1; + new_set[0].code_max = -1; + + return new_set; +} + +static tre_pos_and_tags_t * +tre_set_one(tre_mem_t mem, int position, long code_min, long code_max, + tre_ctype_t class, tre_ctype_t *neg_classes, int backref) +{ + tre_pos_and_tags_t *new_set; + + new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); + if (new_set == NULL) + return NULL; + + new_set[0].position = position; + new_set[0].code_min = code_min; + new_set[0].code_max = code_max; + new_set[0].class = class; + new_set[0].neg_classes = neg_classes; + new_set[0].backref = backref; + new_set[1].position = -1; + new_set[1].code_min = -1; + new_set[1].code_max = -1; + + return new_set; +} + +static tre_pos_and_tags_t * +tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, + int *tags, int assertions, int *params) +{ + int s1, s2, i, j; + tre_pos_and_tags_t *new_set; + int *new_tags; + int num_tags; + + for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++); + for (s1 = 0; set1[s1].position >= 0; s1++); + for (s2 = 0; set2[s2].position >= 0; s2++); + new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); + if (!new_set ) + return NULL; + + for (s1 = 0; set1[s1].position >= 0; s1++) + { + new_set[s1].position = set1[s1].position; + new_set[s1].code_min = set1[s1].code_min; + new_set[s1].code_max = set1[s1].code_max; + new_set[s1].assertions = set1[s1].assertions | assertions; + new_set[s1].class = set1[s1].class; + new_set[s1].neg_classes = set1[s1].neg_classes; + new_set[s1].backref = set1[s1].backref; + if (set1[s1].tags == NULL && tags == NULL) + new_set[s1].tags = NULL; + else + { + for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++); + new_tags = tre_mem_alloc(mem, (sizeof(*new_tags) + * (i + num_tags + 1))); + if (new_tags == NULL) + return NULL; + for (j = 0; j < i; j++) + new_tags[j] = set1[s1].tags[j]; + for (i = 0; i < num_tags; i++) + new_tags[j + i] = tags[i]; + new_tags[j + i] = -1; + new_set[s1].tags = new_tags; + } + if (set1[s1].params) + new_set[s1].params = set1[s1].params; + if (params) + { + if (!new_set[s1].params) + new_set[s1].params = params; + else + { + new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) * + TRE_PARAM_LAST); + if (!new_set[s1].params) + return NULL; + for (i = 0; i < TRE_PARAM_LAST; i++) + if (params[i] != TRE_PARAM_UNSET) + new_set[s1].params[i] = params[i]; + } + } + } + + for (s2 = 0; set2[s2].position >= 0; s2++) + { + new_set[s1 + s2].position = set2[s2].position; + new_set[s1 + s2].code_min = set2[s2].code_min; + new_set[s1 + s2].code_max = set2[s2].code_max; + /* XXX - why not | assertions here as well? */ + new_set[s1 + s2].assertions = set2[s2].assertions; + new_set[s1 + s2].class = set2[s2].class; + new_set[s1 + s2].neg_classes = set2[s2].neg_classes; + new_set[s1 + s2].backref = set2[s2].backref; + if (set2[s2].tags == NULL) + new_set[s1 + s2].tags = NULL; + else + { + for (i = 0; set2[s2].tags[i] >= 0; i++); + new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1)); + if (new_tags == NULL) + return NULL; + for (j = 0; j < i; j++) + new_tags[j] = set2[s2].tags[j]; + new_tags[j] = -1; + new_set[s1 + s2].tags = new_tags; + } + if (set2[s2].params) + new_set[s1 + s2].params = set2[s2].params; + if (params) + { + if (!new_set[s1 + s2].params) + new_set[s1 + s2].params = params; + else + { + new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) * + TRE_PARAM_LAST); + if (!new_set[s1 + s2].params) + return NULL; + for (i = 0; i < TRE_PARAM_LAST; i++) + if (params[i] != TRE_PARAM_UNSET) + new_set[s1 + s2].params[i] = params[i]; + } + } + } + new_set[s1 + s2].position = -1; + return new_set; +} + +/* Finds the empty path through `node' which is the one that should be + taken according to POSIX.2 rules, and adds the tags on that path to + `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is + set to the number of tags seen on the path. */ +static reg_errcode_t +tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, + int *assertions, int *params, int *num_tags_seen, + int *params_seen) +{ + tre_literal_t *lit; + tre_union_t *uni; + tre_catenation_t *cat; + tre_iteration_t *iter; + int i; + size_t bottom = tre_stack_num_items(stack); + reg_errcode_t status = REG_OK; + if (num_tags_seen) + *num_tags_seen = 0; + if (params_seen) + *params_seen = 0; + + status = tre_stack_push_voidptr(stack, node); + + /* Walk through the tree recursively. */ + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + node = tre_stack_pop_voidptr(stack); + + switch (node->type) + { + case LITERAL: + lit = (tre_literal_t *)node->obj; + switch (lit->code_min) + { + case TAG: + if (lit->code_max >= 0) + { + if (tags != NULL) + { + /* Add the tag to `tags'. */ + for (i = 0; tags[i] >= 0; i++) + if (tags[i] == lit->code_max) + break; + if (tags[i] < 0) + { + tags[i] = lit->code_max; + tags[i + 1] = -1; + } + } + if (num_tags_seen) + (*num_tags_seen)++; + } + break; + case ASSERTION: + assert(lit->code_max >= 1 && lit->code_max <= ASSERT_LAST); + if (assertions != NULL) + *assertions |= lit->code_max; + break; + case PARAMETER: + if (params != NULL) + for (i = 0; i < TRE_PARAM_LAST; i++) + params[i] = lit->u.params[i]; + if (params_seen != NULL) + *params_seen = 1; + break; + case EMPTY: + break; + default: + assert(0); + break; + } + break; + + case UNION: + /* Subexpressions starting earlier take priority over ones + starting later, so we prefer the left subexpression over the + right subexpression. */ + uni = (tre_union_t *)node->obj; + if (uni->left->nullable) + STACK_PUSHX(stack, voidptr, uni->left) + else if (uni->right->nullable) + STACK_PUSHX(stack, voidptr, uni->right) + else + assert(0); + break; + + case CATENATION: + /* The path must go through both children. */ + cat = (tre_catenation_t *)node->obj; + assert(cat->left->nullable); + assert(cat->right->nullable); + STACK_PUSHX(stack, voidptr, cat->left); + STACK_PUSHX(stack, voidptr, cat->right); + break; + + case ITERATION: + /* A match with an empty string is preferred over no match at + all, so we go through the argument if possible. */ + iter = (tre_iteration_t *)node->obj; + if (iter->arg->nullable) + STACK_PUSHX(stack, voidptr, iter->arg); + break; + + default: + assert(0); + break; + } + } + + return status; +} + + +typedef enum { + NPFL_RECURSE, + NPFL_POST_UNION, + NPFL_POST_CATENATION, + NPFL_POST_ITERATION +} tre_npfl_stack_symbol_t; + + +/* Computes and fills in the fields `nullable', `position`, `firstpos', + and `lastpos' for the nodes of the AST `tree'; `nextpos' points to an + integer indicating the next available position, and will be updated on + return to reflect the number of additional positions assigned. */ +static reg_errcode_t +tre_compute_npfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, + int *nextpos) +{ + size_t bottom = tre_stack_num_items(stack); + + STACK_PUSHR(stack, voidptr, tree); + STACK_PUSHR(stack, int, NPFL_RECURSE); + + while (tre_stack_num_items(stack) > bottom) + { + tre_npfl_stack_symbol_t symbol; + tre_ast_node_t *node; + + symbol = (tre_npfl_stack_symbol_t)tre_stack_pop_int(stack); + node = tre_stack_pop_voidptr(stack); + switch (symbol) + { + case NPFL_RECURSE: + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = (tre_literal_t *)node->obj; + if (IS_BACKREF(lit)) + { + /* Back references: nullable = false, firstpos = {i}, + lastpos = {i}. */ + node->nullable = 0; + lit->position = (*nextpos)++; + node->firstpos = tre_set_one(mem, lit->position, 0, + TRE_CHAR_MAX, 0, NULL, -1); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_one(mem, lit->position, 0, + TRE_CHAR_MAX, 0, NULL, + lit->code_max); + if (!node->lastpos) + return REG_ESPACE; + } + else if (lit->code_min < 0) + { + /* Tags, empty strings, params, and zero width assertions: + nullable = true, firstpos = {}, and lastpos = {}. */ + node->nullable = 1; + node->firstpos = tre_set_empty(mem); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_empty(mem); + if (!node->lastpos) + return REG_ESPACE; + } + else + { + /* Literal at position i: nullable = false, firstpos = {i}, + lastpos = {i}. */ + node->nullable = 0; + lit->position = (*nextpos)++; + node->firstpos = + tre_set_one(mem, lit->position, lit->code_min, + lit->code_max, 0, NULL, -1); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_one(mem, lit->position, + lit->code_min, + lit->code_max, + lit->u.class, lit->neg_classes, + -1); + if (!node->lastpos) + return REG_ESPACE; + } + break; + } + + case UNION: + /* Compute the attributes for the two subtrees, and after that + for this node. */ + STACK_PUSHR(stack, voidptr, node); + STACK_PUSHR(stack, int, NPFL_POST_UNION); + STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right); + STACK_PUSHR(stack, int, NPFL_RECURSE); + STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left); + STACK_PUSHR(stack, int, NPFL_RECURSE); + break; + + case CATENATION: + /* Compute the attributes for the two subtrees, and after that + for this node. */ + STACK_PUSHR(stack, voidptr, node); + STACK_PUSHR(stack, int, NPFL_POST_CATENATION); + STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right); + STACK_PUSHR(stack, int, NPFL_RECURSE); + STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left); + STACK_PUSHR(stack, int, NPFL_RECURSE); + break; + + case ITERATION: + /* Compute the attributes for the subtree, and after that for + this node. */ + STACK_PUSHR(stack, voidptr, node); + STACK_PUSHR(stack, int, NPFL_POST_ITERATION); + STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg); + STACK_PUSHR(stack, int, NPFL_RECURSE); + break; + } + break; /* end case: NPFL_RECURSE */ + + case NPFL_POST_UNION: + { + tre_union_t *uni = (tre_union_t *)node->obj; + node->nullable = uni->left->nullable || uni->right->nullable; + node->firstpos = tre_set_union(mem, uni->left->firstpos, + uni->right->firstpos, NULL, 0, NULL); + if (!node->firstpos) + return REG_ESPACE; + node->lastpos = tre_set_union(mem, uni->left->lastpos, + uni->right->lastpos, NULL, 0, NULL); + if (!node->lastpos) + return REG_ESPACE; + break; + } + + case NPFL_POST_ITERATION: + { + tre_iteration_t *iter = (tre_iteration_t *)node->obj; + + if (iter->min == 0 || iter->arg->nullable) + node->nullable = 1; + else + node->nullable = 0; + node->firstpos = iter->arg->firstpos; + node->lastpos = iter->arg->lastpos; + break; + } + + case NPFL_POST_CATENATION: + { + int num_tags, *tags, assertions, params_seen; + int *params; + reg_errcode_t status; + tre_catenation_t *cat = node->obj; + node->nullable = cat->left->nullable && cat->right->nullable; + + /* Compute firstpos. */ + if (cat->left->nullable) + { + /* The left side matches the empty string. Make a first pass + with tre_match_empty() to get the number of tags and + parameters. */ + status = tre_match_empty(stack, cat->left, + NULL, NULL, NULL, &num_tags, + ¶ms_seen); + if (status != REG_OK) + return status; + /* Allocate arrays for the tags and parameters. */ + tags = xmalloc(sizeof(*tags) * (num_tags + 1)); + if (!tags) + return REG_ESPACE; + tags[0] = -1; + assertions = 0; + params = NULL; + if (params_seen) + { + params = tre_mem_alloc(mem, sizeof(*params) + * TRE_PARAM_LAST); + if (!params) + { + xfree(tags); + return REG_ESPACE; + } + } + /* Second pass with tre_mach_empty() to get the list of + tags and parameters. */ + status = tre_match_empty(stack, cat->left, tags, + &assertions, params, NULL, NULL); + if (status != REG_OK) + { + xfree(tags); + return status; + } + node->firstpos = + tre_set_union(mem, cat->right->firstpos, cat->left->firstpos, + tags, assertions, params); + xfree(tags); + if (!node->firstpos) + return REG_ESPACE; + } + else + { + node->firstpos = cat->left->firstpos; + } + + /* Compute lastpos. */ + if (cat->right->nullable) + { + /* The right side matches the empty string. Make a first pass + with tre_match_empty() to get the number of tags and + parameters. */ + status = tre_match_empty(stack, cat->right, + NULL, NULL, NULL, &num_tags, + ¶ms_seen); + if (status != REG_OK) + return status; + /* Allocate arrays for the tags and parameters. */ + tags = xmalloc(sizeof(*tags) * (num_tags + 1)); + if (!tags) + return REG_ESPACE; + tags[0] = -1; + assertions = 0; + params = NULL; + if (params_seen) + { + params = tre_mem_alloc(mem, sizeof(*params) + * TRE_PARAM_LAST); + if (!params) + { + xfree(tags); + return REG_ESPACE; + } + } + /* Second pass with tre_mach_empty() to get the list of + tags and parameters. */ + status = tre_match_empty(stack, cat->right, tags, + &assertions, params, NULL, NULL); + if (status != REG_OK) + { + xfree(tags); + return status; + } + node->lastpos = + tre_set_union(mem, cat->left->lastpos, cat->right->lastpos, + tags, assertions, params); + xfree(tags); + if (!node->lastpos) + return REG_ESPACE; + } + else + { + node->lastpos = cat->right->lastpos; + } + break; + } + + default: + assert(0); + break; + } + } + + return REG_OK; +} + + +/* Adds a transition from each position in `p1' to each position in `p2'. */ +static reg_errcode_t +tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, + tre_tnfa_transition_t *transitions, + int *counts, int *offs) +{ + tre_pos_and_tags_t *orig_p2 = p2; + tre_tnfa_transition_t *trans; + int i, j, k, l, dup, prev_p2_pos; + + if (transitions != NULL) + while (p1->position >= 0) + { + p2 = orig_p2; + prev_p2_pos = -1; + while (p2->position >= 0) + { + /* Optimization: if this position was already handled, skip it. */ + if (p2->position == prev_p2_pos) + { + p2++; + continue; + } + prev_p2_pos = p2->position; + /* Set `trans' to point to the next unused transition from + position `p1->position'. */ + trans = transitions + offs[p1->position]; + while (trans->state != NULL) + { +#if 0 + /* If we find a previous transition from `p1->position' to + `p2->position', it is overwritten. This can happen only + if there are nested loops in the regexp, like in "((a)*)*". + In POSIX.2 repetition using the outer loop is always + preferred over using the inner loop. Therefore the + transition for the inner loop is useless and can be thrown + away. */ + /* XXX - The same position is used for all nodes in a bracket + expression, so this optimization cannot be used (it will + break bracket expressions) unless I figure out a way to + detect it here. */ + if (trans->state_id == p2->position) + { + DPRINT(("*")); + break; + } +#endif + trans++; + } + + if (trans->state == NULL) + (trans + 1)->state = NULL; + /* Use the character ranges, assertions, etc. from `p1' for + the transition from `p1' to `p2'. */ + trans->code_min = (tre_cint_t) p1->code_min; + trans->code_max = (tre_cint_t) p1->code_max; + trans->state = transitions + offs[p2->position]; + trans->state_id = p2->position; + trans->assertions = p1->assertions | p2->assertions + | (p1->class ? ASSERT_CHAR_CLASS : 0) + | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0); + if (p1->backref >= 0) + { + assert((trans->assertions & ASSERT_CHAR_CLASS) == 0); + assert(p2->backref < 0); + trans->u.backref = p1->backref; + trans->assertions |= ASSERT_BACKREF; + } + else + trans->u.class = p1->class; + if (p1->neg_classes != NULL) + { + for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++); + trans->neg_classes = + xmalloc(sizeof(*trans->neg_classes) * (i + 1)); + if (trans->neg_classes == NULL) + return REG_ESPACE; + for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++) + trans->neg_classes[i] = p1->neg_classes[i]; + trans->neg_classes[i] = (tre_ctype_t)0; + } + else + trans->neg_classes = NULL; + + /* Find out how many tags this transition has. */ + i = 0; + if (p1->tags != NULL) + while(p1->tags[i] >= 0) + i++; + j = 0; + if (p2->tags != NULL) + while(p2->tags[j] >= 0) + j++; + + /* If we are overwriting a transition, free the old tag array. */ + if (trans->tags != NULL) + xfree(trans->tags); + trans->tags = NULL; + + /* If there were any tags, allocate an array and fill it. */ + if (i + j > 0) + { + trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1)); + if (!trans->tags) + return REG_ESPACE; + i = 0; + if (p1->tags != NULL) + while(p1->tags[i] >= 0) + { + trans->tags[i] = p1->tags[i]; + i++; + } + l = i; + j = 0; + if (p2->tags != NULL) + while (p2->tags[j] >= 0) + { + /* Don't add duplicates. */ + dup = 0; + for (k = 0; k < i; k++) + if (trans->tags[k] == p2->tags[j]) + { + dup = 1; + break; + } + if (!dup) + trans->tags[l++] = p2->tags[j]; + j++; + } + trans->tags[l] = -1; + } + + /* Set the parameter array. If both `p2' and `p1' have same + parameters, the values in `p2' override those in `p1'. */ + if (p1->params || p2->params) + { + if (!trans->params) + trans->params = xmalloc(sizeof(*trans->params) + * TRE_PARAM_LAST); + if (!trans->params) + return REG_ESPACE; + for (i = 0; i < TRE_PARAM_LAST; i++) + { + trans->params[i] = TRE_PARAM_UNSET; + if (p1->params && p1->params[i] != TRE_PARAM_UNSET) + trans->params[i] = p1->params[i]; + if (p2->params && p2->params[i] != TRE_PARAM_UNSET) + trans->params[i] = p2->params[i]; + } + } + else + { + if (trans->params) + xfree(trans->params); + trans->params = NULL; + } + + +#ifdef TRE_DEBUG + { + int *tags; + + DPRINT((" %2d -> %2d on %3d", p1->position, p2->position, + p1->code_min)); + if (p1->code_max != p1->code_min) + DPRINT(("-%3d", p1->code_max)); + tags = trans->tags; + if (tags) + { + DPRINT((", tags [")); + while (*tags >= 0) + { + DPRINT(("%d", *tags)); + tags++; + if (*tags >= 0) + DPRINT((",")); + } + DPRINT(("]")); + } + if (trans->assertions) + DPRINT((", assert %d", trans->assertions)); + if (trans->assertions & ASSERT_BACKREF) + DPRINT((", backref %d", trans->u.backref)); + else if (trans->u.class) + DPRINT((", class %ld", (long)trans->u.class)); + if (trans->neg_classes) + DPRINT((", neg_classes %p", trans->neg_classes)); + if (trans->params) + { + DPRINT((", ")); + tre_print_params(trans->params); + } + DPRINT(("\n")); + } +#endif /* TRE_DEBUG */ + p2++; + } + p1++; + } + else + /* Compute a maximum limit for the number of transitions leaving + from each state. */ + while (p1->position >= 0) + { + p2 = orig_p2; + while (p2->position >= 0) + { + counts[p1->position]++; + p2++; + } + p1++; + } + return REG_OK; +} + +/* Converts the syntax tree to a TNFA. All the transitions in the TNFA are + labelled with one character range (there are no transitions on empty + strings). The TNFA takes O(n^2) space in the worst case, `n' is size of + the regexp. */ +static reg_errcode_t +tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, + int *counts, int *offs) +{ + tre_union_t *uni; + tre_catenation_t *cat; + tre_iteration_t *iter; + reg_errcode_t errcode = REG_OK; + + /* XXX - recurse using a stack!. */ + switch (node->type) + { + case LITERAL: + break; + case UNION: + uni = (tre_union_t *)node->obj; + errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs); + break; + + case CATENATION: + cat = (tre_catenation_t *)node->obj; + /* Add a transition from each position in cat->left->lastpos + to each position in cat->right->firstpos. */ + errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos, + transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs); + break; + + case ITERATION: + iter = (tre_iteration_t *)node->obj; + assert(iter->max == -1 || iter->max == 1); + + if (iter->max == -1) + { + assert(iter->min == 0 || iter->min == 1); + /* Add a transition from each last position in the iterated + expression to each first position. */ + errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos, + transitions, counts, offs); + if (errcode != REG_OK) + return errcode; + } + errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs); + break; + } + return errcode; +} + +#define ERROR_EXIT(err) \ + do \ + { \ + errcode = err; \ + if (/*CONSTCOND*/(void)1,1) \ + goto error_exit; \ + } \ + while (/*CONSTCOND*/(void)0,0) + + +int +tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +{ + tre_stack_t *stack; + tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; + tre_pos_and_tags_t *p; + int *counts = NULL, *offs = NULL; + int i, add = 0; + tre_tnfa_transition_t *transitions, *initial; + tre_tnfa_t *tnfa = NULL; + tre_submatch_data_t *submatch_data; + tre_tag_direction_t *tag_directions = NULL; + reg_errcode_t errcode; + tre_mem_t mem; + int numpos = 0; + + /* Parse context. */ + tre_parse_ctx_t parse_ctx; + + /* Allocate a stack used throughout the compilation process for various + purposes. */ + stack = tre_stack_new(512, TRE_MAX_STACK); + if (!stack) + return REG_ESPACE; + /* Allocate a fast memory allocator. */ + mem = tre_mem_new(); + if (!mem) + { + tre_stack_destroy(stack); + return REG_ESPACE; + } + + /* Parse the regexp. */ + memset(&parse_ctx, 0, sizeof(parse_ctx)); + parse_ctx.mem = mem; + parse_ctx.stack = stack; + parse_ctx.re = regex; + parse_ctx.len = n; + parse_ctx.cflags = cflags; + parse_ctx.max_backref = -1; + /* Use 8-bit optimizations in 8-bit mode */ + parse_ctx.mb_cur_max = (cflags & REG_USEBYTES) ? 1 : TRE_MB_CUR_MAX; + DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex)); + errcode = tre_parse(&parse_ctx); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + preg->re_nsub = parse_ctx.submatch_id - 1; + tree = parse_ctx.result; + + /* Back references and approximate matching cannot currently be used + in the same regexp. */ + if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx) + ERROR_EXIT(REG_BADPAT); + +#ifdef TRE_DEBUG + tre_ast_print(tree); +#endif /* TRE_DEBUG */ + + /* Referring to nonexistent subexpressions is illegal. */ + if (parse_ctx.max_backref > (int)preg->re_nsub) + ERROR_EXIT(REG_ESUBREG); + + /* Allocate the TNFA struct. */ + tnfa = xcalloc(1, sizeof(tre_tnfa_t)); + if (tnfa == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->have_backrefs = parse_ctx.max_backref >= 0; + tnfa->have_approx = parse_ctx.have_approx; + tnfa->num_submatches = parse_ctx.submatch_id; + + /* The literal optimizer only looks at the final tree plus the outer + * compile flags. If the regexp changes flags inline with (?i:...) or + * (?-i:...), those scopes are no longer explicit in the optimized form, + * so keep using the full matcher. */ + if (!parse_ctx.have_inline_cflags) + { + errcode = tre_litopt_try_compile(tnfa, tree, cflags, + parse_ctx.mb_cur_max); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + } + + /* Set up tags for submatch addressing. If REG_NOSUB is set and the + regexp does not have back references, this can be skipped. */ + if (tnfa->have_backrefs || !(cflags & REG_NOSUB)) + { + DPRINT(("tre_compile: setting up tags\n")); + + /* Figure out how many tags we will need. */ + errcode = tre_add_tags(NULL, stack, tree, tnfa); + if (errcode != REG_OK) + ERROR_EXIT(errcode); +#ifdef TRE_DEBUG + tre_ast_print(tree); +#endif /* TRE_DEBUG */ + + if (tnfa->num_tags > 0) + { + tag_directions = xmalloc(sizeof(*tag_directions) + * (tnfa->num_tags + 1)); + if (tag_directions == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->tag_directions = tag_directions; + memset(tag_directions, -1, + sizeof(*tag_directions) * (tnfa->num_tags + 1)); + } + tnfa->minimal_tags = xcalloc(tnfa->num_tags * 2 + 1, + sizeof(*tnfa->minimal_tags)); + if (tnfa->minimal_tags == NULL) + ERROR_EXIT(REG_ESPACE); + + submatch_data = xcalloc((unsigned)parse_ctx.submatch_id, + sizeof(*submatch_data)); + if (submatch_data == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->submatch_data = submatch_data; + + errcode = tre_add_tags(mem, stack, tree, tnfa); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + +#ifdef TRE_DEBUG + for (i = 0; i < parse_ctx.submatch_id; i++) + DPRINT(("pmatch[%d] = {t%d, t%d}\n", + i, submatch_data[i].so_tag, submatch_data[i].eo_tag)); + for (i = 0; i < tnfa->num_tags; i++) + DPRINT(("t%d is %s\n", i, + tag_directions[i] == TRE_TAG_MINIMIZE ? + "minimized" : "maximized")); +#endif /* TRE_DEBUG */ + } + + /* Expand iteration nodes. */ + errcode = tre_expand_ast(mem, stack, tree, tag_directions, + &tnfa->params_depth); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + + /* Add a dummy node for the final state. + XXX - For certain patterns this dummy node can be optimized away, + for example "a*" or "ab*". Figure out a simple way to detect + this possibility. */ + tmp_ast_l = tree; + tmp_ast_r = tre_ast_new_literal(mem, 0, 0); + if (tmp_ast_r == NULL) + ERROR_EXIT(REG_ESPACE); + + tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r); + if (tree == NULL) + ERROR_EXIT(REG_ESPACE); + + errcode = tre_compute_npfl(mem, stack, tree, &numpos); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + +#ifdef TRE_DEBUG + tre_ast_print(tree); + DPRINT(("Number of states: %d\n", numpos)); +#endif /* TRE_DEBUG */ + + counts = xmalloc(sizeof(int) * numpos); + if (counts == NULL) + ERROR_EXIT(REG_ESPACE); + + offs = xmalloc(sizeof(int) * numpos); + if (offs == NULL) + ERROR_EXIT(REG_ESPACE); + + for (i = 0; i < numpos; i++) + counts[i] = 0; + tre_ast_to_tnfa(tree, NULL, counts, NULL); + + add = 0; + for (i = 0; i < numpos; i++) + { + offs[i] = add; + add += counts[i] + 1; + counts[i] = 0; + } + transitions = xcalloc((unsigned)add + 1, sizeof(*transitions)); + if (transitions == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->transitions = transitions; + tnfa->num_transitions = add; + + DPRINT(("Converting to TNFA:\n")); + errcode = tre_ast_to_tnfa(tree, transitions, counts, offs); + if (errcode != REG_OK) + ERROR_EXIT(errcode); + + /* If in eight bit mode, compute a table of characters that can be the + first character of a match. */ + tnfa->first_char = -1; + if (parse_ctx.mb_cur_max == 1 && !tmp_ast_l->nullable) + { + int count = 0; + tre_cint_t k; + DPRINT(("Characters that can start a match:")); + tnfa->firstpos_chars = xcalloc(256, sizeof(char)); + if (tnfa->firstpos_chars == NULL) + ERROR_EXIT(REG_ESPACE); + for (p = tree->firstpos; p->position >= 0; p++) + { + tre_tnfa_transition_t *j = transitions + offs[p->position]; + while (j->state != NULL) + { + for (k = j->code_min; k <= j->code_max && k < 256; k++) + { + DPRINT((" %d", k)); + tnfa->firstpos_chars[k] = 1; + count++; + } + j++; + } + } + DPRINT(("\n")); +#define TRE_OPTIMIZE_FIRST_CHAR 1 +#if TRE_OPTIMIZE_FIRST_CHAR + if (count == 1) + { + for (k = 0; k < 256; k++) + if (tnfa->firstpos_chars[k]) + { + DPRINT(("first char must be %d\n", k)); + tnfa->first_char = k; + xfree(tnfa->firstpos_chars); + tnfa->firstpos_chars = NULL; + break; + } + } +#endif + + } + else + tnfa->firstpos_chars = NULL; + + + p = tree->firstpos; + i = 0; + while (p->position >= 0) + { + i++; + +#ifdef TRE_DEBUG + { + int *tags; + DPRINT(("initial: %d", p->position)); + tags = p->tags; + if (tags != NULL) + { + if (*tags >= 0) + DPRINT(("/")); + while (*tags >= 0) + { + DPRINT(("%d", *tags)); + tags++; + if (*tags >= 0) + DPRINT((",")); + } + } + DPRINT((", assert %d", p->assertions)); + if (p->params) + { + DPRINT((", ")); + tre_print_params(p->params); + } + DPRINT(("\n")); + } +#endif /* TRE_DEBUG */ + + p++; + } + + initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t)); + if (initial == NULL) + ERROR_EXIT(REG_ESPACE); + tnfa->initial = initial; + + i = 0; + for (p = tree->firstpos; p->position >= 0; p++) + { + initial[i].state = transitions + offs[p->position]; + initial[i].state_id = p->position; + initial[i].tags = NULL; + /* Copy the arrays p->tags, and p->params, they are allocated + from a tre_mem object. */ + if (p->tags) + { + int j; + for (j = 0; p->tags[j] >= 0; j++); + initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1)); + if (!initial[i].tags) + ERROR_EXIT(REG_ESPACE); + memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1)); + } + initial[i].params = NULL; + if (p->params) + { + initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST); + if (!initial[i].params) + ERROR_EXIT(REG_ESPACE); + memcpy(initial[i].params, p->params, + sizeof(*p->params) * TRE_PARAM_LAST); + } + initial[i].assertions = p->assertions; + i++; + } + initial[i].state = NULL; + + tnfa->num_transitions = add; + tnfa->final = transitions + offs[tree->lastpos[0].position]; + tnfa->num_states = numpos; + tnfa->cflags = cflags; + + DPRINT(("final state %p\n", (void *)tnfa->final)); + + tre_mem_destroy(mem); + tre_stack_destroy(stack); + xfree(counts); + xfree(offs); + + preg->TRE_REGEX_T_FIELD = (void *)tnfa; + return REG_OK; + + error_exit: + /* Free everything that was allocated and return the error code. */ + tre_mem_destroy(mem); + if (stack != NULL) + tre_stack_destroy(stack); + if (counts != NULL) + xfree(counts); + if (offs != NULL) + xfree(offs); + preg->TRE_REGEX_T_FIELD = (void *)tnfa; + tre_free(preg); + return errcode; +} + + + + +void +tre_free(regex_t *preg) +{ + tre_tnfa_t *tnfa; + unsigned int i; + tre_tnfa_transition_t *trans; + + tnfa = (void *)preg->TRE_REGEX_T_FIELD; + if (!tnfa) + return; + + for (i = 0; i < tnfa->num_transitions; i++) + if (tnfa->transitions[i].state) + { + if (tnfa->transitions[i].tags) + xfree(tnfa->transitions[i].tags); + if (tnfa->transitions[i].neg_classes) + xfree(tnfa->transitions[i].neg_classes); + if (tnfa->transitions[i].params) + xfree(tnfa->transitions[i].params); + } + if (tnfa->transitions) + xfree(tnfa->transitions); + + if (tnfa->initial) + { + for (trans = tnfa->initial; trans->state; trans++) + { + if (trans->tags) + xfree(trans->tags); + if (trans->params) + xfree(trans->params); + } + xfree(tnfa->initial); + } + + if (tnfa->submatch_data) + { + for (i = 0; i < tnfa->num_submatches; i++) + if (tnfa->submatch_data[i].parents) + xfree(tnfa->submatch_data[i].parents); + xfree(tnfa->submatch_data); + } + + if (tnfa->tag_directions) + xfree(tnfa->tag_directions); + if (tnfa->firstpos_chars) + xfree(tnfa->firstpos_chars); + if (tnfa->minimal_tags) + xfree(tnfa->minimal_tags); + tre_litopt_free_literal_list(tnfa->literal_opt.literals, + tnfa->literal_opt.num_literals); + xfree(tnfa); +} + +char * +tre_version(void) +{ + static char str[256]; + char *version; + + if (str[0] == 0) + { + (void) tre_config(TRE_CONFIG_VERSION, &version); + (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version); + } + return str; +} + +int +tre_config(int query, void *result) +{ + int *int_result = result; + const char **string_result = result; + + switch (query) + { + case TRE_CONFIG_APPROX: +#ifdef TRE_APPROX + *int_result = 1; +#else /* !TRE_APPROX */ + *int_result = 0; +#endif /* !TRE_APPROX */ + return REG_OK; + + case TRE_CONFIG_WCHAR: +#ifdef TRE_WCHAR + *int_result = 1; +#else /* !TRE_WCHAR */ + *int_result = 0; +#endif /* !TRE_WCHAR */ + return REG_OK; + + case TRE_CONFIG_MULTIBYTE: +#ifdef TRE_MULTIBYTE + *int_result = 1; +#else /* !TRE_MULTIBYTE */ + *int_result = 0; +#endif /* !TRE_MULTIBYTE */ + return REG_OK; + + case TRE_CONFIG_SYSTEM_ABI: +#ifdef TRE_CONFIG_SYSTEM_ABI + *int_result = 1; +#else /* !TRE_CONFIG_SYSTEM_ABI */ + *int_result = 0; +#endif /* !TRE_CONFIG_SYSTEM_ABI */ + return REG_OK; + + case TRE_CONFIG_VERSION: + *string_result = TRE_VERSION; + return REG_OK; + } + + return REG_NOMATCH; +} + + +/* EOF */ diff --git a/deps/tre/lib/tre-compile.h b/deps/tre/lib/tre-compile.h new file mode 100644 index 000000000..51d5ac94a --- /dev/null +++ b/deps/tre/lib/tre-compile.h @@ -0,0 +1,27 @@ +/* + tre-compile.h: Regex compilation definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_COMPILE_H +#define TRE_COMPILE_H 1 + +typedef struct { + int position; + int code_min; + int code_max; + int *tags; + int assertions; + tre_ctype_t class; + tre_ctype_t *neg_classes; + int backref; + int *params; +} tre_pos_and_tags_t; + +#endif /* TRE_COMPILE_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-filter.c b/deps/tre/lib/tre-filter.c new file mode 100644 index 000000000..194e188ba --- /dev/null +++ b/deps/tre/lib/tre-filter.c @@ -0,0 +1,73 @@ +/* + tre-filter.c: Histogram filter to quickly find regexp match candidates + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* The idea of this filter is quite simple. First, let's assume the + search pattern is a simple string. In order for a substring of a + longer string to match the search pattern, it must have the same + numbers of different characters as the pattern, and those + characters must occur in the same order as they occur in pattern. */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include "tre-internal.h" +#include "tre-filter.h" + +int +tre_filter_find(const unsigned char *str, size_t len, tre_filter_t *filter) +{ + unsigned short counts[256]; + unsigned int i; + unsigned int window_len = filter->window_len; + tre_filter_profile_t *profile = filter->profile; + const unsigned char *str_orig = str; + + DPRINT(("tre_filter_find: %.*s\n", len, str)); + + for (i = 0; i < elementsof(counts); i++) + counts[i] = 0; + + i = 0; + while (*str && i < window_len && i < len) + { + counts[*str]++; + i++; + str++; + len--; + } + + while (len > 0) + { + tre_filter_profile_t *p; + counts[*str]++; + counts[*(str - window_len)]--; + + p = profile; + while (p->ch) + { + if (counts[p->ch] < p->count) + break; + p++; + } + if (!p->ch) + { + DPRINT(("Found possible match at %d\n", + str - str_orig)); + return str - str_orig; + } + else + { + DPRINT(("No match so far...\n")); + } + len--; + str++; + } + DPRINT(("This string cannot match.\n")); + return -1; +} diff --git a/deps/tre/lib/tre-filter.h b/deps/tre/lib/tre-filter.h new file mode 100644 index 000000000..31d0b8263 --- /dev/null +++ b/deps/tre/lib/tre-filter.h @@ -0,0 +1,19 @@ + + + + +typedef struct { + unsigned char ch; + unsigned char count; +} tre_filter_profile_t; + +typedef struct { + /* Length of the window where the character counts are kept. */ + int window_len; + /* Required character counts table. */ + tre_filter_profile_t *profile; +} tre_filter_t; + + +int +tre_filter_find(const unsigned char *str, size_t len, tre_filter_t *filter); diff --git a/deps/tre/lib/tre-internal.h b/deps/tre/lib/tre-internal.h new file mode 100644 index 000000000..40081f0c0 --- /dev/null +++ b/deps/tre/lib/tre-internal.h @@ -0,0 +1,319 @@ +/* + tre-internal.h - TRE internal definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_INTERNAL_H +#define TRE_INTERNAL_H 1 + +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ + +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ + +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ + +#include +#include +#include "../local_includes/tre.h" + +#define TRE_MAX_RE 65536 +#define TRE_MAX_STRING INT_MAX +#define TRE_MAX_STACK 1048576 + +#ifdef TRE_DEBUG +#include +#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/(void)0,0) +#else /* !TRE_DEBUG */ +#define DPRINT(msg) do { } while(/*CONSTCOND*/(void)0,0) +#endif /* !TRE_DEBUG */ + +#define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) + +#ifdef HAVE_MBRTOWC +#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) +#else /* !HAVE_MBRTOWC */ +#ifdef HAVE_MBTOWC +#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) +#endif /* HAVE_MBTOWC */ +#endif /* !HAVE_MBRTOWC */ + +#ifdef TRE_MULTIBYTE +#ifdef HAVE_MBSTATE_T +#define TRE_MBSTATE +#endif /* TRE_MULTIBYTE */ +#endif /* HAVE_MBSTATE_T */ + +/* Define the character types and functions. */ +#ifdef TRE_WCHAR + +/* Wide characters. */ +typedef wint_t tre_cint_t; +#if WCHAR_MAX <= INT_MAX +#define TRE_CHAR_MAX WCHAR_MAX +#else /* WCHAR_MAX > INT_MAX */ +#define TRE_CHAR_MAX INT_MAX +#endif + +#ifdef TRE_MULTIBYTE +#define TRE_MB_CUR_MAX MB_CUR_MAX +#else /* !TRE_MULTIBYTE */ +#define TRE_MB_CUR_MAX 1 +#endif /* !TRE_MULTIBYTE */ + +#define tre_isalnum iswalnum +#define tre_isalpha iswalpha +#ifdef HAVE_ISWBLANK +#define tre_isblank iswblank +#endif /* HAVE_ISWBLANK */ +#define tre_iscntrl iswcntrl +#define tre_isdigit iswdigit +#define tre_isgraph iswgraph +#define tre_islower iswlower +#define tre_isprint iswprint +#define tre_ispunct iswpunct +#define tre_isspace iswspace +#define tre_isupper iswupper +#define tre_isxdigit iswxdigit + +#define tre_tolower towlower +#define tre_toupper towupper +#define tre_strlen wcslen + +#else /* !TRE_WCHAR */ + +/* 8 bit characters. */ +typedef short tre_cint_t; +#define TRE_CHAR_MAX 255 +#define TRE_MB_CUR_MAX 1 + +#define tre_isalnum isalnum +#define tre_isalpha isalpha +#ifdef HAVE_ISASCII +#define tre_isascii isascii +#endif /* HAVE_ISASCII */ +#ifdef HAVE_ISBLANK +#define tre_isblank isblank +#endif /* HAVE_ISBLANK */ +#define tre_iscntrl iscntrl +#define tre_isdigit isdigit +#define tre_isgraph isgraph +#define tre_islower islower +#define tre_isprint isprint +#define tre_ispunct ispunct +#define tre_isspace isspace +#define tre_isupper isupper +#define tre_isxdigit isxdigit + +#define tre_tolower(c) (tre_cint_t)(tolower(c)) +#define tre_toupper(c) (tre_cint_t)(toupper(c)) +#define tre_strlen(s) (strlen((const char*)s)) + +#endif /* !TRE_WCHAR */ + +#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) +#define TRE_USE_SYSTEM_WCTYPE 1 +#endif + +#ifdef TRE_USE_SYSTEM_WCTYPE +/* Use system provided iswctype() and wctype(). */ +typedef wctype_t tre_ctype_t; +#define tre_isctype iswctype +#define tre_ctype wctype +#else /* !TRE_USE_SYSTEM_WCTYPE */ +/* Define our own versions of iswctype() and wctype(). */ +typedef int (*tre_ctype_t)(tre_cint_t); +#define tre_isctype(c, type) ( (type)(c) ) +tre_ctype_t tre_ctype(const char *name); +#endif /* !TRE_USE_SYSTEM_WCTYPE */ + +typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; + +/* Returns number of bytes to add to (char *)ptr to make it + properly aligned for the type. */ +#define ALIGN(ptr, type) \ + ((((long)ptr) % sizeof(type)) \ + ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ + : 0) + +#undef MAX +#undef MIN +#define MAX(a, b) (((a) >= (b)) ? (a) : (b)) +#define MIN(a, b) (((a) <= (b)) ? (a) : (b)) + +/* Define STRF to the correct printf formatter for strings. */ +#ifdef TRE_WCHAR +#define STRF "ls" +#else /* !TRE_WCHAR */ +#define STRF "s" +#endif /* !TRE_WCHAR */ + +/* TNFA transition type. A TNFA state is an array of transitions, + the terminator is a transition with NULL `state'. */ +typedef struct tnfa_transition tre_tnfa_transition_t; + +struct tnfa_transition { + /* Range of accepted characters. */ + tre_cint_t code_min; + tre_cint_t code_max; + /* Pointer to the destination state. */ + tre_tnfa_transition_t *state; + /* ID number of the destination state. */ + int state_id; + /* -1 terminated array of tags (or NULL). */ + int *tags; + /* Matching parameters settings (or NULL). */ + int *params; + /* Assertion bitmap. */ + int assertions; + /* Assertion parameters. */ + union { + /* Character class assertion. */ + tre_ctype_t class; + /* Back reference assertion. */ + int backref; + } u; + /* Negative character class assertions. */ + tre_ctype_t *neg_classes; +}; + + +/* Assertions. */ +#define ASSERT_AT_BOL 1 /* Beginning of line. */ +#define ASSERT_AT_EOL 2 /* End of line. */ +#define ASSERT_CHAR_CLASS 4 /* Character class in `class'. */ +#define ASSERT_CHAR_CLASS_NEG 8 /* Character classes in `neg_classes'. */ +#define ASSERT_AT_BOW 16 /* Beginning of word. */ +#define ASSERT_AT_EOW 32 /* End of word. */ +#define ASSERT_AT_WB 64 /* Word boundary. */ +#define ASSERT_AT_WB_NEG 128 /* Not a word boundary. */ +#define ASSERT_BACKREF 256 /* A back reference in `backref'. */ +#define ASSERT_LAST 256 + +/* Tag directions. */ +typedef enum { + TRE_TAG_MINIMIZE = 0, + TRE_TAG_MAXIMIZE = 1 +} tre_tag_direction_t; + +/* Parameters that can be changed dynamically while matching. */ +typedef enum { + TRE_PARAM_COST_INS = 0, + TRE_PARAM_COST_DEL = 1, + TRE_PARAM_COST_SUBST = 2, + TRE_PARAM_COST_MAX = 3, + TRE_PARAM_MAX_INS = 4, + TRE_PARAM_MAX_DEL = 5, + TRE_PARAM_MAX_SUBST = 6, + TRE_PARAM_MAX_ERR = 7, + TRE_PARAM_DEPTH = 8, + TRE_PARAM_LAST = 9 +} tre_param_t; + +/* Unset matching parameter */ +#define TRE_PARAM_UNSET -1 + +/* Signifies the default matching parameter value. */ +#define TRE_PARAM_DEFAULT -2 + +/* Instructions to compute submatch register values from tag values + after a successful match. */ +struct tre_submatch_data { + /* Tag that gives the value for rm_so (submatch start offset). */ + int so_tag; + /* Tag that gives the value for rm_eo (submatch end offset). */ + int eo_tag; + /* List of submatches this submatch is contained in. */ + int *parents; +}; + +typedef struct tre_submatch_data tre_submatch_data_t; + +typedef enum { + TRE_LITERAL_OPT_NONE = 0, + TRE_LITERAL_OPT_CONTAINS, + TRE_LITERAL_OPT_PREFIX, + TRE_LITERAL_OPT_SUFFIX, + TRE_LITERAL_OPT_EXACT +} tre_literal_opt_mode_t; + +typedef struct { + unsigned char *data; + size_t len; +} tre_literal_opt_literal_t; + +typedef struct { + tre_literal_opt_mode_t mode; + int nocase; + size_t num_literals; + /* Folded byte mapping used by the nocase fast path. */ + unsigned char fold_map[256]; + /* Literal index ranges grouped by the first literal byte. */ + size_t start_offsets[257]; + tre_literal_opt_literal_t *literals; +} tre_literal_opt_t; + + +/* TNFA definition. */ +typedef struct tnfa tre_tnfa_t; + +struct tnfa { + tre_tnfa_transition_t *transitions; + unsigned int num_transitions; + tre_tnfa_transition_t *initial; + tre_tnfa_transition_t *final; + tre_submatch_data_t *submatch_data; + char *firstpos_chars; + int first_char; + unsigned int num_submatches; + tre_tag_direction_t *tag_directions; + int *minimal_tags; + int num_tags; + int num_minimals; + int end_tag; + int num_states; + int cflags; + int have_backrefs; + int have_approx; + int params_depth; + tre_literal_opt_t literal_opt; +}; + +int +tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); + +void +tre_free(regex_t *preg); + +void +tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, + const tre_tnfa_t *tnfa, int *tags, int match_eo); + +reg_errcode_t +tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, int eflags, + int *match_end_ofs); + +reg_errcode_t +tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, int eflags, + int *match_end_ofs); + +#ifdef TRE_APPROX +reg_errcode_t +tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, regamatch_t *match, + regaparams_t params, int eflags, int *match_end_ofs); +#endif /* TRE_APPROX */ + +#endif /* TRE_INTERNAL_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-match-backtrack.c b/deps/tre/lib/tre-match-backtrack.c new file mode 100644 index 000000000..7e184929e --- /dev/null +++ b/deps/tre/lib/tre-match-backtrack.c @@ -0,0 +1,676 @@ +/* + tre-match-backtrack.c - TRE backtracking regex matching engine + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This matcher is for regexps that use back referencing. Regexp matching + with back referencing is an NP-complete problem on the number of back + references. The easiest way to match them is to use a backtracking + routine which basically goes through all possible paths in the TNFA + and chooses the one which results in the best (leftmost and longest) + match. This can be spectacularly expensive and may run out of stack + space, but there really is no better known generic algorithm. Quoting + Henry Spencer from comp.compilers: + + + POSIX.2 REs require longest match, which is really exciting to + implement since the obsolete ("basic") variant also includes + \. I haven't found a better way of tackling this than doing + a preliminary match using a DFA (or simulation) on a modified RE + that just replicates subREs for \, and then doing a + backtracking match to determine whether the subRE matches were + right. This can be rather slow, but I console myself with the + thought that people who use \ deserve very slow execution. + (Pun unintentional but very appropriate.) + +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ + +#include "tre-internal.h" +#include "tre-mem.h" +#include "tre-match-utils.h" +#include "xmalloc.h" + +typedef struct { + int pos; + const char *str_byte; +#ifdef TRE_WCHAR + const wchar_t *str_wide; +#endif /* TRE_WCHAR */ + tre_tnfa_transition_t *state; + int state_id; + int next_c; + int *tags; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* TRE_MBSTATE */ +} tre_backtrack_item_t; + +typedef struct tre_backtrack_struct { + tre_backtrack_item_t item; + struct tre_backtrack_struct *prev; + struct tre_backtrack_struct *next; +} *tre_backtrack_t; + +#ifdef TRE_WCHAR +#define BT_STACK_WIDE_IN(_str_wide) stack->item.str_wide = (_str_wide) +#define BT_STACK_WIDE_OUT (str_wide) = stack->item.str_wide +#else /* !TRE_WCHAR */ +#define BT_STACK_WIDE_IN(_str_wide) +#define BT_STACK_WIDE_OUT +#endif /* !TRE_WCHAR */ + +#ifdef TRE_MBSTATE +#define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) +#define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate +#else /* !TRE_MBSTATE */ +#define BT_STACK_MBSTATE_IN +#define BT_STACK_MBSTATE_OUT +#endif /* !TRE_MBSTATE */ + + +#ifdef TRE_USE_ALLOCA +#define tre_bt_mem_new tre_mem_newa +#define tre_bt_mem_alloc tre_mem_alloca +#define tre_bt_mem_destroy(obj) do { } while (0) +#define xafree(obj) do { } while (0) /* do nothing, obj was obtained with alloca() */ +#else /* !TRE_USE_ALLOCA */ +#define tre_bt_mem_new tre_mem_new +#define tre_bt_mem_alloc tre_mem_alloc +#define tre_bt_mem_destroy tre_mem_destroy +#define xafree(obj) xfree(obj) +#endif /* !TRE_USE_ALLOCA */ + + +#define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \ + do \ + { \ + int i; \ + if (!stack->next) \ + { \ + tre_backtrack_t s; \ + s = tre_bt_mem_alloc(mem, sizeof(*s)); \ + if (!s) \ + { \ + tre_bt_mem_destroy(mem); \ + if (tags) \ + xafree(tags); \ + if (pmatch) \ + xafree(pmatch); \ + if (states_seen) \ + xafree(states_seen); \ + return REG_ESPACE; \ + } \ + s->prev = stack; \ + s->next = NULL; \ + s->item.tags = tre_bt_mem_alloc(mem, \ + sizeof(*tags) * tnfa->num_tags); \ + if (!s->item.tags) \ + { \ + tre_bt_mem_destroy(mem); \ + if (tags) \ + xafree(tags); \ + if (pmatch) \ + xafree(pmatch); \ + if (states_seen) \ + xafree(states_seen); \ + return REG_ESPACE; \ + } \ + stack->next = s; \ + stack = s; \ + } \ + else \ + stack = stack->next; \ + stack->item.pos = (_pos); \ + stack->item.str_byte = (_str_byte); \ + BT_STACK_WIDE_IN(_str_wide); \ + stack->item.state = (_state); \ + stack->item.state_id = (_state_id); \ + stack->item.next_c = (_next_c); \ + for (i = 0; i < tnfa->num_tags; i++) \ + stack->item.tags[i] = (_tags)[i]; \ + BT_STACK_MBSTATE_IN; \ + } \ + while (/*CONSTCOND*/(void)0,0) + +#define BT_STACK_POP() \ + do \ + { \ + int i; \ + assert(stack->prev); \ + pos = stack->item.pos; \ + if (type == STR_USER) \ + str_source->rewind(pos + pos_add_next, str_source->context); \ + str_byte = stack->item.str_byte; \ + BT_STACK_WIDE_OUT; \ + state = stack->item.state; \ + next_c = (tre_char_t) stack->item.next_c; \ + for (i = 0; i < tnfa->num_tags; i++) \ + tags[i] = stack->item.tags[i]; \ + BT_STACK_MBSTATE_OUT; \ + stack = stack->prev; \ + } \ + while (/*CONSTCOND*/(void)0,0) + +#undef MIN +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +reg_errcode_t +tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, + ssize_t len, tre_str_type_t type, int *match_tags, + int eflags, int *match_end_ofs) +{ + /* State variables required by GET_NEXT_WCHAR. */ + tre_char_t prev_c = 0, next_c = 0; + const char *str_byte = string; + ssize_t pos = 0; + unsigned int pos_add_next = 1; +#ifdef TRE_WCHAR + const wchar_t *str_wide = string; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* TRE_MBSTATE */ +#endif /* TRE_WCHAR */ + int reg_notbol = eflags & REG_NOTBOL; + int reg_noteol = eflags & REG_NOTEOL; + int reg_newline = tnfa->cflags & REG_NEWLINE; + int str_user_end = 0; + + /* These are used to remember the necessary values of the above + variables to return to the position where the current search + started from. */ + int next_c_start; + const char *str_byte_start; + int pos_start = -1; +#ifdef TRE_WCHAR + const wchar_t *str_wide_start; +#endif /* TRE_WCHAR */ +#ifdef TRE_MBSTATE + mbstate_t mbstate_start; +#endif /* TRE_MBSTATE */ + reg_errcode_t ret; + + /* End offset of best match so far, or -1 if no match found yet. */ + int match_eo = -1; + /* Tag arrays. */ + int *next_tags, *tags = NULL; + /* Current TNFA state. */ + tre_tnfa_transition_t *state; + int *states_seen = NULL; + + /* Memory allocator to for allocating the backtracking stack. */ + tre_mem_t mem = tre_bt_mem_new(); + + /* The backtracking stack. */ + tre_backtrack_t stack; + + tre_tnfa_transition_t *trans_i; + regmatch_t *pmatch = NULL; + + /* + * TRE internals tend to use int instead of size_t for positions or + * lengths and don't check for overflow. This will take time to fix + * properly. In the meantime, simply limit the input to what we can + * handle. + */ + if (len > TRE_MAX_STRING) + len = TRE_MAX_STRING; + +#ifdef TRE_MBSTATE + memset(&mbstate, '\0', sizeof(mbstate)); +#endif /* TRE_MBSTATE */ + + if (!mem) + return REG_ESPACE; + stack = tre_bt_mem_alloc(mem, sizeof(*stack)); + if (!stack) + { + ret = REG_ESPACE; + goto error_exit; + } + stack->prev = NULL; + stack->next = NULL; + + DPRINT(("tnfa_execute_backtrack, input type %d\n", type)); + DPRINT(("len = %zd\n", len)); + +#ifdef TRE_USE_ALLOCA + tags = alloca(sizeof(*tags) * tnfa->num_tags); + pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches); + states_seen = alloca(sizeof(*states_seen) * tnfa->num_states); +#else /* !TRE_USE_ALLOCA */ + if (tnfa->num_tags) + { + tags = xmalloc(sizeof(*tags) * tnfa->num_tags); + if (!tags) + { + ret = REG_ESPACE; + goto error_exit; + } + } + if (tnfa->num_submatches) + { + pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches); + if (!pmatch) + { + ret = REG_ESPACE; + goto error_exit; + } + } + if (tnfa->num_states) + { + states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states); + if (!states_seen) + { + ret = REG_ESPACE; + goto error_exit; + } + } +#endif /* !TRE_USE_ALLOCA */ + + retry: + { + int i; + for (i = 0; i < tnfa->num_tags; i++) + { + tags[i] = -1; + if (match_tags) + match_tags[i] = -1; + } + for (i = 0; i < tnfa->num_states; i++) + states_seen[i] = 0; + } + + state = NULL; + pos = pos_start; + if (type == STR_USER) + str_source->rewind(pos + pos_add_next, str_source->context); + GET_NEXT_WCHAR(); + pos_start = pos; + next_c_start = next_c; + str_byte_start = str_byte; +#ifdef TRE_WCHAR + str_wide_start = str_wide; +#endif /* TRE_WCHAR */ +#ifdef TRE_MBSTATE + mbstate_start = mbstate; +#endif /* TRE_MBSTATE */ + + /* Handle initial states. */ + next_tags = NULL; + for (trans_i = tnfa->initial; trans_i->state; trans_i++) + { + DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c)); + if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)) + { + DPRINT(("assert failed\n")); + continue; + } + if (state == NULL) + { + /* Start from this state. */ + state = trans_i->state; + next_tags = trans_i->tags; + } + else + { + /* Backtrack to this state. */ + DPRINT(("saving state %d for backtracking\n", trans_i->state_id)); + BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state, + trans_i->state_id, next_c, tags, mbstate); + { + int *tmp = trans_i->tags; + if (tmp) + while (*tmp >= 0) + stack->item.tags[*tmp++] = pos; + } + } + } + + if (next_tags) + for (; *next_tags >= 0; next_tags++) + tags[*next_tags] = pos; + + + DPRINT(("entering match loop, pos %zd, str_byte %p\n", pos, str_byte)); + DPRINT(("pos:chr/code | state and tags\n")); + DPRINT(("-------------+------------------------------------------------\n")); + + if (state == NULL) + goto backtrack; + + while (/*CONSTCOND*/(void)1,1) + { + tre_tnfa_transition_t *next_state; + int empty_br_match; + + DPRINT(("start loop\n")); + if (state == tnfa->final) + { + DPRINT((" match found, %d %zd\n", match_eo, pos)); + if (match_eo < pos + || (match_eo == pos + && match_tags + && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, + tags, match_tags))) + { + int i; + /* This match wins the previous match. */ + DPRINT((" win previous\n")); + match_eo = pos; + if (match_tags) + for (i = 0; i < tnfa->num_tags; i++) + match_tags[i] = tags[i]; + } + /* Our TNFAs never have transitions leaving from the final state, + so we jump right to backtracking. */ + goto backtrack; + } + +#ifdef TRE_DEBUG + DPRINT(("%3zd:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c, + state)); + { + int i; + for (i = 0; i < tnfa->num_tags; i++) + DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : "")); + DPRINT(("\n")); + } +#endif /* TRE_DEBUG */ + + /* Go to the next character in the input string. */ + empty_br_match = 0; + trans_i = state; + if (trans_i->state && trans_i->assertions & ASSERT_BACKREF) + { + /* This is a back reference state. All transitions leaving from + this state have the same back reference "assertion". Instead + of reading the next character, we match the back reference. */ + int so, eo, bt = trans_i->u.backref; + int bt_len; + int result; + + DPRINT((" should match back reference %d\n", bt)); + /* Get the substring we need to match against. Remember to + turn off REG_NOSUB temporarily. */ + tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB, + tnfa, tags, pos); + so = pmatch[bt].rm_so; + eo = pmatch[bt].rm_eo; + bt_len = eo - so; + +#ifdef TRE_DEBUG + { + int slen; + if (len < 0) + slen = bt_len; + else + slen = MIN(bt_len, len - pos); + + if (type == STR_BYTE) + { + DPRINT((" substring (len %d) is [%d, %d[: '%.*s'\n", + bt_len, so, eo, bt_len, (char*)string + so)); + DPRINT((" current string is '%.*s'\n", slen, str_byte - 1)); + } +#ifdef TRE_WCHAR + else if (type == STR_WIDE) + { + DPRINT((" substring (len %d) is [%d, %d[: '%.*" STRF "'\n", + bt_len, so, eo, bt_len, (wchar_t*)string + so)); + DPRINT((" current string is '%.*" STRF "'\n", + slen, str_wide - 1)); + } +#endif /* TRE_WCHAR */ + } +#endif + + if (len < 0) + { + if (type == STR_USER) + result = str_source->compare((unsigned)so, (unsigned)pos, + (unsigned)bt_len, + str_source->context); +#ifdef TRE_WCHAR + else if (type == STR_WIDE) + result = wcsncmp((const wchar_t*)string + so, str_wide - 1, + (size_t)bt_len); +#endif /* TRE_WCHAR */ + else + result = strncmp((const char*)string + so, str_byte - 1, + (size_t)bt_len); + } + else if (len - pos < bt_len) + result = 1; +#ifdef TRE_WCHAR + else if (type == STR_WIDE) + result = wmemcmp((const wchar_t*)string + so, str_wide - 1, + (size_t)bt_len); +#endif /* TRE_WCHAR */ + else + result = memcmp((const char*)string + so, str_byte - 1, + (size_t)bt_len); + + if (result == 0) + { + /* Back reference matched. Check for infinite loop. */ + if (bt_len == 0) + empty_br_match = 1; + if (empty_br_match && states_seen[trans_i->state_id]) + { + DPRINT((" avoid loop\n")); + goto backtrack; + } + + states_seen[trans_i->state_id] = empty_br_match; + + /* Advance in input string and resync `prev_c', `next_c' + and pos. */ + DPRINT((" back reference matched\n")); + str_byte += bt_len - 1; +#ifdef TRE_WCHAR + str_wide += bt_len - 1; +#endif /* TRE_WCHAR */ + pos += bt_len - 1; + GET_NEXT_WCHAR(); + DPRINT((" pos now %zd\n", pos)); + } + else + { + DPRINT((" back reference did not match\n")); + goto backtrack; + } + } + else + { + /* Check for end of string. */ + if (len < 0) + { + if (type == STR_USER) + { + if (str_user_end) + goto backtrack; + } + else if (next_c == L'\0' || pos >= TRE_MAX_STRING) + goto backtrack; + } + else + { + if (pos >= len) + goto backtrack; + } + + /* Read the next character. */ + GET_NEXT_WCHAR(); + } + + next_state = NULL; + for (trans_i = state; trans_i->state; trans_i++) + { + DPRINT((" transition %d-%d (%c-%c) %d to %d\n", + trans_i->code_min, trans_i->code_max, + trans_i->code_min, trans_i->code_max, + trans_i->assertions, trans_i->state_id)); + if (trans_i->code_min <= (tre_cint_t)prev_c + && trans_i->code_max >= (tre_cint_t)prev_c) + { + if (trans_i->assertions + && (CHECK_ASSERTIONS(trans_i->assertions) + || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) + { + DPRINT((" assertion failed\n")); + continue; + } + + if (next_state == NULL) + { + /* First matching transition. */ + DPRINT((" Next state is %d\n", trans_i->state_id)); + next_state = trans_i->state; + next_tags = trans_i->tags; + } + else + { + /* Second matching transition. We may need to backtrack here + to take this transition instead of the first one, so we + push this transition in the backtracking stack so we can + jump back here if needed. */ + DPRINT((" saving state %d for backtracking\n", + trans_i->state_id)); + BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state, + trans_i->state_id, next_c, tags, mbstate); + { + int *tmp; + for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) + stack->item.tags[*tmp] = pos; + } +#if 0 /* XXX - it's important not to look at all transitions here to keep + the stack small! */ + break; +#endif + } + } + } + + if (next_state != NULL) + { + /* Matching transitions were found. Take the first one. */ + state = next_state; + + /* Update the tag values. */ + if (next_tags) + while (*next_tags >= 0) + tags[*next_tags++] = pos; + } + else + { + backtrack: + /* A matching transition was not found. Try to backtrack. */ + if (stack->prev) + { + DPRINT((" backtracking\n")); + if (stack->item.state->assertions & ASSERT_BACKREF) + { + DPRINT((" states_seen[%d] = 0\n", + stack->item.state_id)); + states_seen[stack->item.state_id] = 0; + } + + BT_STACK_POP(); + } + else if (match_eo < 0) + { + /* Try starting from a later position in the input string. */ + /* Check for end of string. */ + if (len < 0) + { + if (next_c_start == L'\0' || pos_start >= TRE_MAX_STRING) + { + DPRINT(("end of string.\n")); + break; + } + } + else + { + if (pos_start >= len) + { + DPRINT(("end of string.\n")); + break; + } + } + DPRINT(("restarting from next start position\n")); + next_c = (tre_char_t) next_c_start; +#ifdef TRE_MBSTATE + mbstate = mbstate_start; +#endif /* TRE_MBSTATE */ + str_byte = str_byte_start; +#ifdef TRE_WCHAR + str_wide = str_wide_start; +#endif /* TRE_WCHAR */ + goto retry; + } + else + { + DPRINT(("finished\n")); + break; + } + } + } + + ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; + *match_end_ofs = match_eo; + + error_exit: + tre_bt_mem_destroy(mem); +#ifndef TRE_USE_ALLOCA + if (tags) + xafree(tags); + if (pmatch) + xafree(pmatch); + if (states_seen) + xafree(states_seen); +#endif /* !TRE_USE_ALLOCA */ + + return ret; +} diff --git a/deps/tre/lib/tre-match-parallel.c b/deps/tre/lib/tre-match-parallel.c new file mode 100644 index 000000000..151083746 --- /dev/null +++ b/deps/tre/lib/tre-match-parallel.c @@ -0,0 +1,538 @@ +/* + tre-match-parallel.c - TRE parallel regex matching engine + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This algorithm searches for matches basically by reading characters + in the searched string one by one, starting at the beginning. All + matching paths in the TNFA are traversed in parallel. When two or + more paths reach the same state, exactly one is chosen according to + tag ordering rules; if returning submatches is not required it does + not matter which path is chosen. + + The worst case time required for finding the leftmost and longest + match, or determining that there is no match, is always linearly + dependent on the length of the text being searched. + + This algorithm cannot handle TNFAs with back referencing nodes. + See `tre-match-backtrack.c'. +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ + +#include "tre-internal.h" +#include "tre-match-utils.h" +#include "xmalloc.h" + + + +typedef struct { + tre_tnfa_transition_t *state; + int *tags; +} tre_tnfa_reach_t; + +typedef struct { + int pos; + int **tags; +} tre_reach_pos_t; + + +#ifdef TRE_DEBUG +static void +tre_print_reach(const tre_tnfa_reach_t *reach, int num_tags) +{ + int i; + + while (reach->state != NULL) + { + DPRINT((" %p", (void *)reach->state)); + if (num_tags > 0) + { + DPRINT(("/")); + for (i = 0; i < num_tags; i++) + { + DPRINT(("%d:%d", i, reach->tags[i])); + if (i < (num_tags-1)) + DPRINT((",")); + } + } + reach++; + } + DPRINT(("\n")); + +} +#endif /* TRE_DEBUG */ + +reg_errcode_t +tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, ssize_t len, + tre_str_type_t type, int *match_tags, int eflags, + int *match_end_ofs) +{ + /* State variables required by GET_NEXT_WCHAR. */ + tre_char_t prev_c = 0, next_c = 0; + const char *str_byte = string; + ssize_t pos = -1; + unsigned int pos_add_next = 1; +#ifdef TRE_WCHAR + const wchar_t *str_wide = string; +#ifdef TRE_MBSTATE + mbstate_t mbstate; +#endif /* TRE_MBSTATE */ +#endif /* TRE_WCHAR */ + reg_errcode_t ret; + int reg_notbol = eflags & REG_NOTBOL; + int reg_noteol = eflags & REG_NOTEOL; + int reg_newline = tnfa->cflags & REG_NEWLINE; + int str_user_end = 0; + + char *buf; + tre_tnfa_transition_t *trans_i; + tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; + tre_reach_pos_t *reach_pos; + int *tag_i; + int num_tags, i; + + int match_eo = -1; /* end offset of match (-1 if no match found yet) */ + int new_match = 0; + int *tmp_tags = NULL; + int *tmp_iptr; + + /* + * TRE internals tend to use int instead of size_t for positions or + * lengths and don't check for overflow. This will take time to fix + * properly. In the meantime, simply limit the input to what we can + * handle. + */ + if (len > TRE_MAX_STRING) + len = TRE_MAX_STRING; + +#ifdef TRE_MBSTATE + memset(&mbstate, '\0', sizeof(mbstate)); +#endif /* TRE_MBSTATE */ + + DPRINT(("tre_tnfa_run_parallel, input type %d\n", type)); + + if (!match_tags) + num_tags = 0; + else + num_tags = tnfa->num_tags; + + /* Allocate memory for temporary data required for matching. This needs to + be done for every matching operation to be thread safe. This allocates + everything in a single large block from the stack frame using alloca() + or with malloc() if alloca is unavailable. */ + { + size_t tbytes, rbytes, pbytes, xbytes, total_bytes; + size_t num_states = (size_t)tnfa->num_states; + size_t state_tag_bytes, reach_bytes; + size_t padding = (sizeof(long) - 1) * 4; + char *tmp_buf; + + if (num_states > SIZE_MAX / sizeof(*reach_pos)) + return REG_ESPACE; + pbytes = sizeof(*reach_pos) * num_states; + + if (num_states + 1 > SIZE_MAX / sizeof(*reach_next)) + return REG_ESPACE; + rbytes = sizeof(*reach_next) * (num_states + 1); + + if ((size_t)num_tags > SIZE_MAX / sizeof(*tmp_tags)) + return REG_ESPACE; + tbytes = sizeof(*tmp_tags) * (size_t)num_tags; + + if ((size_t)num_tags > SIZE_MAX / sizeof(int)) + return REG_ESPACE; + xbytes = sizeof(int) * (size_t)num_tags; + + if (num_states > 0 && xbytes > SIZE_MAX / num_states) + return REG_ESPACE; + state_tag_bytes = xbytes * num_states; + + if (rbytes > SIZE_MAX - state_tag_bytes) + return REG_ESPACE; + reach_bytes = rbytes + state_tag_bytes; + + if (reach_bytes > (SIZE_MAX - padding - tbytes - pbytes) / 2) + return REG_ESPACE; + + /* Compute the length of the block we need. */ + total_bytes = + padding + reach_bytes * 2 + tbytes + pbytes; + + /* Allocate the memory. */ +#ifdef TRE_USE_ALLOCA + buf = alloca(total_bytes); +#else /* !TRE_USE_ALLOCA */ + buf = xmalloc(total_bytes); +#endif /* !TRE_USE_ALLOCA */ + if (buf == NULL) + return REG_ESPACE; + memset(buf, 0, total_bytes); + + /* Get the various pointers within tmp_buf (properly aligned). */ + tmp_tags = (void *)buf; + tmp_buf = buf + tbytes; + tmp_buf += ALIGN(tmp_buf, long); + reach_next = (void *)tmp_buf; + tmp_buf += rbytes; + tmp_buf += ALIGN(tmp_buf, long); + reach = (void *)tmp_buf; + tmp_buf += rbytes; + tmp_buf += ALIGN(tmp_buf, long); + reach_pos = (void *)tmp_buf; + tmp_buf += pbytes; + tmp_buf += ALIGN(tmp_buf, long); + for (i = 0; i < tnfa->num_states; i++) + { + reach[i].tags = (void *)tmp_buf; + tmp_buf += xbytes; + reach_next[i].tags = (void *)tmp_buf; + tmp_buf += xbytes; + } + } + + for (i = 0; i < tnfa->num_states; i++) + reach_pos[i].pos = -1; + + /* If only one character can start a match, find it first. */ + if (tnfa->first_char >= 0 && type == STR_BYTE && str_byte) + { + const char *orig_str = str_byte; + int first = tnfa->first_char; + + if (len >= 0) + str_byte = memchr(orig_str, first, (size_t)len); + else + str_byte = strchr(orig_str, first); + if (str_byte == NULL) + { +#ifndef TRE_USE_ALLOCA + if (buf) + xfree(buf); +#endif /* !TRE_USE_ALLOCA */ + return REG_NOMATCH; + } + DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str))); + if (str_byte >= orig_str + 1) + prev_c = (unsigned char)*(str_byte - 1); + next_c = (unsigned char)*str_byte; + pos = str_byte - orig_str; + if (len < 0 || pos < len) + str_byte++; + } + else + { + GET_NEXT_WCHAR(); + pos = 0; + } + +#if 0 + /* Skip over characters that cannot possibly be the first character + of a match. */ + if (tnfa->firstpos_chars != NULL) + { + char *chars = tnfa->firstpos_chars; + + if (len < 0) + { + const char *orig_str = str_byte; + /* XXX - use strpbrk() and wcspbrk() because they might be + optimized for the target architecture. Try also strcspn() + and wcscspn() and compare the speeds. */ + while (next_c != L'\0' && !chars[next_c]) + { + next_c = *str_byte++; + } + prev_c = *(str_byte - 2); + pos += str_byte - orig_str; + DPRINT(("skipped %d chars\n", str_byte - orig_str)); + } + else + { + while (pos <= len && !chars[next_c]) + { + prev_c = next_c; + next_c = (unsigned char)(*str_byte++); + pos++; + } + } + } +#endif + + DPRINT(("length: %zd\n", len)); + DPRINT(("pos:chr/code | states and tags\n")); + DPRINT(("-------------+------------------------------------------------\n")); + + reach_next_i = reach_next; + while (/*CONSTCOND*/(void)1,1) + { + /* If no match found yet, add the initial states to `reach_next'. */ + if (match_eo < 0) + { + DPRINT((" init >")); + trans_i = tnfa->initial; + while (trans_i->state != NULL) + { + if (reach_pos[trans_i->state_id].pos < pos) + { + if (trans_i->assertions + && CHECK_ASSERTIONS(trans_i->assertions)) + { + DPRINT(("assertion failed\n")); + trans_i++; + continue; + } + + DPRINT((" %p", (void *)trans_i->state)); + reach_next_i->state = trans_i->state; + for (i = 0; i < num_tags; i++) + reach_next_i->tags[i] = -1; + tag_i = trans_i->tags; + if (tag_i) + while (*tag_i >= 0) + { + if (*tag_i < num_tags) + reach_next_i->tags[*tag_i] = pos; + tag_i++; + } + if (reach_next_i->state == tnfa->final) + { + DPRINT((" found empty match\n")); + match_eo = pos; + new_match = 1; + for (i = 0; i < num_tags; i++) + match_tags[i] = reach_next_i->tags[i]; + } + reach_pos[trans_i->state_id].pos = pos; + reach_pos[trans_i->state_id].tags = &reach_next_i->tags; + reach_next_i++; + } + trans_i++; + } + DPRINT(("\n")); + reach_next_i->state = NULL; + } + else + { + if (num_tags == 0 || reach_next_i == reach_next) + /* We have found a match. */ + break; + } + + /* Check for end of string. */ + if (len < 0) + { + if (type == STR_USER) + { + if (str_user_end) + break; + } + else if (next_c == L'\0' || pos >= TRE_MAX_STRING) + break; + } + else + { + if (pos >= len) + break; + } + + GET_NEXT_WCHAR(); + +#ifdef TRE_DEBUG + DPRINT(("%3zd:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c)); + tre_print_reach(reach_next, num_tags); + DPRINT(("%3zd:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c)); + tre_print_reach(reach_next, num_tags); +#endif /* TRE_DEBUG */ + + /* Swap `reach' and `reach_next'. */ + reach_i = reach; + reach = reach_next; + reach_next = reach_i; + + /* For each state in `reach', weed out states that don't fulfill the + minimal matching conditions. */ + if (tnfa->num_minimals && new_match) + { + new_match = 0; + reach_next_i = reach_next; + for (reach_i = reach; reach_i->state; reach_i++) + { + int skip = 0; + for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) + { + int end = tnfa->minimal_tags[i]; + int start = tnfa->minimal_tags[i + 1]; + DPRINT((" Minimal start %d, end %d\n", start, end)); + if (end >= num_tags) + { + DPRINT((" Throwing %p out.\n", reach_i->state)); + skip = 1; + break; + } + else if (reach_i->tags[start] == match_tags[start] + && reach_i->tags[end] < match_tags[end]) + { + DPRINT((" Throwing %p out because t%d < %d\n", + reach_i->state, end, match_tags[end])); + skip = 1; + break; + } + } + if (!skip) + { + reach_next_i->state = reach_i->state; + tmp_iptr = reach_next_i->tags; + reach_next_i->tags = reach_i->tags; + reach_i->tags = tmp_iptr; + reach_next_i++; + } + } + reach_next_i->state = NULL; + + /* Swap `reach' and `reach_next'. */ + reach_i = reach; + reach = reach_next; + reach_next = reach_i; + } + + /* For each state in `reach' see if there is a transition leaving with + the current input symbol to a state not yet in `reach_next', and + add the destination states to `reach_next'. */ + reach_next_i = reach_next; + for (reach_i = reach; reach_i->state; reach_i++) + { + for (trans_i = reach_i->state; trans_i->state; trans_i++) + { + /* Does this transition match the input symbol? */ + if (trans_i->code_min <= (tre_cint_t)prev_c && + trans_i->code_max >= (tre_cint_t)prev_c) + { + if (trans_i->assertions + && (CHECK_ASSERTIONS(trans_i->assertions) + || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) + { + DPRINT(("assertion failed\n")); + continue; + } + + /* Compute the tags after this transition. */ + for (i = 0; i < num_tags; i++) + tmp_tags[i] = reach_i->tags[i]; + tag_i = trans_i->tags; + if (tag_i != NULL) + while (*tag_i >= 0) + { + if (*tag_i < num_tags) + tmp_tags[*tag_i] = pos; + tag_i++; + } + + if (reach_pos[trans_i->state_id].pos < pos) + { + /* Found an unvisited node. */ + reach_next_i->state = trans_i->state; + tmp_iptr = reach_next_i->tags; + reach_next_i->tags = tmp_tags; + tmp_tags = tmp_iptr; + reach_pos[trans_i->state_id].pos = pos; + reach_pos[trans_i->state_id].tags = &reach_next_i->tags; + + if (reach_next_i->state == tnfa->final + && (match_eo == -1 + || (num_tags > 0 + && reach_next_i->tags[0] <= match_tags[0]))) + { + DPRINT((" found match %p\n", trans_i->state)); + match_eo = pos; + new_match = 1; + for (i = 0; i < num_tags; i++) + match_tags[i] = reach_next_i->tags[i]; + } + reach_next_i++; + + } + else + { + assert(reach_pos[trans_i->state_id].pos == pos); + /* Another path has also reached this state. We choose + the winner by examining the tag values for both + paths. */ + if (tre_tag_order(num_tags, tnfa->tag_directions, + tmp_tags, + *reach_pos[trans_i->state_id].tags)) + { + /* The new path wins. */ + tmp_iptr = *reach_pos[trans_i->state_id].tags; + *reach_pos[trans_i->state_id].tags = tmp_tags; + if (trans_i->state == tnfa->final) + { + DPRINT((" found better match\n")); + match_eo = pos; + new_match = 1; + for (i = 0; i < num_tags; i++) + match_tags[i] = tmp_tags[i]; + } + tmp_tags = tmp_iptr; + } + } + } + } + } + reach_next_i->state = NULL; + } + + DPRINT(("match end offset = %d\n", match_eo)); + + *match_end_ofs = match_eo; + ret = match_eo >= 0 ? REG_OK : REG_NOMATCH; + +#ifndef TRE_USE_ALLOCA + if (buf) + xfree(buf); +#endif /* !TRE_USE_ALLOCA */ + return ret; +} + +/* EOF */ diff --git a/deps/tre/lib/tre-match-utils.h b/deps/tre/lib/tre-match-utils.h new file mode 100644 index 000000000..76e8b1972 --- /dev/null +++ b/deps/tre/lib/tre-match-utils.h @@ -0,0 +1,215 @@ +/* + tre-match-utils.h - TRE matcher helper definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#define str_source ((const tre_str_source*)string) + +#ifdef TRE_WCHAR + +#ifdef TRE_MULTIBYTE + +/* Wide character and multibyte support. */ + +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = '\0'; \ + else \ + next_c = (unsigned char)(*str_byte++); \ + } \ + else if (type == STR_WIDE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = L'\0'; \ + else \ + next_c = *str_wide++; \ + } \ + else if (type == STR_MBS) \ + { \ + pos += pos_add_next; \ + if (str_byte == NULL) \ + next_c = L'\0'; \ + else \ + { \ + size_t w; \ + size_t max; \ + if (len >= 0) \ + max = len - pos; \ + else \ + max = 32; \ + if (max <= 0) \ + { \ + next_c = L'\0'; \ + pos_add_next = 1; \ + } \ + else \ + { \ + w = tre_mbrtowc(&next_c, str_byte, (size_t)max, &mbstate); \ + if (w == (size_t)-1 || w == (size_t)-2) \ + return REG_NOMATCH; \ + if (w == 0 && len >= 0) \ + { \ + pos_add_next = 1; \ + next_c = 0; \ + str_byte++; \ + } \ + else \ + { \ + pos_add_next = w; \ + str_byte += w; \ + } \ + } \ + } \ + } \ + else if (type == STR_USER) \ + { \ + pos += pos_add_next; \ + str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ + str_source->context); \ + } \ + } while(/*CONSTCOND*/(void)0,0) + +#else /* !TRE_MULTIBYTE */ + +/* Wide character support, no multibyte support. */ + +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = '\0'; \ + else \ + next_c = (unsigned char)(*str_byte++); \ + } \ + else if (type == STR_WIDE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = L'\0'; \ + else \ + next_c = *str_wide++; \ + } \ + else if (type == STR_USER) \ + { \ + pos += pos_add_next; \ + str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ + str_source->context); \ + } \ + } while(/*CONSTCOND*/(void)0,0) + +#endif /* !TRE_MULTIBYTE */ + +#else /* !TRE_WCHAR */ + +/* No wide character or multibyte support. */ + +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) \ + next_c = '\0'; \ + else \ + next_c = (unsigned char)(*str_byte++); \ + } \ + else if (type == STR_USER) \ + { \ + pos += pos_add_next; \ + str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ + str_source->context); \ + } \ + } while(/*CONSTCOND*/(void)0,0) + +#endif /* !TRE_WCHAR */ + + + +#define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum(c)) + +#define CHECK_ASSERTIONS(assertions) \ + (((assertions & ASSERT_AT_BOL) \ + && (pos > 0 || reg_notbol) \ + && (prev_c != L'\n' || !reg_newline)) \ + || ((assertions & ASSERT_AT_EOL) \ + && (next_c != L'\0' || reg_noteol) \ + && (next_c != L'\n' || !reg_newline)) \ + || ((assertions & ASSERT_AT_BOW) \ + && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \ + || ((assertions & ASSERT_AT_EOW) \ + && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ + || ((assertions & ASSERT_AT_WB) \ + && (pos != 0 && next_c != L'\0' \ + && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \ + || ((assertions & ASSERT_AT_WB_NEG) \ + && (pos == 0 || next_c == L'\0' \ + || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c)))) + +#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \ + (((trans_i->assertions & ASSERT_CHAR_CLASS) \ + && !(tnfa->cflags & REG_ICASE) \ + && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class)) \ + || ((trans_i->assertions & ASSERT_CHAR_CLASS) \ + && (tnfa->cflags & REG_ICASE) \ + && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \ + && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \ + || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \ + && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ + tnfa->cflags & REG_ICASE))) + + + + +/* Returns 1 if `t1' wins `t2', 0 otherwise. */ +inline static int +tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, + int *t1, int *t2) +{ + int i; + for (i = 0; i < num_tags; i++) + { + if (tag_directions[i] == TRE_TAG_MINIMIZE) + { + if (t1[i] < t2[i]) + return 1; + if (t1[i] > t2[i]) + return 0; + } + else + { + if (t1[i] > t2[i]) + return 1; + if (t1[i] < t2[i]) + return 0; + } + } + /* assert(0);*/ + return 0; +} + +inline static int +tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) +{ + DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase)); + while (*classes != (tre_ctype_t)0) + if ((!icase && tre_isctype(wc, *classes)) + || (icase && (tre_isctype(tre_toupper(wc), *classes) + || tre_isctype(tre_tolower(wc), *classes)))) + return 1; /* Match. */ + else + classes++; + return 0; /* No match. */ +} diff --git a/deps/tre/lib/tre-mem.c b/deps/tre/lib/tre-mem.c new file mode 100644 index 000000000..ca56d2b7e --- /dev/null +++ b/deps/tre/lib/tre-mem.c @@ -0,0 +1,155 @@ +/* + tre-mem.c - TRE memory allocator + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This memory allocator is for allocating small memory blocks efficiently + in terms of memory overhead and execution speed. The allocated blocks + cannot be freed individually, only all at once. There can be multiple + allocators, though. +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#include "tre-internal.h" +#include "tre-mem.h" +#include "xmalloc.h" + + +/* Returns a new memory allocator or NULL if out of memory. */ +tre_mem_t +tre_mem_new_impl(int provided, void *provided_block) +{ + tre_mem_t mem; + if (provided) + { + mem = provided_block; + memset(mem, 0, sizeof(*mem)); + } + else + mem = xcalloc(1, sizeof(*mem)); + if (mem == NULL) + return NULL; + return mem; +} + + +/* Frees the memory allocator and all memory allocated with it. */ +void +tre_mem_destroy(tre_mem_t mem) +{ + tre_list_t *tmp, *l = mem->blocks; + + while (l != NULL) + { + xfree(l->data); + tmp = l->next; + xfree(l); + l = tmp; + } + xfree(mem); +} + + +/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the + allocated block or NULL if an underlying malloc() failed. */ +void * +tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block, + int zero, size_t size) +{ + void *ptr; + + if (mem->failed) + { + DPRINT(("tre_mem_alloc: oops, called after failure?!\n")); + return NULL; + } + +#ifdef MALLOC_DEBUGGING + if (!provided) + { + ptr = xmalloc(1); + if (ptr == NULL) + { + DPRINT(("tre_mem_alloc: xmalloc forced failure\n")); + mem->failed = 1; + return NULL; + } + xfree(ptr); + } +#endif /* MALLOC_DEBUGGING */ + + if (mem->n < size) + { + /* We need more memory than is available in the current block. + Allocate a new block. */ + tre_list_t *l; + if (provided) + { + DPRINT(("tre_mem_alloc: using provided block\n")); + if (provided_block == NULL) + { + DPRINT(("tre_mem_alloc: provided block was NULL\n")); + mem->failed = 1; + return NULL; + } + mem->ptr = provided_block; + mem->n = TRE_MEM_BLOCK_SIZE; + } + else + { + size_t block_size; + if (size * 8 > TRE_MEM_BLOCK_SIZE) + block_size = size * 8; + else + block_size = TRE_MEM_BLOCK_SIZE; + DPRINT(("tre_mem_alloc: allocating new %zu byte block\n", + block_size)); + l = xmalloc(sizeof(*l)); + if (l == NULL) + { + mem->failed = 1; + return NULL; + } + l->data = xmalloc(block_size); + if (l->data == NULL) + { + xfree(l); + mem->failed = 1; + return NULL; + } + l->next = NULL; + if (mem->current != NULL) + mem->current->next = l; + if (mem->blocks == NULL) + mem->blocks = l; + mem->current = l; + mem->ptr = l->data; + mem->n = block_size; + } + } + + /* Make sure the next pointer will be aligned. */ + size += ALIGN(mem->ptr + size, long); + + /* Allocate from current block. */ + ptr = mem->ptr; + mem->ptr += size; + mem->n -= size; + + /* Set to zero if needed. */ + if (zero) + memset(ptr, 0, size); + + return ptr; +} + +/* EOF */ diff --git a/deps/tre/lib/tre-mem.h b/deps/tre/lib/tre-mem.h new file mode 100644 index 000000000..285940457 --- /dev/null +++ b/deps/tre/lib/tre-mem.h @@ -0,0 +1,66 @@ +/* + tre-mem.h - TRE memory allocator interface + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_MEM_H +#define TRE_MEM_H 1 + +#include + +#define TRE_MEM_BLOCK_SIZE 1024 + +typedef struct tre_list { + void *data; + struct tre_list *next; +} tre_list_t; + +typedef struct tre_mem_struct { + tre_list_t *blocks; + tre_list_t *current; + char *ptr; + size_t n; + int failed; + void **provided; +} *tre_mem_t; + + +tre_mem_t tre_mem_new_impl(int provided, void *provided_block); +void *tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block, + int zero, size_t size); + +/* Returns a new memory allocator or NULL if out of memory. */ +#define tre_mem_new() tre_mem_new_impl(0, NULL) + +/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the + allocated block or NULL if an underlying malloc() failed. */ +#define tre_mem_alloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 0, size) + +/* Allocates a block of `size' bytes from `mem'. Returns a pointer to the + allocated block or NULL if an underlying malloc() failed. The memory + is set to zero. */ +#define tre_mem_calloc(mem, size) tre_mem_alloc_impl(mem, 0, NULL, 1, size) + +#ifdef TRE_USE_ALLOCA +/* alloca() versions. Like above, but memory is allocated with alloca() + instead of malloc(). */ + +#define tre_mem_newa() \ + tre_mem_new_impl(1, alloca(sizeof(struct tre_mem_struct))) + +#define tre_mem_alloca(mem, size) \ + ((mem)->n >= (size) \ + ? tre_mem_alloc_impl((mem), 1, NULL, 0, (size)) \ + : tre_mem_alloc_impl((mem), 1, alloca(TRE_MEM_BLOCK_SIZE), 0, (size))) +#endif /* TRE_USE_ALLOCA */ + + +/* Frees the memory allocator and all memory allocated with it. */ +void tre_mem_destroy(tre_mem_t mem); + +#endif /* TRE_MEM_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-parse.c b/deps/tre/lib/tre-parse.c new file mode 100644 index 000000000..64ab6aca8 --- /dev/null +++ b/deps/tre/lib/tre-parse.c @@ -0,0 +1,1758 @@ +/* + tre-parse.c - Regexp parser + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This parser is just a simple recursive descent parser for POSIX.2 + regexps. The parser supports both the obsolete default syntax and + the "extended" syntax, and some nonstandard extensions. +*/ + + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include +#include + +#include "xmalloc.h" +#include "tre-mem.h" +#include "tre-ast.h" +#include "tre-stack.h" +#include "tre-parse.h" + + +/* Characters with special meanings in regexp syntax. */ +#define CHAR_PIPE L'|' +#define CHAR_LPAREN L'(' +#define CHAR_RPAREN L')' +#define CHAR_LBRACE L'{' +#define CHAR_RBRACE L'}' +#define CHAR_LBRACKET L'[' +#define CHAR_RBRACKET L']' +#define CHAR_MINUS L'-' +#define CHAR_STAR L'*' +#define CHAR_QUESTIONMARK L'?' +#define CHAR_PLUS L'+' +#define CHAR_PERIOD L'.' +#define CHAR_COLON L':' +#define CHAR_EQUAL L'=' +#define CHAR_COMMA L',' +#define CHAR_CARET L'^' +#define CHAR_DOLLAR L'$' +#define CHAR_BACKSLASH L'\\' +#define CHAR_HASH L'#' +#define CHAR_TILDE L'~' + + +/* Some macros for expanding \w, \s, etc. */ +static const struct tre_macro_struct { + const char c; + const char *expansion; +} tre_macros[] = + { {'t', "\t"}, {'n', "\n"}, {'r', "\r"}, + {'f', "\f"}, {'a', "\a"}, {'e', "\033"}, + {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"}, + {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"}, + { 0, NULL } + }; + + +/* Expands a macro delimited by `regex' and `regex_end' to `buf', which + must have at least `len' items. Sets buf[0] to zero if the there + is no match in `tre_macros'. */ +static void +tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end, + tre_char_t *buf, size_t buf_len) +{ + int i; + + buf[0] = 0; + if (regex >= regex_end) + return; + + for (i = 0; tre_macros[i].expansion; i++) + { + if (tre_macros[i].c == *regex) + { + unsigned int j; + DPRINT(("Expanding macro '%c' => '%s'\n", + tre_macros[i].c, tre_macros[i].expansion)); + for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++) + buf[j] = tre_macros[i].expansion[j]; + buf[j] = 0; + break; + } + } +} + +static reg_errcode_t +tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, + tre_ast_node_t ***items) +{ + reg_errcode_t status; + tre_ast_node_t **array = *items; + /* Allocate more space if necessary. */ + if (*i >= *max_i) + { + tre_ast_node_t **new_items; + DPRINT(("out of array space, i = %d\n", *i)); + /* If the array is already 1024 items large, give up -- there's + probably an error in the regexp (e.g. not a '\0' terminated + string and missing ']') */ + if (*max_i > 1024) + return REG_ESPACE; + *max_i *= 2; + new_items = xrealloc(array, sizeof(*items) * *max_i); + if (new_items == NULL) + return REG_ESPACE; + *items = array = new_items; + } + array[*i] = tre_ast_new_literal(mem, min, max); + status = array[*i] == NULL ? REG_ESPACE : REG_OK; + (*i)++; + return status; +} + + +/* Expands a character class to character ranges. */ +static reg_errcode_t +tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items, + int *i, int *max_i, int cflags) +{ + reg_errcode_t status = REG_OK; + tre_cint_t c; + int j, min = -1, max = 0; + + DPRINT((" expanding class to character ranges\n")); + for (j = 0; (j < 256) && (status == REG_OK); j++) + { + c = (tre_cint_t) j; + if (tre_isctype(c, class) + || ((cflags & REG_ICASE) + && (tre_isctype(tre_tolower(c), class) + || tre_isctype(tre_toupper(c), class)))) +{ + if (min < 0) + min = c; + max = c; + } + else if (min >= 0) + { + DPRINT((" range %c (%d) to %c (%d)\n", min, min, max, max)); + status = tre_new_item(mem, min, max, i, max_i, items); + min = -1; + } + } + if (min >= 0 && status == REG_OK) + status = tre_new_item(mem, min, max, i, max_i, items); + return status; +} + + +static int +tre_compare_items(const void *a, const void *b) +{ + const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a; + const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b; + tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj; + long a_min = l_a->code_min, b_min = l_b->code_min; + + if (a_min < b_min) + return -1; + else if (a_min > b_min) + return 1; + else + return 0; +} + +#ifndef TRE_USE_SYSTEM_WCTYPE + +/* isalnum() and the rest may be macros, so wrap them to functions. */ +int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); } +int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); } + +#ifdef tre_isascii +int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); } +#else /* !tre_isascii */ +int tre_isascii_func(tre_cint_t c) { return !(c >> 7); } +#endif /* !tre_isascii */ + +#ifdef tre_isblank +int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); } +#else /* !tre_isblank */ +int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); } +#endif /* !tre_isblank */ + +int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); } +int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); } +int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); } +int tre_islower_func(tre_cint_t c) { return tre_islower(c); } +int tre_isprint_func(tre_cint_t c) +{ + return +#if defined(WIN32) && TRE_WCHAR + /* On Windows, iswprint(L'\t') incorrectly returns true. */ + c != L'\t' && +#endif + tre_isprint(c); +} +int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); } +int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); } +int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); } +int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); } + +struct { + char *name; + int (*func)(tre_cint_t); +} tre_ctype_map[] = { + { "alnum", &tre_isalnum_func }, + { "alpha", &tre_isalpha_func }, +#ifdef tre_isascii + { "ascii", &tre_isascii_func }, +#endif /* tre_isascii */ +#ifdef tre_isblank + { "blank", &tre_isblank_func }, +#endif /* tre_isblank */ + { "cntrl", &tre_iscntrl_func }, + { "digit", &tre_isdigit_func }, + { "graph", &tre_isgraph_func }, + { "lower", &tre_islower_func }, + { "print", &tre_isprint_func }, + { "punct", &tre_ispunct_func }, + { "space", &tre_isspace_func }, + { "upper", &tre_isupper_func }, + { "xdigit", &tre_isxdigit_func }, + { NULL, NULL} +}; + +tre_ctype_t tre_ctype(const char *name) +{ + int i; + for (i = 0; tre_ctype_map[i].name != NULL; i++) + { + if (strcmp(name, tre_ctype_map[i].name) == 0) + return tre_ctype_map[i].func; + } + return (tre_ctype_t)0; +} +#endif /* !TRE_USE_SYSTEM_WCTYPE */ + +/* Maximum number of character classes that can occur in a negated bracket + expression. */ +#define MAX_NEG_CLASSES 64 + +/* Maximum length of character class names. */ +#define MAX_CLASS_NAME + +#define REST(re) (int)(ctx->re_end - (re)), (re) + +static reg_errcode_t +tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, + tre_ctype_t neg_classes[], int *num_neg_classes, + tre_ast_node_t ***items, int *num_items, + int *items_size) +{ + const tre_char_t *re = ctx->re; + reg_errcode_t status; + tre_ctype_t class = (tre_ctype_t)0; + tre_cint_t min = 0, max = 0; + int i = *num_items; + int max_i = *items_size; + int skip; + + /* Build an array of the items in the bracket expression. */ + for (;;) + { + skip = 0; + if (re == ctx->re_end) + { + return REG_EBRACK; + } + if (*re == CHAR_RBRACKET && re > ctx->re) + { + DPRINT(("tre_parse_bracket: done: '%.*" STRF "'\n", REST(re))); + re++; + break; + } + class = (tre_ctype_t)0; + if (re + 2 < ctx->re_end + && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET) + { + DPRINT(("tre_parse_bracket: range: '%.*" STRF "'\n", REST(re))); + min = *re; + max = *(re + 2); + re += 3; + /* XXX - Should use collation order instead of encoding values + in character ranges. */ + if (min > max) + return REG_ERANGE; + } + else if (re + 1 < ctx->re_end + && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD) + return REG_ECOLLATE; + else if (re + 1 < ctx->re_end + && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL) + return REG_ECOLLATE; + else if (re + 1 < ctx->re_end + && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON) + { + char tmp_str[64]; + const tre_char_t *endptr = re + 2; + size_t len; + DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re))); + while (endptr < ctx->re_end && *endptr != CHAR_COLON) + endptr++; + if (endptr != ctx->re_end) + { + len = MIN(endptr - re - 2, 63); +#ifdef TRE_WCHAR + { + tre_char_t tmp_wcs[64]; + wcsncpy(tmp_wcs, re + 2, len); + tmp_wcs[len] = L'\0'; +#if defined HAVE_WCSRTOMBS + { + mbstate_t state; + const tre_char_t *src = tmp_wcs; + memset(&state, '\0', sizeof(state)); + len = wcsrtombs(tmp_str, &src, sizeof(tmp_str), &state); + } +#elif defined HAVE_WCSTOMBS + len = wcstombs(tmp_str, tmp_wcs, 63); +#endif /* defined HAVE_WCSTOMBS */ + if (len == (size_t)-1) + return REG_ECTYPE; + } +#else /* !TRE_WCHAR */ + strncpy(tmp_str, (const char*)re + 2, len); +#endif /* !TRE_WCHAR */ + tmp_str[len] = '\0'; + DPRINT((" class name: %s\n", tmp_str)); + class = tre_ctype(tmp_str); + if (!class) + return REG_ECTYPE; + /* Optimize character classes for 8 bit character sets. */ + if (ctx->mb_cur_max == 1) + { + status = tre_expand_ctype(ctx->mem, class, items, + &i, &max_i, ctx->cflags); + if (status != REG_OK) + return status; + class = (tre_ctype_t)0; + skip = 1; + } + re = endptr + 2; + } + else + return REG_ECTYPE; + min = 0; + max = TRE_CHAR_MAX; + } + else + { + DPRINT(("tre_parse_bracket: char: '%.*" STRF "'\n", REST(re))); + if (*re == CHAR_MINUS && re + 1 < ctx->re_end + && *(re + 1) != CHAR_RBRACKET + && ctx->re != re) + /* Two ranges are not allowed to share and endpoint. */ + return REG_ERANGE; + min = max = *re++; + } + + if (class && negate) + if (*num_neg_classes >= MAX_NEG_CLASSES) + return REG_ESPACE; + else + neg_classes[(*num_neg_classes)++] = class; + else if (!skip) + { + status = tre_new_item(ctx->mem, min, max, &i, &max_i, items); + if (status != REG_OK) + return status; + ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class; + } + + /* Add opposite-case counterpoints if REG_ICASE is present. + This is broken if there are more than two "same" characters. */ + if (ctx->cflags & REG_ICASE && !class && !skip) + { + tre_cint_t cmin, ccurr; + + DPRINT(("adding opposite-case counterpoints\n")); + while (min <= max) + { + if (tre_islower(min)) + { + cmin = ccurr = tre_toupper(min++); + while (tre_islower(min) && tre_toupper(min) == ccurr + 1 + && min <= max) + ccurr = tre_toupper(min++); + status = tre_new_item(ctx->mem, cmin, ccurr, + &i, &max_i, items); + if (status != REG_OK) + return status; + } + else if (tre_isupper(min)) + { + cmin = ccurr = tre_tolower(min++); + while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 + && min <= max) + ccurr = tre_tolower(min++); + status = tre_new_item(ctx->mem, cmin, ccurr, + &i, &max_i, items); + if (status != REG_OK) + return status; + } + else + min++; + } + } + } + *num_items = i; + *items_size = max_i; + ctx->re = re; + return REG_OK; +} + +static reg_errcode_t +tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) +{ + tre_ast_node_t *node = NULL; + int negate = 0; + reg_errcode_t status = REG_OK; + tre_ast_node_t **items, *u, *n; + int i = 0, j, max_i = 32; + long curr_max, curr_min; + tre_ctype_t neg_classes[MAX_NEG_CLASSES]; + int num_neg_classes = 0; + + /* Start off with an array of `max_i' elements. */ + items = xmalloc(sizeof(*items) * max_i); + if (items == NULL) + return REG_ESPACE; + + if (ctx->re < ctx->re_end && *ctx->re == CHAR_CARET) + { + DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re))); + negate = 1; + ctx->re++; + } + + status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, + &items, &i, &max_i); + + if (status != REG_OK) + goto parse_bracket_done; + + /* Sort the array if we need to negate it. */ + if (negate) + qsort(items, (unsigned)i, sizeof(*items), tre_compare_items); + + curr_max = curr_min = 0; + /* Build a union of the items in the array, negated if necessary. */ + for (j = 0; j < i && status == REG_OK; j++) + { + long min, max; + tre_literal_t *l = items[j]->obj; + min = l->code_min; + max = l->code_max; + + DPRINT(("item: %ld - %ld, class %ld, curr_max = %ld\n", + l->code_min, l->code_max, (long)l->u.class, curr_max)); + + if (negate) + { + if (min < curr_max) + { + /* Overlap. */ + curr_max = MAX(max + 1, curr_max); + DPRINT(("overlap, curr_max = %ld\n", curr_max)); + l = NULL; + } + else + { + /* No overlap. */ + curr_max = min - 1; + if (curr_max >= curr_min) + { + DPRINT(("no overlap\n")); + l->code_min = curr_min; + l->code_max = curr_max; + } + else + { + DPRINT(("no overlap, zero room\n")); + l = NULL; + } + curr_min = curr_max = max + 1; + } + } + + if (l != NULL) + { + int k; + DPRINT(("creating %ld - %ld\n", l->code_min, l->code_max)); + if (num_neg_classes > 0) + { + l->neg_classes = tre_mem_alloc(ctx->mem, + (sizeof(l->neg_classes) + * (num_neg_classes + 1))); + if (l->neg_classes == NULL) + { + status = REG_ESPACE; + break; + } + for (k = 0; k < num_neg_classes; k++) + l->neg_classes[k] = neg_classes[k]; + l->neg_classes[k] = (tre_ctype_t)0; + } + else + l->neg_classes = NULL; + if (node == NULL) + node = items[j]; + else + { + u = tre_ast_new_union(ctx->mem, node, items[j]); + if (u == NULL) + status = REG_ESPACE; + node = u; + } + } + } + + if (status != REG_OK) + goto parse_bracket_done; + + if (negate) + { + int k; + DPRINT(("final: creating %ld - %ld\n", curr_min, (long)TRE_CHAR_MAX)); + n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX); + if (n == NULL) + status = REG_ESPACE; + else + { + tre_literal_t *l = n->obj; + if (num_neg_classes > 0) + { + l->neg_classes = tre_mem_alloc(ctx->mem, + (sizeof(l->neg_classes) + * (num_neg_classes + 1))); + if (l->neg_classes == NULL) + { + status = REG_ESPACE; + goto parse_bracket_done; + } + for (k = 0; k < num_neg_classes; k++) + l->neg_classes[k] = neg_classes[k]; + l->neg_classes[k] = (tre_ctype_t)0; + } + else + l->neg_classes = NULL; + if (node == NULL) + node = n; + else + { + u = tre_ast_new_union(ctx->mem, node, n); + if (u == NULL) + status = REG_ESPACE; + node = u; + } + } + } + + if (status != REG_OK) + goto parse_bracket_done; + +#ifdef TRE_DEBUG + tre_ast_print(node); +#endif /* TRE_DEBUG */ + + parse_bracket_done: + xfree(items); + *result = node; + return status; +} + + +/* Parses a positive decimal integer capped at INT_MAX. Returns -1 if the + string does not contain a valid number. */ +static int +tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end) +{ + unsigned long num = 0; + int overflow = 0; + const tre_char_t *r = *regex; + while (r < regex_end && *r >= L'0' && *r <= L'9') + { + if (!overflow) + { + if (num * 10 + *r - L'0' < num) + { + overflow = 1; + } + else + { + num = num * 10 + *r - L'0'; + if (num > INT_MAX) + overflow = 1; + } + } + r++; + } + if (r == *regex) + return -1; + *regex = r; + return overflow ? INT_MAX : (int)num; +} + + +static reg_errcode_t +tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result) +{ + int min, max, i; + int cost_ins, cost_del, cost_subst, cost_max; + int limit_ins, limit_del, limit_subst, limit_err; + const tre_char_t *r = ctx->re; + const tre_char_t *start; + int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; + int approx = 0; + int costs_set = 0; + int counts_set = 0; + + cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET; + limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET; + + /* Parse number (minimum repetition count). */ + min = -1; + if (r < ctx->re_end && *r >= L'0' && *r <= L'9') { + DPRINT(("tre_parse: min count: '%.*" STRF "'\n", REST(r))); + min = tre_parse_int(&r, ctx->re_end); + } + + /* Parse comma and second number (maximum repetition count). */ + max = min; + if (r < ctx->re_end && *r == CHAR_COMMA) + { + if (min < 0) + min = 0; + r++; + DPRINT(("tre_parse: max count: '%.*" STRF "'\n", REST(r))); + max = tre_parse_int(&r, ctx->re_end); + } + + /* Check that the repeat counts are sane. */ + if (max >= 0 && min > max) + return REG_BADBR; + if (min > RE_DUP_MAX || max > RE_DUP_MAX) + return REG_BADMAX; + + + /* + '{' + optionally followed immediately by a number == minimum repcount + optionally followed by , then a number == maximum repcount + + then a number == maximum insertion count + - then a number == maximum deletion count + # then a number == maximum substitution count + ~ then a number == maximum number of errors + Any of +, -, # or ~ without followed by a number means that + the maximum count/number of errors is infinite. + + An equation of the form + Xi + Yd + Zs < C + can be specified to set costs and the cost limit to a value + different from the default value: + - X is the cost of an insertion + - Y is the cost of a deletion + - Z is the cost of a substitution + - C is the maximum cost + + If no count limit or cost is set for an operation, the operation + is not allowed at all. + */ + + + do { + int done; + start = r; + + /* Parse count limit settings */ + done = 0; + if (!counts_set) + while (r + 1 < ctx->re_end && !done) + { + switch (*r) + { + case CHAR_PLUS: /* Insert limit */ + DPRINT(("tre_parse: ins limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_ins = tre_parse_int(&r, ctx->re_end); + if (limit_ins < 0) + limit_ins = INT_MAX; + counts_set = 1; + break; + case CHAR_MINUS: /* Delete limit */ + DPRINT(("tre_parse: del limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_del = tre_parse_int(&r, ctx->re_end); + if (limit_del < 0) + limit_del = INT_MAX; + counts_set = 1; + break; + case CHAR_HASH: /* Substitute limit */ + DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_subst = tre_parse_int(&r, ctx->re_end); + if (limit_subst < 0) + limit_subst = INT_MAX; + counts_set = 1; + break; + case CHAR_TILDE: /* Maximum number of changes */ + DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r))); + r++; + limit_err = tre_parse_int(&r, ctx->re_end); + if (limit_err < 0) + limit_err = INT_MAX; + approx = 1; + break; + case CHAR_COMMA: + r++; + break; + case L' ': + r++; + break; + case L'}': + done = 1; + break; + default: + done = 1; + break; + } + } + + /* Parse cost restriction equation. */ + done = 0; + if (!costs_set) + while (r + 1 < ctx->re_end && !done) + { + switch (*r) + { + case CHAR_PLUS: + case L' ': + r++; + break; + case L'<': + DPRINT(("tre_parse: max cost: '%.*" STRF "'\n", REST(r))); + r++; + while (*r == L' ') + r++; + cost_max = tre_parse_int(&r, ctx->re_end); + if (cost_max < 0) + cost_max = INT_MAX; + else + cost_max--; + approx = 1; + break; + case CHAR_COMMA: + r++; + done = 1; + break; + default: + if (*r >= L'0' && *r <= L'9') + { +#ifdef TRE_DEBUG + const tre_char_t *sr = r; +#endif /* TRE_DEBUG */ + int cost = tre_parse_int(&r, ctx->re_end); + /* XXX - make sure r is not past end. */ + switch (*r) + { + case L'i': /* Insert cost */ + DPRINT(("tre_parse: ins cost: '%.*" STRF "'\n", + REST(sr))); + r++; + cost_ins = cost; + costs_set = 1; + break; + case L'd': /* Delete cost */ + DPRINT(("tre_parse: del cost: '%.*" STRF "'\n", + REST(sr))); + r++; + cost_del = cost; + costs_set = 1; + break; + case L's': /* Substitute cost */ + DPRINT(("tre_parse: subst cost: '%.*" STRF "'\n", + REST(sr))); + r++; + cost_subst = cost; + costs_set = 1; + break; + default: + return REG_BADBR; + } + } + else + { + done = 1; + break; + } + } + } + } while (start != r); + + /* Missing }. */ + if (r >= ctx->re_end) + return REG_EBRACE; + + /* Empty contents of {}. */ + if (r == ctx->re) + return REG_BADBR; + + /* Parse the ending '}' or '\}'.*/ + if (ctx->cflags & REG_EXTENDED) + { + if (r >= ctx->re_end || *r != CHAR_RBRACE) + return REG_BADBR; + r++; + } + else + { + if (r + 1 >= ctx->re_end + || *r != CHAR_BACKSLASH + || *(r + 1) != CHAR_RBRACE) + return REG_BADBR; + r += 2; + } + + + /* Parse trailing '?' marking minimal repetition. */ + if (r < ctx->re_end) + { + if (*r == CHAR_QUESTIONMARK) + { + minimal = !(ctx->cflags & REG_UNGREEDY); + r++; + } + else if (*r == CHAR_STAR || *r == CHAR_PLUS) + { + /* These are reserved for future extensions. */ + return REG_BADRPT; + } + } + + /* Create the AST node(s). */ + if (min == 0 && max == 0) + { + *result = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (*result == NULL) + return REG_ESPACE; + } + else + { + if (min < 0 && max < 0) + /* Only approximate parameters set, no repetitions. */ + min = max = 1; + + *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal); + if (!*result) + return REG_ESPACE; + + /* If approximate matching parameters are set, add them to the + iteration node. */ + if (approx || costs_set || counts_set) + { + int *params; + tre_iteration_t *iter = (*result)->obj; + + if (costs_set || counts_set) + { + if (limit_ins == TRE_PARAM_UNSET) + { + if (cost_ins == TRE_PARAM_UNSET) + limit_ins = 0; + else + limit_ins = INT_MAX; + } + + if (limit_del == TRE_PARAM_UNSET) + { + if (cost_del == TRE_PARAM_UNSET) + limit_del = 0; + else + limit_del = INT_MAX; + } + + if (limit_subst == TRE_PARAM_UNSET) + { + if (cost_subst == TRE_PARAM_UNSET) + limit_subst = 0; + else + limit_subst = INT_MAX; + } + } + + if (cost_max == TRE_PARAM_UNSET) + cost_max = INT_MAX; + if (limit_err == TRE_PARAM_UNSET) + limit_err = INT_MAX; + + ctx->have_approx = 1; + params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST); + if (!params) + return REG_ESPACE; + for (i = 0; i < TRE_PARAM_LAST; i++) + params[i] = TRE_PARAM_UNSET; + params[TRE_PARAM_COST_INS] = cost_ins; + params[TRE_PARAM_COST_DEL] = cost_del; + params[TRE_PARAM_COST_SUBST] = cost_subst; + params[TRE_PARAM_COST_MAX] = cost_max; + params[TRE_PARAM_MAX_INS] = limit_ins; + params[TRE_PARAM_MAX_DEL] = limit_del; + params[TRE_PARAM_MAX_SUBST] = limit_subst; + params[TRE_PARAM_MAX_ERR] = limit_err; + iter->params = params; + } + } + + DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], " + "limits [%d,%d,%d, total %d]\n", + min, max, cost_ins, cost_del, cost_subst, cost_max, + limit_ins, limit_del, limit_subst, limit_err)); + + + ctx->re = r; + return REG_OK; +} + +typedef enum { + PARSE_RE = 0, + PARSE_ATOM, + PARSE_MARK_FOR_SUBMATCH, + PARSE_BRANCH, + PARSE_PIECE, + PARSE_CATENATION, + PARSE_POST_CATENATION, + PARSE_UNION, + PARSE_POST_UNION, + PARSE_POSTFIX, + PARSE_RESTORE_CFLAGS +} tre_parse_re_stack_symbol_t; + + +reg_errcode_t +tre_parse(tre_parse_ctx_t *ctx) +{ + tre_ast_node_t *result = NULL; + tre_parse_re_stack_symbol_t symbol; + reg_errcode_t status = REG_OK; + tre_stack_t *stack = ctx->stack; + size_t bottom = tre_stack_num_items(stack); + int depth = 0; + int temporary_cflags = 0; + + DPRINT(("tre_parse: parsing '%.*" STRF "', len = %zu\n", + (int)ctx->len, ctx->re, ctx->len)); + + if (!ctx->nofirstsub) + { + STACK_PUSH(stack, int, ctx->submatch_id); + STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH); + ctx->submatch_id++; + } + STACK_PUSH(stack, int, PARSE_RE); + ctx->re_start = ctx->re; + ctx->re_end = ctx->re + ctx->len; + + + /* The following is basically just a recursive descent parser. I use + an explicit stack instead of recursive functions mostly because of + two reasons: compatibility with systems which have an overflowable + call stack, and efficiency (both in lines of code and speed). */ + while (status == REG_OK && tre_stack_num_items(stack) > bottom) + { + symbol = tre_stack_pop_int(stack); + switch (symbol) + { + case PARSE_RE: + /* Parse a full regexp. A regexp is one or more branches, + separated by the union operator `|'. */ +#ifdef REG_LITERAL + if (!(ctx->cflags & REG_LITERAL) + && ctx->cflags & REG_EXTENDED) +#endif /* REG_LITERAL */ + STACK_PUSHX(stack, int, PARSE_UNION); + STACK_PUSHX(stack, int, PARSE_BRANCH); + break; + + case PARSE_BRANCH: + /* Parse a branch. A branch is one or more pieces, concatenated. + A piece is an atom possibly followed by a postfix operator. */ + STACK_PUSHX(stack, int, PARSE_CATENATION); + STACK_PUSHX(stack, int, PARSE_PIECE); + break; + + case PARSE_PIECE: + /* Parse a piece. A piece is an atom possibly followed by one + or more postfix operators. */ +#ifdef REG_LITERAL + if (!(ctx->cflags & REG_LITERAL)) +#endif /* REG_LITERAL */ + STACK_PUSHX(stack, int, PARSE_POSTFIX); + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + + case PARSE_CATENATION: + /* If the expression has not ended, parse another piece. */ + { + tre_char_t c; + if (ctx->re >= ctx->re_end) + break; + c = *ctx->re; +#ifdef REG_LITERAL + if (!(ctx->cflags & REG_LITERAL)) + { +#endif /* REG_LITERAL */ + if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) + break; + if ((ctx->cflags & REG_EXTENDED + && c == CHAR_RPAREN && depth > 0) + || (!(ctx->cflags & REG_EXTENDED) + && (c == CHAR_BACKSLASH + && *(ctx->re + 1) == CHAR_RPAREN))) + { + if (!(ctx->cflags & REG_EXTENDED) && depth == 0) + status = REG_EPAREN; + DPRINT(("tre_parse: group end: '%.*" STRF "'\n", + REST(ctx->re))); + depth--; + if (!(ctx->cflags & REG_EXTENDED)) + ctx->re += 2; + break; + } +#ifdef REG_LITERAL + } +#endif /* REG_LITERAL */ + +#ifdef REG_RIGHT_ASSOC + if (ctx->cflags & REG_RIGHT_ASSOC) + { + /* Right associative concatenation. */ + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_CATENATION); + STACK_PUSHX(stack, int, PARSE_CATENATION); + STACK_PUSHX(stack, int, PARSE_PIECE); + } + else +#endif /* REG_RIGHT_ASSOC */ + { + /* Default case, left associative concatenation. */ + STACK_PUSHX(stack, int, PARSE_CATENATION); + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_CATENATION); + STACK_PUSHX(stack, int, PARSE_PIECE); + } + break; + } + + case PARSE_POST_CATENATION: + { + tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); + tre_ast_node_t *tmp_node; + tmp_node = tre_ast_new_catenation(ctx->mem, tree, result); + if (!tmp_node) + return REG_ESPACE; + result = tmp_node; + break; + } + + case PARSE_UNION: + if (ctx->re >= ctx->re_end) + break; +#ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + break; +#endif /* REG_LITERAL */ + switch (*ctx->re) + { + case CHAR_PIPE: + DPRINT(("tre_parse: union: '%.*" STRF "'\n", + REST(ctx->re))); + STACK_PUSHX(stack, int, PARSE_UNION); + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_UNION); + STACK_PUSHX(stack, int, PARSE_BRANCH); + ctx->re++; + break; + + case CHAR_RPAREN: + ctx->re++; + break; + + default: + break; + } + break; + + case PARSE_POST_UNION: + { + tre_ast_node_t *tmp_node; + tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); + tmp_node = tre_ast_new_union(ctx->mem, tree, result); + if (!tmp_node) + return REG_ESPACE; + result = tmp_node; + break; + } + + case PARSE_POSTFIX: + /* Parse postfix operators. */ + if (ctx->re >= ctx->re_end) + break; +#ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + break; +#endif /* REG_LITERAL */ + switch (*ctx->re) + { + case CHAR_PLUS: + case CHAR_QUESTIONMARK: + if (!(ctx->cflags & REG_EXTENDED)) + break; + /*FALLTHROUGH*/ + case CHAR_STAR: + { + tre_ast_node_t *tmp_node; + int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; + int rep_min = 0; + int rep_max = -1; +#ifdef TRE_DEBUG + const tre_char_t *tmp_re; +#endif + + if (*ctx->re == CHAR_PLUS) + rep_min = 1; + if (*ctx->re == CHAR_QUESTIONMARK) + rep_max = 1; +#ifdef TRE_DEBUG + tmp_re = ctx->re; +#endif + + if (ctx->re + 1 < ctx->re_end) + { + if (*(ctx->re + 1) == CHAR_QUESTIONMARK) + { + minimal = !(ctx->cflags & REG_UNGREEDY); + ctx->re++; + } + else if (*(ctx->re + 1) == CHAR_STAR + || *(ctx->re + 1) == CHAR_PLUS) + { + /* These are reserved for future extensions. */ + return REG_BADRPT; + } + } + + DPRINT(("tre_parse: %s star: '%.*" STRF "'\n", + minimal ? " minimal" : "greedy", REST(tmp_re))); + ctx->re++; + tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max, + minimal); + if (tmp_node == NULL) + return REG_ESPACE; + result = tmp_node; + STACK_PUSHX(stack, int, PARSE_POSTFIX); + } + break; + + case CHAR_BACKSLASH: + /* "\{" is special without REG_EXTENDED */ + if (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && *(ctx->re + 1) == CHAR_LBRACE) + { + ctx->re++; + goto parse_brace; + } + else + break; + + case CHAR_LBRACE: + /* "{" is literal without REG_EXTENDED */ + if (!(ctx->cflags & REG_EXTENDED)) + break; + + parse_brace: + DPRINT(("tre_parse: bound: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->re++; + + status = tre_parse_bound(ctx, &result); + if (status != REG_OK) + return status; + STACK_PUSHX(stack, int, PARSE_POSTFIX); + break; + } + break; + + case PARSE_ATOM: + /* Parse an atom. An atom is a regular expression enclosed in `()', + an empty set of `()', a bracket expression, `.', `^', `$', + a `\' followed by a character, or a single character. */ + + /* End of regexp? (empty string). */ + if (ctx->re >= ctx->re_end) + goto parse_literal; + +#ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + goto parse_literal; +#endif /* REG_LITERAL */ + + switch (*ctx->re) + { + case CHAR_LPAREN: /* parenthesized subexpression */ + + /* Handle "(?...)" extensions. They work in a way similar + to Perls corresponding extensions. */ + if (ctx->cflags & REG_EXTENDED + && ctx->re + 1 < ctx->re_end + && *(ctx->re + 1) == CHAR_QUESTIONMARK) + { + int new_cflags = ctx->cflags; + int bit = 1; + DPRINT(("tre_parse: extension: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re += 2; + while (/*CONSTCOND*/(void)1,1) + { + if (ctx->re >= ctx->re_end) + return REG_BADPAT; + if (*ctx->re == L'i') + { + DPRINT(("tre_parse: icase: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_ICASE; + else + new_cflags &= ~REG_ICASE; + ctx->re++; + } + else if (*ctx->re == L'n') + { + DPRINT(("tre_parse: newline: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_NEWLINE; + else + new_cflags &= ~REG_NEWLINE; + ctx->re++; + } +#ifdef REG_RIGHT_ASSOC + else if (*ctx->re == L'r') + { + DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_RIGHT_ASSOC; + else + new_cflags &= ~REG_RIGHT_ASSOC; + ctx->re++; + } +#endif /* REG_RIGHT_ASSOC */ +#ifdef REG_UNGREEDY + else if (*ctx->re == L'U') + { + DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", + REST(ctx->re))); + if (bit) + new_cflags |= REG_UNGREEDY; + else + new_cflags &= ~REG_UNGREEDY; + ctx->re++; + } +#endif /* REG_UNGREEDY */ + else if (*ctx->re == CHAR_MINUS) + { + DPRINT(("tre_parse: turn off: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re++; + bit = 0; + } + else if (*ctx->re == CHAR_COLON) + { + DPRINT(("tre_parse: no group: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re++; + depth++; + break; + } + else if (*ctx->re == CHAR_HASH) + { + DPRINT(("tre_parse: comment: '%.*" STRF "\n", + REST(ctx->re))); + /* A comment can contain any character except a + right parenthesis */ + while (ctx->re < ctx->re_end + && *ctx->re != CHAR_RPAREN) + ctx->re++; + if (ctx->re < ctx->re_end + && *ctx->re == CHAR_RPAREN) + { + ctx->re++; + break; + } + else + return REG_BADPAT; + } + else if (*ctx->re == CHAR_RPAREN) + { + ctx->re++; + break; + } + else + return REG_BADPAT; + } + + /* Turn on the cflags changes for the rest of the + enclosing group. */ + if (new_cflags != ctx->cflags) + ctx->have_inline_cflags = 1; + STACK_PUSHX(stack, int, ctx->cflags); + STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS); + STACK_PUSHX(stack, int, PARSE_RE); + ctx->cflags = new_cflags; + break; + } + + if (ctx->cflags & REG_EXTENDED + || (ctx->re > ctx->re_start + && *(ctx->re - 1) == CHAR_BACKSLASH)) + { + depth++; + if (ctx->re + 2 < ctx->re_end + && *(ctx->re + 1) == CHAR_QUESTIONMARK + && *(ctx->re + 2) == CHAR_COLON) + { + DPRINT(("tre_parse: group begin: '%.*" STRF + "', no submatch\n", REST(ctx->re))); + /* Don't mark for submatching. */ + ctx->re += 3; + STACK_PUSHX(stack, int, PARSE_RE); + } + else + { + DPRINT(("tre_parse: group begin: '%.*" STRF + "', submatch %d\n", REST(ctx->re), + ctx->submatch_id)); + ctx->re++; + /* First parse a whole RE, then mark the resulting tree + for submatching. */ + STACK_PUSHX(stack, int, ctx->submatch_id); + STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH); + STACK_PUSHX(stack, int, PARSE_RE); + ctx->submatch_id++; + } + } + else + goto parse_literal; + break; + + case CHAR_RPAREN: /* end of current subexpression */ + if ((ctx->cflags & REG_EXTENDED && depth > 0) + || (!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start + && *(ctx->re - 1) == CHAR_BACKSLASH)) + { + DPRINT(("tre_parse: empty: '%.*" STRF "'\n", + REST(ctx->re))); + /* We were expecting an atom, but instead the current + subexpression was closed. POSIX leaves the meaning of + this to be implementation-defined. We interpret this as + an empty expression (which matches an empty string). */ + result = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (result == NULL) + return REG_ESPACE; + if (!(ctx->cflags & REG_EXTENDED)) + ctx->re--; + } + else + goto parse_literal; + break; + + case CHAR_LBRACKET: /* bracket expression */ + DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->re++; + status = tre_parse_bracket(ctx, &result); + if (status != REG_OK) + return status; + break; + + case CHAR_BACKSLASH: + /* If this is "\(" or "\)" chew off the backslash and + try again. */ + if (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && (*(ctx->re + 1) == CHAR_LPAREN + || *(ctx->re + 1) == CHAR_RPAREN)) + { + ctx->re++; + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + } + + /* If a macro is used, parse the expanded macro recursively. */ + { + tre_char_t buf[64]; + tre_expand_macro(ctx->re + 1, ctx->re_end, + buf, elementsof(buf)); + if (buf[0] != 0) + { + tre_parse_ctx_t subctx; + memcpy(&subctx, ctx, sizeof(subctx)); + subctx.re = buf; + subctx.len = tre_strlen(buf); + subctx.nofirstsub = 1; + status = tre_parse(&subctx); + if (status != REG_OK) + return status; + ctx->re += 2; + result = subctx.result; + break; + } + } + + if (ctx->re + 1 >= ctx->re_end) + /* Trailing backslash. */ + return REG_EESCAPE; + +#ifdef REG_LITERAL + if (*(ctx->re + 1) == L'Q') + { + DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->cflags |= REG_LITERAL; + temporary_cflags |= REG_LITERAL; + ctx->re += 2; + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + } +#endif /* REG_LITERAL */ + + DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re))); + ctx->re++; + switch (*ctx->re) + { + case L'b': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_WB); + ctx->re++; + break; + case L'B': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_WB_NEG); + ctx->re++; + break; + case L'<': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_BOW); + ctx->re++; + break; + case L'>': + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_EOW); + ctx->re++; + break; + case L'x': + ctx->re++; + if (ctx->re >= ctx->re_end) + { + result = tre_ast_new_literal(ctx->mem, 0, 0); + if (result == NULL) + return REG_ESPACE; + break; + } + if (ctx->re[0] != CHAR_LBRACE) + { + /* 8 bit hex char. */ + char tmp[3] = {0, 0, 0}; + long val; + DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n", + REST(ctx->re - 2))); + + if (ctx->re < ctx->re_end && tre_isxdigit(ctx->re[0])) + { + tmp[0] = (char)ctx->re[0]; + ctx->re++; + } + if (ctx->re < ctx->re_end && tre_isxdigit(ctx->re[0])) + { + tmp[1] = (char)ctx->re[0]; + ctx->re++; + } + val = strtol(tmp, NULL, 16); + result = tre_ast_new_literal(ctx->mem, (int)val, (int)val); + break; + } + else + { + /* Wide char. */ + char tmp[9]; /* max 8 hex digits + terminator */ + long val; + size_t i = 0; + ctx->re++; + while (ctx->re < ctx->re_end) + { + if (ctx->re[0] == CHAR_RBRACE) + break; + if (tre_isxdigit(ctx->re[0]) && i < sizeof(tmp) - 1) + { + tmp[i] = (char)ctx->re[0]; + i++; + ctx->re++; + continue; + } + return REG_EBRACE; + } + if (ctx->re >= ctx->re_end) + return REG_EBRACE; + ctx->re++; + tmp[i] = 0; + val = strtol(tmp, NULL, 16); + result = tre_ast_new_literal(ctx->mem, (int)val, (int)val); + break; + } + /*FALLTHROUGH*/ + + default: + if (tre_isdigit(*ctx->re)) + { + /* Back reference. */ + int val = *ctx->re - L'0'; + DPRINT(("tre_parse: backref: '%.*" STRF "'\n", + REST(ctx->re - 1))); + result = tre_ast_new_literal(ctx->mem, BACKREF, val); + if (result == NULL) + return REG_ESPACE; + ctx->max_backref = MAX(val, ctx->max_backref); + ctx->re++; + } + else + { + /* Escaped character. */ + DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", + REST(ctx->re - 1))); + result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re); + ctx->re++; + } + break; + } + if (result == NULL) + return REG_ESPACE; + break; + + case CHAR_PERIOD: /* the any-symbol */ + DPRINT(("tre_parse: any: '%.*" STRF "'\n", + REST(ctx->re))); + if (ctx->cflags & REG_NEWLINE) + { + tre_ast_node_t *tmp1; + tre_ast_node_t *tmp2; + tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1); + if (!tmp1) + return REG_ESPACE; + tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX); + if (!tmp2) + return REG_ESPACE; + result = tre_ast_new_union(ctx->mem, tmp1, tmp2); + if (!result) + return REG_ESPACE; + } + else + { + result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX); + if (!result) + return REG_ESPACE; + } + ctx->re++; + break; + + case CHAR_CARET: /* beginning of line assertion */ + /* '^' has a special meaning everywhere in EREs, and in the + beginning of the RE and after \( is BREs. */ + if (ctx->cflags & REG_EXTENDED + || (ctx->re - 2 >= ctx->re_start + && *(ctx->re - 2) == CHAR_BACKSLASH + && *(ctx->re - 1) == CHAR_LPAREN) + || ctx->re == ctx->re_start) + { + DPRINT(("tre_parse: BOL: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_BOL); + if (result == NULL) + return REG_ESPACE; + ctx->re++; + } + else + goto parse_literal; + break; + + case CHAR_DOLLAR: /* end of line assertion. */ + /* '$' is special everywhere in EREs, and in the end of the + string and before \) is BREs. */ + if (ctx->cflags & REG_EXTENDED + || (ctx->re + 2 < ctx->re_end + && *(ctx->re + 1) == CHAR_BACKSLASH + && *(ctx->re + 2) == CHAR_RPAREN) + || ctx->re + 1 == ctx->re_end) + { + DPRINT(("tre_parse: EOL: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, ASSERTION, + ASSERT_AT_EOL); + if (result == NULL) + return REG_ESPACE; + ctx->re++; + } + else + goto parse_literal; + break; + + default: + parse_literal: + + if (temporary_cflags && ctx->re + 1 < ctx->re_end + && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E') + { + DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->cflags &= ~temporary_cflags; + temporary_cflags = 0; + ctx->re += 2; + STACK_PUSHX(stack, int, PARSE_PIECE); + break; + } + + + /* We are expecting an atom. If the subexpression (or the whole + regexp) ends here, we interpret it as an empty expression + (which matches an empty string). */ + if ( +#ifdef REG_LITERAL + !(ctx->cflags & REG_LITERAL) && +#endif /* REG_LITERAL */ + (ctx->re >= ctx->re_end + || *ctx->re == CHAR_STAR + || (ctx->cflags & REG_EXTENDED + && (*ctx->re == CHAR_PIPE + || *ctx->re == CHAR_LBRACE + || *ctx->re == CHAR_PLUS + || *ctx->re == CHAR_QUESTIONMARK)) + /* Test for "\)" in BRE mode. */ + || (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && *ctx->re == CHAR_BACKSLASH + && *(ctx->re + 1) == CHAR_LBRACE))) + { + DPRINT(("tre_parse: empty: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (!result) + return REG_ESPACE; + break; + } + + DPRINT(("tre_parse: literal: '%.*" STRF "'\n", + REST(ctx->re))); + /* Note that we can't use an tre_isalpha() test here, since there + may be characters which are alphabetic but neither upper or + lower case. */ + if (ctx->cflags & REG_ICASE + && (tre_isupper(*ctx->re) || tre_islower(*ctx->re))) + { + tre_ast_node_t *tmp1; + tre_ast_node_t *tmp2; + + /* XXX - Can there be more than one opposite-case + counterpoints for some character in some locale? Or + more than two characters which all should be regarded + the same character if case is ignored? If yes, there + does not seem to be a portable way to detect it. I guess + that at least for multi-character collating elements there + could be several opposite-case counterpoints, but they + cannot be supported portably anyway. */ + tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), + tre_toupper(*ctx->re)); + if (!tmp1) + return REG_ESPACE; + tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), + tre_tolower(*ctx->re)); + if (!tmp2) + return REG_ESPACE; + result = tre_ast_new_union(ctx->mem, tmp1, tmp2); + if (!result) + return REG_ESPACE; + } + else + { + result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re); + if (!result) + return REG_ESPACE; + } + ctx->re++; + break; + } + break; + + case PARSE_MARK_FOR_SUBMATCH: + { + int submatch_id = tre_stack_pop_int(stack); + + assert(result); + if (result->submatch_id >= 0) + { + tre_ast_node_t *n, *tmp_node; + n = tre_ast_new_literal(ctx->mem, EMPTY, -1); + if (n == NULL) + return REG_ESPACE; + tmp_node = tre_ast_new_catenation(ctx->mem, n, result); + if (tmp_node == NULL) + return REG_ESPACE; + tmp_node->num_submatches = result->num_submatches; + result = tmp_node; + } + result->submatch_id = submatch_id; + result->num_submatches++; + break; + } + + case PARSE_RESTORE_CFLAGS: + ctx->cflags = tre_stack_pop_int(stack); + break; + + default: + assert(0); + break; + } + } + + if (status != REG_OK) + return status; + + /* Check for missing closing parentheses. */ + if (depth > 0) + return REG_EPAREN; + + ctx->result = result; + return REG_OK; +} + +/* EOF */ diff --git a/deps/tre/lib/tre-parse.h b/deps/tre/lib/tre-parse.h new file mode 100644 index 000000000..39260ea7f --- /dev/null +++ b/deps/tre/lib/tre-parse.h @@ -0,0 +1,52 @@ +/* + tre-parse.c - Regexp parser definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_PARSE_H +#define TRE_PARSE_H 1 + +/* Parse context. */ +typedef struct { + /* Memory allocator. The AST is allocated using this. */ + tre_mem_t mem; + /* Stack used for keeping track of regexp syntax. */ + tre_stack_t *stack; + /* The parse result. */ + tre_ast_node_t *result; + /* The regexp to parse and its length. */ + const tre_char_t *re; + /* The first character of the entire regexp. */ + const tre_char_t *re_start; + /* The first character after the end of the regexp. */ + const tre_char_t *re_end; + size_t len; + /* Current submatch ID. */ + int submatch_id; + /* The highest back reference or -1 if none seen so far. */ + int max_backref; + /* This flag is set if the regexp uses approximate matching. */ + int have_approx; + /* This flag is set if the regexp changes cflags inline using (?...) */ + int have_inline_cflags; + /* Compilation flags. */ + int cflags; + /* If this flag is set the top-level submatch is not captured. */ + int nofirstsub; + /* The currently set approximate matching parameters. */ + int params[TRE_PARAM_LAST]; + /* the MB_CUR_MAX in use */ + int mb_cur_max; +} tre_parse_ctx_t; + +/* Parses a wide character regexp pattern into a syntax tree. This parser + handles both syntaxes (BRE and ERE), including the TRE extensions. */ +reg_errcode_t +tre_parse(tre_parse_ctx_t *ctx); + +#endif /* TRE_PARSE_H */ + +/* EOF */ diff --git a/deps/tre/lib/tre-stack.c b/deps/tre/lib/tre-stack.c new file mode 100644 index 000000000..199aaf1b7 --- /dev/null +++ b/deps/tre/lib/tre-stack.c @@ -0,0 +1,123 @@ +/* + tre-stack.c - Simple stack implementation + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#include "tre-internal.h" +#include "tre-stack.h" +#include "xmalloc.h" + +union tre_stack_item { + void *voidptr_value; + int int_value; +}; + +struct tre_stack_rec { + size_t size; + size_t max_size; + size_t ptr; + union tre_stack_item *stack; +}; + + +tre_stack_t * +tre_stack_new(size_t size, size_t max_size) +{ + tre_stack_t *s; + + s = xmalloc(sizeof(*s)); + if (s != NULL) + { + s->stack = xmalloc(sizeof(*s->stack) * size); + if (s->stack == NULL) + { + xfree(s); + return NULL; + } + s->size = size; + s->max_size = max_size; + s->ptr = 0; + } + return s; +} + +void +tre_stack_destroy(tre_stack_t *s) +{ + xfree(s->stack); + xfree(s); +} + +size_t +tre_stack_num_items(tre_stack_t *s) +{ + return s->ptr; +} + +static reg_errcode_t +tre_stack_push(tre_stack_t *s, union tre_stack_item value) +{ + if (s->ptr < s->size) + { + s->stack[s->ptr] = value; + s->ptr++; + } + else + { + if (s->size >= s->max_size) + { + DPRINT(("tre_stack_push: stack full\n")); + return REG_ESPACE; + } + else + { + union tre_stack_item *new_buffer; + size_t new_size; + DPRINT(("tre_stack_push: trying to realloc more space\n")); + new_size = s->size + s->size; + if (new_size > s->max_size) + new_size = s->max_size; + new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size); + if (new_buffer == NULL) + { + DPRINT(("tre_stack_push: realloc failed.\n")); + return REG_ESPACE; + } + DPRINT(("tre_stack_push: realloc succeeded.\n")); + assert(new_size > s->size); + s->size = new_size; + s->stack = new_buffer; + tre_stack_push(s, value); + } + } + return REG_OK; +} + +#define define_pushf(typetag, type) \ + declare_pushf(typetag, type) { \ + union tre_stack_item item; \ + item.typetag ## _value = value; \ + return tre_stack_push(s, item); \ +} + +define_pushf(int, int) +define_pushf(voidptr, void *) + +#define define_popf(typetag, type) \ + declare_popf(typetag, type) { \ + return s->stack[--s->ptr].typetag ## _value; \ + } + +define_popf(int, int) +define_popf(voidptr, void *) + +/* EOF */ diff --git a/deps/tre/lib/tre-stack.h b/deps/tre/lib/tre-stack.h new file mode 100644 index 000000000..1408f322a --- /dev/null +++ b/deps/tre/lib/tre-stack.h @@ -0,0 +1,76 @@ +/* + tre-stack.h: Stack definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + + +#ifndef TRE_STACK_H +#define TRE_STACK_H 1 + +#include "../local_includes/tre.h" + +typedef struct tre_stack_rec tre_stack_t; + +/* Creates a new stack object with initial size `size' and maximum size + `max_size'. Pushing an additional item onto a full stack will resize + the stack to double its capacity until the maximum is reached. Returns + the stack object or NULL if out of memory. */ +tre_stack_t * +tre_stack_new(size_t size, size_t max_size); + +/* Frees the stack object. */ +void +tre_stack_destroy(tre_stack_t *s); + +/* Returns the current number of items on the stack. */ +size_t +tre_stack_num_items(tre_stack_t *s); + +/* Each tre_stack_push_*(tre_stack_t *s, value) function pushes + `value' on top of stack `s'. Returns REG_ESPACE if out of memory. + This tries to realloc() more space before failing if maximum size + has not yet been reached. Returns REG_OK if successful. */ +#define declare_pushf(typetag, type) \ + reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value) + +declare_pushf(voidptr, void *); +declare_pushf(int, int); + +/* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost + element off of stack `s' and returns it. The stack must not be + empty. */ +#define declare_popf(typetag, type) \ + type tre_stack_pop_ ## typetag(tre_stack_t *s) + +declare_popf(voidptr, void *); +declare_popf(int, int); + +/* Just to save some typing. */ +#define STACK_PUSH(s, typetag, value) \ + do \ + { \ + status = tre_stack_push_ ## typetag(s, value); \ + } \ + while (/*CONSTCOND*/(void)0,0) + +#define STACK_PUSHX(s, typetag, value) \ + { \ + status = tre_stack_push_ ## typetag(s, value); \ + if (status != REG_OK) \ + break; \ + } + +#define STACK_PUSHR(s, typetag, value) \ + { \ + reg_errcode_t _status; \ + _status = tre_stack_push_ ## typetag(s, value); \ + if (_status != REG_OK) \ + return _status; \ + } + +#endif /* TRE_STACK_H */ + +/* EOF */ diff --git a/deps/tre/lib/xmalloc.c b/deps/tre/lib/xmalloc.c new file mode 100644 index 000000000..637235d8d --- /dev/null +++ b/deps/tre/lib/xmalloc.c @@ -0,0 +1,362 @@ +/* + xmalloc.c - Simple malloc debugging library implementation + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + TODO: + - red zones + - group dumps by source location +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#define XMALLOC_INTERNAL 1 +#include "xmalloc.h" + + +/* + Internal stuff. +*/ + +typedef struct hashTableItemRec { + void *ptr; + size_t bytes; + const char *file; + int line; + const char *func; + struct hashTableItemRec *next; +} hashTableItem; + +typedef struct { + hashTableItem **table; +} hashTable; + +static int xmalloc_peak; +int xmalloc_current; +static int xmalloc_peak_blocks; +int xmalloc_current_blocks; +static int xmalloc_fail_after; + +#define TABLE_BITS 8 +#define TABLE_MASK ((1 << TABLE_BITS) - 1) +#define TABLE_SIZE (1 << TABLE_BITS) + +static hashTable * +hash_table_new(void) +{ + hashTable *tbl; + + tbl = malloc(sizeof(*tbl)); + + if (tbl != NULL) + { + tbl->table = calloc(TABLE_SIZE, sizeof(*tbl->table)); + + if (tbl->table == NULL) + { + free(tbl); + return NULL; + } + } + + return tbl; +} + +static unsigned int +hash_void_ptr(void *ptr) +{ + unsigned int hash; + unsigned int i; + + /* I took this hash function just off the top of my head, I have + no idea whether it is bad or very bad. */ + hash = 0; + for (i = 0; i < sizeof(ptr) * 8 / TABLE_BITS; i++) + { + hash ^= (uintptr_t)ptr >> i * 8; + hash += i * 17; + hash &= TABLE_MASK; + } + return hash; +} + +static void +hash_table_add(hashTable *tbl, void *ptr, size_t bytes, + const char *file, int line, const char *func) +{ + unsigned int i; + hashTableItem *item, *new; + + i = hash_void_ptr(ptr); + + item = tbl->table[i]; + if (item != NULL) + while (item->next != NULL) + item = item->next; + + new = malloc(sizeof(*new)); + assert(new != NULL); + new->ptr = ptr; + new->bytes = bytes; + new->file = file; + new->line = line; + new->func = func; + new->next = NULL; + if (item != NULL) + item->next = new; + else + tbl->table[i] = new; + + xmalloc_current += bytes; + if (xmalloc_current > xmalloc_peak) + xmalloc_peak = xmalloc_current; + xmalloc_current_blocks++; + if (xmalloc_current_blocks > xmalloc_peak_blocks) + xmalloc_peak_blocks = xmalloc_current_blocks; +} + +static void +#if defined(__GNUC__) && __GNUC__ >= 10 +__attribute__((access(none, 2))) +#endif +hash_table_del(hashTable *tbl, void *ptr) +{ + int i; + hashTableItem *item, *prev; + + i = hash_void_ptr(ptr); + + item = tbl->table[i]; + if (item == NULL) + { + printf("xfree: invalid ptr %p\n", ptr); + abort(); + } + prev = NULL; + while (item->ptr != ptr) + { + prev = item; + item = item->next; + } + if (item->ptr != ptr) + { + printf("xfree: invalid ptr %p\n", ptr); + abort(); + } + + xmalloc_current -= item->bytes; + xmalloc_current_blocks--; + + if (prev != NULL) + { + prev->next = item->next; + free(item); + } + else + { + tbl->table[i] = item->next; + free(item); + } +} + +static hashTable *xmalloc_table = NULL; + +static void +xmalloc_init(void) +{ + if (xmalloc_table == NULL) + { + xmalloc_table = hash_table_new(); + xmalloc_peak = 0; + xmalloc_peak_blocks = 0; + xmalloc_current = 0; + xmalloc_current_blocks = 0; + xmalloc_fail_after = -1; + } + assert(xmalloc_table != NULL); + assert(xmalloc_table->table != NULL); +} + + + +/* + Public API. +*/ + +void +xmalloc_configure(int fail_after) +{ + xmalloc_init(); + xmalloc_fail_after = fail_after; +} + +int +xmalloc_dump_leaks(void) +{ + unsigned int i; + unsigned int num_leaks = 0; + size_t leaked_bytes = 0; + hashTableItem *item; + + xmalloc_init(); + + for (i = 0; i < TABLE_SIZE; i++) + { + item = xmalloc_table->table[i]; + while (item != NULL) + { + printf("%s:%d: %s: %zu bytes at %p not freed\n", + item->file, item->line, item->func, item->bytes, item->ptr); + num_leaks++; + leaked_bytes += item->bytes; + item = item->next; + } + } + if (num_leaks == 0) + printf("No memory leaks.\n"); + else + printf("%u unfreed memory chuncks, total %zu unfreed bytes.\n", + num_leaks, leaked_bytes); + printf("Peak memory consumption %d bytes (%.1f kB, %.1f MB) in %d blocks ", + xmalloc_peak, (double)xmalloc_peak / 1024, + (double)xmalloc_peak / (1024*1024), xmalloc_peak_blocks); + printf("(average "); + if (xmalloc_peak_blocks) + printf("%d", ((xmalloc_peak + xmalloc_peak_blocks / 2) + / xmalloc_peak_blocks)); + else + printf("N/A"); + printf(" bytes per block).\n"); + + return num_leaks; +} + +void * +xmalloc_impl(size_t size, const char *file, int line, const char *func) +{ + void *ptr; + + xmalloc_init(); + assert(size > 0); + + if (xmalloc_fail_after == 0) + { + xmalloc_fail_after = -2; +#if 0 + printf("xmalloc: forced failure %s:%d: %s\n", file, line, func); +#endif + return NULL; + } + else if (xmalloc_fail_after == -2) + { + printf("xmalloc: called after failure from %s:%d: %s\n", + file, line, func); + assert(0); + } + else if (xmalloc_fail_after > 0) + xmalloc_fail_after--; + + ptr = malloc(size); + if (ptr != NULL) + hash_table_add(xmalloc_table, ptr, (int)size, file, line, func); + return ptr; +} + +void * +xcalloc_impl(size_t nmemb, size_t size, const char *file, int line, + const char *func) +{ + void *ptr; + + xmalloc_init(); + assert(size > 0); + + if (xmalloc_fail_after == 0) + { + xmalloc_fail_after = -2; +#if 0 + printf("xcalloc: forced failure %s:%d: %s\n", file, line, func); +#endif + return NULL; + } + else if (xmalloc_fail_after == -2) + { + printf("xcalloc: called after failure from %s:%d: %s\n", + file, line, func); + assert(0); + } + else if (xmalloc_fail_after > 0) + xmalloc_fail_after--; + + ptr = calloc(nmemb, size); + if (ptr != NULL) + hash_table_add(xmalloc_table, ptr, (int)(nmemb * size), file, line, func); + return ptr; +} + +void +xfree_impl(void *ptr, const char *file, int line, const char *func) +{ + /*LINTED*/(void)&file; + /*LINTED*/(void)&line; + /*LINTED*/(void)&func; + xmalloc_init(); + + if (ptr != NULL) + hash_table_del(xmalloc_table, ptr); + free(ptr); +} + +void * +xrealloc_impl(void *ptr, size_t new_size, const char *file, int line, + const char *func) +{ + void *new_ptr; + + xmalloc_init(); + assert(ptr != NULL); + assert(new_size > 0); + + if (xmalloc_fail_after == 0) + { + xmalloc_fail_after = -2; + return NULL; + } + else if (xmalloc_fail_after == -2) + { + printf("xrealloc: called after failure from %s:%d: %s\n", + file, line, func); + assert(0); + } + else if (xmalloc_fail_after > 0) + xmalloc_fail_after--; + + new_ptr = realloc(ptr, new_size); + if (new_ptr != NULL && new_ptr != ptr) + { +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuse-after-free" +#endif + hash_table_del(xmalloc_table, ptr); +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#endif + hash_table_add(xmalloc_table, new_ptr, (int)new_size, file, line, func); + } + return new_ptr; +} + + + +/* EOF */ diff --git a/deps/tre/lib/xmalloc.h b/deps/tre/lib/xmalloc.h new file mode 100644 index 000000000..ce310af52 --- /dev/null +++ b/deps/tre/lib/xmalloc.h @@ -0,0 +1,77 @@ +/* + xmalloc.h - Simple malloc debugging library API + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef _XMALLOC_H +#define _XMALLOC_H 1 + +void *xmalloc_impl(size_t size, const char *file, int line, const char *func); +void *xcalloc_impl(size_t nmemb, size_t size, const char *file, int line, + const char *func); +void xfree_impl(void *ptr, const char *file, int line, const char *func); +void *xrealloc_impl(void *ptr, size_t new_size, const char *file, int line, + const char *func); +int xmalloc_dump_leaks(void); +void xmalloc_configure(int fail_after); + + +#ifndef XMALLOC_INTERNAL +#ifdef MALLOC_DEBUGGING + +/* Version 2.4 and later of GCC define a magical variable `__PRETTY_FUNCTION__' + which contains the name of the function currently being defined. +# define __XMALLOC_FUNCTION __PRETTY_FUNCTION__ + This is broken in G++ before version 2.6. + C9x has a similar variable called __func__, but prefer the GCC one since + it demangles C++ function names. */ +# ifdef __GNUC__ +# if __GNUC__ > 2 || (__GNUC__ == 2 \ + && __GNUC_MINOR__ >= (defined __cplusplus ? 6 : 4)) +# define __XMALLOC_FUNCTION __PRETTY_FUNCTION__ +# else +# define __XMALLOC_FUNCTION ((const char *) 0) +# endif +# else +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __XMALLOC_FUNCTION __func__ +# else +# define __XMALLOC_FUNCTION ((const char *) 0) +# endif +# endif + +#define xmalloc(size) xmalloc_impl(size, __FILE__, __LINE__, \ + __XMALLOC_FUNCTION) +#define xcalloc(nmemb, size) xcalloc_impl(nmemb, size, __FILE__, __LINE__, \ + __XMALLOC_FUNCTION) +#define xfree(ptr) xfree_impl(ptr, __FILE__, __LINE__, __XMALLOC_FUNCTION) +#define xrealloc(ptr, new_size) xrealloc_impl(ptr, new_size, __FILE__, \ + __LINE__, __XMALLOC_FUNCTION) +#undef malloc +#undef calloc +#undef free +#undef realloc + +#define malloc USE_XMALLOC_INSTEAD_OF_MALLOC +#define calloc USE_XCALLOC_INSTEAD_OF_CALLOC +#define free USE_XFREE_INSTEAD_OF_FREE +#define realloc USE_XREALLOC_INSTEAD_OF_REALLOC + +#else /* !MALLOC_DEBUGGING */ + +#include + +#define xmalloc(size) malloc(size) +#define xcalloc(nmemb, size) calloc(nmemb, size) +#define xfree(ptr) free(ptr) +#define xrealloc(ptr, new_size) realloc(ptr, new_size) + +#endif /* !MALLOC_DEBUGGING */ +#endif /* !XMALLOC_INTERNAL */ + +#endif /* _XMALLOC_H */ + +/* EOF */ diff --git a/deps/tre/local_includes/regex.h b/deps/tre/local_includes/regex.h new file mode 100644 index 000000000..daa15a741 --- /dev/null +++ b/deps/tre/local_includes/regex.h @@ -0,0 +1,48 @@ +/* + regex.h - TRE legacy API + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + + This header is for source level compatibility with old code using + the header which defined the TRE API functions without + a prefix. New code should include instead. + +*/ + +#ifndef TRE_REXEX_H +#define TRE_REGEX_H 1 + +#ifdef USE_LOCAL_TRE_H +/* Use the header(s) from the TRE package that this file is part of. + (Yes, this file is in local_include too, but the explict path + means there is no way to get a system tre.h by accident.) */ +#include "../local_includes/tre.h" +#else +/* Use the header(s) from an installed version of the TRE package + (so that this application matches the installed libtre), + not the one(s) in the local_includes directory. */ +#include +#endif + +#ifndef TRE_USE_SYSTEM_REGEX_H +#define regcomp tre_regcomp +#define regerror tre_regerror +#define regexec tre_regexec +#define regfree tre_regfree +#endif /* TRE_USE_SYSTEM_REGEX_H */ + +#define regacomp tre_regacomp +#define regaexec tre_regaexec +#define regancomp tre_regancomp +#define reganexec tre_reganexec +#define regawncomp tre_regawncomp +#define regawnexec tre_regawnexec +#define regncomp tre_regncomp +#define regnexec tre_regnexec +#define regwcomp tre_regwcomp +#define regwexec tre_regwexec +#define regwncomp tre_regwncomp +#define regwnexec tre_regwnexec + +#endif /* TRE_REGEX_H */ diff --git a/deps/tre/local_includes/tre-config.h b/deps/tre/local_includes/tre-config.h new file mode 100644 index 000000000..4b73c1289 --- /dev/null +++ b/deps/tre/local_includes/tre-config.h @@ -0,0 +1,14 @@ +/* Minimal TRE configuration for Redis. + * + * We use TRE as a byte-oriented regex matcher for ARGREP. Redis SDS values are + * binary-safe byte strings, so we intentionally keep the dependency build + * simple: no wide-char path, no multibyte locale handling, and no approximate + * matching engine. + */ + +#define HAVE_SYS_TYPES_H 1 + +#define TRE_VERSION "redis-vendored" +#define TRE_VERSION_1 0 +#define TRE_VERSION_2 0 +#define TRE_VERSION_3 0 diff --git a/deps/tre/local_includes/tre.h b/deps/tre/local_includes/tre.h new file mode 100644 index 000000000..675153990 --- /dev/null +++ b/deps/tre/local_includes/tre.h @@ -0,0 +1,344 @@ +/* + tre.h - TRE public API definitions + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifndef TRE_H +#define TRE_H 1 + +#ifdef USE_LOCAL_TRE_H +/* Make certain to use the header(s) from the TRE package that this + file is part of by giving the full path to the header from this directory. */ +#include "../local_includes/tre-config.h" +#else +/* Use the header in the same directory as this file if there is one. */ +#include "tre-config.h" +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ + +#ifdef HAVE_LIBUTF8_H +#include +#endif /* HAVE_LIBUTF8_H */ + +#ifdef TRE_USE_SYSTEM_REGEX_H +/* Include the system regex.h to make TRE ABI compatible with the + system regex. */ +#include TRE_SYSTEM_REGEX_H_PATH +#define tre_regcomp regcomp +#define tre_regexec regexec +#define tre_regerror regerror +#define tre_regfree regfree +/* The GNU C regex has a number of refinements to the POSIX standard for the + formal parameter list of the regexec() function, and some of those fail to + compile when using LLVM. The refinements seem to be opt-out rather than + opt-in when using a recent gcc, and they produce a warning when TRE tries + to mimic the API without the refinements. The TRE code still works but + the warnings are distracting, so try to #define a flag to indicate when to + add the refinements to TRE's parameter list too. */ +#ifdef __GNUC__ +/* Try to test something that looks pretty REGEX specific and hope we don't + need a zillion different platform+compiler specific tests to deal with this. */ +#ifdef _REGEX_NELTS +/* Define a TRE specific flag here so that: + 1) there is only one place where code has to be changed if the test above is not adequate, and + 2) the flag can be used in any other parts of the TRE source that might be affected by the + GNUC refinements. + Note that this flag is only defined when all of TRE_USE_SYSTEM_REGEX_H, __GNUC__, and _REGEX_NELTS are defined. */ +#define TRE_USE_GNUC_REGEXEC_FPL 1 +#endif +#endif +#endif /* TRE_USE_SYSTEM_REGEX_H */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef TRE_USE_SYSTEM_REGEX_H + +#ifndef REG_OK +#define REG_OK 0 +#endif /* !REG_OK */ + +#ifndef HAVE_REG_ERRCODE_T +typedef int reg_errcode_t; +#endif /* !HAVE_REG_ERRCODE_T */ + +#if !defined(REG_NOSPEC) && !defined(REG_LITERAL) +#define REG_LITERAL 0x1000 +#endif + +/* Extra tre_regcomp() return error codes. */ +#define REG_BADMAX REG_BADBR + +/* Extra tre_regcomp() flags. */ +#ifndef REG_BASIC +#define REG_BASIC 0 +#endif /* !REG_BASIC */ +#define REG_RIGHT_ASSOC (REG_LITERAL << 1) +#ifdef REG_UNGREEDY +/* We're going to use TRE code, so we need the TRE define (dodge problem in MacOS). */ +#undef REG_UNGREEDY +#endif +#define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) + +#define REG_USEBYTES (REG_UNGREEDY << 1) + +/* Extra tre_regexec() flags. */ +#define REG_APPROX_MATCHER 0x1000 +#ifdef REG_BACKTRACKING_MATCHER +/* We're going to use TRE code, so we need the TRE define (dodge problem in MacOS). */ +#undef REG_BACKTRACKING_MATCHER +#endif +#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) + +#else /* !TRE_USE_SYSTEM_REGEX_H */ + +/* If the we're not using system regex.h, we need to define the + structs and enums ourselves. */ + +typedef int regoff_t; +typedef struct { + size_t re_nsub; /* Number of parenthesized subexpressions. */ + void *value; /* For internal use only. */ +} regex_t; + +typedef struct { + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + + +typedef enum { + REG_OK = 0, /* No error. */ + /* POSIX tre_regcomp() return error codes. (In the order listed in the + standard.) */ + REG_NOMATCH, /* No match. */ + REG_BADPAT, /* Invalid regexp. */ + REG_ECOLLATE, /* Unknown collating element. */ + REG_ECTYPE, /* Unknown character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* "[]" imbalance */ + REG_EPAREN, /* "\(\)" or "()" imbalance */ + REG_EBRACE, /* "\{\}" or "{}" imbalance */ + REG_BADBR, /* Invalid content of {} */ + REG_ERANGE, /* Invalid use of range operator */ + REG_ESPACE, /* Out of memory. */ + REG_BADRPT, /* Invalid use of repetition operators. */ + REG_BADMAX, /* Maximum repetition in {} too large */ +} reg_errcode_t; + +/* POSIX tre_regcomp() flags. */ +#define REG_EXTENDED 1 +#define REG_ICASE (REG_EXTENDED << 1) +#define REG_NEWLINE (REG_ICASE << 1) +#define REG_NOSUB (REG_NEWLINE << 1) + +/* Extra tre_regcomp() flags. */ +#define REG_BASIC 0 +#define REG_LITERAL (REG_NOSUB << 1) +#define REG_RIGHT_ASSOC (REG_LITERAL << 1) +#define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) +#define REG_USEBYTES (REG_UNGREEDY << 1) + +/* POSIX tre_regexec() flags. */ +#define REG_NOTBOL 1 +#define REG_NOTEOL (REG_NOTBOL << 1) + +/* Extra tre_regexec() flags. */ +#define REG_APPROX_MATCHER (REG_NOTEOL << 1) +#define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) + +#endif /* !TRE_USE_SYSTEM_REGEX_H */ + +/* REG_NOSPEC and REG_LITERAL mean the same thing. */ +#if defined(REG_LITERAL) && !defined(REG_NOSPEC) +#define REG_NOSPEC REG_LITERAL +#elif defined(REG_NOSPEC) && !defined(REG_LITERAL) +#define REG_LITERAL REG_NOSPEC +#endif /* defined(REG_NOSPEC) */ + +/* The maximum number of iterations in a bound expression. */ +#undef RE_DUP_MAX +#define RE_DUP_MAX 255 + +/* The POSIX.2 regexp functions */ +extern int +tre_regcomp(regex_t *preg, const char *regex, int cflags); + +#ifdef TRE_USE_GNUC_REGEXEC_FPL +extern int +tre_regexec(const regex_t *preg, const char *string, + size_t nmatch, regmatch_t pmatch[_Restrict_arr_ _REGEX_NELTS (nmatch)], + int eflags); +#else +extern int +tre_regexec(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags); +#endif + +extern int +tre_regcompb(regex_t *preg, const char *regex, int cflags); + +extern int +tre_regexecb(const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags); + +extern size_t +tre_regerror(int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size); + +extern void +tre_regfree(regex_t *preg); + +#ifdef TRE_WCHAR +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ + +/* Wide character versions (not in POSIX.2). */ +extern int +tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags); + +extern int +tre_regwexec(const regex_t *preg, const wchar_t *string, + size_t nmatch, regmatch_t pmatch[], int eflags); +#endif /* TRE_WCHAR */ + +/* Versions with a maximum length argument and therefore the capability to + handle null characters in the middle of the strings (not in POSIX.2). */ +extern int +tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags); + +extern int +tre_regnexec(const regex_t *preg, const char *string, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); + +/* regn*b versions take byte literally as 8-bit values */ +extern int +tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags); + +extern int +tre_regnexecb(const regex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); + +#ifdef TRE_WCHAR +extern int +tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags); + +extern int +tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); +#endif /* TRE_WCHAR */ + +#ifdef TRE_APPROX + +/* Approximate matching parameter struct. */ +typedef struct { + int cost_ins; /* Default cost of an inserted character. */ + int cost_del; /* Default cost of a deleted character. */ + int cost_subst; /* Default cost of a substituted character. */ + int max_cost; /* Maximum allowed cost of a match. */ + + int max_ins; /* Maximum allowed number of inserts. */ + int max_del; /* Maximum allowed number of deletes. */ + int max_subst; /* Maximum allowed number of substitutes. */ + int max_err; /* Maximum allowed number of errors total. */ +} regaparams_t; + +/* Approximate matching result struct. */ +typedef struct { + size_t nmatch; /* Length of pmatch[] array. */ + regmatch_t *pmatch; /* Submatch data. */ + int cost; /* Cost of the match. */ + int num_ins; /* Number of inserts in the match. */ + int num_del; /* Number of deletes in the match. */ + int num_subst; /* Number of substitutes in the match. */ +} regamatch_t; + + +/* Approximate matching functions. */ +extern int +tre_regaexec(const regex_t *preg, const char *string, + regamatch_t *match, regaparams_t params, int eflags); + +extern int +tre_reganexec(const regex_t *preg, const char *string, size_t len, + regamatch_t *match, regaparams_t params, int eflags); + +extern int +tre_regaexecb(const regex_t *preg, const char *string, + regamatch_t *match, regaparams_t params, int eflags); + +#ifdef TRE_WCHAR +/* Wide character approximate matching. */ +extern int +tre_regawexec(const regex_t *preg, const wchar_t *string, + regamatch_t *match, regaparams_t params, int eflags); + +extern int +tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len, + regamatch_t *match, regaparams_t params, int eflags); +#endif /* TRE_WCHAR */ + +/* Sets the parameters to default values. */ +extern void +tre_regaparams_default(regaparams_t *params); +#endif /* TRE_APPROX */ + +#ifdef TRE_WCHAR +typedef wchar_t tre_char_t; +#else /* !TRE_WCHAR */ +typedef unsigned char tre_char_t; +#endif /* !TRE_WCHAR */ + +typedef struct { + int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context); + void (*rewind)(size_t pos, void *context); + int (*compare)(size_t pos1, size_t pos2, size_t len, void *context); + void *context; +} tre_str_source; + +extern int +tre_reguexec(const regex_t *preg, const tre_str_source *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + +/* Returns the version string. The returned string is static. */ +extern char * +tre_version(void); + +/* Returns the value for a config parameter. The type to which `result' + must point to depends of the value of `query', see documentation for + more details. */ +extern int +tre_config(int query, void *result); + +enum { + TRE_CONFIG_APPROX, + TRE_CONFIG_WCHAR, + TRE_CONFIG_MULTIBYTE, + TRE_CONFIG_SYSTEM_ABI, + TRE_CONFIG_VERSION +}; + +/* Returns 1 if the compiled pattern has back references, 0 if not. */ +extern int +tre_have_backrefs(const regex_t *preg); + +/* Returns 1 if the compiled pattern uses approximate matching features, + 0 if not. */ +extern int +tre_have_approx(const regex_t *preg); + +#ifdef __cplusplus +} +#endif +#endif /* TRE_H */ + +/* EOF */ diff --git a/deps/tre/tests/retest.c b/deps/tre/tests/retest.c new file mode 100644 index 000000000..c486a819c --- /dev/null +++ b/deps/tre/tests/retest.c @@ -0,0 +1,1871 @@ +/* + retest.c - TRE regression test program + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +/* + This is just a simple test application containing various hands-written + tests for regression testing TRE. I've tried to surround TRE specific + tests inside ifdefs, so this can be used to test any POSIX compatible + regexp implementation. +*/ + +/* + 2023/06 - Compilers now sometimes require the input string constants to be + properly encoded, but how they decide on which encoding (if any) + is poorly documented and different for different platforms. + The non-ASCII encoded strings are now guarded by #ifdefs with one + of the following values. Define/undef whichever one(s) you need. + #define SRC_IN_ISO_8859_1 + #define SRC_IN_UTF_8 + #define SRC_IN_EUC_JP + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +/* look for getopt in order to use a -o option for output. */ +#if defined(HAVE_UNISTD_H) +#include +#elif defined(HAVE_GETOPT_H) +#include +#endif +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ + +#ifdef TRE_VERSION +#define HAVE_REGNEXEC 1 +#define HAVE_REGNCOMP 1 +#include "xmalloc.h" +#else /* !TRE_VERSION */ +#define xmalloc malloc +#define xfree free +#endif /* !TRE_VERSION */ + +#include "tre-internal.h" + +#ifdef WRETEST +#include +#define CHAR_T wchar_t +#define L(x) (L ## x) + +#define MAXSTRSIZE 8192 +static wchar_t wstr[MAXSTRSIZE]; +static wchar_t wregex[MAXSTRSIZE]; +static int woffs[MAXSTRSIZE]; + +#ifdef TRE_USE_SYSTEM_REGEX_H +/* Avoid some redefinition warnings from including tre.h. */ +#ifdef tre_regexec +#undef tre_regexec +/* No need for the *n* fn, it isn't in the system abi. */ +#endif +#endif +#define tre_regexec tre_regwexec +#define tre_regnexec tre_regwnexec +#ifdef TRE_USE_SYSTEM_REGEX_H +/* Avoid some redefinition warnings from including tre.h. */ +#ifdef tre_regcomp +#undef tre_regcomp +#endif +/* No need for the *n* fn, it isn't in the system abi. */ +#endif +#define tre_regcomp tre_regwcomp +#define tre_regncomp tre_regwncomp + +/* Iterate mbrtowc over the multi-byte sequence STR of length LEN, + store the result in BUF and memoize the successive byte offsets + in OFF. */ + +static int +mbntowc (wchar_t *buf, const char *str, size_t len, int *off) +{ + int n, wlen; +#ifdef HAVE_MBSTATE_T + mbstate_t cst; + memset(&cst, 0, sizeof(cst)); +#endif + + if (len >= MAXSTRSIZE) + { + fprintf(stderr, "Increase MAXSTRSIZE to %ld or more and recompile!\n", + (long)len + 1); + exit(EXIT_FAILURE); + } + + if (off) + { + memset(off + 1, -1, len * sizeof(int)); + *off = 0; + } + + wlen = 0; + while (len > 0) + { + n = tre_mbrtowc(buf ? buf++ : NULL, str, len, &cst); + if (n < 0) + return n; + if (n == 0) + n = 1; + str += n; + len -= n; + wlen += 1; + if (off) + *(off += n) = wlen; + } + + return(wlen); +} + +#else /* !WRETEST */ +#define CHAR_T char +#define L(x) (x) +#endif /* !WRETEST */ + +static FILE *outf = NULL; + +static int valid_reobj = 0; +static regex_t reobj; +static regmatch_t pmatch_global[32]; +static const CHAR_T *regex_pattern; +static int cflags_global; +static int use_regnexec = 0; +static int use_regncomp = 0; +static int avoid_eflags = 0; + +static int comp_tests = 0; +static int exec_tests = 0; +static int comp_errors = 0; +static int exec_errors = 0; + +#ifndef REG_OK +#define REG_OK 0 +#endif /* REG_OK */ + +#define END -2 + +static void +test_status(char c) +{ + static int k = 0; + fprintf(outf, "%c", c); + if (++k % 79 == 0) + fprintf(outf, "\n"); + fflush(outf); +} + + +static int +wrap_regexec(const CHAR_T *data, size_t len, + size_t pmatch_len, regmatch_t *pmatch, int eflags) +{ + CHAR_T *buf = NULL; + int result; + + if (len == 0 && use_regnexec) + { + /* Zero length string and using tre_regnexec(), the pointer we give + should not be dereferenced at all. */ + buf = NULL; + } + else + { + /* Copy the data to a separate buffer to make a better test for + tre_regexec() and tre_regnexec(). */ + buf = xmalloc((len + !use_regnexec) * sizeof(CHAR_T)); + if (!buf) + return REG_ESPACE; + memcpy(buf, data, len * sizeof(CHAR_T)); + test_status('#'); + } + +#ifdef HAVE_REGNEXEC + if (use_regnexec) + { + if (len == 0) + result = tre_regnexec(&reobj, NULL, len, pmatch_len, pmatch, eflags); + else + result = tre_regnexec(&reobj, buf, len, pmatch_len, pmatch, eflags); + } + else +#endif /* HAVE_REGNEXEC */ + { + buf[len] = L('\0'); + result = tre_regexec(&reobj, buf, pmatch_len, pmatch, eflags); + } + + xfree(buf); + return result; +} + +static int +wrap_regcomp(regex_t *preg, const CHAR_T *data, size_t len, int cflags) +{ +#ifdef HAVE_REGNCOMP + if (use_regncomp) + return tre_regncomp(preg, data, len, cflags); + else + return tre_regcomp(preg, data, cflags); +#else /* !HAVE_REGNCOMP */ + fprintf(stderr, "%s\n", data); + return tre_regcomp(preg, data, cflags); +#endif /* !HAVE_REGNCOMP */ +} + +static int +execute(const CHAR_T *data, int len, size_t pmatch_len, regmatch_t *pmatch, + int eflags) +{ +#ifdef MALLOC_DEBUGGING + int i = 0; + int ret; + + while (1) + { + xmalloc_configure(i); + comp_tests++; + ret = wrap_regexec(data, len, pmatch_len, pmatch, eflags); + if (ret != REG_ESPACE) + { + break; + } +#ifdef REGEX_DEBUG + xmalloc_dump_leaks(); +#endif /* REGEX_DEBUG */ + i++; + } + return ret; +#else /* !MALLOC_DEBUGGING */ + return wrap_regexec(data, len, pmatch_len, pmatch, eflags); +#endif /* !MALLOC_DEBUGGING */ +} + +static int +check(va_list ap, int ret, const CHAR_T *str, + size_t pmatch_len, regmatch_t *pmatch, int eflags) +{ + int fail = 0; + + if (ret != va_arg(ap, int)) + { +#ifndef WRETEST + fprintf(outf, "Exec error, regex: \"%s\", cflags %d, " + "string: \"%s\", eflags %d\n", regex_pattern, cflags_global, + str, eflags); +#else /* WRETEST */ + fprintf(outf, "Exec error, regex: \"%ls\", cflags %d, " + "string: \"%ls\", eflags %d\n", regex_pattern, cflags_global, + str, eflags); +#endif /* WRETEST */ + fprintf(outf, " got %smatch (tre_regexec returned %d)\n", ret ? "no " : "", ret); + return 1; + } + + if (ret == 0) + { + unsigned int i; + + for (i = 0; i < pmatch_len; i++) + { + int rm_so, rm_eo; + rm_so = va_arg(ap, int); + if (rm_so == END) + break; + rm_eo = va_arg(ap, int); +#ifdef WRETEST + if (rm_so >= 0) + { + int n = rm_so; + + if ((rm_so = woffs[rm_so]) < 0 || + (n = rm_eo, rm_eo = woffs[rm_eo]) < 0) + { + fprintf(outf, "Invalid or incomplete multi-byte sequence " + "in string %ls before byte offset %d\n", str, n); + return 1; + } + } +#endif /* WRETEST */ + if (pmatch[i].rm_so != rm_so + || pmatch[i].rm_eo != rm_eo) + { +#ifndef WRETEST + fprintf(outf, "Exec error, regex: \"%s\", string: \"%s\"\n", + regex_pattern, str); + fprintf(outf, " group %d: expected (%d, %d) \"%.*s\", " + "got (%d, %d) \"%.*s\"\n", +#else /* WRETEST */ + fprintf(outf, "Exec error, regex: \"%ls\", string: \"%ls\"\n", + regex_pattern, str); + fprintf(outf, " group %d: expected (%d, %d) \"%.*ls\", " + "got (%d, %d) \"%.*ls\"\n", +#endif /* WRETEST */ + i, rm_so, rm_eo, rm_eo - rm_so, str + rm_so, + (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo, + (int)(pmatch[i].rm_eo - pmatch[i].rm_so), + str + pmatch[i].rm_so); + fail = 1; + } + } + + if (!(cflags_global & REG_NOSUB) && reobj.re_nsub != i - 1 + && reobj.re_nsub <= pmatch_len && pmatch) + { +#ifndef WRETEST + fprintf(outf, "Comp error, regex: \"%s\"\n", regex_pattern); +#else /* WRETEST */ + fprintf(outf, "Comp error, regex: \"%ls\"\n", regex_pattern); +#endif /* WRETEST */ + fprintf(outf, " re_nsub is %d, should be %d\n", (int)reobj.re_nsub, i - 1); + fail = 1; + } + + + for (; i < pmatch_len; i++) + if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1) + { + if (!fail) +#ifndef WRETEST + fprintf(outf, "Exec error, regex: \"%s\", string: \"%s\"\n", + regex_pattern, str); +#else /* WRETEST */ + fprintf(outf, "Exec error, regex: \"%ls\", string: \"%ls\"\n", + regex_pattern, str); +#endif /* WRETEST */ + fprintf(outf, " group %d: expected (-1, -1), got (%d, %d)\n", + i, (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo); + fail = 1; + } + } + + return fail; +} + + +static void +test_nexec(const char *data, size_t len, int eflags, ...) +{ + int m; + int fail = 0; + int extra_flags[] = {0, REG_BACKTRACKING_MATCHER, REG_APPROX_MATCHER}; + size_t i; + va_list ap; + + if (!valid_reobj) + { + exec_errors++; + return; + } + +#ifdef WRETEST + { + int wlen = mbntowc(wstr, data, len, woffs); + if (wlen < 0) + { + exec_errors++; + fprintf(outf, "Invalid or incomplete multi-byte sequence in %s\n", data); + return; + } + wstr[wlen] = L'\0'; + len = wlen; + } +#define data wstr +#endif /* WRETEST */ + + use_regnexec = 1; + + for (i = 0; i < elementsof(extra_flags); i++) + { + int final_flags = eflags | extra_flags[i]; + + if ((final_flags & REG_BACKTRACKING_MATCHER + && tre_have_approx(&reobj)) + || (final_flags & REG_APPROX_MATCHER + && tre_have_backrefs(&reobj)) + || (final_flags & avoid_eflags)) + continue; + + /* Test with a pmatch array. */ + exec_tests++; + m = execute(data, len, elementsof(pmatch_global), pmatch_global, + final_flags); + va_start(ap, eflags); + fail |= check(ap, m, data, elementsof(pmatch_global), pmatch_global, + final_flags); + va_end(ap); + + /* Same test with a NULL pmatch. */ + exec_tests++; + m = execute(data, len, 0, NULL, final_flags); + va_start(ap, eflags); + fail |= check(ap, m, data, 0, NULL, final_flags); + va_end(ap); + } + +#ifdef WRETEST +#undef data +#endif /* WRETEST */ + + if (fail) + exec_errors++; +} + + + +static void +test_exec(const char *str, int eflags, ...) +{ + int m; + int fail = 0; + size_t len = strlen(str); + int extra_flags[] = {0, + REG_BACKTRACKING_MATCHER, + REG_APPROX_MATCHER, + REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER}; + size_t i; + va_list ap; + + if (!valid_reobj) + { + exec_errors++; + return; + } + +#ifdef WRETEST + { + int wlen = mbntowc(wstr, str, len, woffs); + if (wlen < 0) + { + exec_errors++; + fprintf(outf, "Invalid or incomplete multi-byte sequence in %s\n", str); + return; + } + wstr[wlen] = L'\0'; + len = wlen; + } +#define str wstr +#endif /* WRETEST */ + + for (use_regnexec = 0; use_regnexec < 2; use_regnexec++) + { + for (i = 0; i < elementsof(extra_flags); i++) + { + int final_flags = eflags | extra_flags[i]; + + if ((final_flags & REG_BACKTRACKING_MATCHER + && tre_have_approx(&reobj)) + || (final_flags & REG_APPROX_MATCHER + && tre_have_backrefs(&reobj)) + || (final_flags & avoid_eflags)) + continue; + + /* Test with a pmatch array. */ + exec_tests++; + m = execute(str, len, elementsof(pmatch_global), pmatch_global, + final_flags); + va_start(ap, eflags); + fail |= check(ap, m, str, elementsof(pmatch_global), pmatch_global, + final_flags); + va_end(ap); + + /* Same test with a NULL pmatch. */ + exec_tests++; + m = execute(str, len, 0, NULL, final_flags); + va_start(ap, eflags); + fail |= check(ap, m, str, 0, NULL, final_flags); + va_end(ap); + } + } + +#ifdef WRETEST +#undef str +#endif /* WRETEST */ + + if (fail) + exec_errors++; +} + + +static void +test_comp(const char *re, int flags, int ret) +{ + int errcode = 0; + int len = re ? strlen(re) : 0; + + if (valid_reobj) + { + tre_regfree(&reobj); + valid_reobj = 0; + } + + comp_tests++; + +#ifdef WRETEST + { + int wlen = mbntowc(wregex, re, len, NULL); + + if (wlen < 0) + { + comp_errors++; + fprintf(outf, "Invalid or incomplete multi-byte sequence in %s\n", re); + return; + } + wregex[wlen] = L'\0'; + len = wlen; + } +#define re wregex +#endif /* WRETEST */ + regex_pattern = re; + cflags_global = flags; + +#ifdef MALLOC_DEBUGGING + xmalloc_configure(-1); + if (ret != REG_ESPACE) { + static int j = 0; + int i = 0; + while (1) + { + xmalloc_configure(i); + comp_tests++; + if (j++ % 20 == 0) + test_status('.'); + errcode = wrap_regcomp(&reobj, re, len, flags); + if (errcode != REG_ESPACE) + { + test_status('*'); + break; + } +#ifdef REGEX_DEBUG + xmalloc_dump_leaks(); +#endif /* REGEX_DEBUG */ + i++; + } + } else +#endif /* !MALLOC_DEBUGGING */ + errcode = wrap_regcomp(&reobj, re, len, flags); + +#ifdef WRETEST +#undef re +#endif /* WRETEST */ + + if (errcode != ret) + { +#ifndef WRETEST + fprintf(outf, "Comp error, regex: \"%s\"\n", regex_pattern); +#else /* WRETEST */ + fprintf(outf, "Comp error, regex: \"%ls\"\n", regex_pattern); +#endif /* WRETEST */ + fprintf(outf, " expected return code %d, got %d.\n", + ret, errcode); + comp_errors++; + } + + if (errcode == 0) + valid_reobj = 1; +} + + + +/* To enable tests for known bugs, set this to 1. */ +#define KNOWN_BUG 0 + +int +main(int argc, char **argv) +{ + outf = stdout; +#if defined(HAVE_UNISTD_H) || defined(HAVE_GETOPT_H) + int opt; + while ((opt = getopt(argc, argv, "o:")) != EOF) + { + switch (opt) + { + case 'o': + if ((outf = fopen(optarg, "w")) == NULL) + { + perror(optarg); + exit(1); + } + break; + default: + /* getopt() will have printed an error message already */ + exit(1); + } + } +#endif /* HAVE_UNISTD_H */ + +#ifdef WRETEST + /* Need an 8-bit locale. Or move the two tests with non-ascii + characters to the localized internationalization tests. */ + if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") == NULL && + setlocale(LC_CTYPE, "en_US.ISO8859-1") == NULL) + fprintf(stderr, "Could not set locale en_US.ISO-8859-1. Expect some\n" + "`Invalid or incomplete multi-byte sequence' errors.\n"); +#endif /* WRETEST */ + /* Large number of macros in one regexp. */ + test_comp("[A-Z]\\d\\s?\\d[A-Z]{2}|[A-Z]\\d{2}\\s?\\d[A-Z]{2}|[A-Z]{2}\\d" + "\\s?\\d[A-Z]{2}|[A-Z]{2}\\d{2}\\s?\\d[A-Z]{2}|[A-Z]\\d[A-Z]\\s?" + "\\d[A-Z]{2}|[A-Z]{2}\\d[A-Z]\\s?\\d[A-Z]{2}|[A-Z]{3}\\s?\\d[A-Z]" + "{2}", REG_EXTENDED, 0); + + test_comp("a{11}(b{2}c){2}", REG_EXTENDED, 0); + test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); + test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", + REG_EXTENDED, 0); + test_comp("^!pfast [0-9]{1,15} ([0-9]{1,3}\\.){3}[0-9]{1,3}[0-9]{1,5}$", + REG_EXTENDED, 0); + +#if KNOWN_BUG + /* Should these match or not? */ + test_comp("(a)*-\\1b", REG_EXTENDED, 0); + test_exec("aaa-b", 0, REG_NOMATCH); + test_comp("((.*)\\1)+", REG_EXTENDED, 0); + test_exec("xxxxxx", 0, REG_NOMATCH); +#endif + +#ifdef TRE_APPROX + /* + * Approximate matching tests. + * + * The approximate matcher always searches for the best match, and returns + * the leftmost and longest one if there are several best matches. + */ + + test_comp("(fou){# ~1}", REG_EXTENDED, 0); + test_comp("(fuu){#}", REG_EXTENDED, 0); + test_comp("(fuu){# ~}", REG_EXTENDED, 0); + test_comp("(anaconda){ 1i + 1d < 1, #1}", REG_EXTENDED, 0); + test_comp("(anaconda){ 1i + 1d < 1 #1 ~10 }", REG_EXTENDED, 0); + test_comp("(anaconda){ #1, ~1, 1i + 1d < 1 }", REG_EXTENDED, 0); + + test_comp("(znacnda){ #1 ~3 1i + 1d < 1 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_NOMATCH); + test_comp("(znacnda){ #1 ~3 1i + 1d < 2 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_OK, 9, 17, 9, 17, END); + test_comp("(ananda){ 1i + 1d < 2 }", REG_EXTENDED, 0); + test_exec("molasses anaconda foo bar baz smith anderson ", + 0, REG_NOMATCH); + + test_comp("(fuu){ +3 -3 ~5}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + test_comp("(fuu){ +2 -2 ~5}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + test_comp("(fuu){ +3 -3 ~}", REG_EXTENDED, 0); + test_exec("anaconda foo bar baz smith anderson", + 0, REG_OK, 9, 10, 9, 10, END); + + test_comp("(laurikari){ #3, 1i + 1d < 3 }", REG_EXTENDED, 0); + + /* No cost limit. */ + test_comp("(foobar){~}", REG_EXTENDED, 0); + test_exec("xirefoabralfobarxie", 0, REG_OK, 11, 16, 11, 16, END); + + /* At most two errors. */ + test_comp("(foobar){~2}", REG_EXTENDED, 0); + test_exec("xirefoabrzlfd", 0, REG_OK, 4, 9, 4, 9, END); + test_exec("xirefoabzlfd", 0, REG_NOMATCH); + + /* At most two inserts or substitutions and max two errors total. */ + test_comp("(foobar){+2#2~2}", REG_EXTENDED, 0); + test_exec("oobargoobaploowap", 0, REG_OK, 5, 11, 5, 11, END); + + /* Find best whole word match for "foobar". */ + test_comp("\\<(foobar){~}\\>", REG_EXTENDED, 0); + test_exec("zfoobarz", 0, REG_OK, 0, 8, 0, 8, END); + test_exec("boing zfoobarz goobar woop", 0, REG_OK, 15, 21, 15, 21, END); + + /* Match whole string, allow only 1 error. */ + test_comp("^(foobar){~1}$", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("xfoobar", 0, REG_OK, 0, 7, 0, 7, END); + /* + This currently fails. + test_exec("foobarx", 0, REG_OK, 0, 7, 0, 7, END); + */ + test_exec("fooxbar", 0, REG_OK, 0, 7, 0, 7, END); + test_exec("foxbar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("xoobar", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("foobax", 0, REG_OK, 0, 6, 0, 6, END); + test_exec("oobar", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("fobar", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("fooba", 0, REG_OK, 0, 5, 0, 5, END); + test_exec("xfoobarx", 0, REG_NOMATCH); + test_exec("foobarxx", 0, REG_NOMATCH); + test_exec("xxfoobar", 0, REG_NOMATCH); + test_exec("xfoxbar", 0, REG_NOMATCH); + test_exec("foxbarx", 0, REG_NOMATCH); + + /* At most one insert, two deletes, and three substitutions. + Additionally, deletes cost two and substitutes one, and total + cost must be less than 4. */ + test_comp("(foobar){+1 -2 #3, 2d + 1s < 4}", REG_EXTENDED, 0); + test_exec("3oifaowefbaoraofuiebofasebfaobfaorfeoaro", + 0, REG_OK, 26, 33, 26, 33, END); + + /* Partially approximate matches. */ + test_comp("foo(bar){~1}zap", REG_EXTENDED, 0); + test_exec("foobarzap", 0, REG_OK, 0, 9, 3, 6, END); + test_exec("fobarzap", 0, REG_NOMATCH); + test_exec("foobrzap", 0, REG_OK, 0, 8, 3, 5, END); + test_comp("^.*(dot.org){~}.*$", REG_EXTENDED, 0); + test_exec("www.cnn.com 64.236.16.20\n" + "www.slashdot.org 66.35.250.150\n" + "For useful information, use www.slashdot.org\n" + "this is demo data!\n", + 0, REG_OK, 0, 120, 93, 100, END); + + /* Approximate matching and back referencing cannot be used together. */ + test_comp("(foo{~})\\1", REG_EXTENDED, REG_BADPAT); + +#endif /* TRE_APPROX */ + + /* + * Basic tests with pure regular expressions + */ + + /* Basic string matching. */ + test_comp("foobar", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, END); + test_exec("xxxfoobarzapzot", 0, REG_OK, 3, 9, END); + test_comp("foobar", REG_EXTENDED | REG_NOSUB, 0); + test_exec("foobar", 0, REG_OK, END); + test_comp("aaaa", REG_EXTENDED, 0); + test_exec("xxaaaaaaaaaaaaaaaaa", 0, REG_OK, 2, 6, END); + + /* Test zero length matches. */ + test_comp("(a*)", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + + test_comp("(a*)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + + test_comp("((a*)*)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_comp("(a*bcd)*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabcx", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcxbcdbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("aaaaaaaaaaaabcdbcdbcxaabcxaabc", 0, REG_OK, 0, 18, 15, 18, END); + + test_comp("(a*)+", REG_EXTENDED, 0); + test_exec("-", 0, REG_OK, 0, 0, 0, 0, END); + + /* This test blows up the backtracking matcher. */ + avoid_eflags = REG_BACKTRACKING_MATCHER; + test_comp("((a*)*b)*b", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaaaaaaaaaaaaaaab", 0, REG_OK, + 25, 26, -1, -1, -1, -1, END); + avoid_eflags = 0; + + test_comp("", 0, 0); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("foo", 0, REG_OK, 0, 0, END); + + /* Test for submatch addressing which requires arbitrary lookahead. */ + test_comp("(a*)aaaaaa", REG_EXTENDED, 0); + test_exec("aaaaaaaaaaaaaaax", 0, REG_OK, 0, 15, 0, 9, END); + + /* Test leftmost and longest matching and some tricky submatches. */ + test_comp("(a*)(a*)", REG_EXTENDED, 0); + test_exec("aaaa", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abcd|abc)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abc|abcd)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); + test_comp("(abc|abcd)(d?)e", REG_EXTENDED, 0); + test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); + test_comp("(abcd|abc)(d?)e", REG_EXTENDED, 0); + test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); + test_comp("a(bc|bcd)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); + test_comp("a(bcd|bc)(d?)", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); + test_comp("a*(a?bc|bcd)(d?)", REG_EXTENDED, 0); + test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); + test_comp("a*(bcd|a?bc)(d?)", REG_EXTENDED, 0); + test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); + test_comp("(a|(a*b*))*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, -1, -1, END); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); + test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); + test_comp("((a*b*)|a)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); + test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); + test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); + test_comp("a.*(.*b.*(.*c.*).*d.*).*e.*(.*f.*).*g", REG_EXTENDED, 0); + test_exec("aabbccddeeffgg", 0, REG_OK, 0, 14, 3, 9, 5, 7, 11, 13, END); + test_comp("(wee|week)(night|knights)s*", REG_EXTENDED, 0); + test_exec("weeknights", 0, REG_OK, 0, 10, 0, 3, 3, 10, END); + test_exec("weeknightss", 0, REG_OK, 0, 11, 0, 3, 3, 10, END); + test_comp("a*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aaa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + test_comp("aaaa*", REG_EXTENDED, 0); + test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); + + /* Test clearing old submatch data with nesting parentheses + and iteration. */ + test_comp("((a)|(b))*c", REG_EXTENDED, 0); + test_exec("aaabc", 0, REG_OK, 0, 5, 3, 4, -1, -1, 3, 4, END); + test_exec("aaaac", 0, REG_OK, 0, 5, 3, 4, 3, 4, -1, -1, END); + test_comp("foo((bar)*)*zot", REG_EXTENDED, 0); + test_exec("foozot", 0, REG_OK, 0, 6, 3, 3, -1, -1, END); + test_exec("foobarzot", 0, REG_OK, 0, 9, 3, 6, 3, 6, END); + test_exec("foobarbarzot", 0, REG_OK, 0, 12, 3, 9, 6, 9, END); + + test_comp("foo((zup)*|(bar)*|(zap)*)*zot", REG_EXTENDED, 0); + test_exec("foobarzapzot", 0, REG_OK, + 0, 12, 6, 9, -1, -1, -1, -1, 6, 9, END); + test_exec("foobarbarzapzot", 0, REG_OK, + 0, 15, 9, 12, -1, -1, -1, -1, 9, 12, END); + test_exec("foozupzot", 0, REG_OK, + 0, 9, 3, 6, 3, 6, -1, -1, -1, -1, END); + test_exec("foobarzot", 0, REG_OK, + 0, 9, 3, 6, -1, -1, 3, 6, -1, -1, END); + test_exec("foozapzot", 0, REG_OK, + 0, 9, 3, 6, -1, -1, -1, -1, 3, 6, END); + test_exec("foozot", 0, REG_OK, + 0, 6, 3, 3, -1, -1, -1, -1, -1, -1, END); + + + /* Test case where, e.g., Perl and Python regexp functions, and many + other backtracking matchers, fail to produce the longest match. + It is not exactly a bug since Perl does not claim to find the + longest match, but a confusing feature and, in my opinion, a bad + design choice because the union operator is traditionally defined + to be commutative (with respect to the language denoted by the RE). */ + test_comp("(a|ab)(blip)?", REG_EXTENDED, 0); + test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); + test_comp("(ab|a)(blip)?", REG_EXTENDED, 0); + test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); + + /* Test more submatch addressing. */ + test_comp("((a|b)*)a(a|b)*", REG_EXTENDED, 0); + test_exec("aaaaabaaaba", 0, REG_OK, 0, 11, 0, 10, 9, 10, -1, -1, END); + test_exec("aaaaabaaab", 0, REG_OK, 0, 10, 0, 8, 7, 8, 9, 10, END); + test_exec("caa", 0, REG_OK, 1, 3, 1, 2, 1, 2, -1, -1, END); + test_comp("((a|aba)*)(ababbaba)((a|b)*)", REG_EXTENDED, 0); + test_exec("aabaababbabaaababbab", 0, REG_OK, + 0, 20, 0, 4, 1, 4, 4, 12, 12, 20, 19, 20, END); + test_exec("aaaaababbaba", 0, REG_OK, + 0, 12, 0, 4, 3, 4, 4, 12, 12, 12, -1, -1, END); + test_comp("((a|aba|abb|bba|bab)*)(ababbababbabbbabbbbbbabbaba)((a|b)*)", + REG_EXTENDED, 0); + test_exec("aabaabbbbabababaababbababbabbbabbbbbbabbabababbababababbabababa", + 0, REG_OK, 0, 63, 0, 16, 13, 16, 16, 43, 43, 63, 62, 63, END); + + /* Test for empty subexpressions. */ + test_comp("", 0, 0); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("foo", 0, REG_OK, 0, 0, END); + test_comp("(a|)", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, 0, 0, END); + test_comp("a|", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, END); + test_comp("|a", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 0, END); + test_exec("", 0, REG_OK, 0, 0, END); + + /* Miscellaneous tests. */ + test_comp("(a*)b(c*)", REG_EXTENDED, 0); + test_exec("abc", 0, REG_OK, 0, 3, 0, 1, 2, 3, END); + test_exec("***abc***", 0, REG_OK, 3, 6, 3, 4, 5, 6, END); + test_comp("(a)", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_comp("((a))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); + test_comp("(((a)))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, END); + test_comp("((((((((((((((((((((a))))))))))))))))))))", REG_EXTENDED, 0); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, END); + + test_comp("ksntoeaiksntoeaikstneoaiksnteoaiksntoeaiskntoeaiskntoekainstoei" + "askntoeakisntoeksaitnokesantiksoentaikosentaiksoentaiksnoeaiskn" + "teoaksintoekasitnoeksaitkosetniaksoetnaisknoetakistoeksintokesa" + "nitksoentaisknoetaisknoetiaksotneaikstoekasitoeskatioksentaikso" + "enatiksoetnaiksonateiksoteaeskanotisknetaiskntoeasknitoskenatis" + "konetaisknoteai", 0, 0); + + test_comp("((aab)|(aac)|(aa*))c", REG_EXTENDED, 0); + test_exec("aabc", 0, REG_OK, 0, 4, 0, 3, 0, 3, -1, -1, -1, -1, END); + test_exec("aacc", 0, REG_OK, 0, 4, 0, 3, -1, -1, 0, 3, -1, -1, END); + test_exec("aaac", 0, REG_OK, 0, 4, 0, 3, -1, -1, -1, -1, 0, 3, END); + + test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + test_comp("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", + REG_EXTENDED, 0); + test_exec("foo!bar!bas", 0, REG_OK, + 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); + + test_comp("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", + REG_EXTENDED, 0); + test_exec("Muammar Quathafi", 0, REG_OK, 0, 16, -1, -1, 11, 13, END); + + test_comp("(Ab|cD)*", REG_EXTENDED | REG_ICASE, 0); + test_exec("aBcD", 0, REG_OK, 0, 4, 2, 4, END); + + test_comp("a**", REG_EXTENDED, REG_BADRPT); + test_comp("a*+", REG_EXTENDED, REG_BADRPT); + test_comp("a+*", REG_EXTENDED, REG_BADRPT); + test_comp("a++", REG_EXTENDED, REG_BADRPT); + test_comp("a?+", REG_EXTENDED, REG_BADRPT); + test_comp("a?*", REG_EXTENDED, REG_BADRPT); + test_comp("a{1,2}*", REG_EXTENDED, REG_BADRPT); + test_comp("a{1,2}+", REG_EXTENDED, REG_BADRPT); + + /* + * Many of the following tests were mostly inspired by (or copied from) the + * libhackerlab posix test suite by Tom Lord. + */ + + test_comp("a", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_comp("\\.", 0, 0); + test_exec(".", 0, REG_OK, 0, 1, END); + test_comp("\\[", 0, 0); + test_exec("[", 0, REG_OK, 0, 1, END); + test_comp("\\\\", 0, 0); + test_exec("\\", 0, REG_OK, 0, 1, END); + test_comp("\\*", 0, 0); + test_exec("*", 0, REG_OK, 0, 1, END); + test_comp("\\^", 0, 0); + test_exec("^", 0, REG_OK, 0, 1, END); + test_comp("\\$", 0, 0); + test_exec("$", 0, REG_OK, 0, 1, END); + + test_comp("\\", 0, REG_EESCAPE); + + test_comp("x\\.", 0, 0); + test_exec("x.", 0, REG_OK, 0, 2, END); + test_comp("x\\[", 0, 0); + test_exec("x[", 0, REG_OK, 0, 2, END); + test_comp("x\\\\", 0, 0); + test_exec("x\\", 0, REG_OK, 0, 2, END); + test_comp("x\\*", 0, 0); + test_exec("x*", 0, REG_OK, 0, 2, END); + test_comp("x\\^", 0, 0); + test_exec("x^", 0, REG_OK, 0, 2, END); + test_comp("x\\$", 0, 0); + test_exec("x$", 0, REG_OK, 0, 2, END); + + test_comp("x\\", 0, REG_EESCAPE); + + test_comp(".", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("\n", 0, REG_OK, 0, 1, END); + + test_comp("(+|?)", 0, 0); + test_exec("(+|?)", 0, REG_OK, 0, 5, END); + test_exec("+|?", 0, REG_NOMATCH); + test_exec("(+)", 0, REG_NOMATCH); + test_exec("+", 0, REG_NOMATCH); + + + /* + * Test bracket expressions. + */ + + test_comp("[", 0, REG_EBRACK); + test_comp("[]", 0, REG_EBRACK); + test_comp("[^]", 0, REG_EBRACK); + + test_comp("[]x]", 0, 0); + test_exec("]", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + + test_comp("[.]", 0, 0); + test_exec(".", 0, REG_OK, 0, 1, END); + test_exec("a", 0, REG_NOMATCH); + + test_comp("[*]", 0, 0); + test_exec("*", 0, REG_OK, 0, 1, END); + + test_comp("[[]", 0, 0); + test_exec("[", 0, REG_OK, 0, 1, END); + + test_comp("[\\]", 0, 0); + test_exec("\\", 0, REG_OK, 0, 1, END); + + test_comp("[-x]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + test_comp("[x-]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("x", 0, REG_OK, 0, 1, END); + test_comp("[-]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + + test_comp("[abc]", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("b", 0, REG_OK, 0, 1, END); + test_exec("c", 0, REG_OK, 0, 1, END); + test_exec("d", 0, REG_NOMATCH); + test_exec("xa", 0, REG_OK, 1, 2, END); + test_exec("xb", 0, REG_OK, 1, 2, END); + test_exec("xc", 0, REG_OK, 1, 2, END); + test_exec("xd", 0, REG_NOMATCH); + test_comp("x[abc]", 0, 0); + test_exec("xa", 0, REG_OK, 0, 2, END); + test_exec("xb", 0, REG_OK, 0, 2, END); + test_exec("xc", 0, REG_OK, 0, 2, END); + test_exec("xd", 0, REG_NOMATCH); + test_comp("[^abc]", 0, 0); + test_exec("a", 0, REG_NOMATCH); + test_exec("b", 0, REG_NOMATCH); + test_exec("c", 0, REG_NOMATCH); + test_exec("d", 0, REG_OK, 0, 1, END); + test_exec("xa", 0, REG_OK, 0, 1, END); + test_exec("xb", 0, REG_OK, 0, 1, END); + test_exec("xc", 0, REG_OK, 0, 1, END); + test_exec("xd", 0, REG_OK, 0, 1, END); + test_comp("x[^abc]", 0, 0); + test_exec("xa", 0, REG_NOMATCH); + test_exec("xb", 0, REG_NOMATCH); + test_exec("xc", 0, REG_NOMATCH); + test_exec("xd", 0, REG_OK, 0, 2, END); + + test_comp("[()+?*\\]+", REG_EXTENDED, 0); + test_exec("x\\*?+()x", 0, REG_OK, 1, 7, END); + + /* Standard character classes. */ + test_comp("[[:alnum:]]+", REG_EXTENDED, 0); + test_exec("%abc123890XYZ=", 0, REG_OK, 1, 13, END); + test_comp("[[:cntrl:]]+", REG_EXTENDED, 0); + test_exec("%\n\t\015\f ", 0, REG_OK, 1, 5, END); + test_comp("[[:lower:]]+", REG_EXTENDED, 0); + test_exec("AbcdE", 0, REG_OK, 1, 4, END); + test_comp("[[:lower:]]+", REG_EXTENDED | REG_ICASE, 0); + test_exec("AbcdE", 0, REG_OK, 0, 5, END); + test_comp("[[:space:]]+", REG_EXTENDED, 0); + test_exec("x \t\f\nx", 0, REG_OK, 1, 5, END); + test_comp("[[:alpha:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 1, 4, END); + test_comp("[[:digit:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 4, 10, END); + test_comp("[^[:digit:]]+", REG_EXTENDED, 0); + test_exec("%abC123890xyz=", 0, REG_OK, 0, 4, END); + test_comp("[[:print:]]+", REG_EXTENDED, 0); + test_exec("\n\t %abC12\f", 0, REG_OK, 2, 9, END); + test_comp("[[:upper:]]+", REG_EXTENDED, 0); + test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 3, 27, END); + test_comp("[[:upper:]]+", REG_EXTENDED | REG_ICASE, 0); + test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 2, 28, END); +#ifdef HAVE_ISWBLANK +#ifdef HAVE_ISBLANK + test_comp("[[:blank:]]+", REG_EXTENDED, 0); + test_exec("\na \t b", 0, REG_OK, 2, 5, END); +#endif /* HAVE_ISBLANK */ +#endif /* HAVE_ISWBLANK */ + test_comp("[[:graph:]]+", REG_EXTENDED, 0); + test_exec("\n %abC12\f", 0, REG_OK, 2, 8, END); + test_comp("[[:punct:]]+", REG_EXTENDED, 0); + test_exec("a~!@#$%^&*()_+=-`[]{};':\"|\\,./?>< ", + 0, REG_OK, 1, 33, END); + test_comp("[[:xdigit:]]+", REG_EXTENDED, 0); + test_exec("-0123456789ABCDEFabcdef", 0, REG_OK, 1, 23, END); + test_comp("[[:bogus-character-class-name:]", REG_EXTENDED, REG_ECTYPE); + test_comp("[[:\xff:", REG_EXTENDED, REG_ECTYPE); + + + /* Range expressions (assuming that the C locale is being used). */ + test_comp("[a-z]+", REG_EXTENDED, 0); + test_exec("ABCabcxyzABC", 0, REG_OK, 3, 9, END); + test_comp("[z-a]+", REG_EXTENDED, REG_ERANGE); + test_comp("[a-b-c]", 0, REG_ERANGE); + test_comp("[a-a]+", REG_EXTENDED, 0); + test_exec("zaaaaab", 0, REG_OK, 1, 6, END); + test_comp("[--Z]+", REG_EXTENDED, 0); + test_exec("!ABC-./XYZ~", 0, REG_OK, 1, 10, END); + test_comp("[*--]", 0, 0); + test_exec("-", 0, REG_OK, 0, 1, END); + test_exec("*", 0, REG_OK, 0, 1, END); + test_comp("[*--Z]+", REG_EXTENDED, 0); + test_exec("!+*,---ABC", 0, REG_OK, 1, 7, END); + test_comp("[a-]+", REG_EXTENDED, 0); + test_exec("xa-a--a-ay", 0, REG_OK, 1, 9, END); + + /* REG_ICASE and character sets. */ + test_comp("[a-c]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("cABbage", 0, REG_OK, 0, 5, END); + test_comp("[^a-c]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("tObAcCo", 0, REG_OK, 0, 2, END); + test_comp("[A-C]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("cABbage", 0, REG_OK, 0, 5, END); + test_comp("[^A-C]*", REG_ICASE | REG_EXTENDED, 0); + test_exec("tObAcCo", 0, REG_OK, 0, 2, END); + + /* Complex character sets. */ + test_comp("[[:digit:]a-z#$%]+", REG_EXTENDED, 0); + test_exec("__abc#lmn012$x%yz789*", 0, REG_OK, 2, 20, END); + test_comp("[[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("__abcLMN012x%#$yz789*", 0, REG_OK, 2, 20, END); + test_comp("[^[:digit:]a-z#$%]+", REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); + test_comp("[^[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); + test_comp("[^[:digit:]#$%[:xdigit:]]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 4, 7, END); + test_comp("[^-]+", REG_EXTENDED, 0); + test_exec("---afd*(&,ml---", 0, REG_OK, 3, 12, END); + test_comp("[^--Z]+", REG_EXTENDED, 0); + test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 12, END); + test_comp("[^--Z]+", REG_ICASE | REG_EXTENDED, 0); + test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 10, END); + + /* Unsupported things (equivalence classes and multicharacter collating + elements) */ + test_comp("[[.foo.]]", 0, REG_ECOLLATE); + test_comp("[[=foo=]]", 0, REG_ECOLLATE); + test_comp("[[..]]", 0, REG_ECOLLATE); + test_comp("[[==]]", 0, REG_ECOLLATE); + test_comp("[[.]]", 0, REG_ECOLLATE); + test_comp("[[=]]", 0, REG_ECOLLATE); + test_comp("[[.]", 0, REG_ECOLLATE); + test_comp("[[=]", 0, REG_ECOLLATE); + test_comp("[[.", 0, REG_ECOLLATE); + test_comp("[[=", 0, REG_ECOLLATE); + + + + /* Miscellaneous tests. */ + test_comp("abc\\(\\(de\\)\\(fg\\)\\)hi", 0, 0); + test_exec("xabcdefghiy", 0, REG_OK, 1, 10, 4, 8, 4, 6, 6, 8, END); + + test_comp("abc*def", 0, 0); + test_exec("xabdefy", 0, REG_OK, 1, 6, END); + test_exec("xabcdefy", 0, REG_OK, 1, 7, END); + test_exec("xabcccccccdefy", 0, REG_OK, 1, 13, END); + + test_comp("abc\\(def\\)*ghi", 0, 0); + test_exec("xabcghiy", 0, REG_OK, 1, 7, -1, -1, END); + test_exec("xabcdefghi", 0, REG_OK, 1, 10, 4, 7, END); + test_exec("xabcdefdefdefghi", 0, REG_OK, 1, 16, 10, 13, END); + + test_comp("a?", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 1, END); + test_exec("xaaaaa", 0, REG_OK, 0, 0, END); + test_comp("a+", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 5, END); + test_exec("xaaaaa", 0, REG_OK, 1, 6, END); + + + /* + * Test anchors and their behaviour with the REG_NEWLINE compilation + * flag and the REG_NOTBOL, REG_NOTEOL execution flags. + */ + + /* Normally, `^' matches the empty string at beginning of input. + If REG_NOTBOL is used, `^' won't match the zero length string. */ + test_comp("^abc", 0, 0); + test_exec("abcdef", 0, REG_OK, 0, 3, END); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("xyzabcdef", 0, REG_NOMATCH); + test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("\nabcdef", REG_NOTBOL, REG_NOMATCH); + + /* Normally, `$' matches the empty string at end of input. + If REG_NOTEOL is used, `$' won't match the zero length string. */ + test_comp("abc$", 0, 0); + test_exec("defabc", 0, REG_OK, 3, 6, END); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("defabcxyz", 0, REG_NOMATCH); + test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("defabc\n", REG_NOTEOL, REG_NOMATCH); + + test_comp("^abc$", 0, 0); + test_exec("abc", 0, REG_OK, 0, 3, END); + test_exec("abc", REG_NOTBOL, REG_NOMATCH); + test_exec("abc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("\nabc\n", 0, REG_NOMATCH); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("abcdef", 0, REG_NOMATCH); + test_exec("defabc", 0, REG_NOMATCH); + test_exec("abc\ndef", 0, REG_NOMATCH); + test_exec("def\nabc", 0, REG_NOMATCH); + + /* If REG_NEWLINE is used, `^' matches the empty string immediately after + a newline, regardless of whether execution flags contain REG_NOTBOL. + Similarly, if REG_NEWLINE is used, `$' matches the empty string + immediately before a newline, regardless of execution flags. */ + test_comp("^abc", REG_NEWLINE, 0); + test_exec("abcdef", 0, REG_OK, 0, 3, END); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("xyzabcdef", 0, REG_NOMATCH); + test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_OK, 1, 4, END); + test_exec("\nabcdef", REG_NOTBOL, 0, 1, 4, END); + test_comp("abc$", REG_NEWLINE, 0); + test_exec("defabc", 0, REG_OK, 3, 6, END); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("defabcxyz", 0, REG_NOMATCH); + test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); + test_exec("defabc\n", 0, REG_OK, 3, 6, END); + test_exec("defabc\n", REG_NOTEOL, 0, 3, 6, END); + test_comp("^abc$", REG_NEWLINE, 0); + test_exec("abc", 0, REG_OK, 0, 3, END); + test_exec("abc", REG_NOTBOL, REG_NOMATCH); + test_exec("abc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("\nabc\n", 0, REG_OK, 1, 4, END); + test_exec("defabc\n", 0, REG_NOMATCH); + test_exec("\nabcdef", 0, REG_NOMATCH); + test_exec("abcdef", 0, REG_NOMATCH); + test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); + test_exec("defabc", 0, REG_NOMATCH); + test_exec("defabc", REG_NOTEOL, REG_NOMATCH); + test_exec("abc\ndef", 0, REG_OK, 0, 3, END); + test_exec("abc\ndef", REG_NOTBOL, REG_NOMATCH); + test_exec("abc\ndef", REG_NOTEOL, 0, 0, 3, END); + test_exec("abc\ndef", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + test_exec("def\nabc", 0, REG_OK, 4, 7, END); + test_exec("def\nabc", REG_NOTBOL, 0, 4, 7, END); + test_exec("def\nabc", REG_NOTEOL, REG_NOMATCH); + test_exec("def\nabc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); + + /* With BRE syntax, `^' has a special meaning only at the beginning of the + RE or the beginning of a parenthesized subexpression. */ + test_comp("a\\{0,1\\}^bc", 0, 0); + test_exec("bc", 0, REG_NOMATCH); + test_exec("^bc", 0, REG_OK, 0, 3, END); + test_exec("abc", 0, REG_NOMATCH); + test_exec("a^bc", 0, REG_OK, 0, 4, END); + test_comp("a\\{0,1\\}\\(^bc\\)", 0, 0); + test_exec("bc", 0, REG_OK, 0, 2, 0, 2, END); + test_exec("^bc", 0, REG_NOMATCH); + test_exec("abc", 0, REG_NOMATCH); + test_exec("a^bc", 0, REG_NOMATCH); + test_comp("(^a", 0, 0); + test_exec("(^a", 0, REG_OK, 0, 3, END); + + /* With BRE syntax, `$' has a special meaning only at the end of the + RE or the end of a parenthesized subexpression. */ + test_comp("ab$c\\{0,1\\}", 0, 0); + test_exec("ab", 0, REG_NOMATCH); + test_exec("ab$", 0, REG_OK, 0, 3, END); + test_exec("abc", 0, REG_NOMATCH); + test_exec("ab$c", 0, REG_OK, 0, 4, END); + test_comp("\\(ab$\\)c\\{0,1\\}", 0, 0); + test_exec("ab", 0, REG_OK, 0, 2, 0, 2, END); + test_exec("ab$", 0, REG_NOMATCH); + test_exec("abc", 0, REG_NOMATCH); + test_exec("ab$c", 0, REG_NOMATCH); + test_comp("a$)", 0, 0); + test_exec("a$)", 0, REG_OK, 0, 3, END); + + /* Miscellaneous tests for `^' and `$'. */ + test_comp("foo^$", REG_EXTENDED, 0); + test_exec("foo", 0, REG_NOMATCH); + test_comp("x$\n^y", REG_EXTENDED | REG_NEWLINE, 0); + test_exec("foo\nybarx\nyes\n", 0, REG_OK, 8, 11, END); + test_comp("^$", 0, 0); + test_exec("x", 0, REG_NOMATCH); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("\n", 0, REG_NOMATCH); + test_comp("^$", REG_NEWLINE, 0); + test_exec("x", 0, REG_NOMATCH); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("\n", 0, REG_OK, 0, 0, END); + + /* REG_NEWLINE causes `.' not to match newlines. */ + test_comp(".*", 0, 0); + test_exec("ab\ncd", 0, REG_OK, 0, 5, END); + test_comp(".*", REG_NEWLINE, 0); + test_exec("ab\ncd", 0, REG_OK, 0, 2, END); + + /* + * Tests for nonstandard syntax extensions. + */ + + /* Zero width assertions. */ + test_comp("\\", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 2, 3, END); + test_exec("aax", 0, REG_OK, 2, 3, END); + test_comp("\\bx", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 4, 5, END); + test_exec("aax", 0, REG_NOMATCH); + test_exec("xax", 0, REG_OK, 0, 1, END); + test_comp("x\\b", REG_EXTENDED, 0); + test_exec("axx xaa", 0, REG_OK, 2, 3, END); + test_exec("aax", 0, REG_OK, 2, 3, END); + test_exec("xaa", 0, REG_NOMATCH); + test_comp("\\Bx", REG_EXTENDED, 0); + test_exec("aax xxa", 0, REG_OK, 2, 3, END); + test_comp("\\Bx\\b", REG_EXTENDED, 0); + test_exec("aax xxx", 0, REG_OK, 2, 3, END); + test_comp("\\<.", REG_EXTENDED, 0); + test_exec(";xaa", 0, REG_OK, 1, 2, END); + + /* Shorthands for character classes. */ + test_comp("\\w+", REG_EXTENDED, 0); +#ifdef SRC_IN_ISO_8859_1 + test_exec(",.(a23_Nt-o)", 0, REG_OK, 3, 9, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f ISO-8859-1 -t UTF-8 file_with_lines_above > www_utf_8 */ + test_exec(",.(a23_Nt-öo)", 0, REG_OK, 3, 9, END); +#else + unsigned char str_000[] = { + ',','.','(','a','2','3','_','N','t','-',0xF6,'o',0x00 + }; + test_exec((char const *)str_000, 0, REG_OK, 3, 9, END); +#endif +#endif + test_comp("\\d+", REG_EXTENDED, 0); + test_exec("uR120_4=v4", 0, REG_OK, 2, 5, END); + test_comp("\\D+", REG_EXTENDED, 0); + test_exec("120d_=vA4s", 0, REG_OK, 3, 8, END); + + /* Quoted special characters. */ + test_comp("\\t", REG_EXTENDED, 0); + test_comp("\\e", REG_EXTENDED, 0); + + /* Test the \x1B and \x{263a} extensions for specifying 8 bit and wide + characters in hexadecimal. */ + test_comp("\\x41", REG_EXTENDED, 0); + test_exec("ABC", 0, REG_OK, 0, 1, END); + test_comp("\\x5", REG_EXTENDED, 0); + test_exec("\005", 0, REG_OK, 0, 1, END); + test_comp("\\x5r", REG_EXTENDED, 0); + test_exec("\005r", 0, REG_OK, 0, 2, END); + test_comp("\\x", REG_EXTENDED, 0); + test_nexec("\000", 1, 0, REG_OK, 0, 1, END); + test_comp("\\xr", REG_EXTENDED, 0); + test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); + test_comp("\\x{41}", REG_EXTENDED, 0); + test_exec("ABC", 0, REG_OK, 0, 1, END); + test_comp("\\x{5}", REG_EXTENDED, 0); + test_exec("\005", 0, REG_OK, 0, 1, END); + test_comp("\\x{5}r", REG_EXTENDED, 0); + test_exec("\005r", 0, REG_OK, 0, 2, END); + test_comp("\\x{}", REG_EXTENDED, 0); + test_nexec("\000", 1, 0, REG_OK, 0, 1, END); + test_comp("\\x{}r", REG_EXTENDED, 0); + test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); + test_comp("\\x{00000000}", REG_EXTENDED, 0); + test_comp("\\x{000000000}", REG_EXTENDED, REG_EBRACE); + + /* Tests for (?inrU-inrU) and (?inrU-inrU:) */ + test_comp("foo(?i)bar", REG_EXTENDED, 0); + test_exec("fooBaR", 0, REG_OK, 0, 6, END); + test_comp("foo(?i)bar|zap", REG_EXTENDED, 0); + test_exec("fooBaR", 0, REG_OK, 0, 6, END); + test_exec("foozap", 0, REG_OK, 0, 6, END); + test_exec("foozAp", 0, REG_OK, 0, 6, END); + test_exec("zap", 0, REG_NOMATCH); + test_comp("foo(?-i:zap)zot", REG_EXTENDED | REG_ICASE, 0); + test_exec("FoOzapZOt", 0, REG_OK, 0, 9, END); + test_exec("FoOzApZOt", 0, REG_NOMATCH); + test_comp("foo(?i:bar|zap)", REG_EXTENDED, 0); + test_exec("foozap", 0, REG_OK, 0, 6, END); + test_exec("foobar", 0, REG_OK, 0, 6, END); + test_exec("foobAr", 0, REG_OK, 0, 6, END); + test_exec("fooZaP", 0, REG_OK, 0, 6, END); + test_comp("foo(?U:o*)(o*)", REG_EXTENDED, 0); + test_exec("foooo", 0, REG_OK, 0, 5, 3, 5, END); + + /* Test comment syntax. */ + test_comp("foo(?# This here is a comment. )bar", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 0, 6, END); + + /* Tests for \Q and \E. */ + test_comp("\\((\\Q)?:\\<[^$\\E)", REG_EXTENDED, 0); + test_exec("()?:\\<[^$", 0, REG_OK, 0, 9, 1, 9, END); + test_comp("\\Qabc\\E.*", REG_EXTENDED, 0); + test_exec("abcdef", 0, REG_OK, 0, 6, END); + test_comp("\\Qabc\\E.*|foo", REG_EXTENDED, 0); + test_exec("parabc123wxyz", 0, REG_OK, 3, 13, END); + test_exec("fooabc123wxyz", 0, REG_OK, 0, 3, END); + + /* + * Test integer parser used for bounded repititions. + */ + + test_comp("a{9223372036854775808,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{9223372036854775808}", REG_EXTENDED, REG_BADMAX); + test_comp("a{9223372036854775807,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{9223372036854775807}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483648,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483648}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483647,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{2147483647}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32768,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32768}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32767,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{32767}", REG_EXTENDED, REG_BADMAX); + test_comp("a{256,}", REG_EXTENDED, REG_BADMAX); + test_comp("a{256}", REG_EXTENDED, REG_BADMAX); + test_comp("a{255,}", REG_EXTENDED, REG_OK); + test_comp("a{255}", REG_EXTENDED, REG_OK); + + /* + * Test bounded repetitions. + */ + + test_comp("a{0,0}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 0, END); + test_comp("a{0,1}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 1, END); + test_comp("a{1,1}", REG_EXTENDED, REG_OK); + test_exec("aaa", 0, REG_OK, 0, 1, END); + test_comp("a{1,3}", REG_EXTENDED, REG_OK); + test_exec("xaaaaa", 0, REG_OK, 1, 4, END); + test_comp("a{0,3}", REG_EXTENDED, REG_OK); + test_exec("aaaaa", 0, REG_OK, 0, 3, END); + test_comp("a{0,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{1,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{2,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{3,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_NOMATCH); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 4, END); + test_exec("aaaaa", 0, REG_OK, 0, 5, END); + test_exec("aaaaaa", 0, REG_OK, 0, 6, END); + test_exec("aaaaaaa", 0, REG_OK, 0, 7, END); + test_comp("a{,}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("aaa", 0, REG_OK, 0, 3, END); + test_comp("a{,0}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("aaa", 0, REG_OK, 0, 0, END); + test_comp("a{,1}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 1, END); + test_comp("a{,2}", REG_EXTENDED, REG_OK); + test_exec("", 0, REG_OK, 0, 0, END); + test_exec("a", 0, REG_OK, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, END); + test_exec("aaa", 0, REG_OK, 0, 2, END); + + test_comp("a{5,10}", REG_EXTENDED, REG_OK); + test_comp("a{6,6}", REG_EXTENDED, REG_OK); + test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); + test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); + test_exec("xxaaaaa", 0, REG_NOMATCH); + test_comp("a{5,6}", REG_EXTENDED, REG_OK); + test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); + test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); + test_exec("xxaaaaa", 0, REG_OK, 2, 7, END); + test_exec("xxaaaa", 0, REG_NOMATCH); + + /* Trickier ones... */ + test_comp("([ab]{5,10})*b", REG_EXTENDED, REG_OK); + test_exec("bbbbbabaaaaab", 0, REG_OK, 0, 13, 5, 12, END); + test_exec("bbbbbbaaaaab", 0, REG_OK, 0, 12, 5, 11, END); + test_exec("bbbbbbaaaab", 0, REG_OK, 0, 11, 0, 10, END); + test_exec("bbbbbbaaab", 0, REG_OK, 0, 10, 0, 9, END); + test_exec("bbbbbbaab", 0, REG_OK, 0, 9, 0, 8, END); + test_exec("bbbbbbab", 0, REG_OK, 0, 8, 0, 7, END); + + test_comp("([ab]*)(ab[ab]{5,10})ba", REG_EXTENDED, REG_OK); + test_exec("abbabbbabaabbbbbbbbbbbbbabaaaabab", 0, REG_OK, + 0, 10, 0, 0, 0, 8, END); + test_exec("abbabbbabaabbbbbbbbbbbbabaaaaabab", 0, REG_OK, + 0, 32, 0, 23, 23, 30, END); + test_exec("abbabbbabaabbbbbbbbbbbbabaaaabab", 0, REG_OK, + 0, 24, 0, 10, 10, 22, END); + test_exec("abbabbbabaabbbbbbbbbbbba", 0, REG_OK, + 0, 24, 0, 10, 10, 22, END); + + test_comp("^((a{1,2})?x)*y", REG_EXTENDED | REG_NOSUB, REG_OK); + test_exec("y", 0, REG_OK, END); + test_exec("xy", 0, REG_OK, END); + test_exec("axy", 0, REG_OK, END); + test_exec("aaxy", 0, REG_OK, END); + test_exec("aaaxy", 0, REG_NOMATCH, END); + + /* Test repeating something that has submatches inside. */ + test_comp("(a){0,5}", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, -1, -1, END); + test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); + test_exec("aaaaa", 0, REG_OK, 0, 5, 4, 5, END); + test_exec("aaaaaa", 0, REG_OK, 0, 5, 4, 5, END); + + test_comp("(a){2,3}", REG_EXTENDED, 0); + test_exec("", 0, REG_NOMATCH); + test_exec("a", 0, REG_NOMATCH); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); + test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); + test_exec("aaaa", 0, REG_OK, 0, 3, 2, 3, END); + + test_comp("\\(a\\)\\{4\\}", 0, 0); + test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); + + test_comp("\\(a*\\)\\{2\\}", 0, 0); + test_exec("a", 0, REG_OK, 0, 1, 1, 1, END); + + test_comp("((..)|(.)){2}", REG_EXTENDED, 0); + test_exec("aa", 0, REG_OK, 0, 2, 1, 2, -1, -1, 1, 2, END); + + /* Nested repeats. */ + test_comp("(.){2}{3}", REG_EXTENDED, 0); + test_exec("xxxxx", 0, REG_NOMATCH); + test_exec("xxxxxx", 0, REG_OK, 0, 6, 5, 6, END); + test_comp("(..){2}{3}", REG_EXTENDED, 0); + test_exec("xxxxxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxxxxx", 0, REG_OK, 0, 12, 10, 12, END); + test_comp("((..){2}.){3}", REG_EXTENDED, 0); + test_exec("xxxxxxxxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxxxxxxxx", 0, REG_OK, 0, 15, 10, 15, 12, 14, END); + test_comp("((..){1,2}.){3}", REG_EXTENDED, 0); + test_exec("xxxxxxxx", 0, REG_NOMATCH); + test_exec("xxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); + test_exec("xxxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); + test_exec("xxxxxxxxxxx", 0, REG_OK, 0, 11, 8, 11, 8, 10, END); + test_comp("a{2}{2}x", REG_EXTENDED, 0); + test_exec("", 0, REG_NOMATCH); + test_exec("x", 0, REG_NOMATCH); + test_exec("ax", 0, REG_NOMATCH); + test_exec("aax", 0, REG_NOMATCH); + test_exec("aaax", 0, REG_NOMATCH); + test_exec("aaaax", 0, REG_OK, 0, 5, END); + test_exec("aaaaax", 0, REG_OK, 1, 6, END); + test_exec("aaaaaax", 0, REG_OK, 2, 7, END); + test_exec("aaaaaaax", 0, REG_OK, 3, 8, END); + test_exec("aaaaaaaax", 0, REG_OK, 4, 9, END); + + /* Repeats with iterations inside. */ + test_comp("([a-z]+){2,5}", REG_EXTENDED, 0); + test_exec("a\n", 0, REG_NOMATCH); + test_exec("aa\n", 0, REG_OK, 0, 2, 1, 2, END); + + /* Multiple repeats in one regexp. */ + test_comp("a{3}b{3}", REG_EXTENDED, 0); + test_exec("aaabbb", 0, REG_OK, 0, 6, END); + test_exec("aaabbbb", 0, REG_OK, 0, 6, END); + test_exec("aaaabbb", 0, REG_OK, 1, 7, END); + test_exec("aabbb", 0, REG_NOMATCH); + test_exec("aaabb", 0, REG_NOMATCH); + + /* Test that different types of repetitions work correctly when used + in the same regexp. */ + test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); + test_exec("aaaaxbxcxdx", 0, REG_OK, 0, 11, END); + test_exec("aaaxbxcxdx", 0, REG_NOMATCH); + test_exec("aabxcxdx", 0, REG_NOMATCH); + test_exec("aaaacxdx", 0, REG_NOMATCH); + test_exec("aaaaxbdx", 0, REG_NOMATCH); + test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", + REG_EXTENDED, 0); + test_exec("!packet 10.0.2.4 12765 ei voittoa", 0, REG_OK, 0, 22, END); + + /* + * Back referencing tests. + */ + test_comp("([a-z]*) \\1", REG_EXTENDED, 0); + test_exec("foobar foobar", 0, REG_OK, 0, 13, 0, 6, END); + + /* Searching for a leftmost longest square (repeated string) */ + test_comp("(.*)\\1", REG_EXTENDED, 0); + test_exec("foobarfoobar", 0, REG_OK, 0, 12, 0, 6, END); + + test_comp("a(b)*c\\1", REG_EXTENDED, 0); + test_exec("acb", 0, REG_OK, 0, 2, -1, -1, END); + test_exec("abbcbbb", 0, REG_OK, 0, 5, 2, 3, END); + test_exec("abbdbd", 0, REG_NOMATCH); + + test_comp("([a-c]*)\\1", REG_EXTENDED, 0); + test_exec("abcacdef", 0, REG_OK, 0, 0, 0, 0, END); + test_exec("abcabcabcd", 0, REG_OK, 0, 6, 0, 3, END); + test_comp("(.{1,3})\\1", REG_EXTENDED, 0); + test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END); + + test_comp("\\(a*\\)*\\(x\\)\\(\\1\\)", 0, 0); + test_exec("x", 0, REG_OK, 0, 1, 0, 0, 0, 1, 1, 1, END); +#if KNOWN_BUG + test_exec("ax", 0, REG_OK, 0, 2, 1, 1, 1, 2, 2, 2, END); +#endif + + test_comp("(a)\\1{1,2}", REG_EXTENDED, 0); + test_exec("aabc", 0, REG_OK, 0, 2, 0, 1, END); + + test_comp("((.*)\\1)+", REG_EXTENDED, 0); + test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 1, END); + +#if KNOWN_BUG + test_comp("()(\\1\\1)*", REG_EXTENDED, 0); + test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); +#endif + + /* Check that back references work with REG_NOSUB. */ + test_comp("(o)\\1", REG_EXTENDED | REG_NOSUB, 0); + test_exec("foobar", 0, REG_OK, END); + test_comp("(o)\\1", REG_EXTENDED, 0); + test_exec("foobar", 0, REG_OK, 1, 3, 1, 2, END); + test_comp("(o)\\1", REG_EXTENDED, 0); + test_exec("fobar", 0, REG_NOMATCH); + + test_comp("\\1foo", REG_EXTENDED, REG_ESUBREG); + test_comp("\\1foo(bar)", REG_EXTENDED, 0); + + /* Back reference with zero-width assertion. */ + test_comp("(.)\\1$", REG_EXTENDED, 0); + test_exec("foox", 0, REG_NOMATCH); + test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END); + + /* Back references together with {}. */ + test_comp("([0-9]{5})\\1", REG_EXTENDED, 0); + test_exec("12345", 0, REG_NOMATCH); + test_exec("1234512345", 0, REG_OK, 0, 10, 0, 5, END); + test_comp("([0-9]{4})\\1", REG_EXTENDED, 0); + test_exec("1234", 0, REG_NOMATCH); + test_exec("12341234", 0, REG_OK, 0, 8, 0, 4, END); + + /* + * Test minimal repetitions (non-greedy repetitions) + */ + avoid_eflags = REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER; + + /* Basic .*/ + test_comp(".*?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".+?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 1, END); + test_comp(".??", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".{2,5}?", REG_EXTENDED, 0); + test_exec("abcd", 0, REG_OK, 0, 2, END); + + /* More complicated. */ + test_comp("(.*?)", REG_EXTENDED, 0); + test_exec("text1text2", 0, REG_OK, 0, 12, 3, 8, END); + test_comp("a(.*?)(foo|bar|zap)", REG_EXTENDED, 0); + test_exec("hubba wooga-booga zabar gafoo wazap", 0, REG_OK, + 4, 23, 5, 20, 20, 23, END); + + /* Test REG_UNGREEDY. */ + test_comp(".*", REG_EXTENDED | REG_UNGREEDY, 0); + test_exec("abcd", 0, REG_OK, 0, 0, END); + test_comp(".*?", REG_EXTENDED | REG_UNGREEDY, 0); + test_exec("abcd", 0, REG_OK, 0, 4, END); + + avoid_eflags = 0; + + + /* + * Error reporting tests. + */ + + test_comp("\\", REG_EXTENDED, REG_EESCAPE); + test_comp("\\\\", REG_EXTENDED, REG_OK); + test_exec("\\", 0, REG_OK, 0, 1, END); + test_comp("(", REG_EXTENDED, REG_EPAREN); + test_comp("(aaa", REG_EXTENDED, REG_EPAREN); + test_comp(")", REG_EXTENDED, REG_OK); + test_exec(")", 0, REG_OK, 0, 1, END); + test_comp("a{1", REG_EXTENDED, REG_EBRACE); + test_comp("a{1,x}", REG_EXTENDED, REG_BADBR); + test_comp("a{1x}", REG_EXTENDED, REG_BADBR); + test_comp("a{1,0}", REG_EXTENDED, REG_BADBR); + test_comp("a{x}", REG_EXTENDED, REG_BADBR); + test_comp("a{}", REG_EXTENDED, REG_BADBR); + + + test_comp("\\", 0, REG_EESCAPE); + test_comp("\\(", 0, REG_EPAREN); + test_comp("\\)", 0, REG_EPAREN); + test_comp("a\\{1", 0, REG_EBRACE); + test_comp("a\\{1,x\\}", 0, REG_BADBR); + test_comp("a\\{1x\\}", 0, REG_BADBR); + test_comp("a\\{1,0\\}", 0, REG_BADBR); + test_comp("a\\{x\\}", 0, REG_BADBR); + test_comp("a\\{\\}", 0, REG_BADBR); + test_comp("a\\{1,256\\}", 0, REG_BADMAX); + + + test_comp(NULL, REG_BASIC, REG_OK); + test_comp(NULL, REG_EXTENDED, REG_OK); + + + /* + * Internationalization tests. + */ + + /* This same test with the correct locale is below. + TBR: This is a guess for the source encoding, see comments below after the locale is set to a Japanese locale. */ +#ifdef SRC_IN_EUC_JP + test_comp("+", REG_EXTENDED, 0); + test_exec("ξޤϡ", + 0, REG_OK, 10, 13, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f EUC_JP -t UTF-8 file_with_lines_above > zzz_utf_8 + This may be incorrect because the match results might be incorrect for UTF-8, I (TBR) just don't know enough to be certain. + It compiles and runs successfully on my desktop with the C.UTF-8 locale. */ + test_comp("機+", REG_EXTENDED, 0); + test_exec("この賞は、機・利便性・セキ", + 0, REG_OK, 15, 18, END); +#else + /* Represent the test strings as a sequence of bytes so we don't run afoul of the compiler's expected source-charset. */ + unsigned char str_001[] = { + 0xB5,0xA1,'+',0x00 + }; + unsigned char str_002[] = { + 0xA4,0xB3,0xA4,0xCE,0xBE,0xDE,0xA4,0xCF,0xA1,0xA2,0xB5,0xA1,0xA1,0xA6,0xCD,0xF8,0xCA,0xD8,0xC0,0xAD,0xA1,0xA6,0xA5,0xBB,0xA5,0xAD,0x00 + }; + test_comp((char const *)str_001, REG_EXTENDED, 0); + test_exec((char const *)str_002, 0, REG_OK, 10, 13, END); +#endif +#endif + +#if !defined(WIN32) && !defined(__OpenBSD__) + if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") != NULL || + setlocale(LC_CTYPE, "en_US.ISO8859-1") != NULL) + { + fprintf(outf, "\nTesting LC_CTYPE en_US.ISO-8859-1\n"); +#ifdef SRC_IN_ISO_8859_1 + test_comp("aBCdeFghiJKlmnoPQRstuvWXyZ", REG_ICASE, 0); + test_exec("abCDefGhiJKlmNoPqRStuVwXyz", 0, REG_OK, 0, 29, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f ISO-8859-1 -t UTF-8 file_with_lines_above > yyy_utf_8 */ + /* This fails with no match on freebsd, but succeeds in linux. */ + test_comp("aBCdeFghiJKlmnoPQRstuvWXyZåäö", REG_ICASE, 0); + test_exec("abCDefGhiJKlmNoPqRStuVwXyzÅÄÖ", 0, REG_OK, 0, 29, END); +#else + /* Represent the test strings as a sequence of bytes so we don't run afoul of the compiler's expected source-charset. */ + unsigned char str_003[] = { + 'a','B','C','d','e','F','g','h','i','J','K','l','m','n','o','P','Q','R','s','t','u','v','W','X','y','Z',0xE5,0xE4,0xF6,0x00 + }; + unsigned char str_004[] = { + 'a','b','C','D','e','f','G','h','i','J','K','l','m','N','o','P','q','R','S','t','u','V','w','X','y','z',0xC5,0xC4,0xD6,0x00 + }; + test_comp((char const *)str_003, REG_ICASE, 0); + test_exec((char const *)str_004, 0, REG_OK, 0, 29, END); +#endif +#endif + } + +#ifdef TRE_MULTIBYTE + if (setlocale(LC_CTYPE, "ja_JP.eucjp") != NULL || + setlocale(LC_CTYPE, "ja_JP.eucJP") != NULL) + { + fprintf(outf, "\nTesting LC_CTYPE ja_JP.eucjp\n"); + /* I tried to make a test where implementations not aware of multibyte + character sets will fail. I have no idea what the japanese text here + means, I took it from http://www.ipsec.co.jp/. */ + /* TBR 2023/03/22: iconv has (at least) the following encoding names for Japanese: + EUC-JIS-2004 EUC-JISX0213 + EUC-JP-MS EUCJP-MS EUCJP-OPEN EUCJP-WIN EUCJPMS + EUC-JP CSEUCPKDFMTJAPANESE EUCJP IBM-EUCJP + ISO-2022-JP-1 ISO2022-JP1 + ISO-2022-JP-2 CSISO2022JP2 ISO2022-JP2 ISO-2022-JP-2004 ISO-2022-JP-3 ISO2022-JP2004 ISO2022-JP3 + ISO-2022-JP CSISO2022JP ISO2022-JP + Both iconv arguments of EUC-JP and EUC-JP-MS produced the converted strings below, + all the others I tried resulted in invalid characters. So guess at EUC-JP. + If anyone knows what the encoding actually was, feel free to let me know at tbr at acm dot org :). */ +#ifdef SRC_IN_EUC_JP + test_comp("+", REG_EXTENDED, 0); + test_exec("ξޤϡ", 0, REG_OK, 10, 12, END); +#else +#ifdef SRC_IN_UTF_8 + /* iconv -f EUC_JP -t UTF-8 file_with_lines_above > zzz_utf_8 + This may fail because the match results might be incorrect for UTF-8, I (TBR) just don't know enough to be certain. + It compiles and runs successfully on my desktop with the C.UTF-8 locale. */ + test_comp("機+", REG_EXTENDED, 0); + test_exec("この賞は、機・利便性・セキ", 0, REG_OK, 10, 12, END); +#else + /* Represent the test strings as a sequence of bytes so we don't run afoul of the compiler's expected source-charset. */ + /* This test uses the same strings (str_001 and str_002) as above, now with a Japanese locale. + NOTE THE DIFFERENCE IN MATCH RESULTS - (10,13) earlier with the default locale, and (10,12) here with the Japanese locale. */ + test_comp((char const *)str_001, REG_EXTENDED, 0); + test_exec((char const *)str_002, 0, REG_OK, 10, 12, END); +#endif +#endif + test_comp("a", REG_EXTENDED, 0); + test_nexec("foo\000bar", 7, 0, REG_OK, 5, 6, END); + test_comp("c$", REG_EXTENDED, 0); + test_exec("abc", 0, REG_OK, 2, 3, END); + } + else + { + fprintf(outf, "\nTRE_MULTIBYTE enabled, but skipping LC_CTYPE ja_JP.eucJP (locale unavailable)\n"); + } +#endif /* TRE_MULTIBYTE */ +#endif + + tre_regfree(&reobj); + + fprintf(outf, "\n"); + if (comp_errors || exec_errors) + fprintf(outf, "%d (%d + %d) out of %d tests FAILED!\n", + comp_errors + exec_errors, comp_errors, exec_errors, + comp_tests + exec_tests); + else + fprintf(outf, "All %d tests passed.\n", comp_tests + exec_tests); + + +#ifdef MALLOC_DEBUGGING + if (xmalloc_dump_leaks()) + return 1; +#endif /* MALLOC_DEBUGGING */ + + return comp_errors || exec_errors; +} + +/* EOF */ diff --git a/deps/tre/tests/test-literal-opt.c b/deps/tre/tests/test-literal-opt.c new file mode 100644 index 000000000..62853e07d --- /dev/null +++ b/deps/tre/tests/test-literal-opt.c @@ -0,0 +1,303 @@ +/* + test-literal-opt.c - Validate TRE literal optimization against the + generic matcher. + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include + +#include "tre-internal.h" + +#define PMATCH_SLOTS 4 +#define RC_ANY -9999 + +typedef struct { + const char *name; + const char *pattern; + size_t pattern_len; + int cflags; + const char *string; + size_t string_len; + int eflags; + int expected_rc; + tre_literal_opt_mode_t expected_mode; +} litopt_case_t; + +static void +init_pmatch(regmatch_t pmatch[], size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + { + pmatch[i].rm_so = 111; + pmatch[i].rm_eo = 222; + } +} + +static int +same_pmatch(const regmatch_t a[], const regmatch_t b[], size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + if (a[i].rm_so != b[i].rm_so || a[i].rm_eo != b[i].rm_eo) + return 0; + return 1; +} + +static int +pmatch_cleared(const regmatch_t pmatch[], size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1) + return 0; + return 1; +} + +static int +run_case(const litopt_case_t *tc) +{ + regex_t preg; + tre_tnfa_t *tnfa; + regmatch_t fast[PMATCH_SLOTS], slow[PMATCH_SLOTS]; + tre_literal_opt_mode_t saved_mode; + char errbuf[256]; + int errcode, fast_rc, slow_rc; + + memset(&preg, 0, sizeof(preg)); + errcode = tre_regncompb(&preg, tc->pattern, tc->pattern_len, tc->cflags); + if (errcode != REG_OK) + { + tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); + fprintf(stderr, "%s: compile failed: %s\n", tc->name, errbuf); + return 1; + } + + tnfa = (tre_tnfa_t *)preg.value; + if (tnfa->literal_opt.mode != tc->expected_mode) + { + fprintf(stderr, "%s: optimizer mode %d, expected %d\n", + tc->name, (int)tnfa->literal_opt.mode, (int)tc->expected_mode); + tre_regfree(&preg); + return 1; + } + + init_pmatch(fast, PMATCH_SLOTS); + init_pmatch(slow, PMATCH_SLOTS); + + fast_rc = tre_regnexecb(&preg, tc->string, tc->string_len, + PMATCH_SLOTS, fast, tc->eflags); + + saved_mode = tnfa->literal_opt.mode; + tnfa->literal_opt.mode = TRE_LITERAL_OPT_NONE; + slow_rc = tre_regnexecb(&preg, tc->string, tc->string_len, + PMATCH_SLOTS, slow, tc->eflags); + tnfa->literal_opt.mode = saved_mode; + + if (fast_rc != slow_rc) + { + fprintf(stderr, "%s: fast rc %d, slow rc %d\n", + tc->name, fast_rc, slow_rc); + tre_regfree(&preg); + return 1; + } + + if (tc->expected_rc != RC_ANY && fast_rc != tc->expected_rc) + { + fprintf(stderr, "%s: rc %d, expected %d\n", + tc->name, fast_rc, tc->expected_rc); + tre_regfree(&preg); + return 1; + } + + if (!same_pmatch(fast, slow, PMATCH_SLOTS)) + { + fprintf(stderr, "%s: fast and slow pmatch differ\n", tc->name); + tre_regfree(&preg); + return 1; + } + + if ((tc->cflags & REG_NOSUB) && fast_rc == REG_OK + && !pmatch_cleared(fast, PMATCH_SLOTS)) + { + fprintf(stderr, "%s: REG_NOSUB match did not clear pmatch\n", tc->name); + tre_regfree(&preg); + return 1; + } + + tre_regfree(&preg); + return 0; +} + +int +main(void) +{ + static const char nonascii_pattern[] = { (char)0xc0, '|', (char)0xe0 }; + static const char nonascii_haystack[] = { 'x', (char)0xe0, 'y' }; + static const litopt_case_t cases[] = { + { + "contains basic", + "foo|bar|baz", + sizeof("foo|bar|baz") - 1, + REG_EXTENDED | REG_NOSUB, + "xxbaryy", + sizeof("xxbaryy") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_CONTAINS + }, + { + "contains ignores bol/eol flags", + "foo|bar|baz", + sizeof("foo|bar|baz") - 1, + REG_EXTENDED | REG_NOSUB, + "xxbaryy", + sizeof("xxbaryy") - 1, + REG_NOTBOL | REG_NOTEOL, + REG_OK, + TRE_LITERAL_OPT_CONTAINS + }, + { + "prefix basic", + "^(foo|bar|baz)", + sizeof("^(foo|bar|baz)") - 1, + REG_EXTENDED | REG_NOSUB, + "barrier", + sizeof("barrier") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_PREFIX + }, + { + "prefix respects REG_NOTBOL", + "^(foo|bar|baz)", + sizeof("^(foo|bar|baz)") - 1, + REG_EXTENDED | REG_NOSUB, + "barrier", + sizeof("barrier") - 1, + REG_NOTBOL, + REG_NOMATCH, + TRE_LITERAL_OPT_PREFIX + }, + { + "suffix basic", + "(foo|bar|baz)$", + sizeof("(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "crowbar", + sizeof("crowbar") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_SUFFIX + }, + { + "suffix respects REG_NOTEOL", + "(foo|bar|baz)$", + sizeof("(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "crowbar", + sizeof("crowbar") - 1, + REG_NOTEOL, + REG_NOMATCH, + TRE_LITERAL_OPT_SUFFIX + }, + { + "exact basic", + "^(foo|bar|baz)$", + sizeof("^(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "bar", + sizeof("bar") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_EXACT + }, + { + "exact respects REG_NOTBOL", + "^(foo|bar|baz)$", + sizeof("^(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "bar", + sizeof("bar") - 1, + REG_NOTBOL, + REG_NOMATCH, + TRE_LITERAL_OPT_EXACT + }, + { + "exact respects REG_NOTEOL", + "^(foo|bar|baz)$", + sizeof("^(foo|bar|baz)$") - 1, + REG_EXTENDED | REG_NOSUB, + "bar", + sizeof("bar") - 1, + REG_NOTEOL, + REG_NOMATCH, + TRE_LITERAL_OPT_EXACT + }, + { + "empty alternation disables optimization", + "(|foo|bar)", + sizeof("(|foo|bar)") - 1, + REG_EXTENDED | REG_NOSUB, + "", + 0, + 0, + REG_OK, + TRE_LITERAL_OPT_NONE + }, + { + "inline flag disable stays generic", + "foo(?-i:zap)zot", + sizeof("foo(?-i:zap)zot") - 1, + REG_EXTENDED | REG_ICASE | REG_NOSUB, + "FoOzApZOt", + sizeof("FoOzApZOt") - 1, + 0, + REG_NOMATCH, + TRE_LITERAL_OPT_NONE + }, + { + "inline flag disable still matches exact scoped bytes", + "foo(?-i:zap)zot", + sizeof("foo(?-i:zap)zot") - 1, + REG_EXTENDED | REG_ICASE | REG_NOSUB, + "FoOzapZOt", + sizeof("FoOzapZOt") - 1, + 0, + REG_OK, + TRE_LITERAL_OPT_NONE + }, + { + "nocase non-ascii bytes stay in sync", + nonascii_pattern, + sizeof(nonascii_pattern), + REG_EXTENDED | REG_ICASE | REG_NOSUB, + nonascii_haystack, + sizeof(nonascii_haystack), + 0, + RC_ANY, + TRE_LITERAL_OPT_CONTAINS + } + }; + size_t i; + int failures = 0; + + setlocale(LC_CTYPE, "en_US.ISO-8859-1"); + + for (i = 0; i < elementsof(cases); i++) + failures += run_case(&cases[i]); + + return failures; +} diff --git a/deps/tre/tests/test-malformed-regn.c b/deps/tre/tests/test-malformed-regn.c new file mode 100644 index 000000000..7d3074a1e --- /dev/null +++ b/deps/tre/tests/test-malformed-regn.c @@ -0,0 +1,85 @@ +/* + test-malformed-regn.c - Verify exact-length edge-case regexps compile or fail + cleanly both with and without a trailing NUL byte. + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. +*/ + +#include +#include +#include + +#include "tre.h" + +typedef struct { + const char *name; + const char *pattern; + int expected_err; +} malformed_case_t; + +static int +run_case(const malformed_case_t *tc, int nul_terminated) +{ + regex_t preg; + size_t len = strlen(tc->pattern); + size_t alloc_len = len + (nul_terminated ? 1 : 0); + char *pattern = malloc(alloc_len ? alloc_len : 1); + int errcode; + + if (pattern == NULL) + { + fprintf(stderr, "%s: out of memory\n", tc->name); + return 1; + } + + if (len > 0) + memcpy(pattern, tc->pattern, len); + if (nul_terminated) + pattern[len] = '\0'; + + memset(&preg, 0, sizeof(preg)); + errcode = tre_regncompb(&preg, pattern, len, REG_EXTENDED | REG_NOSUB); + if (errcode == REG_OK) + tre_regfree(&preg); + + free(pattern); + + if (errcode != tc->expected_err) + { + char errbuf[128]; + memset(&preg, 0, sizeof(preg)); + tre_regerror(errcode, &preg, errbuf, sizeof(errbuf)); + fprintf(stderr, "%s (%s): got %d (%s), expected %d\n", + tc->name, nul_terminated ? "nul" : "exact", + errcode, errbuf, tc->expected_err); + return 1; + } + + return 0; +} + +int +main(void) +{ + static const malformed_case_t cases[] = { + { "open paren", "(", REG_EPAREN }, + { "open bracket", "[", REG_EBRACK }, + { "unterminated comment", "(?#", REG_BADPAT }, + { "unterminated inline flags", "(?i", REG_BADPAT }, + { "short hex escape", "\\x", REG_OK }, + { "unterminated wide hex", "\\x{", REG_EBRACE }, + { "empty wide hex", "\\x{}", REG_OK } + }; + size_t i; + + for (i = 0; i < sizeof(cases) / sizeof(*cases); i++) + { + if (run_case(&cases[i], 0)) + return 1; + if (run_case(&cases[i], 1)) + return 1; + } + + return 0; +} diff --git a/deps/tre/tests/test-str-source.c b/deps/tre/tests/test-str-source.c new file mode 100644 index 000000000..985f5b247 --- /dev/null +++ b/deps/tre/tests/test-str-source.c @@ -0,0 +1,192 @@ +/* + test-str-source.c - Sample program for using tre_reguexec() + + This software is released under a BSD-style license. + See the file LICENSE for details and copyright. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +/* look for getopt in order to use a -o option for output. */ +#if defined(HAVE_UNISTD_H) +#include +#elif defined(HAVE_GETOPT_H) +#include +#endif + +#include "tre-internal.h" + +static FILE *outf = NULL; + +/* Context structure for the tre_str_source wrappers. */ +typedef struct { + /* Our string. */ + const char *str; + /* Current position in the string. */ + size_t pos; +} str_handler_ctx; + +/* The get_next_char() handler. Sets `c' to the value of the next character, + and increases `pos_add' by the number of bytes read. Returns 1 if the + string has ended, 0 if there are more characters. */ +static int +str_handler_get_next(tre_char_t *c, unsigned int *pos_add, void *context) +{ + str_handler_ctx *ctx = context; + unsigned char ch = ctx->str[ctx->pos]; + +#ifdef TRE_DEBUG + fprintf(outf, "str[%lu] = %d\n", (unsigned long)ctx->pos, ch); +#endif /* TRE_DEBUG */ + *c = ch; + if (ch) + ctx->pos++; + *pos_add = 1; + + return ch == '\0'; +} + +/* The rewind() handler. Resets the current position in the input string. */ +static void +str_handler_rewind(size_t pos, void *context) +{ + str_handler_ctx *ctx = context; + +#ifdef TRE_DEBUG + fprintf(outf, "rewind to %lu\n", (unsigned long)pos); +#endif /* TRE_DEBUG */ + ctx->pos = pos; +} + +/* The compare() handler. Compares two substrings in the input and returns + 0 if the substrings are equal, and a nonzero value if not. */ +static int +str_handler_compare(size_t pos1, size_t pos2, size_t len, void *context) +{ + str_handler_ctx *ctx = context; +#ifdef TRE_DEBUG + fprintf(outf, "comparing %lu-%lu and %lu-%lu\n", + (unsigned long)pos1, (unsigned long)pos1 + len, + (unsigned long)pos2, (unsigned long)pos2 + len); +#endif /* TRE_DEBUG */ + return strncmp(ctx->str + pos1, ctx->str + pos2, len); +} + +/* Creates a tre_str_source wrapper around the string `str'. Returns the + tre_str_source object or NULL if out of memory. */ +static tre_str_source * +make_str_source(const char *str) +{ + tre_str_source *s; + str_handler_ctx *ctx; + + s = calloc(1, sizeof(*s)); + if (!s) + return NULL; + + ctx = malloc(sizeof(str_handler_ctx)); + if (!ctx) + { + free(s); + return NULL; + } + + ctx->str = str; + ctx->pos = 0; + s->context = ctx; + s->get_next_char = str_handler_get_next; + s->rewind = str_handler_rewind; + s->compare = str_handler_compare; + + return s; +} + +/* Frees the memory allocated for `s'. */ +static void +free_str_source(tre_str_source *s) +{ + free(s->context); + free(s); +} + +/* Run one test with tre_reguexec. Returns 1 if the regex matches, 0 if + it doesn't, and -1 if an error occurs. */ +static int +test_reguexec(const char *str, const char *regex) +{ + regex_t preg; + tre_str_source *source; + regmatch_t pmatch[5]; + int ret; + + if ((source = make_str_source(str)) == NULL) + { + fprintf(stderr, "Out of memory\n"); + ret = -1; + } + else + { + if (tre_regcomp(&preg, regex, REG_EXTENDED) != REG_OK) + { + fprintf(stderr, "Failed to compile /%s/\n", regex); + ret = -1; + } + else + { + if (tre_reguexec(&preg, source, elementsof(pmatch), pmatch, 0) == 0) + { + fprintf(outf, "Match: /%s/ matches \"%.*s\" in \"%s\"\n", regex, + (int)(pmatch[0].rm_eo - pmatch[0].rm_so), + str + pmatch[0].rm_so, str); + ret = 1; + } + else + { + fprintf(outf, "No match: /%s/ in \"%s\"\n", regex, str); + ret = 0; + } + tre_regfree(&preg); + } + free_str_source(source); + } + return ret; +} + +int +main(int argc, char **argv) +{ + int ret = 0; + outf = stdout; +#if defined(HAVE_UNISTD_H) || defined(HAVE_GETOPT_H) + int opt; + while ((opt = getopt(argc, argv, "o:")) != EOF) + { + switch (opt) + { + case 'o': + if ((outf = fopen(optarg, "w")) == NULL) + { + perror(optarg); + exit(1); + } + break; + default: + /* getopt() will have printed an error message already */ + exit(1); + } + } +#endif + ret += test_reguexec("xfoofofoofoo", "(foo)\\1") != 1; + ret += test_reguexec("catcat", "(cat|dog)\\1") != 1; + ret += test_reguexec("catdog", "(cat|dog)\\1") != 0; + ret += test_reguexec("dogdog", "(cat|dog)\\1") != 1; + ret += test_reguexec("dogcat", "(cat|dog)\\1") != 0; + + return ret; +} diff --git a/modules/redisbloom/Makefile b/modules/redisbloom/Makefile index f40cc7c1f..2fa608a0e 100644 --- a/modules/redisbloom/Makefile +++ b/modules/redisbloom/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redisbloom/redisbloom TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redisbloom.so diff --git a/modules/redisearch/Makefile b/modules/redisearch/Makefile index dee1ef04c..a56e9fc70 100644 --- a/modules/redisearch/Makefile +++ b/modules/redisearch/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redisearch/redisearch TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/search-community/redisearch.so @@ -7,5 +7,10 @@ TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/search-community/redisearch.so LTO ?= 1 export LTO + # Set INLINE_LSE_ATOMICS=1 for perf improvement on common ARM CPUs (i.e. Graviton2/3/4); no effect on x86 or macOS. + # Default 0 keeps the binary runnable on pre-Armv8.1-a cores (Cortex-A72, Graviton1, RPi4) that would otherwise SIGILL at module load. +INLINE_LSE_ATOMICS ?= 0 +export INLINE_LSE_ATOMICS + include ../common.mk diff --git a/modules/redisjson/Makefile b/modules/redisjson/Makefile index 4d13ed7bc..e85e5297d 100644 --- a/modules/redisjson/Makefile +++ b/modules/redisjson/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redisjson/redisjson TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/rejson.so diff --git a/modules/redistimeseries/Makefile b/modules/redistimeseries/Makefile index 1bd8b46ca..b5da541dd 100644 --- a/modules/redistimeseries/Makefile +++ b/modules/redistimeseries/Makefile @@ -1,5 +1,5 @@ SRC_DIR = src -MODULE_VERSION = v8.7.90 +MODULE_VERSION = v8.7.91 MODULE_REPO = https://github.com/redistimeseries/redistimeseries TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redistimeseries.so diff --git a/modules/vector-sets/tests/dimension_max_limit.py b/modules/vector-sets/tests/dimension_max_limit.py new file mode 100644 index 000000000..5a142d441 --- /dev/null +++ b/modules/vector-sets/tests/dimension_max_limit.py @@ -0,0 +1,129 @@ +from test import TestCase, generate_random_vector +import struct +import redis.exceptions + +MAX_DIM = 65536 + + +class DimensionMaxLimitVaddAtLimit(TestCase): + def getname(self): + return "[regression] VADD VALUES dim == MAX_DIM accepted" + + def estimated_runtime(self): + return 0.5 + + def test(self): + dim = MAX_DIM + vec = generate_random_vector(dim) + + result = self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', dim, + *[str(x) for x in vec], + f"{self.test_key}:item:maxdim") + assert result == 1, "VADD with dimension at the limit should succeed" + + +class DimensionMaxLimitVaddAboveLimit(TestCase): + def getname(self): + return "[regression] VADD VALUES dim > MAX_DIM rejected" + + def estimated_runtime(self): + return 0.1 + + def test(self): + too_big_dim = MAX_DIM + 1 + too_big_vec = generate_random_vector(16) + try: + self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', too_big_dim, + *[str(x) for x in too_big_vec], + f"{self.test_key}:item:toolarge") + assert False, "VADD with dimension above the limit should fail" + except redis.exceptions.ResponseError as e: + # parseVector returns NULL so caller uses the generic invalid spec error + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector specification error, got: {e}") + + +class DimensionMaxLimitVsimAtLimit(TestCase): + def getname(self): + return "[regression] VSIM VALUES dim == MAX_DIM accepted" + + def estimated_runtime(self): + return 0.5 + + def test(self): + # Insert a vector at the maximum allowed dimension, then query at the same dimension. + dim = MAX_DIM + base_vec = generate_random_vector(dim) + + result = self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', dim, + *[str(x) for x in base_vec], + f"{self.test_key}:item:1") + assert result == 1, "VADD with dimension at the limit should succeed" + + query = generate_random_vector(dim) + res = self.redis.execute_command( + 'VSIM', self.test_key, + 'VALUES', dim, + *[str(x) for x in query], + 'COUNT', 1) + assert isinstance(res, list), "VSIM with dimension at the limit should return a list" + + +class DimensionMaxLimitVsimAboveLimit(TestCase): + def getname(self): + return "[regression] VSIM VALUES dim > MAX_DIM rejected" + + def estimated_runtime(self): + return 0.1 + + def test(self): + # Create a small index, then issue a VSIM with an over-limit dimension. + base_dim = 16 + base_vec = generate_random_vector(base_dim) + result = self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', base_dim, + *[str(x) for x in base_vec], + f"{self.test_key}:item:1") + assert result == 1, "VADD with base_dim should succeed" + + too_big_dim = MAX_DIM + 1 + too_big_vec = generate_random_vector(16) + try: + self.redis.execute_command( + 'VSIM', self.test_key, + 'VALUES', too_big_dim, + *[str(x) for x in too_big_vec], + 'COUNT', 1) + assert False, "VSIM with dimension above the limit should fail" + except redis.exceptions.ResponseError as e: + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector specification error in VSIM, got: {e}") + + +class DimensionMaxLimitHugeDimension(TestCase): + def getname(self): + return "[regression] VADD VALUES absurdly large dim rejected" + + def estimated_runtime(self): + return 0.1 + + def test(self): + # Extremely large dimension close to LLONG_MAX should also be rejected safely. + huge_dim = 9223372036854775807 # LLONG_MAX from the original report + try: + self.redis.execute_command( + 'VADD', self.test_key, + 'VALUES', huge_dim, + '0') # Just a dummy value; parseVector should reject based on dimension alone + assert False, "VADD with absurdly large dimension should fail" + except redis.exceptions.ResponseError as e: + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector specification error for huge dim, got: {e}") + diff --git a/modules/vector-sets/tests/dimension_validation.py b/modules/vector-sets/tests/dimension_validation.py index f0811529a..7e13f57cf 100644 --- a/modules/vector-sets/tests/dimension_validation.py +++ b/modules/vector-sets/tests/dimension_validation.py @@ -65,3 +65,33 @@ class DimensionValidation(TestCase): assert False, "VSIM with wrong dimension should fail" except redis.exceptions.ResponseError as e: assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}" + +class ReduceDimConstraintValidation(TestCase): + def getname(self): + return "[regression] VADD enforces reduce_dim <= dim" + + def estimated_runtime(self): + return 0.1 + + def test(self): + import struct + + dim = 16 + reduce_dim = dim + 1 # Intentionally larger than dim + + # Build a simple FP32 vector of the given dimension. + vec = [0.0] * dim + vec_bytes = struct.pack(f'{dim}f', *vec) + + try: + self.redis.execute_command( + 'VADD', self.test_key, + 'REDUCE', reduce_dim, + 'FP32', vec_bytes, + f'{self.test_key}:item:reducemismatch') + assert False, "VADD with reduce_dim > dim should fail" + except redis.exceptions.ResponseError as e: + # Same generic validation error path as other vector spec problems. + assert "invalid vector specification" in str(e), ( + f"Expected invalid vector error, got: {e}") + diff --git a/modules/vector-sets/vset.c b/modules/vector-sets/vset.c index 618723e91..b3b47871b 100644 --- a/modules/vector-sets/vset.c +++ b/modules/vector-sets/vset.c @@ -134,6 +134,9 @@ static uint64_t VectorSetTypeNextId = 0; // Default num elements returned by VSIM. #define VSET_DEFAULT_COUNT 10 +// Maximum allowed vector dimension for input vectors and sets. +#define VSET_MAX_VECTOR_DIM (1<<16) + /* ========================== Internal data structure ====================== */ /* Our abstract data type needs a dual representation similar to Redis @@ -408,6 +411,7 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx, // Must be 4 bytes per component. if (vec_raw_len % 4 || vec_raw_len < 4) return NULL; *dim = vec_raw_len/4; + if (*dim > VSET_MAX_VECTOR_DIM) return NULL; vec = RedisModule_Alloc(vec_raw_len); if (!vec) return NULL; @@ -417,7 +421,7 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx, if (argc < start_idx + 2) return NULL; // Need at least the dimension. long long vdim; // Vector dimension passed by the user. if (RedisModule_StringToLongLong(argv[start_idx+1],&vdim) - != REDISMODULE_OK || vdim < 1) return NULL; + != REDISMODULE_OK || vdim < 1 || vdim > VSET_MAX_VECTOR_DIM) return NULL; // Check that all the arguments are available. if (argc < start_idx + 2 + vdim) return NULL; @@ -441,6 +445,12 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx, return NULL; // Unknown format. } + // reduce_dim must be <= dim + if (reduce_dim && *reduce_dim && *reduce_dim > *dim) { + if (vec) RedisModule_Free(vec); + return NULL; + } + if (consumed_args) *consumed_args = consumed; return vec; } @@ -1966,6 +1976,15 @@ void *VectorSetRdbLoad(RedisModuleIO *rdb, int encver) { uint32_t quant_type = hnsw_config & 0xff; uint32_t hnsw_m = (hnsw_config >> 8) & 0xffff; + /* Validate dimension loaded from RDB to enforce invariants and + * avoid absurd allocations or inconsistent state. */ + if (dim == 0 || dim > VSET_MAX_VECTOR_DIM) { + RedisModule_LogIOError(rdb, "warning", + "Invalid vector dimension in RDB: dim=%u (max allowed %u)", + (unsigned)dim, (unsigned)VSET_MAX_VECTOR_DIM); + return NULL; + } + /* Check that the quantization type is correct. Otherwise * return ASAP signaling the error. */ if (quant_type != HNSW_QUANT_NONE && @@ -1987,14 +2006,44 @@ void *VectorSetRdbLoad(RedisModuleIO *rdb, int encver) { uint32_t input_dim = RedisModule_LoadUnsigned(rdb); if (RedisModule_IsIOError(rdb)) goto ioerr; uint32_t output_dim = dim; - size_t matrix_size = sizeof(float) * input_dim * output_dim; + + /* Sanity check projection dimensions. */ + if (input_dim == 0 || output_dim == 0 || input_dim > VSET_MAX_VECTOR_DIM || output_dim > input_dim) { + RedisModule_LogIOError(rdb, "warning", + "Invalid projection matrix dimensions: input_dim=%u, output_dim=%u (max allowed %u)", + (unsigned)input_dim, (unsigned)output_dim, + (unsigned)VSET_MAX_VECTOR_DIM); + goto ioerr; + } + + /* Check for overflow in matrix_size = sizeof(float) * input_dim * output_dim. */ + #if SIZE_MAX == UINT32_MAX + uint64_t product = (uint64_t) output_dim * (uint64_t) input_dim * sizeof(float); + if (product > SIZE_MAX) { + RedisModule_LogIOError(rdb, "warning", + "Projection matrix size overflow (output_dim too large): input_dim=%u, output_dim=%u", + (unsigned)input_dim, (unsigned)output_dim); + goto ioerr; + } + #endif + + size_t matrix_size = sizeof(float) * (size_t)input_dim * (size_t)output_dim; + + /* Load projection matrix as a binary blob and validate length. */ + size_t blob_len = 0; + char *matrix_blob = RedisModule_LoadStringBuffer(rdb, &blob_len); + if (matrix_blob == NULL) goto ioerr; + + if (blob_len != matrix_size) { + RedisModule_LogIOError(rdb, "warning", + "Mismatching projection matrix length: expected=%zu, got=%zu", + matrix_size, blob_len); + RedisModule_Free(matrix_blob); + goto ioerr; + } vset->proj_matrix = RedisModule_Alloc(matrix_size); vset->proj_input_size = input_dim; - - // Load projection matrix as a binary blob - char *matrix_blob = RedisModule_LoadStringBuffer(rdb, NULL); - if (matrix_blob == NULL) goto ioerr; memcpy(vset->proj_matrix, matrix_blob, matrix_size); RedisModule_Free(matrix_blob); } diff --git a/redis.conf b/redis.conf index 79157b7d2..9151c8fc8 100644 --- a/redis.conf +++ b/redis.conf @@ -2044,21 +2044,21 @@ latency-monitor-threshold 0 # e Evicted events (events generated when a key is evicted for maxmemory) # n New key events (Note: not included in the 'A' class) # t Stream commands +# a Array commands # d Module key type events # m Key-miss events (Note: It is not included in the 'A' class) # o Overwritten events generated every time a key is overwritten. # (Note: not included in the 'A' class) # c Type-changed events generated every time a key's type changes # (Note: not included in the 'A' class) -# r rate limit event # S Subkeyspace events, published with __subkeyspace@__: prefix. # T Subkeyevent events, published with __subkeyevent@__: prefix. # I Subkeyspaceitem events, published per subkey with # __subkeyspaceitem@__:\n prefix. # V Subkeyspaceevent events, published with # __subkeyspaceevent@__:| prefix. -# A Alias for g$lshzxetd, so that the "AKE" string means all the events -# except key-miss, new key, overwritten, type-changed and rate-limit. +# A Alias for g$lshzxetad, so that the "AKE" string means all the events +# except key-miss, new key, overwritten and type-changed. # # The "notify-keyspace-events" takes as argument a string that is composed # of zero or multiple characters. The empty string means that notifications @@ -2187,6 +2187,37 @@ stream-node-max-entries 100 # stream-idmp-duration 100 # stream-idmp-maxsize 100 +# Arrays use a sliced directory structure for O(1) access. The slice size +# controls the granularity of memory allocation - each slice covers a range +# of indices. Must be a power of two between 256 and 65536. +# +# Smaller slices (1024-2048): Better for sparse data with large gaps between +# indices, or many small arrays. Uses less memory per slice but more directory +# entries. +# +# Larger slices (8192-16384): Better for dense/contiguous data. Fewer directory +# entries but may waste memory if data is sparse within slices. +# +# Default 4096 works well for mixed workloads. If you change this setting via +# CONFIG SET, existing arrays retain their original slice size. +# +# IMPORTANT CONSIDERATION: Redis arrays, for slices with very few elements, are +# able to use a sparse representation, where the slice is not really +# materialized into an actual contiguous allocation. See the next configuration +# parameters for more information. +array-slice-size 4096 + +# Arrays start with sparse slices (sorted key-value pairs) for memory efficiency +# when elements are scattered. When a sparse slice exceeds array-sparse-kmax +# entries, it promotes to a dense slice (direct array). When a dense slice's +# element count drops below array-sparse-kmin and demotion would save memory, +# it demotes back to sparse. Set kmax to 0 to disable sparse encoding entirely. +# Set kmin to 0 if you never want dense slices to be demoted to sparse (useful +# when in your work load arrays reach an almost empty state to be filled again +# and so forth). +array-sparse-kmax 10 +array-sparse-kmin 5 + # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # order to help rehashing the main Redis hash table (the one mapping top-level # keys to values). The hash table implementation Redis uses (see dict.c) diff --git a/src/Makefile b/src/Makefile index cf0395d1c..fea95efd4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -37,7 +37,7 @@ endif ifneq ($(OPTIMIZATION),-O0) OPTIMIZATION+=-fno-omit-frame-pointer endif -DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv xxhash +DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv xxhash tre NODEPS:=clean distclean # Default settings @@ -384,7 +384,7 @@ endif REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX) REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX) -REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o fast_float_strtod.o +REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_array.o sparsearray.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o gcra.o vector.o fast_float_strtod.o REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX) @@ -444,7 +444,7 @@ endif # redis-server $(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/xxhash/libxxhash.a $(FINAL_LIBS) + $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/xxhash/libxxhash.a ../deps/tre/libtre.a $(FINAL_LIBS) # redis-sentinel $(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME) diff --git a/src/acl.c b/src/acl.c index 79a900200..177077d45 100644 --- a/src/acl.c +++ b/src/acl.c @@ -57,6 +57,7 @@ struct ACLCategoryItem { {"list", ACL_CATEGORY_LIST}, {"hash", ACL_CATEGORY_HASH}, {"string", ACL_CATEGORY_STRING}, + {"array", ACL_CATEGORY_ARRAY}, {"bitmap", ACL_CATEGORY_BITMAP}, {"hyperloglog", ACL_CATEGORY_HYPERLOGLOG}, {"geo", ACL_CATEGORY_GEO}, @@ -70,7 +71,9 @@ struct ACLCategoryItem { {"connection", ACL_CATEGORY_CONNECTION}, {"transaction", ACL_CATEGORY_TRANSACTION}, {"scripting", ACL_CATEGORY_SCRIPTING}, +#ifdef ENABLE_GCRA {"ratelimit", ACL_CATEGORY_RATE_LIMIT}, +#endif {NULL,0} /* Terminator. */ }; diff --git a/src/aof.c b/src/aof.c index a2bf945f2..9e55a78b7 100644 --- a/src/aof.c +++ b/src/aof.c @@ -2467,6 +2467,7 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { return 1; } +#ifdef ENABLE_GCRA int rewriteGCRAObject(rio *r, robj *key, robj *o) { long long val; getLongLongFromGCRAObject(o, &val); @@ -2478,6 +2479,7 @@ int rewriteGCRAObject(rio *r, robj *key, robj *o) { if (rioWriteBulkLongLong(r,val) == 0) return 0; return 1; } +#endif /* Call the module type callback in order to rewrite a data type * that is exported by a module and is not handled by Redis itself. @@ -2515,6 +2517,116 @@ werr: return 0; } +/* Write unsigned 64-bit integer as bulk string. + * Unlike rioWriteBulkLongLong which uses signed representation, + * this correctly handles values >= 2^63 (e.g., array indices). */ +static int rioWriteBulkUnsignedLongLong(rio *r, uint64_t value) { + char buf[24]; + int len = ull2string(buf, sizeof(buf), value); + return rioWriteBulkString(r, buf, len); +} + +/* Helper to emit a single array element for AOF rewrite. + * Returns 0 on error, 1 on success. Updates count and items. */ +static int aofEmitArrayElement(rio *r, robj *key, uint64_t idx, void *v, + long long *count, long long *items) { + if (*count == 0) { + int cmd_items = (*items > AOF_REWRITE_ITEMS_PER_CMD/2) ? + AOF_REWRITE_ITEMS_PER_CMD/2 : *items; /* pairs of idx+val */ + if (!rioWriteBulkCount(r,'*',2+cmd_items*2) || + !rioWriteBulkString(r,"ARMSET",6) || + !rioWriteBulkObject(r,key)) + { + return 0; + } + } + + /* Write index (unsigned to handle indices >= 2^63) */ + if (!rioWriteBulkUnsignedLongLong(r, idx)) return 0; + + /* Write value - inline types use scratch space, arString aliases directly. */ + char buf[AR_INLINE_BUFSIZE]; + size_t len; + const char *data = arDecode(v, buf, sizeof(buf), &len); + if (!rioWriteBulkString(r, data, len)) return 0; + + if (++(*count) == AOF_REWRITE_ITEMS_PER_CMD/2) *count = 0; + (*items)--; + return 1; +} + +/* Helper to emit all elements from a slice for AOF rewrite. */ +static int aofEmitSliceElements(rio *r, robj *key, arSlice *s, uint64_t slice_id, + uint32_t slice_size, long long *count, long long *items) { + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + void *v = s->layout.dense.items[i]; + if (arIsEmpty(v)) continue; + uint64_t idx = arMakeIdx(slice_id, s->layout.dense.offset + i, slice_size); + if (!aofEmitArrayElement(r, key, idx, v, count, items)) return 0; + } + } else { + /* Sparse slice */ + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < s->count; i++) { + uint64_t idx = arMakeIdx(slice_id, offsets[i], slice_size); + if (!aofEmitArrayElement(r, key, idx, values[i], count, items)) return 0; + } + } + return 1; +} + +/* Emit the commands needed to rebuild an array object. + * The function returns 0 on error, 1 on success. */ +int rewriteArrayObject(rio *r, robj *key, robj *o) { + redisArray *ar = o->ptr; + long long count = 0, items = ar->count; + if (items == 0) return 1; + + /* Iterate through all slices, handling both flat directory mode and + * superdir mode. This mirrors the iteration logic in rdb.c. */ + if (ar->superdir) { + /* Superdir mode: iterate through blocks */ + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + if (!s) continue; + uint64_t slice_id = block_base + si; + if (!aofEmitSliceElements(r, key, s, slice_id, ar->slice_size, + &count, &items)) return 0; + } + } + } else { + /* Flat directory mode */ + for (uint64_t slice_id = 0; slice_id <= ar->dir_highest_used && slice_id < ar->dir_alloc; slice_id++) { + arSlice *s = ar->dir[slice_id]; + if (!s) continue; + if (!aofEmitSliceElements(r, key, s, slice_id, ar->slice_size, + &count, &items)) return 0; + } + } + + /* If insert_idx is set, emit ARSEEK command to restore it. + * When insert_idx == UINT64_MAX-1, we emit ARSEEK UINT64_MAX which + * correctly sets insert_idx back to UINT64_MAX-1 (terminal state). */ + if (ar->insert_idx != AR_INSERT_IDX_NONE) { + /* ARSEEK key insert_idx+1 (ARSEEK sets position for next insert) */ + if (!rioWriteBulkCount(r,'*',3) || + !rioWriteBulkString(r,"ARSEEK",6) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkUnsignedLongLong(r, ar->insert_idx + 1)) + { + return 0; + } + } + + return 1; +} + int rewriteObject(rio *r, robj *key, robj *o, int dbid, long long expiretime) { /* Save the key and associated value */ if (o->type == OBJ_STRING) { @@ -2534,8 +2646,12 @@ int rewriteObject(rio *r, robj *key, robj *o, int dbid, long long expiretime) { if (rewriteHashObject(r,key,o) == 0) return C_ERR; } else if (o->type == OBJ_STREAM) { if (rewriteStreamObject(r,key,o) == 0) return C_ERR; +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { if (rewriteGCRAObject(r,key,o) == 0) return C_ERR; +#endif + } else if (o->type == OBJ_ARRAY) { + if (rewriteArrayObject(r,key,o) == 0) return C_ERR; } else if (o->type == OBJ_MODULE) { if (rewriteModuleObject(r,key,o,dbid) == 0) return C_ERR; } else { diff --git a/src/blocked.c b/src/blocked.c index b973adeaf..74558b485 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -699,7 +699,13 @@ static void unblockClientOnKey(client *c, robj *key) { client *old_client = server.current_client; server.current_client = c; enterExecutionUnit(1, 0); - processCommandAndResetClient(c); + if (processCommandAndResetClient(c) == C_ERR) { + /* Client was freed during command processing, exit immediately */ + exitExecutionUnit(); + server.current_client = old_client; + return; + } + if (!(c->flags & CLIENT_BLOCKED)) { if (c->flags & CLIENT_MODULE) { moduleCallCommandUnblockedHandler(c); diff --git a/src/cluster.c b/src/cluster.c index 98bb0ebda..637b5dd9a 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -804,7 +804,12 @@ int verifyClusterNodeId(const char *name, int length) { } int isValidAuxChar(int c) { - return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL); + /* Reject control characters (0x00-0x1F and 0x7F). */ + if (iscntrl(c)) { + return 0; + } + /* Reject forbidden characters including nodes.conf delimiters and special parsing characters */ + return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~,= \"'\\", c) == NULL); } int isValidAuxString(char *s, unsigned int length) { diff --git a/src/commands.def b/src/commands.def index 7e4a14dc8..9b5692aa3 100644 --- a/src/commands.def +++ b/src/commands.def @@ -24,13 +24,545 @@ const char *COMMAND_GROUP_STR[] = { "geo", "stream", "bitmap", + "array", "module", +#ifdef ENABLE_GCRA "rate_limit" +#endif }; const char *commandGroupStr(int index) { return COMMAND_GROUP_STR[index]; } +/********** ARCOUNT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARCOUNT history */ +#define ARCOUNT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARCOUNT tips */ +#define ARCOUNT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARCOUNT key specs */ +keySpec ARCOUNT_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARCOUNT argument table */ +struct COMMAND_ARG ARCOUNT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARDEL ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARDEL history */ +#define ARDEL_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARDEL tips */ +#define ARDEL_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARDEL key specs */ +keySpec ARDEL_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARDEL argument table */ +struct COMMAND_ARG ARDEL_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARDELRANGE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARDELRANGE history */ +#define ARDELRANGE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARDELRANGE tips */ +#define ARDELRANGE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARDELRANGE key specs */ +keySpec ARDELRANGE_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_DELETE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARDELRANGE range argument table */ +struct COMMAND_ARG ARDELRANGE_range_Subargs[] = { +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARDELRANGE argument table */ +struct COMMAND_ARG ARDELRANGE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("range",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=ARDELRANGE_range_Subargs}, +}; + +/********** ARGET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARGET history */ +#define ARGET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARGET tips */ +#define ARGET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARGET key specs */ +keySpec ARGET_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARGET argument table */ +struct COMMAND_ARG ARGET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARGETRANGE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARGETRANGE history */ +#define ARGETRANGE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARGETRANGE tips */ +#define ARGETRANGE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARGETRANGE key specs */ +keySpec ARGETRANGE_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARGETRANGE argument table */ +struct COMMAND_ARG ARGETRANGE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARGREP ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARGREP history */ +#define ARGREP_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARGREP tips */ +#define ARGREP_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARGREP key specs */ +keySpec ARGREP_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARGREP predicate exact argument table */ +struct COMMAND_ARG ARGREP_predicate_exact_Subargs[] = { +{MAKE_ARG("exact",ARG_TYPE_PURE_TOKEN,-1,"EXACT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("string",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate match argument table */ +struct COMMAND_ARG ARGREP_predicate_match_Subargs[] = { +{MAKE_ARG("match",ARG_TYPE_PURE_TOKEN,-1,"MATCH",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("string",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate glob argument table */ +struct COMMAND_ARG ARGREP_predicate_glob_Subargs[] = { +{MAKE_ARG("glob",ARG_TYPE_PURE_TOKEN,-1,"GLOB",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("pattern",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate re argument table */ +struct COMMAND_ARG ARGREP_predicate_re_Subargs[] = { +{MAKE_ARG("re",ARG_TYPE_PURE_TOKEN,-1,"RE",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("pattern",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP predicate argument table */ +struct COMMAND_ARG ARGREP_predicate_Subargs[] = { +{MAKE_ARG("exact",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_exact_Subargs}, +{MAKE_ARG("match",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_match_Subargs}, +{MAKE_ARG("glob",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_glob_Subargs}, +{MAKE_ARG("re",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=ARGREP_predicate_re_Subargs}, +}; + +/* ARGREP options argument table */ +struct COMMAND_ARG ARGREP_options_Subargs[] = { +{MAKE_ARG("and",ARG_TYPE_PURE_TOKEN,-1,"AND",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("or",ARG_TYPE_PURE_TOKEN,-1,"OR",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("withvalues",ARG_TYPE_PURE_TOKEN,-1,"WITHVALUES",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("nocase",ARG_TYPE_PURE_TOKEN,-1,"NOCASE",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARGREP argument table */ +struct COMMAND_ARG ARGREP_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("predicate",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,4,NULL),.subargs=ARGREP_predicate_Subargs}, +{MAKE_ARG("options",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,5,NULL),.subargs=ARGREP_options_Subargs}, +}; + +/********** ARINFO ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARINFO history */ +#define ARINFO_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARINFO tips */ +#define ARINFO_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARINFO key specs */ +keySpec ARINFO_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARINFO argument table */ +struct COMMAND_ARG ARINFO_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("full",ARG_TYPE_PURE_TOKEN,-1,"FULL",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** ARINSERT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARINSERT history */ +#define ARINSERT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARINSERT tips */ +#define ARINSERT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARINSERT key specs */ +keySpec ARINSERT_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARINSERT argument table */ +struct COMMAND_ARG ARINSERT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARLASTITEMS ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARLASTITEMS history */ +#define ARLASTITEMS_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARLASTITEMS tips */ +#define ARLASTITEMS_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARLASTITEMS key specs */ +keySpec ARLASTITEMS_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARLASTITEMS argument table */ +struct COMMAND_ARG ARLASTITEMS_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("rev",ARG_TYPE_PURE_TOKEN,-1,"REV",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** ARLEN ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARLEN history */ +#define ARLEN_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARLEN tips */ +#define ARLEN_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARLEN key specs */ +keySpec ARLEN_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARLEN argument table */ +struct COMMAND_ARG ARLEN_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARMGET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARMGET history */ +#define ARMGET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARMGET tips */ +#define ARMGET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARMGET key specs */ +keySpec ARMGET_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARMGET argument table */ +struct COMMAND_ARG ARMGET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARMSET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARMSET history */ +#define ARMSET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARMSET tips */ +#define ARMSET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARMSET key specs */ +keySpec ARMSET_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARMSET data argument table */ +struct COMMAND_ARG ARMSET_data_Subargs[] = { +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* ARMSET argument table */ +struct COMMAND_ARG ARMSET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=ARMSET_data_Subargs}, +}; + +/********** ARNEXT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARNEXT history */ +#define ARNEXT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARNEXT tips */ +#define ARNEXT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARNEXT key specs */ +keySpec ARNEXT_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARNEXT argument table */ +struct COMMAND_ARG ARNEXT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** AROP ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* AROP history */ +#define AROP_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* AROP tips */ +#define AROP_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* AROP key specs */ +keySpec AROP_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* AROP operation match argument table */ +struct COMMAND_ARG AROP_operation_match_Subargs[] = { +{MAKE_ARG("match",ARG_TYPE_PURE_TOKEN,-1,"MATCH",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* AROP operation argument table */ +struct COMMAND_ARG AROP_operation_Subargs[] = { +{MAKE_ARG("sum",ARG_TYPE_PURE_TOKEN,-1,"SUM",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("min",ARG_TYPE_PURE_TOKEN,-1,"MIN",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("max",ARG_TYPE_PURE_TOKEN,-1,"MAX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("and",ARG_TYPE_PURE_TOKEN,-1,"AND",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("or",ARG_TYPE_PURE_TOKEN,-1,"OR",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xor",ARG_TYPE_PURE_TOKEN,-1,"XOR",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("match",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=AROP_operation_match_Subargs}, +{MAKE_ARG("used",ARG_TYPE_PURE_TOKEN,-1,"USED",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* AROP argument table */ +struct COMMAND_ARG AROP_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,8,NULL),.subargs=AROP_operation_Subargs}, +}; + +/********** ARRING ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARRING history */ +#define ARRING_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARRING tips */ +#define ARRING_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARRING key specs */ +keySpec ARRING_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARRING argument table */ +struct COMMAND_ARG ARRING_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("size",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/********** ARSCAN ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARSCAN history */ +#define ARSCAN_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARSCAN tips */ +#define ARSCAN_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARSCAN key specs */ +keySpec ARSCAN_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARSCAN argument table */ +struct COMMAND_ARG ARSCAN_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("start",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("end",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("limit",ARG_TYPE_INTEGER,-1,"LIMIT",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, +}; + +/********** ARSEEK ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARSEEK history */ +#define ARSEEK_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARSEEK tips */ +#define ARSEEK_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARSEEK key specs */ +keySpec ARSEEK_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARSEEK argument table */ +struct COMMAND_ARG ARSEEK_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/********** ARSET ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* ARSET history */ +#define ARSET_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* ARSET tips */ +#define ARSET_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* ARSET key specs */ +keySpec ARSET_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* ARSET argument table */ +struct COMMAND_ARG ARSET_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("index",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + /********** BITCOUNT ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -5380,59 +5912,6 @@ struct COMMAND_ARG UNSUBSCRIBE_Args[] = { {MAKE_ARG("channel",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL|CMD_ARG_MULTIPLE,0,NULL)}, }; -/********** GCRA ********************/ - -#ifndef SKIP_CMD_HISTORY_TABLE -/* GCRA history */ -#define GCRA_History NULL -#endif - -#ifndef SKIP_CMD_TIPS_TABLE -/* GCRA tips */ -#define GCRA_Tips NULL -#endif - -#ifndef SKIP_CMD_KEY_SPECS_TABLE -/* GCRA key specs */ -keySpec GCRA_Keyspecs[1] = { -{NULL,CMD_KEY_RW|CMD_KEY_ACCESS|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} -}; -#endif - -/* GCRA argument table */ -struct COMMAND_ARG GCRA_Args[] = { -{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("max-burst",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("tokens-per-period",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("period",ARG_TYPE_DOUBLE,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("count",ARG_TYPE_INTEGER,-1,"TOKENS",NULL,NULL,CMD_ARG_OPTIONAL,0,NULL)}, -}; - -/********** GCRASETVALUE ********************/ - -#ifndef SKIP_CMD_HISTORY_TABLE -/* GCRASETVALUE history */ -#define GCRASETVALUE_History NULL -#endif - -#ifndef SKIP_CMD_TIPS_TABLE -/* GCRASETVALUE tips */ -#define GCRASETVALUE_Tips NULL -#endif - -#ifndef SKIP_CMD_KEY_SPECS_TABLE -/* GCRASETVALUE key specs */ -keySpec GCRASETVALUE_Keyspecs[1] = { -{NULL,CMD_KEY_OW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} -}; -#endif - -/* GCRASETVALUE argument table */ -struct COMMAND_ARG GCRASETVALUE_Args[] = { -{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("tat",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -}; - /********** EVAL ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11876,6 +12355,25 @@ struct COMMAND_ARG WATCH_Args[] = { /* Main command table */ struct COMMAND_STRUCT redisCommandTable[] = { +/* array */ +{MAKE_CMD("arcount","Returns the number of non-empty elements in an array.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARCOUNT_History,0,ARCOUNT_Tips,0,arcountCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARCOUNT_Keyspecs,1,NULL,1),.args=ARCOUNT_Args}, +{MAKE_CMD("ardel","Deletes elements at the specified indices in an array.","O(N) where N is the number of indices to delete","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARDEL_History,0,ARDEL_Tips,0,ardelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_ARRAY,ARDEL_Keyspecs,1,NULL,2),.args=ARDEL_Args}, +{MAKE_CMD("ardelrange","Deletes elements in one or more ranges.","Proportional to the number of existing elements / slices touched, not to the numeric span of the requested ranges","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARDELRANGE_History,0,ARDELRANGE_Tips,0,ardelrangeCommand,-4,CMD_WRITE,ACL_CATEGORY_ARRAY,ARDELRANGE_Keyspecs,1,NULL,2),.args=ARDELRANGE_Args}, +{MAKE_CMD("arget","Gets the value at an index in an array.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARGET_History,0,ARGET_Tips,0,argetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARGET_Keyspecs,1,NULL,2),.args=ARGET_Args}, +{MAKE_CMD("argetrange","Gets values in a range of indices.","O(N) where N is the range length","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARGETRANGE_History,0,ARGETRANGE_Tips,0,argetrangeCommand,4,CMD_READONLY,ACL_CATEGORY_ARRAY,ARGETRANGE_Keyspecs,1,NULL,3),.args=ARGETRANGE_Args}, +{MAKE_CMD("argrep","Searches array elements in a range using textual predicates.","O(P * C) where P is the number of visited positions in touched slices and C is the cost of evaluating the predicates on one existing element.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARGREP_History,0,ARGREP_Tips,0,argrepCommand,-6,CMD_READONLY,ACL_CATEGORY_ARRAY,ARGREP_Keyspecs,1,NULL,5),.args=ARGREP_Args}, +{MAKE_CMD("arinfo","Returns metadata about an array.","O(1), or O(N) with FULL option where N is the number of slices.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARINFO_History,0,ARINFO_Tips,0,arinfoCommand,-2,CMD_READONLY,ACL_CATEGORY_ARRAY,ARINFO_Keyspecs,1,NULL,2),.args=ARINFO_Args}, +{MAKE_CMD("arinsert","Inserts one or more values at consecutive indices.","O(N) where N is the number of values","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARINSERT_History,0,ARINSERT_Tips,0,arinsertCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_ARRAY,ARINSERT_Keyspecs,1,NULL,2),.args=ARINSERT_Args}, +{MAKE_CMD("arlastitems","Returns the most recently inserted elements.","O(N) where N is the count","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARLASTITEMS_History,0,ARLASTITEMS_Tips,0,arlastitemsCommand,-3,CMD_READONLY,ACL_CATEGORY_ARRAY,ARLASTITEMS_Keyspecs,1,NULL,3),.args=ARLASTITEMS_Args}, +{MAKE_CMD("arlen","Returns the length of an array (max index + 1).","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARLEN_History,0,ARLEN_Tips,0,arlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARLEN_Keyspecs,1,NULL,1),.args=ARLEN_Args}, +{MAKE_CMD("armget","Gets values at multiple indices in an array.","O(N) where N is the number of indices","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARMGET_History,0,ARMGET_Tips,0,armgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARMGET_Keyspecs,1,NULL,2),.args=ARMGET_Args}, +{MAKE_CMD("armset","Sets multiple index-value pairs in an array.","O(N) where N is the number of pairs","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARMSET_History,0,ARMSET_Tips,0,armsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_ARRAY,ARMSET_Keyspecs,1,NULL,2),.args=ARMSET_Args}, +{MAKE_CMD("arnext","Returns the next index ARINSERT would use.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARNEXT_History,0,ARNEXT_Tips,0,arnextCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_ARRAY,ARNEXT_Keyspecs,1,NULL,1),.args=ARNEXT_Args}, +{MAKE_CMD("arop","Performs aggregate operations on array elements in a range.","O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,AROP_History,0,AROP_Tips,0,aropCommand,-5,CMD_READONLY,ACL_CATEGORY_ARRAY,AROP_Keyspecs,1,NULL,4),.args=AROP_Args}, +{MAKE_CMD("arring","Inserts values into a ring buffer of specified size, wrapping and truncating as needed.","O(M) normally, O(N+M) on ring resize, where N is the maximum of the old and new ring size and M is the number of inserted values","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARRING_History,0,ARRING_Tips,0,arringCommand,-4,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_ARRAY,ARRING_Keyspecs,1,NULL,3),.args=ARRING_Args}, +{MAKE_CMD("arscan","Iterates existing elements in a range, returning index-value pairs.","O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARSCAN_History,0,ARSCAN_Tips,0,arscanCommand,-4,CMD_READONLY,ACL_CATEGORY_ARRAY,ARSCAN_Keyspecs,1,NULL,4),.args=ARSCAN_Args}, +{MAKE_CMD("arseek","Sets the ARINSERT / ARRING cursor to a specific index.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARSEEK_History,0,ARSEEK_Tips,0,arseekCommand,3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_ARRAY,ARSEEK_Keyspecs,1,NULL,2),.args=ARSEEK_Args}, +{MAKE_CMD("arset","Sets one or more contiguous values starting at an index in an array.","O(N) where N is the number of values","8.8.0",CMD_DOC_NONE,NULL,NULL,"array",COMMAND_GROUP_ARRAY,ARSET_History,0,ARSET_Tips,0,arsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_ARRAY,ARSET_Keyspecs,1,NULL,3),.args=ARSET_Args}, /* bitmap */ {MAKE_CMD("bitcount","Counts the number of set bits (population counting) in a string.","O(N)","2.6.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITCOUNT_History,1,BITCOUNT_Tips,0,bitcountCommand,-2,CMD_READONLY,ACL_CATEGORY_BITMAP,BITCOUNT_Keyspecs,1,NULL,2),.args=BITCOUNT_Args}, {MAKE_CMD("bitfield","Performs arbitrary bitfield integer operations on strings.","O(1) for each subcommand specified","3.2.0",CMD_DOC_NONE,NULL,NULL,"bitmap",COMMAND_GROUP_BITMAP,BITFIELD_History,0,BITFIELD_Tips,0,bitfieldCommand,-2,CMD_WRITE|CMD_DENYOOM,ACL_CATEGORY_BITMAP,BITFIELD_Keyspecs,1,bitfieldGetKeys,2),.args=BITFIELD_Args}, @@ -11998,18 +12496,15 @@ struct COMMAND_STRUCT redisCommandTable[] = { {MAKE_CMD("rpush","Appends one or more elements to a list. Creates the key if it doesn't exist.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","1.0.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSH_History,1,RPUSH_Tips,0,rpushCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSH_Keyspecs,1,NULL,2),.args=RPUSH_Args}, {MAKE_CMD("rpushx","Appends an element to a list only when the list exists.","O(1) for each element added, so O(N) to add N elements when the command is called with multiple arguments.","2.2.0",CMD_DOC_NONE,NULL,NULL,"list",COMMAND_GROUP_LIST,RPUSHX_History,1,RPUSHX_Tips,0,rpushxCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_LIST,RPUSHX_Keyspecs,1,NULL,2),.args=RPUSHX_Args}, /* pubsub */ -{MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args}, +{MAKE_CMD("psubscribe","Listens for messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PSUBSCRIBE_History,0,PSUBSCRIBE_Tips,0,psubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_DENYOOM,0,PSUBSCRIBE_Keyspecs,0,NULL,1),.args=PSUBSCRIBE_Args}, {MAKE_CMD("publish","Posts a message to a channel.","O(N+M) where N is the number of clients subscribed to the receiving channel and M is the total number of subscribed patterns (by any client).","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBLISH_History,0,PUBLISH_Tips,0,publishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE|CMD_SENTINEL,0,PUBLISH_Keyspecs,0,NULL,2),.args=PUBLISH_Args}, {MAKE_CMD("pubsub","A container for Pub/Sub commands.","Depends on subcommand.","2.8.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUBSUB_History,0,PUBSUB_Tips,0,NULL,-2,0,0,PUBSUB_Keyspecs,0,NULL,0),.subcommands=PUBSUB_Subcommands}, {MAKE_CMD("punsubscribe","Stops listening to messages published to channels that match one or more patterns.","O(N) where N is the number of patterns to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,PUNSUBSCRIBE_History,0,PUNSUBSCRIBE_Tips,0,punsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,PUNSUBSCRIBE_Keyspecs,0,NULL,1),.args=PUNSUBSCRIBE_Args}, {MAKE_CMD("spublish","Post a message to a shard channel","O(N) where N is the number of clients subscribed to the receiving shard channel.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SPUBLISH_History,0,SPUBLISH_Tips,0,spublishCommand,3,CMD_PUBSUB|CMD_LOADING|CMD_STALE|CMD_FAST|CMD_MAY_REPLICATE,0,SPUBLISH_Keyspecs,1,NULL,2),.args=SPUBLISH_Args}, -{MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args}, -{MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, +{MAKE_CMD("ssubscribe","Listens for messages published to shard channels.","O(N) where N is the number of shard channels to subscribe to.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SSUBSCRIBE_History,0,SSUBSCRIBE_Tips,0,ssubscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_DENYOOM,0,SSUBSCRIBE_Keyspecs,1,NULL,1),.args=SSUBSCRIBE_Args}, +{MAKE_CMD("subscribe","Listens for messages published to channels.","O(N) where N is the number of channels to subscribe to.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUBSCRIBE_History,0,SUBSCRIBE_Tips,0,subscribeCommand,-2,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL|CMD_DENYOOM,0,SUBSCRIBE_Keyspecs,0,NULL,1),.args=SUBSCRIBE_Args}, {MAKE_CMD("sunsubscribe","Stops listening to messages posted to shard channels.","O(N) where N is the number of shard channels to unsubscribe.","7.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,SUNSUBSCRIBE_History,0,SUNSUBSCRIBE_Tips,0,sunsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,0,SUNSUBSCRIBE_Keyspecs,1,NULL,1),.args=SUNSUBSCRIBE_Args}, {MAKE_CMD("unsubscribe","Stops listening to messages posted to channels.","O(N) where N is the number of channels to unsubscribe.","2.0.0",CMD_DOC_NONE,NULL,NULL,"pubsub",COMMAND_GROUP_PUBSUB,UNSUBSCRIBE_History,0,UNSUBSCRIBE_Tips,0,unsubscribeCommand,-1,CMD_PUBSUB|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,0,UNSUBSCRIBE_Keyspecs,0,NULL,1),.args=UNSUBSCRIBE_Args}, -/* rate_limit */ -{MAKE_CMD("gcra","Rate limit via GCRA (Generic Cell Rate Algorithm).","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"rate_limit",COMMAND_GROUP_RATE_LIMIT,GCRA_History,0,GCRA_Tips,0,gcraCommand,-5,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_RATE_LIMIT,GCRA_Keyspecs,1,NULL,5),.args=GCRA_Args}, -{MAKE_CMD("gcrasetvalue","An internal command for recording a GCRA TAT value during AOF rewrite and replication.","O(1)","8.8.0",CMD_DOC_NONE,NULL,NULL,"rate_limit",COMMAND_GROUP_RATE_LIMIT,GCRASETVALUE_History,0,GCRASETVALUE_Tips,0,gcraSetValueCommand,3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_RATE_LIMIT,GCRASETVALUE_Keyspecs,1,NULL,2),.args=GCRASETVALUE_Args}, /* scripting */ {MAKE_CMD("eval","Executes a server-side Lua script.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVAL_History,0,EVAL_Tips,0,evalCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVAL_Keyspecs,1,evalGetKeys,4),.args=EVAL_Args}, {MAKE_CMD("evalsha","Executes a server-side Lua script by SHA1 digest.","Depends on the script that is executed.","2.6.0",CMD_DOC_NONE,NULL,NULL,"scripting",COMMAND_GROUP_SCRIPTING,EVALSHA_History,0,EVALSHA_Tips,0,evalShaCommand,-3,CMD_NOSCRIPT|CMD_SKIP_MONITOR|CMD_MAY_REPLICATE|CMD_NO_MANDATORY_KEYS|CMD_STALE,ACL_CATEGORY_SCRIPTING,EVALSHA_Keyspecs,1,evalGetKeys,4),.args=EVALSHA_Args}, diff --git a/src/commands/arcount.json b/src/commands/arcount.json new file mode 100644 index 000000000..3452a6ec7 --- /dev/null +++ b/src/commands/arcount.json @@ -0,0 +1,48 @@ +{ + "ARCOUNT": { + "summary": "Returns the number of non-empty elements in an array.", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 2, + "function": "arcountCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The number of non-empty elements, or 0 if key does not exist.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + } + ] + } +} diff --git a/src/commands/ardel.json b/src/commands/ardel.json new file mode 100644 index 000000000..e29d56181 --- /dev/null +++ b/src/commands/ardel.json @@ -0,0 +1,53 @@ +{ + "ARDEL": { + "summary": "Deletes elements at the specified indices in an array.", + "complexity": "O(N) where N is the number of indices to delete", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "ardelCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "DELETE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of elements deleted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer", + "multiple": true + } + ] + } +} diff --git a/src/commands/ardelrange.json b/src/commands/ardelrange.json new file mode 100644 index 000000000..0ed67ced9 --- /dev/null +++ b/src/commands/ardelrange.json @@ -0,0 +1,62 @@ +{ + "ARDELRANGE": { + "summary": "Deletes elements in one or more ranges.", + "complexity": "Proportional to the number of existing elements / slices touched, not to the numeric span of the requested ranges", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "ardelrangeCommand", + "command_flags": [ + "WRITE" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "DELETE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of elements deleted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "range", + "type": "block", + "multiple": true, + "arguments": [ + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + } + ] + } + ] + } +} diff --git a/src/commands/arget.json b/src/commands/arget.json new file mode 100644 index 000000000..481bb4f66 --- /dev/null +++ b/src/commands/arget.json @@ -0,0 +1,60 @@ +{ + "ARGET": { + "summary": "Gets the value at an index in an array.", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 3, + "function": "argetCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "The value at the given index.", + "type": "string" + }, + { + "description": "Null reply if key or index does not exist.", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer" + } + ] + } +} diff --git a/src/commands/argetrange.json b/src/commands/argetrange.json new file mode 100644 index 000000000..02d1fa6f0 --- /dev/null +++ b/src/commands/argetrange.json @@ -0,0 +1,64 @@ +{ + "ARGETRANGE": { + "summary": "Gets values in a range of indices.", + "complexity": "O(N) where N is the range length", + "group": "array", + "since": "8.8.0", + "arity": 4, + "function": "argetrangeCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + } + ] + } +} diff --git a/src/commands/argrep.json b/src/commands/argrep.json new file mode 100644 index 000000000..4ca4fa6f8 --- /dev/null +++ b/src/commands/argrep.json @@ -0,0 +1,182 @@ +{ + "ARGREP": { + "summary": "Searches array elements in a range using textual predicates.", + "complexity": "O(P * C) where P is the number of visited positions in touched slices and C is the cost of evaluating the predicates on one existing element.", + "group": "array", + "since": "8.8.0", + "arity": -6, + "function": "argrepCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "anyOf": [ + { + "description": "Array of matching indexes.", + "type": "array", + "items": { + "type": "integer", + "description": "Index of a matching element" + } + }, + { + "description": "Array of [index, value] pairs. Returned in case `WITHVALUES` was used.", + "type": "array", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + { + "type": "integer", + "description": "Index of a matching element" + }, + { + "type": "string", + "description": "Value at that index" + } + ] + } + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "string" + }, + { + "name": "end", + "type": "string" + }, + { + "name": "predicate", + "type": "oneof", + "multiple": true, + "arguments": [ + { + "name": "exact", + "type": "block", + "arguments": [ + { + "name": "exact", + "type": "pure-token", + "token": "EXACT" + }, + { + "name": "string", + "type": "string" + } + ] + }, + { + "name": "match", + "type": "block", + "arguments": [ + { + "name": "match", + "type": "pure-token", + "token": "MATCH" + }, + { + "name": "string", + "type": "string" + } + ] + }, + { + "name": "glob", + "type": "block", + "arguments": [ + { + "name": "glob", + "type": "pure-token", + "token": "GLOB" + }, + { + "name": "pattern", + "type": "string" + } + ] + }, + { + "name": "re", + "type": "block", + "arguments": [ + { + "name": "re", + "type": "pure-token", + "token": "RE" + }, + { + "name": "pattern", + "type": "string" + } + ] + } + ] + }, + { + "name": "options", + "type": "oneof", + "optional": true, + "multiple": true, + "arguments": [ + { + "name": "and", + "type": "pure-token", + "token": "AND" + }, + { + "name": "or", + "type": "pure-token", + "token": "OR" + }, + { + "name": "limit", + "type": "integer", + "token": "LIMIT" + }, + { + "name": "withvalues", + "type": "pure-token", + "token": "WITHVALUES" + }, + { + "name": "nocase", + "type": "pure-token", + "token": "NOCASE" + } + ] + } + ] + } +} diff --git a/src/commands/arinfo.json b/src/commands/arinfo.json new file mode 100644 index 000000000..09b06ef10 --- /dev/null +++ b/src/commands/arinfo.json @@ -0,0 +1,103 @@ +{ + "ARINFO": { + "summary": "Returns metadata about an array.", + "complexity": "O(1), or O(N) with FULL option where N is the number of slices.", + "group": "array", + "since": "8.8.0", + "arity": -2, + "function": "arinfoCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "object", + "additionalProperties": false, + "properties": { + "count": { + "type": "integer", + "description": "Total number of non-empty elements." + }, + "len": { + "type": "integer", + "description": "Logical length (highest index + 1)." + }, + "next-insert-index": { + "type": "integer", + "description": "Index the next ARINSERT would use, or 0 if unset/exhausted." + }, + "slices": { + "type": "integer", + "description": "Number of allocated slices." + }, + "directory-size": { + "type": "integer", + "description": "Directory allocation capacity (flat dir_alloc or superdir sdir_cap)." + }, + "super-dir-entries": { + "type": "integer", + "description": "Number of super-directory entries (0 if not in superdir mode)." + }, + "slice-size": { + "type": "integer", + "description": "Configured slice size." + }, + "dense-slices": { + "type": "integer", + "description": "Number of dense slices (FULL only)." + }, + "sparse-slices": { + "type": "integer", + "description": "Number of sparse slices (FULL only)." + }, + "avg-dense-size": { + "type": "number", + "description": "Average allocation size of dense slices (FULL only)." + }, + "avg-dense-fill": { + "type": "number", + "description": "Average fill rate of dense slices (FULL only)." + }, + "avg-sparse-size": { + "type": "number", + "description": "Average capacity of sparse slices (FULL only)." + } + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "full", + "type": "pure-token", + "token": "FULL", + "optional": true + } + ] + } +} diff --git a/src/commands/arinsert.json b/src/commands/arinsert.json new file mode 100644 index 000000000..6b8c6ed76 --- /dev/null +++ b/src/commands/arinsert.json @@ -0,0 +1,54 @@ +{ + "ARINSERT": { + "summary": "Inserts one or more values at consecutive indices.", + "complexity": "O(N) where N is the number of values", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "arinsertCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The last index where a value was inserted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "value", + "type": "string", + "multiple": true + } + ] + } +} diff --git a/src/commands/arlastitems.json b/src/commands/arlastitems.json new file mode 100644 index 000000000..ed888bf83 --- /dev/null +++ b/src/commands/arlastitems.json @@ -0,0 +1,66 @@ +{ + "ARLASTITEMS": { + "summary": "Returns the most recently inserted elements.", + "complexity": "O(N) where N is the count", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "arlastitemsCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "count", + "type": "integer" + }, + { + "name": "rev", + "type": "pure-token", + "token": "REV", + "optional": true + } + ] + } +} diff --git a/src/commands/arlen.json b/src/commands/arlen.json new file mode 100644 index 000000000..36143dfc7 --- /dev/null +++ b/src/commands/arlen.json @@ -0,0 +1,48 @@ +{ + "ARLEN": { + "summary": "Returns the length of an array (max index + 1).", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 2, + "function": "arlenCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The length of the array (max index + 1), or 0 if key does not exist.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + } + ] + } +} diff --git a/src/commands/armget.json b/src/commands/armget.json new file mode 100644 index 000000000..f05023e03 --- /dev/null +++ b/src/commands/armget.json @@ -0,0 +1,62 @@ +{ + "ARMGET": { + "summary": "Gets values at multiple indices in an array.", + "complexity": "O(N) where N is the number of indices", + "group": "array", + "since": "8.8.0", + "arity": -3, + "function": "armgetCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer", + "multiple": true + } + ] + } +} diff --git a/src/commands/armset.json b/src/commands/armset.json new file mode 100644 index 000000000..002f01bc2 --- /dev/null +++ b/src/commands/armset.json @@ -0,0 +1,64 @@ +{ + "ARMSET": { + "summary": "Sets multiple index-value pairs in an array.", + "complexity": "O(N) where N is the number of pairs", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "armsetCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of new slots that were set (previously empty).", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "data", + "type": "block", + "multiple": true, + "arguments": [ + { + "name": "index", + "type": "integer" + }, + { + "name": "value", + "type": "string" + } + ] + } + ] + } +} diff --git a/src/commands/arnext.json b/src/commands/arnext.json new file mode 100644 index 000000000..f64b178d0 --- /dev/null +++ b/src/commands/arnext.json @@ -0,0 +1,56 @@ +{ + "ARNEXT": { + "summary": "Returns the next index ARINSERT would use.", + "complexity": "O(1)", + "group": "array", + "since": "8.8.0", + "arity": 2, + "function": "arnextCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "The next index ARINSERT would use. Returns 0 for missing keys or when no insert happened yet.", + "type": "integer" + }, + { + "description": "Null when the insertion cursor is exhausted (next insert would overflow).", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + } + ] + } +} diff --git a/src/commands/arop.json b/src/commands/arop.json new file mode 100644 index 000000000..eb18566bb --- /dev/null +++ b/src/commands/arop.json @@ -0,0 +1,123 @@ +{ + "AROP": { + "summary": "Performs aggregate operations on array elements in a range.", + "complexity": "O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.", + "group": "array", + "since": "8.8.0", + "arity": -5, + "function": "aropCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "Result of the operation.", + "type": "string" + }, + { + "description": "Integer result for MATCH, USED, AND, OR, XOR.", + "type": "integer" + }, + { + "description": "Null if no elements match the operation.", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + }, + { + "name": "operation", + "type": "oneof", + "arguments": [ + { + "name": "sum", + "type": "pure-token", + "token": "SUM" + }, + { + "name": "min", + "type": "pure-token", + "token": "MIN" + }, + { + "name": "max", + "type": "pure-token", + "token": "MAX" + }, + { + "name": "and", + "type": "pure-token", + "token": "AND" + }, + { + "name": "or", + "type": "pure-token", + "token": "OR" + }, + { + "name": "xor", + "type": "pure-token", + "token": "XOR" + }, + { + "name": "match", + "type": "block", + "arguments": [ + { + "name": "match", + "type": "pure-token", + "token": "MATCH" + }, + { + "name": "value", + "type": "string" + } + ] + }, + { + "name": "used", + "type": "pure-token", + "token": "USED" + } + ] + } + ] + } +} diff --git a/src/commands/arring.json b/src/commands/arring.json new file mode 100644 index 000000000..01bddf7d7 --- /dev/null +++ b/src/commands/arring.json @@ -0,0 +1,57 @@ +{ + "ARRING": { + "summary": "Inserts values into a ring buffer of specified size, wrapping and truncating as needed.", + "complexity": "O(M) normally, O(N+M) on ring resize, where N is the maximum of the old and new ring size and M is the number of inserted values", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "arringCommand", + "command_flags": [ + "WRITE", + "DENYOOM" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "The last index where a value was inserted.", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "size", + "type": "integer" + }, + { + "name": "value", + "type": "string", + "multiple": true + } + ] + } +} diff --git a/src/commands/arscan.json b/src/commands/arscan.json new file mode 100644 index 000000000..3c75f3207 --- /dev/null +++ b/src/commands/arscan.json @@ -0,0 +1,76 @@ +{ + "ARSCAN": { + "summary": "Iterates existing elements in a range, returning index-value pairs.", + "complexity": "O(P) where P is visited positions in touched slices (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) and typical case close to O(N), where N is the number of existing elements in range.", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "arscanCommand", + "command_flags": [ + "READONLY" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Array of [index, value] pairs.", + "type": "array", + "items": { + "type": "array", + "minItems": 2, + "maxItems": 2, + "items": [ + { + "type": "integer", + "description": "Index of existing element" + }, + { + "type": "string", + "description": "Value at that index" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "start", + "type": "integer" + }, + { + "name": "end", + "type": "integer" + }, + { + "name": "limit", + "token": "LIMIT", + "type": "integer", + "optional": true + } + ] + } +} diff --git a/src/commands/gcrasetvalue.json b/src/commands/arseek.json similarity index 73% rename from src/commands/gcrasetvalue.json rename to src/commands/arseek.json index 5cce15cf4..58904c77d 100644 --- a/src/commands/gcrasetvalue.json +++ b/src/commands/arseek.json @@ -1,23 +1,22 @@ { - "GCRASETVALUE": { - "summary": "An internal command for recording a GCRA TAT value during AOF rewrite and replication.", + "ARSEEK": { + "summary": "Sets the ARINSERT / ARRING cursor to a specific index.", "complexity": "O(1)", - "group": "rate_limit", + "group": "array", "since": "8.8.0", "arity": 3, - "function": "gcraSetValueCommand", + "function": "arseekCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ - "RATE_LIMIT" + "ARRAY" ], "key_specs": [ { "flags": [ - "OW", + "RW", "UPDATE" ], "begin_search": { @@ -35,7 +34,8 @@ } ], "reply_schema": { - "const": "OK" + "description": "1 if the cursor was set, 0 if the key does not exist.", + "type": "integer" }, "arguments": [ { @@ -44,7 +44,7 @@ "key_spec_index": 0 }, { - "name": "tat", + "name": "index", "type": "integer" } ] diff --git a/src/commands/arset.json b/src/commands/arset.json new file mode 100644 index 000000000..6d5e8453f --- /dev/null +++ b/src/commands/arset.json @@ -0,0 +1,58 @@ +{ + "ARSET": { + "summary": "Sets one or more contiguous values starting at an index in an array.", + "complexity": "O(N) where N is the number of values", + "group": "array", + "since": "8.8.0", + "arity": -4, + "function": "arsetCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "ARRAY" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "Number of new slots that were set (previously empty).", + "type": "integer" + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "index", + "type": "integer" + }, + { + "name": "value", + "type": "string", + "multiple": true + } + ] + } +} diff --git a/src/commands/command-docs.json b/src/commands/command-docs.json index 5e76c806c..7648d7d51 100644 --- a/src/commands/command-docs.json +++ b/src/commands/command-docs.json @@ -59,6 +59,9 @@ { "const": "hyperloglog" }, + { + "const": "array" + }, { "const": "list" }, diff --git a/src/commands/gcra.json b/src/commands/gcra.json deleted file mode 100644 index 6980af1ac..000000000 --- a/src/commands/gcra.json +++ /dev/null @@ -1,92 +0,0 @@ -{ - "GCRA": { - "summary": "Rate limit via GCRA (Generic Cell Rate Algorithm).", - "complexity": "O(1)", - "group": "rate_limit", - "since": "8.8.0", - "arity": -5, - "function": "gcraCommand", - "command_flags": [ - "WRITE", - "DENYOOM", - "FAST" - ], - "acl_categories": [ - "RATE_LIMIT" - ], - "key_specs": [ - { - "flags": [ - "RW", - "ACCESS", - "UPDATE" - ], - "begin_search": { - "index": { - "pos": 1 - } - }, - "find_keys": { - "range": { - "lastkey": 0, - "step": 1, - "limit": 0 - } - } - } - ], - "reply_schema": { - "type": "array", - "minItems": 5, - "maxItems": 5, - "description": "Rate limiting result", - "items": [ - { - "type": "integer", - "description": "Limited: 0 if allowed, 1 if rate limited" - }, - { - "type": "integer", - "description": "Max request tokens: always equal to max_burst+1" - }, - { - "type": "integer", - "description": "Number of tokens available immediately" - }, - { - "type": "integer", - "description": "Retry after: seconds after which the caller should retry. Always -1 if not limited" - }, - { - "type": "integer", - "description": "Full burst after: seconds after which a full burst will be allowed" - } - ] - }, - "arguments": [ - { - "name": "key", - "type": "key", - "key_spec_index": 0 - }, - { - "name": "max-burst", - "type": "integer" - }, - { - "name": "tokens-per-period", - "type": "integer" - }, - { - "name": "period", - "type": "double" - }, - { - "name": "count", - "type": "integer", - "token": "TOKENS", - "optional": true - } - ] - } -} diff --git a/src/commands/psubscribe.json b/src/commands/psubscribe.json index cab5d14ef..8c56db2cc 100644 --- a/src/commands/psubscribe.json +++ b/src/commands/psubscribe.json @@ -11,7 +11,8 @@ "NOSCRIPT", "LOADING", "STALE", - "SENTINEL" + "SENTINEL", + "DENYOOM" ], "arguments": [ { diff --git a/src/commands/ssubscribe.json b/src/commands/ssubscribe.json index 46373d541..5bebc6c8c 100644 --- a/src/commands/ssubscribe.json +++ b/src/commands/ssubscribe.json @@ -10,7 +10,8 @@ "PUBSUB", "NOSCRIPT", "LOADING", - "STALE" + "STALE", + "DENYOOM" ], "arguments": [ { diff --git a/src/commands/subscribe.json b/src/commands/subscribe.json index bdf12b726..63e838d7d 100644 --- a/src/commands/subscribe.json +++ b/src/commands/subscribe.json @@ -12,7 +12,8 @@ "NOSCRIPT", "LOADING", "STALE", - "SENTINEL" + "SENTINEL", + "DENYOOM" ], "arguments": [ { diff --git a/src/config.c b/src/config.c index 1320c8981..59bef0a1b 100644 --- a/src/config.c +++ b/src/config.c @@ -24,6 +24,7 @@ #include #include #include +#include /*----------------------------------------------------------------------------- * Config file name-value maps. @@ -2428,13 +2429,7 @@ static int isValidAnnouncedNodename(char *val,const char **err) { return 1; } -static int isValidAnnouncedHostname(char *val, const char **err) { - if (strlen(val) >= NET_HOST_STR_LEN) { - *err = "Hostnames must be less than " - STRINGIFY(NET_HOST_STR_LEN) " characters"; - return 0; - } - +static int isValidHostnameChars(char *val, const char **err) { int i = 0; char c; while ((c = val[i])) { @@ -2452,6 +2447,39 @@ static int isValidAnnouncedHostname(char *val, const char **err) { return 1; } +static int isValidAnnouncedHostname(char *val, const char **err) { + if (strlen(val) >= NET_HOST_STR_LEN) { + *err = "Hostnames must be less than " + STRINGIFY(NET_HOST_STR_LEN) " characters"; + return 0; + } + return isValidHostnameChars(val, err); +} + +/* Validation function for cluster-announce-ip. + * Ensures the IP address is valid and rejects control characters. */ +static int isValidClusterAnnounceIp(char *val, const char **err) { + unsigned char buf[sizeof(struct in6_addr)]; + /* Empty string is allowed - it will be converted to NULL by EMPTY_STRING_IS_NULL flag */ + if (val[0] == '\0') { + return 1; + } + + /* Accept valid IPv4 or IPv6 */ + if (inet_pton(AF_INET, val, buf) == 1 || inet_pton(AF_INET6, val, buf) == 1) { + return 1; + } + /* Also accept valid hostnames, but limited to NET_IP_STR_LEN since + * cluster_announce_ip is stored in a NET_IP_STR_LEN buffer */ + if (strlen(val) >= NET_IP_STR_LEN) { + *err = "Hostnames for cluster-announce-ip must be less than " + STRINGIFY(NET_IP_STR_LEN) " characters"; + return 0; + } + /* Also accept valid hostnames */ + return isValidHostnameChars(val, err); +} + /* Validate specified string is a valid proc-title-template */ static int isValidProcTitleTemplate(char *val, const char **err) { if (!validateProcTitleTemplate(val)) { @@ -2461,6 +2489,33 @@ static int isValidProcTitleTemplate(char *val, const char **err) { return 1; } +/* Validate that array-slice-size is a power of two */ +static int isValidArraySliceSize(long long val, const char **err) { + if (val <= 0 || (val & (val - 1)) != 0) { + *err = "array-slice-size must be a power of two"; + return 0; + } + return 1; +} + +/* Validate array-sparse-kmax: if non-zero, must be > kmin */ +static int isValidArraySparseKmax(long long val, const char **err) { + if (val > 0 && (unsigned int)val <= server.array_sparse_kmin) { + *err = "array-sparse-kmax must be greater than array-sparse-kmin when non-zero"; + return 0; + } + return 1; +} + +/* Validate array-sparse-kmin: must be < kmax when kmax is non-zero */ +static int isValidArraySparseKmin(long long val, const char **err) { + if (server.array_sparse_kmax > 0 && (unsigned int)val >= server.array_sparse_kmax) { + *err = "array-sparse-kmin must be less than array-sparse-kmax"; + return 0; + } + return 1; +} + static int updateLocaleCollate(const char **err) { const char *s = setlocale(LC_COLLATE, server.locale_collate); if (s == NULL) { @@ -2917,7 +2972,11 @@ static int setConfigNotifyKeyspaceEventsOption(standardConfig *config, sds *argv } int flags = keyspaceEventsStringToFlags(argv[0]); if (flags == -1) { - *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocrSTIV'."; +#ifdef ENABLE_GCRA + *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocraSTIV'."; +#else + *err = "Invalid event class character. Use 'Ag$lshzxeKEtmdnocaSTIV'."; +#endif return 0; } server.notify_keyspace_events = flags; @@ -3159,7 +3218,7 @@ standardConfig static_configs[] = { createStringConfig("pidfile", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.pidfile, NULL, NULL, NULL), createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.slave_announce_ip, NULL, NULL, NULL), createStringConfig("masteruser", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.masteruser, NULL, NULL, NULL), - createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp), + createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, isValidClusterAnnounceIp, updateClusterIp), createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", NULL, NULL), createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname), createStringConfig("cluster-announce-human-nodename", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_human_nodename, NULL, isValidAnnouncedNodename, updateClusterHumanNodename), @@ -3252,6 +3311,10 @@ standardConfig static_configs[] = { createUIntConfig("socket-mark-id", NULL, IMMUTABLE_CONFIG, 0, UINT_MAX, server.socket_mark_id, 0, INTEGER_CONFIG, NULL, NULL), createUIntConfig("max-new-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_conns_per_cycle, 10, INTEGER_CONFIG, NULL, NULL), createUIntConfig("max-new-tls-connections-per-cycle", NULL, MODIFIABLE_CONFIG, 1, 1000, server.max_new_tls_conns_per_cycle, 1, INTEGER_CONFIG, NULL, NULL), + /* Array type configuration */ + createUIntConfig("array-slice-size", NULL, MODIFIABLE_CONFIG, AR_SLICE_SIZE_MIN, AR_SLICE_SIZE_MAX, server.array_slice_size, AR_SLICE_SIZE_DEFAULT, INTEGER_CONFIG, isValidArraySliceSize, NULL), + createUIntConfig("array-sparse-kmax", NULL, MODIFIABLE_CONFIG, 0, 256, server.array_sparse_kmax, AR_SPARSE_KMAX_DEFAULT, INTEGER_CONFIG, isValidArraySparseKmax, NULL), + createUIntConfig("array-sparse-kmin", NULL, MODIFIABLE_CONFIG, 0, 256, server.array_sparse_kmin, AR_SPARSE_KMIN_DEFAULT, INTEGER_CONFIG, isValidArraySparseKmin, NULL), #ifdef LOG_REQ_RES createUIntConfig("client-default-resp", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, 2, 3, server.client_default_resp, 2, INTEGER_CONFIG, NULL, NULL), #endif diff --git a/src/db.c b/src/db.c index 98197fbb4..87881a991 100644 --- a/src/db.c +++ b/src/db.c @@ -1751,14 +1751,17 @@ int parseScanCursorOrReply(client *c, robj *o, unsigned long long *cursor) { } char *obj_type_name[OBJ_TYPE_MAX] = { - "string", - "list", - "set", - "zset", - "hash", + "string", + "list", + "set", + "zset", + "hash", NULL, /* module type is special */ "stream", + "array", +#ifdef ENABLE_GCRA "gcra" +#endif }; /* Helper function to get type from a string in scan commands */ @@ -2433,11 +2436,14 @@ void copyCommand(client *c) { case OBJ_ZSET: newobj = zsetDup(o); break; case OBJ_HASH: newobj = hashTypeDup(o, &minHashExpire); break; case OBJ_STREAM: newobj = streamDup(o); break; +#ifdef ENABLE_GCRA case OBJ_GCRA: newobj = gcraDup(o); break; +#endif case OBJ_MODULE: newobj = moduleTypeDupOrReply(c, key, newkey, dst->id, o); if (!newobj) return; break; + case OBJ_ARRAY: newobj = arrayTypeDup(o); break; default: addReplyError(c, "unknown type object"); return; diff --git a/src/debug.c b/src/debug.c index c6baf4b4d..e14f2a52b 100644 --- a/src/debug.c +++ b/src/debug.c @@ -123,6 +123,7 @@ void mixStringObjectDigest(unsigned char *digest, robj *o) { decrRefCount(o); } +#ifdef ENABLE_GCRA void mixGCRAObjectDigest(unsigned char *digest, robj *o) { char buf[LONG_STR_SIZE]; long long val; @@ -130,6 +131,7 @@ void mixGCRAObjectDigest(unsigned char *digest, robj *o) { int len = ll2string(buf, sizeof(buf), val); mixDigest(digest,buf,len); } +#endif /* This function computes the digest of a data structure stored in the * object 'o'. It is the core of the DEBUG DIGEST command: when taking the @@ -263,8 +265,10 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) } } streamIteratorStop(&si); +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { mixGCRAObjectDigest(digest, o); +#endif } else if (o->type == OBJ_MODULE) { RedisModuleDigest md = {{0},{0},keyobj,db->id}; moduleValue *mv = o->ptr; @@ -274,6 +278,21 @@ void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) mt->digest(&md,mv->value); xorDigest(digest,md.x,sizeof(md.x)); } + } else if (o->type == OBJ_ARRAY) { + redisArray *ar = o->ptr; + uint64_t len = arLen(ar); + for (uint64_t idx = 0; idx < len; idx++) { + void *v = arGet(ar, idx); + if (arIsEmpty(v)) { + /* For empty slots, contribute "(null)" */ + mixDigest(digest, "(null)", 6); + } else { + char vbuf[AR_INLINE_BUFSIZE]; + size_t vlen; + const char *data = arDecode(v, vbuf, sizeof(vbuf), &vlen); + mixDigest(digest, data, vlen); + } + } } else { serverPanic("Unknown object type"); } @@ -1312,9 +1331,11 @@ void serverLogObjectDebugInfo(const robj *o) { serverLog(LL_WARNING,"Skiplist level: %d", (int) ((const zset*)o->ptr)->zsl->level); } else if (o->type == OBJ_STREAM) { serverLog(LL_WARNING,"Stream size: %d", (int) streamLength(o)); +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { #if UINTPTR_MAX == 0xffffffffffffffff serverLog(LL_WARNING, "GCRA object: %lld", (long long)o->ptr); +#endif #endif } #endif diff --git a/src/defrag.c b/src/defrag.c index 93a7389d9..913e457c2 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -754,6 +754,32 @@ void defragSet(defragKeysCtx *ctx, kvobj *ob) { ob->ptr = newd; } +/* Arrays can be expensive to defrag in one shot because they may contain many + * independently allocated slices. Small arrays are defragmented immediately, + * while large arrays are queued for later and processed one slice per step. */ +void defragArray(defragKeysCtx *ctx, kvobj *ob) { + serverAssert(ob->type == OBJ_ARRAY); + /* Maybe arCount() is not the best possible value to check against + * server.active_defrag_max_scan_fields, also because anyway when we + * defrag incrementally, we defrag a since slice per call. Yet it makes + * sense in a non very obvious way, for several reasons: + * + * 1. If the array is very sparse, it is an upper bound to the max + * number of slices it is composed to. + * 2. If the array is dense, we will scan in the default case at most 4096 + * entries, and the default defrag limit for max scans is 1000. They + * are kinda comparable numbers. + * 3. In case of a highly sparse array with huge indexes, in superdir mode, + * yet the super blocks are going to be at max arCount(). + * + * So regardless of the fact we later will defrag in slice units, this + * is a good trigger for the one shot or incremental selection. */ + if (arCount(ob->ptr) > server.active_defrag_max_scan_fields) + defragLater(ctx, ob); + else + ob->ptr = arDefrag(ob->ptr, activeDefragAlloc); +} + /* Defrag callback for radix tree iterator, called for each node, * used in order to defrag the nodes allocations. */ int defragRaxNode(raxNode **noderef, void *privdata) { @@ -1163,15 +1189,19 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de, dictEntryLink link) { } } else if (ob->type == OBJ_STREAM) { defragStream(ctx, ob); +#ifdef ENABLE_GCRA } else if (ob->type == OBJ_GCRA) { /* GCRA object is just an allocation to a long long value */ #if UINTPTR_MAX == 0xffffffff void *newptr, *ptr = ob->ptr; if ((newptr = activeDefragAlloc(ptr))) ob->ptr = newptr; +#endif #endif } else if (ob->type == OBJ_MODULE) { defragModule(ctx,db, ob); + } else if (ob->type == OBJ_ARRAY) { + defragArray(ctx, ob); } else { serverPanic("Unknown object type"); } @@ -1288,6 +1318,10 @@ int defragLaterItem(kvobj *ob, unsigned long *cursor, monotime endtime, int dbid robj keyobj; initStaticStringObject(keyobj, kvobjGetKey(ob)); return moduleLateDefrag(&keyobj, ob, cursor, endtime, dbid); + } else if (ob->type == OBJ_ARRAY) { + redisArray *ar = ob->ptr; + *cursor = arDefragIncremental(&ar, *cursor, activeDefragAlloc); + ob->ptr = ar; } else { *cursor = 0; /* object type/encoding may have changed since we schedule it for later */ } diff --git a/src/eval.c b/src/eval.c index 30acedd9e..0edea5ddd 100644 --- a/src/eval.c +++ b/src/eval.c @@ -1502,7 +1502,9 @@ void ldbEval(lua_State *lua, sds *argv, int argc) { sdsfree(code); sdsfree(expr); if (lua_pcall(lua,0,1,0)) { - ldbLog(sdscatfmt(sdsempty()," %s",lua_tostring(lua,-1))); + const char *err = lua_tostring(lua,-1); + ldbLog(sdscatfmt(sdsempty()," %s", + err ? err : "(error object is not a string)")); lua_pop(lua,1); return; } diff --git a/src/fast_float_strtod.c b/src/fast_float_strtod.c index 25bddba79..8039c5a9b 100644 --- a/src/fast_float_strtod.c +++ b/src/fast_float_strtod.c @@ -48,6 +48,195 @@ static const double powers_of_ten[] = { 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22 }; +/* ---------------------------------------------------------------------------- + * Eisel-Lemire algorithm — extended-precision powers of five. + * + * The table below maps from decimal scaling (10^q) to a 128-bit binary + * approximation. Since 10^q = 2^q * 5^q and the 2^q factor is exact in + * binary, only 5^q affects the binary significand — so we precompute + * 5^q rounded toward 1 to 128 bits. Used by `compute_float()` to avoid + * any iterative rounding in the widened (mantissa > 2^53) range. + * + * Pulled verbatim from fast_float by Daniel Lemire & Joao Paulo Magalhaes + * (MIT-licensed, https://github.com/fastfloat/fast_float — fast_table.h). + * + * Range: 5^-342 ... 5^308 — covers every value that can produce a finite + * non-zero double from a 64-bit decimal mantissa. 651 entries, each stored + * as { high64, low64 } pairs (1302 uint64_t total). + * ---------------------------------------------------------------------------- */ + +#define EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE -342 +#define EISEL_LEMIRE_LARGEST_POWER_OF_FIVE 308 +#define EISEL_LEMIRE_NUMBER_OF_ENTRIES (2 * (EISEL_LEMIRE_LARGEST_POWER_OF_FIVE - \ + EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE + 1)) + +static const uint64_t power_of_five_128[EISEL_LEMIRE_NUMBER_OF_ENTRIES] = { + 0xeef453d6923bd65a, 0x113faa2906a13b3f, 0x9558b4661b6565f8, 0x4ac7ca59a424c507, 0xbaaee17fa23ebf76, 0x5d79bcf00d2df649, 0xe95a99df8ace6f53, 0xf4d82c2c107973dc, + 0x91d8a02bb6c10594, 0x79071b9b8a4be869, 0xb64ec836a47146f9, 0x9748e2826cdee284, 0xe3e27a444d8d98b7, 0xfd1b1b2308169b25, 0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f, 0xbdbd2d335e51a935, 0xde8b2b66b3bc4723, 0xad2c788035e61382, 0x8b16fb203055ac76, 0x4c3bcb5021afcc31, 0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78, 0xd71d6dad34a2af0d, 0x87d4713d6f33aa6b, 0x8672648c40e5ad68, 0xa9c98d8ccb009506, 0x680efdaf511f18c2, 0xd43bf0effdc0ba48, 0x212bd1b2566def2, + 0x84a57695fe98746d, 0x14bb630f7604b57, 0xa5ced43b7e3e9188, 0x419ea3bd35385e2d, 0xcf42894a5dce35ea, 0x52064cac828675b9, 0x818995ce7aa0e1b2, 0x7343efebd1940993, + 0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8, 0xca66fa129f9b60a6, 0xd41a26e077774ef6, 0xfd00b897478238d0, 0x8920b098955522b4, 0x9e20735e8cb16382, 0x55b46e5f5d5535b0, + 0xc5a890362fddbc62, 0xeb2189f734aa831d, 0xf712b443bbd52b7b, 0xa5e9ec7501d523e4, 0x9a6bb0aa55653b2d, 0x47b233c92125366e, 0xc1069cd4eabe89f8, 0x999ec0bb696e840a, + 0xf148440a256e2c76, 0xc00670ea43ca250d, 0x96cd2a865764dbca, 0x380406926a5e5728, 0xbc807527ed3e12bc, 0xc605083704f5ecf2, 0xeba09271e88d976b, 0xf7864a44c633682e, + 0x93445b8731587ea3, 0x7ab3ee6afbe0211d, 0xb8157268fdae9e4c, 0x5960ea05bad82964, 0xe61acf033d1a45df, 0x6fb92487298e33bd, 0x8fd0c16206306bab, 0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696, 0x8f48a4899877186c, 0xe0b62e2929aba83c, 0x331acdabfe94de87, 0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14, 0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a, 0xc9e82cd9f69d6150, 0x892731ac9faf056e, 0xbe311c083a225cd2, 0xab70fe17c79ac6ca, 0x6dbd630a48aaf406, 0xd64d3d9db981787d, 0x92cbbccdad5b108, + 0x85f0468293f0eb4e, 0x25bbf56008c58ea5, 0xa76c582338ed2621, 0xaf2af2b80af6f24e, 0xd1476e2c07286faa, 0x1af5af660db4aee1, 0x82cca4db847945ca, 0x50d98d9fc890ed4d, + 0xa37fce126597973c, 0xe50ff107bab528a0, 0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8, 0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a, 0x9faacf3df73609b1, 0x77b191618c54e9ac, + 0xc795830d75038c1d, 0xd59df5b9ef6a2417, 0xf97ae3d0d2446f25, 0x4b0573286b44ad1d, 0x9becce62836ac577, 0x4ee367f9430aec32, 0xc2e801fb244576d5, 0x229c41f793cda73f, + 0xf3a20279ed56d48a, 0x6b43527578c1110f, 0x9845418c345644d6, 0x830a13896b78aaa9, 0xbe5691ef416bd60c, 0x23cc986bc656d553, 0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9, 0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53, 0xe858ad248f5c22c9, 0xd1b3400f8f9cff68, 0x91376c36d99995be, 0x23100809b9c21fa1, + 0xb58547448ffffb2d, 0xabd40a0c2832a78a, 0xe2e69915b3fff9f9, 0x16c90c8f323f516c, 0x8dd01fad907ffc3b, 0xae3da7d97f6792e3, 0xb1442798f49ffb4a, 0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d, 0x40405643d711d583, 0x8a7d3eef7f1cfc52, 0x482835ea666b2572, 0xad1c8eab5ee43b66, 0xda3243650005eecf, 0xd863b256369d4a40, 0x90bed43e40076a82, + 0x873e4f75e2224e68, 0x5a7744a6e804a291, 0xa90de3535aaae202, 0x711515d0a205cb36, 0xd3515c2831559a83, 0xd5a5b44ca873e03, 0x8412d9991ed58091, 0xe858790afe9486c2, + 0xa5178fff668ae0b6, 0x626e974dbe39a872, 0xce5d73ff402d98e3, 0xfb0a3d212dc8128f, 0x80fa687f881c7f8e, 0x7ce66634bc9d0b99, 0xa139029f6a239f72, 0x1c1fffc1ebc44e80, + 0xc987434744ac874e, 0xa327ffb266b56220, 0xfbe9141915d7a922, 0x4bf1ff9f0062baa8, 0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9, 0xc4ce17b399107c22, 0xcb550fb4384d21d3, + 0xf6019da07f549b2b, 0x7e2a53a146606a48, 0x99c102844f94e0fb, 0x2eda7444cbfc426d, 0xc0314325637a1939, 0xfa911155fefb5308, 0xf03d93eebc589f88, 0x793555ab7eba27ca, + 0x96267c7535b763b5, 0x4bc1558b2f3458de, 0xbbb01b9283253ca2, 0x9eb1aaedfb016f16, 0xea9c227723ee8bcb, 0x465e15a979c1cadc, 0x92a1958a7675175f, 0xbfacd89ec191ec9, + 0xb749faed14125d36, 0xcef980ec671f667b, 0xe51c79a85916f484, 0x82b7e12780e7401a, 0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810, 0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9, 0x67a791e093e1d49a, 0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0, 0xaecc49914078536d, 0x58fae9f773886e18, 0xda7f5bf590966848, 0xaf39a475506a899e, + 0x888f99797a5e012d, 0x6d8406c952429603, 0xaab37fd7d8f58178, 0xc8e5087ba6d33b83, 0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64, 0x855c3be0a17fcd26, 0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e, 0xd0601d8efc57b08b, 0xf13b94daf124da26, 0x823c12795db6ce57, 0x76c53d08d6b70858, 0xa2cb1717b52481ed, 0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09, 0xfe5d54150b090b02, 0xd3f93b35435d7c4c, 0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf, 0xc6b8e9b0709f109a, 0x359ab6419ca1091b, + 0xf867241c8cc6d4c0, 0xc30163d203c94b62, 0x9b407691d7fc44f8, 0x79e0de63425dcf1d, 0xc21094364dfb5636, 0x985915fc12f542e4, 0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a, 0xa705992ceecf9c42, 0xbd8430bd08277231, 0x50c6ff782a838353, 0xece53cec4a314ebd, 0xa4f8bf5635246428, 0x940f4613ae5ed136, 0x871b7795e136be99, + 0xb913179899f68584, 0x28e2557b59846e3f, 0xe757dd7ec07426e5, 0x331aeada2fe589cf, 0x9096ea6f3848984f, 0x3ff0d2c85def7621, 0xb4bca50b065abe63, 0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb, 0xd3e8495912c62894, 0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c, 0xb080392cc4349dec, 0xbd8d794d96aacfb3, 0xdca04777f541c567, 0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60, 0xf41686c49db57244, 0xac5d37d5b79b6239, 0x311c2875c522ced5, 0xd77485cb25823ac7, 0x7d633293366b828b, 0x86a8d39ef77164bc, 0xae5dff9c02033197, + 0xa8530886b54dbdeb, 0xd9f57f830283fdfc, 0xd267caa862a12d66, 0xd072df63c324fd7b, 0x8380dea93da4bc60, 0x4247cb9e59f71e6d, 0xa46116538d0deb78, 0x52d9be85f074e608, + 0xcd795be870516656, 0x67902e276c921f8b, 0x806bd9714632dff6, 0xba1cd8a3db53b6, 0xa086cfcd97bf97f3, 0x80e8a40eccd228a4, 0xc8a883c0fdaf7df0, 0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c, 0x796b805720085f81, 0x9cc3a6eec6311a63, 0xcbe3303674053bb0, 0xc3f490aa77bd60fc, 0xbedbfc4411068a9c, 0xf4f1b4d515acb93b, 0xee92fb5515482d44, + 0x991711052d8bf3c5, 0x751bdd152d4d1c4a, 0xbf5cd54678eef0b6, 0xd262d45a78a0635d, 0xef340a98172aace4, 0x86fb897116c87c34, 0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0, + 0xbae0a846d2195712, 0x8974836059cca109, 0xe998d258869facd7, 0x2bd1a438703fc94b, 0x91ff83775423cc06, 0x7b6306a34627ddcf, 0xb67f6455292cbf08, 0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93, 0x8e938662882af53e, 0x547eb47b7282ee9c, 0xb23867fb2a35b28d, 0xe99e619a4f23aa43, 0xdec681f9f4c31f31, 0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e, 0xde83bc408dd3dd04, 0xae0b158b4738705e, 0x9624ab50b148d445, 0xd98ddaee19068c76, 0x3badd624dd9b0957, 0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c, 0xd47487cc8470652b, 0x7647c3200069671f, 0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073, 0xa5fb0a17c777cf09, 0xf468107100525890, + 0xcf79cc9db955c2cc, 0x7182148d4066eeb4, 0x81ac1fe293d599bf, 0xc6f14cd848405530, 0xa21727db38cb002f, 0xb8ada00e5a506a7c, 0xca9cf1d206fdc03b, 0xa6d90811f0e4851c, + 0xfd442e4688bd304a, 0x908f4a166d1da663, 0x9e4a9cec15763e2e, 0x9a598e4e043287fe, 0xc5dd44271ad3cdba, 0x40eff1e1853f29fd, 0xf7549530e188c128, 0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9, 0x82bb74f8301958ce, 0xc13a148e3032d6e7, 0xe36a52363c1faf01, 0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1, 0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de, 0x7415d448f6b6f0e7, 0xebdf661791d60f56, 0x111b495b3464ad21, 0x936b9fcebb25c995, 0xcab10dd900beec34, 0xb84687c269ef3bfb, 0x3d5d514f40eea742, + 0xe65829b3046b0afa, 0xcb4a5a3112a5112, 0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab, 0xb3f4e093db73a093, 0x59ed216765690f56, 0xe0f218b8d25088b8, 0x306869c13ec3532c, + 0x8c974f7383725573, 0x1e414218c73a13fb, 0xafbd2350644eeacf, 0xe5d1929ef90898fa, 0xdbac6c247d62a583, 0xdf45f746b74abf39, 0x894bc396ce5da772, 0x6b8bba8c328eb783, + 0xab9eb47c81f5114f, 0x66ea92f3f326564, 0xd686619ba27255a2, 0xc80a537b0efefebd, 0x8613fd0145877585, 0xbd06742ce95f5f36, 0xa798fc4196e952e7, 0x2c48113823b73704, + 0xd17f3b51fca3a7a0, 0xf75a15862ca504c5, 0x82ef85133de648c4, 0x9a984d73dbe722fb, 0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba, 0xcc963fee10b7d1b3, 0x318df905079926a8, + 0xffbbcfe994e5c61f, 0xfdf17746497f7052, 0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633, 0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0, 0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d, 0x6bea10ca65c084e, 0xc31bfa0fe5698db8, 0x486e494fcff30a62, 0xf3e2f893dec3f126, 0x5a89dba3c3efccfa, 0x986ddb5c6b3a76b7, 0xf89629465a75e01c, + 0xbe89523386091465, 0xf6bbb397f1135823, 0xee2ba6c0678b597f, 0x746aa07ded582e2c, 0x94db483840b717ef, 0xa8c2a44eb4571cdc, 0xba121a4650e4ddeb, 0x92f34d62616ce413, + 0xe896a0d7e51e1566, 0x77b020baf9c81d17, 0x915e2486ef32cd60, 0xace1474dc1d122e, 0xb5b5ada8aaff80b8, 0xd819992132456ba, 0xe3231912d5bf60e6, 0x10e1fff697ed6c69, + 0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1, 0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2, 0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde, 0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d, 0x86c16c98d2c953c6, 0xd89d64d57a607744, 0xe871c7bf077ba8b7, 0x87625f056c7c4a8b, 0x11471cd764ad4972, 0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf, + 0xd389b47879823479, 0x4aff1d108d4ec2c3, 0x843610cb4bf160cb, 0xcedf722a585139ba, 0xa54394fe1eedb8fe, 0xc2974eb4ee658828, 0xce947a3da6a9273e, 0x733d226229feea32, + 0x811ccc668829b887, 0x806357d5a3f525f, 0xa163ff802a3426a8, 0xca07c2dcb0cf26f7, 0xc9bcff6034c13052, 0xfc89b393dd02f0b5, 0xfc2c3f3841f17c67, 0xbbac2078d443ace2, + 0x9d9ba7832936edc0, 0xd54b944b84aa4c0d, 0xc5029163f384a931, 0xa9e795e65d4df11, 0xf64335bcf065d37d, 0x4d4617b5ff4a16d5, 0x99ea0196163fa42e, 0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6, 0xf07da27a82c37088, 0x5d767327bb4e5a4c, 0x964e858c91ba2655, 0x3a6a07f8d510f86f, 0xbbe226efb628afea, 0x890489f70a55368b, + 0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e, 0x92c8ae6b464fc96f, 0x3b0b8bc90012929d, 0xb77ada0617e3bbcb, 0x9ce6ebb40173744, 0xe55990879ddcaabd, 0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6, 0x9fa946824a12232d, 0xb32df8e9f3546564, 0x47939822dc96abf9, 0xdff9772470297ebd, 0x59787e2b93bc56f7, 0x8bfbea76c619ef36, 0x57eb4edb3c55b65a, + 0xaefae51477a06b03, 0xede622920b6b23f1, 0xdab99e59958885c4, 0xe95fab368e45eced, 0x88b402f7fd75539b, 0x11dbcb0218ebb414, 0xaae103b5fcd2a881, 0xd652bdc29f26a119, + 0xd59944a37c0752a2, 0x4be76d3346f0495f, 0x857fcae62d8493a5, 0x6f70a4400c562ddb, 0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952, 0xd097ad07a71f26b2, 0x7e2000a41346a7a7, + 0x825ecc24c873782f, 0x8ed400668c0c28c8, 0xa2f67f2dfa90563b, 0x728900802f0f32fa, 0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9, 0xfea126b7d78186bc, 0xe2f610c84987bfa8, + 0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9, 0xc6ede63fa05d3143, 0x91503d1c79720dbb, 0xf8a95fcf88747d94, 0x75a44c6397ce912a, 0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba, + 0xc24452da229b021b, 0xfbe85badce996168, 0xf2d56790ab41c2a2, 0xfae27299423fb9c3, 0x97c560ba6b0919a5, 0xdccd879fc967d41a, 0xbdb6b8e905cb600f, 0x5400e987bbc1c920, + 0xed246723473e3813, 0x290123e9aab23b68, 0x9436c0760c86e30b, 0xf9a0b6720aaf6521, 0xb94470938fa89bce, 0xf808e40e8d5b3e69, 0xe7958cb87392c2c2, 0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2, 0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3, 0xe2280b6c20dd5232, 0x25c6da63c38de1b0, 0x8d590723948a535f, 0x579c487e5a38ad0e, + 0xb0af48ec79ace837, 0x2d835a9df0c6d851, 0xdcdb1b2798182244, 0xf8e431456cf88e65, 0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff, 0xac8b2d36eed2dac5, 0xe272467e3d222f3f, + 0xd7adf884aa879177, 0x5b0ed81dcc6abb0f, 0x86ccbb52ea94baea, 0x98e947129fc2b4e9, 0xa87fea27a539e9a5, 0x3f2398d747b36224, 0xd29fe4b18e88640e, 0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89, 0x1953cf68300424ac, 0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7, 0xcdb02555653131b6, 0x3792f412cb06794d, 0x808e17555f3ebf11, 0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4, 0xc8de047564d20a8b, 0xf245825a5a445275, 0xfb158592be068d2e, 0xeed6e2f0f0d56712, 0x9ced737bb6c4183d, 0x55464dd69685606b, + 0xc428d05aa4751e4c, 0xaa97e14c3c26b886, 0xf53304714d9265df, 0xd53dd99f4b3066a8, 0x993fe2c6d07b7fab, 0xe546a8038efe4029, 0xbf8fdb78849a5f96, 0xde98520472bdd033, + 0xef73d256a5c0f77c, 0x963e66858f6d4440, 0x95a8637627989aad, 0xdde7001379a44aa8, 0xbb127c53b17ec159, 0x5560c018580d5d52, 0xe9d71b689dde71af, 0xaab8f01e6e10b4a6, + 0x9226712162ab070d, 0xcab3961304ca70e8, 0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22, 0xe45c10c42a2b3b05, 0x8cb89a7db77c506a, 0x8eb98a7a9a5b04e3, 0x77f3608e92adb242, + 0xb267ed1940f1c61c, 0x55f038b237591ed3, 0xdf01e85f912e37a3, 0x6b6c46dec52f6688, 0x8b61313bbabce2c6, 0x2323ac4b3b3da015, 0xae397d8aa96c1b77, 0xabec975e0a0d081a, + 0xd9c7dced53c72255, 0x96e7bd358c904a21, 0x881cea14545c7575, 0x7e50d64177da2e54, 0xaa242499697392d2, 0xdde50bd1d5d0b9e9, 0xd4ad2dbfc3d07787, 0x955e4ec64b44e864, + 0x84ec3c97da624ab4, 0xbd5af13bef0b113e, 0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e, 0xcfb11ead453994ba, 0x67de18eda5814af2, 0x81ceb32c4b43fcf4, 0x80eacf948770ced7, + 0xa2425ff75e14fc31, 0xa1258379a94d028d, 0xcad2f7f5359a3b3e, 0x96ee45813a04330, 0xfd87b5f28300ca0d, 0x8bca9d6e188853fc, 0x9e74d1b791e07e48, 0x775ea264cf55347e, + 0xc612062576589dda, 0x95364afe032a819e, 0xf79687aed3eec551, 0x3a83ddbd83f52205, 0x9abe14cd44753b52, 0xc4926a9672793543, 0xc16d9a0095928a27, 0x75b7053c0f178294, + 0xf1c90080baf72cb1, 0x5324c68b12dd6339, 0x971da05074da7bee, 0xd3f6fc16ebca5e04, 0xbce5086492111aea, 0x88f4bb1ca6bcf585, 0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07, 0x3aff322e62439fd0, 0xb877aa3236a4b449, 0x9befeb9fad487c3, 0xe69594bec44de15b, 0x4c2ebe687989a9b4, 0x901d7cf73ab0acd9, 0xf9d37014bf60a11, + 0xb424dc35095cd80f, 0x538484c19ef38c95, 0xe12e13424bb40e13, 0x2865a5f206b06fba, 0x8cbccc096f5088cb, 0xf93f87b7442e45d4, 0xafebff0bcb24aafe, 0xf78f69a51539d749, + 0xdbe6fecebdedd5be, 0xb573440e5a884d1c, 0x89705f4136b4a597, 0x31680a88f8953031, 0xabcc77118461cefc, 0xfdc20d2b36ba7c3e, 0xd6bf94d5e57a42bc, 0x3d32907604691b4d, + 0x8637bd05af6c69b5, 0xa63f9a49c2c1b110, 0xa7c5ac471b478423, 0xfcf80dc33721d54, 0xd1b71758e219652b, 0xd3c36113404ea4a9, 0x83126e978d4fdf3b, 0x645a1cac083126ea, + 0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4, 0xcccccccccccccccc, 0xcccccccccccccccd, 0x8000000000000000, 0x0, 0xa000000000000000, 0x0, + 0xc800000000000000, 0x0, 0xfa00000000000000, 0x0, 0x9c40000000000000, 0x0, 0xc350000000000000, 0x0, + 0xf424000000000000, 0x0, 0x9896800000000000, 0x0, 0xbebc200000000000, 0x0, 0xee6b280000000000, 0x0, + 0x9502f90000000000, 0x0, 0xba43b74000000000, 0x0, 0xe8d4a51000000000, 0x0, 0x9184e72a00000000, 0x0, + 0xb5e620f480000000, 0x0, 0xe35fa931a0000000, 0x0, 0x8e1bc9bf04000000, 0x0, 0xb1a2bc2ec5000000, 0x0, + 0xde0b6b3a76400000, 0x0, 0x8ac7230489e80000, 0x0, 0xad78ebc5ac620000, 0x0, 0xd8d726b7177a8000, 0x0, + 0x878678326eac9000, 0x0, 0xa968163f0a57b400, 0x0, 0xd3c21bcecceda100, 0x0, 0x84595161401484a0, 0x0, + 0xa56fa5b99019a5c8, 0x0, 0xcecb8f27f4200f3a, 0x0, 0x813f3978f8940984, 0x4000000000000000, 0xa18f07d736b90be5, 0x5000000000000000, + 0xc9f2c9cd04674ede, 0xa400000000000000, 0xfc6f7c4045812296, 0x4d00000000000000, 0x9dc5ada82b70b59d, 0xf020000000000000, 0xc5371912364ce305, 0x6c28000000000000, + 0xf684df56c3e01bc6, 0xc732000000000000, 0x9a130b963a6c115c, 0x3c7f400000000000, 0xc097ce7bc90715b3, 0x4b9f100000000000, 0xf0bdc21abb48db20, 0x1e86d40000000000, + 0x96769950b50d88f4, 0x1314448000000000, 0xbc143fa4e250eb31, 0x17d955a000000000, 0xeb194f8e1ae525fd, 0x5dcfab0800000000, 0x92efd1b8d0cf37be, 0x5aa1cae500000000, + 0xb7abc627050305ad, 0xf14a3d9e40000000, 0xe596b7b0c643c719, 0x6d9ccd05d0000000, 0x8f7e32ce7bea5c6f, 0xe4820023a2000000, 0xb35dbf821ae4f38b, 0xdda2802c8a800000, + 0xe0352f62a19e306e, 0xd50b2037ad200000, 0x8c213d9da502de45, 0x4526f422cc340000, 0xaf298d050e4395d6, 0x9670b12b7f410000, 0xdaf3f04651d47b4c, 0x3c0cdd765f114000, + 0x88d8762bf324cd0f, 0xa5880a69fb6ac800, 0xab0e93b6efee0053, 0x8eea0d047a457a00, 0xd5d238a4abe98068, 0x72a4904598d6d880, 0x85a36366eb71f041, 0x47a6da2b7f864750, + 0xa70c3c40a64e6c51, 0x999090b65f67d924, 0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d, 0x82818f1281ed449f, 0xbff8f10e7a8921a4, 0xa321f2d7226895c7, 0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490, 0xfee50b7025c36a08, 0x2f236d04753d5b4, 0x9f4f2726179a2245, 0x1d762422c946590, 0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2, 0x9b934c3b330c8577, 0x63cc55f49f88eb2f, 0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb, 0xf316271c7fc3908a, 0x8bef464e3945ef7a, + 0x97edd871cfda3a56, 0x97758bf0e3cbb5ac, 0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317, 0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd, 0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436, 0xb3e2fd538e122b44, 0xe7d34c64a9c85d44, 0x60dbbca87196b616, 0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd, 0xb51d13aea4a488dd, 0x6babab6398bdbe41, + 0xe264589a4dcdab14, 0xc696963c7eed2dd1, 0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2, 0xb0de65388cc8ada8, 0x3b25a55f43294bcb, 0xdd15fe86affad912, 0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab, 0x6e3569326c784337, 0xacb92ed9397bf996, 0x49c2c37f07965404, 0xd7e77a8f87daf7fb, 0xdc33745ec97be906, 0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3, + 0xa8acd7c0222311bc, 0xc40832ea0d68ce0c, 0xd2d80db02aabd62b, 0xf50a3fa490c30190, 0x83c7088e1aab65db, 0x792667c6da79e0fa, 0xa4b8cab1a1563f52, 0x577001b891185938, + 0xcde6fd5e09abcf26, 0xed4c0226b55e6f86, 0x80b05e5ac60b6178, 0x544f8158315b05b4, 0xa0dc75f1778e39d6, 0x696361ae3db1c721, 0xc913936dd571c84c, 0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f, 0x4ab48a04065c723, 0x9d174b2dcec0e47b, 0x62eb0d64283f9c76, 0xc45d1df942711d9a, 0x3ba5d0bd324f8394, 0xf5746577930d6500, 0xca8f44ec7ee36479, + 0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb, 0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e, 0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e, 0x95d04aee3b80ece5, 0xbba1f1d158724a12, + 0xbb445da9ca61281f, 0x2a8a6e45ae8edc97, 0xea1575143cf97226, 0xf52d09d71a3293bd, 0x924d692ca61be758, 0x593c2626705f9c56, 0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c, + 0xe498f455c38b997a, 0xb6dfb9c0f956447, 0x8edf98b59a373fec, 0x4724bd4189bd5eac, 0xb2977ee300c50fe7, 0x58edec91ec2cb657, 0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed, + 0x8b865b215899f46c, 0xbd79e0d20082ee74, 0xae67f1e9aec07187, 0xecd8590680a3aa11, 0xda01ee641a708de9, 0xe80e6f4820cc9495, 0x884134fe908658b2, 0x3109058d147fdcdd, + 0xaa51823e34a7eede, 0xbd4b46f0599fd415, 0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a, 0x850fadc09923329e, 0x3e2cf6bc604ddb0, 0xa6539930bf6bff45, 0x84db8346b786151c, + 0xcfe87f7cef46ff16, 0xe612641865679a63, 0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e, 0xa26da3999aef7749, 0xe3be5e330f38f09d, 0xcb090c8001ab551c, 0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6, 0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa, 0xc646d63501a1511d, 0xb281e1fd541501b8, 0xf7d88bc24209a565, 0x1f225a7ca91a4226, + 0x9ae757596946075f, 0x3375788de9b06958, 0xc1a12d2fc3978937, 0x52d6b1641c83ae, 0xf209787bb47d6b84, 0xc0678c5dbd23a49a, 0x9745eb4d50ce6332, 0xf840b7ba963646e0, + 0xbd176620a501fbff, 0xb650e5a93bc3d898, 0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe, 0x93ba47c980e98cdf, 0xc66f336c36b10137, 0xb8a8d9bbe123f017, 0xb80b0047445d4184, + 0xe6d3102ad96cec1d, 0xa60dc059157491e5, 0x9043ea1ac7e41392, 0x87c89837ad68db2f, 0xb454e4a179dd1877, 0x29babe4598c311fb, 0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d, 0x1899e4a65f58660c, 0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f, 0xdc21a1171d42645d, 0x76707543f4fa1f73, 0x899504ae72497eba, 0x6a06494a791c53a8, + 0xabfa45da0edbde69, 0x487db9d17636892, 0xd6f8d7509292d603, 0x45a9d2845d3c42b6, 0x865b86925b9bc5c2, 0xb8a2392ba45a9b2, 0xa7f26836f282b732, 0x8e6cac7768d7141e, + 0xd1ef0244af2364ff, 0x3207d795430cd926, 0x8335616aed761f1f, 0x7f44e6bd49e807b8, 0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6, 0xcd036837130890a1, 0x36dba887c37a8c0f, + 0x802221226be55a64, 0xc2494954da2c9789, 0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c, 0xc83553c5c8965d3d, 0x6f92829494e5acc7, 0xfa42a8b73abbf48c, 0xcb772339ba1f17f9, + 0x9c69a97284b578d7, 0xff2a760414536efb, 0xc38413cf25e2d70d, 0xfef5138519684aba, 0xf46518c2ef5b8cd1, 0x7eb258665fc25d69, 0x98bf2f79d5993802, 0xef2f773ffbd97a61, + 0xbeeefb584aff8603, 0xaafb550ffacfd8fa, 0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38, 0x952ab45cfa97a0b2, 0xdd945a747bf26183, 0xba756174393d88df, 0x94f971119aeef9e4, + 0xe912b9d1478ceb17, 0x7a37cd5601aab85d, 0x91abb422ccb812ee, 0xac62e055c10ab33a, 0xb616a12b7fe617aa, 0x577b986b314d6009, 0xe39c49765fdf9d94, 0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d, 0x14588f13be847307, 0xb1d219647ae6b31c, 0x596eb2d8ae258fc8, 0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb, 0x8aec23d680043bee, 0x25de7bb9480d5854, + 0xada72ccc20054ae9, 0xaf561aa79a10ae6a, 0xd910f7ff28069da4, 0x1b2ba1518094da04, 0x87aa9aff79042286, 0x90fb44d2f05d0842, 0xa99541bf57452b28, 0x353a1607ac744a53, + 0xd3fa922f2d1675f2, 0x42889b8997915ce8, 0x847c9b5d7c2e09b7, 0x69956135febada11, 0xa59bc234db398c25, 0x43fab9837e699095, 0xcf02b2c21207ef2e, 0x94f967e45e03f4bb, + 0x8161afb94b44f57d, 0x1d1be0eebac278f5, 0xa1ba1ba79e1632dc, 0x6462d92a69731732, 0xca28a291859bbf93, 0x7d7b8f7503cfdcfe, 0xfcb2cb35e702af78, 0x5cda735244c3d43e, + 0x9defbf01b061adab, 0x3a0888136afa64a7, 0xc56baec21c7a1916, 0x88aaa1845b8fdd0, 0xf6c69a72a3989f5b, 0x8aad549e57273d45, 0x9a3c2087a63f6399, 0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd, 0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5, 0x969eb7c47859e743, 0x9f644ae5a4b1b325, 0xbc4665b596706114, 0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959, 0xa90cb506d155a7ea, 0x9316ff75dd87cbd8, 0x9a7f12442d588f2, 0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f, 0xe5d3ef282a242e81, 0x8f1668c8a86da5fa, + 0x8fa475791a569d10, 0xf96e017d694487bc, 0xb38d92d760ec4455, 0x37c981dcc395a9ac, 0xe070f78d3927556a, 0x85bbe253f47b1417, 0x8c469ab843b89562, 0x93956d7478ccec8e, + 0xaf58416654a6babb, 0x387ac8d1970027b2, 0xdb2e51bfe9d0696a, 0x6997b05fcc0319e, 0x88fcf317f22241e2, 0x441fece3bdf81f03, 0xab3c2fddeeaad25a, 0xd527e81cad7626c3, + 0xd60b3bd56a5586f1, 0x8a71e223d8d3b074, 0x85c7056562757456, 0xf6872d5667844e49, 0xa738c6bebb12d16c, 0xb428f8ac016561db, 0xd106f86e69d785c7, 0xe13336d701beba52, + 0x82a45b450226b39c, 0xecc0024661173473, 0xa34d721642b06084, 0x27f002d7f95d0190, 0xcc20ce9bd35c78a5, 0x31ec038df7b441f4, 0xff290242c83396ce, 0x7e67047175a15271, + 0x9f79a169bd203e41, 0xf0062c6e984d386, 0xc75809c42c684dd1, 0x52c07b78a3e60868, 0xf92e0c3537826145, 0xa7709a56ccdf8a82, 0x9bbcc7a142b17ccb, 0x88a66076400bb691, + 0xc2abf989935ddbfe, 0x6acff893d00ea435, 0xf356f7ebf83552fe, 0x583f6b8c4124d43, 0x98165af37b2153de, 0xc3727a337a8b704a, 0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c, + 0xeda2ee1c7064130c, 0x1162def06f79df73, 0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8, 0xb9a74a0637ce2ee1, 0x6d953e2bd7173692, 0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0, 0x1d9c9892400a22a2, 0xb54d5e4a127f59c8, 0x2503beb6d00cab4b, 0xe2a0b5dc971f303a, 0x2e44ae64840fd61d, 0x8da471a9de737e24, 0x5ceaecfed289e5d2, + 0xb10d8e1456105dad, 0x7425a83e872c5f47, 0xdd50f1996b947518, 0xd12f124e28f77719, 0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f, 0xace73cbfdc0bfb7b, 0x636cc64d1001550b, + 0xd8210befd30efa5a, 0x3c47f7e05401aa4e, 0x8714a775e3e95c78, 0x65acfaec34810a71, 0xa8d9d1535ce3b396, 0x7f1839a741a14d0d, 0xd31045a8341ca07c, 0x1ede48111209a050, + 0x83ea2b892091e44d, 0x934aed0aab460432, 0xa4e4b66b68b65d60, 0xf81da84d5617853f, 0xce1de40642e3f4b9, 0x36251260ab9d668e, 0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019, + 0xa1075a24e4421730, 0xb24cf65b8612f81f, 0xc94930ae1d529cfc, 0xdee033f26797b627, 0xfb9b7cd9a4a7443c, 0x169840ef017da3b1, 0x9d412e0806e88aa5, 0x8e1f289560ee864e, + 0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2, 0xf5b5d7ec8acb58a2, 0xae10af696774b1db, 0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29, 0xbff610b0cc6edd3f, 0x17fd090a58d32af3, + 0xeff394dcff8a948e, 0xddfc4b4cef07f5b0, 0x95f83d0a1fb69cd9, 0x4abdaf101564f98e, 0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1, 0xea53df5fd18d5513, 0x84c86189216dc5ed, + 0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4, 0xb7118682dbb66a77, 0x3fbc8c33221dc2a1, 0xe4d5e82392a40515, 0xfabaf3feaa5334a, 0x8f05b1163ba6832d, 0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8, 0x743e20e9ef511012, 0xdf78e4b2bd342cf6, 0x914da9246b255416, 0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e, 0xae9672aba3d0c320, 0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e, 0x8865899617fb1871, 0x7e2fa67c7a658892, 0xaa7eebfb9df9de8d, 0xddbb901b98feeab7, 0xd51ea6fa85785631, 0x552a74227f3ea565, + 0x8533285c936b35de, 0xd53a88958f87275f, 0xa67ff273b8460356, 0x8a892abaf368f137, 0xd01fef10a657842c, 0x2d2b7569b0432d85, 0x8213f56a67f6b29b, 0x9c3b29620e29fc73, + 0xa298f2c501f45f42, 0x8349f3ba91b47b8f, 0xcb3f2f7642717713, 0x241c70a936219a73, 0xfe0efb53d30dd4d7, 0xed238cd383aa0110, 0x9ec95d1463e8a506, 0xf4363804324a40aa, + 0xc67bb4597ce2ce48, 0xb143c6053edcd0d5, 0xf81aa16fdc1b81da, 0xdd94b7868e94050a, 0x9b10a4e5e9913128, 0xca7cf2b4191c8326, 0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf, 0xbc633b39673c8cec, 0x976e41088617ca01, 0xd5be0503e085d813, 0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18, 0xec9c459d51852ba2, 0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45, 0xcabb90e5c942b503, 0xb8da1662e7b00a17, 0x3d6a751f3b936243, 0xe7109bfba19c0c9d, 0xcc512670a783ad4, 0x906a617d450187e2, 0x27fb2b80668b24c5, + 0xb484f9dc9641e9da, 0xb1f9f660802dedf6, 0xe1a63853bbd26451, 0x5e7873f8a0396973, 0x8d07e33455637eb2, 0xdb0b487b6423e1e8, 0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7, 0x7641a140cc7810fb, 0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d, 0xac2820d9623bf429, 0x546345fa9fbdcd44, 0xd732290fbacaf133, 0xa97c177947ad4095, + 0x867f59a9d4bed6c0, 0x49ed8eabcccc485d, 0xa81f301449ee8c70, 0x5c68f256bfff5a74, 0xd226fc195c6a2f8c, 0x73832eec6fff3111, 0x83585d8fd9c25db7, 0xc831fd53c5ff7eab, + 0xa42e74f3d032f525, 0xba3e7ca8b77f5e55, 0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb, 0x80444b5e7aa7cf85, 0x7980d163cf5b81b3, 0xa0555e361951c366, 0xd7e105bcc332621f, + 0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7, 0xfa856334878fc150, 0xb14f98f6f0feb951, 0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3, 0xc3b8358109e84f07, 0xa862f80ec4700c8, + 0xf4a642e14c6262c8, 0xcd27bb612758c0fa, 0x98e7e9cccfbd7dbd, 0x8038d51cb897789c, 0xbf21e44003acdd2c, 0xe0470a63e6bd56c3, 0xeeea5d5004981478, 0x1858ccfce06cac74, + 0x95527a5202df0ccb, 0xf37801e0c43ebc8, 0xbaa718e68396cffd, 0xd30560258f54e6ba, 0xe950df20247c83fd, 0x47c6b82ef32a2069, 0x91d28b7416cdd27e, 0x4cdc331d57fa5441, + 0xb6472e511c81471d, 0xe0133fe4adf8e952, 0xe3d8f9e563a198e5, 0x58180fddd97723a6, 0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648, +}; + + /* Maximum mantissa for fast path: 2^53 */ #define MAX_MANTISSA_FAST_PATH 9007199254740992ULL /* 2^53 */ @@ -159,6 +348,190 @@ static inline uint32_t parse_eight_digits_swar(uint64_t val) { return (uint32_t)val; } +/* ---------------------------------------------------------------------------- + * Eisel-Lemire algorithm — core (compute_float / am_to_double). + * + * Given a decimal mantissa `w` (≤ 19 digits, fits in uint64) and exponent `q`, + * compute the correctly-rounded `double` representing `w * 10^q`. Internally: + * + * 1. Shift `w` so its leading bit is set (full 64-bit mantissa). + * 2. Multiply by the 128-bit precomputed power-of-five entry above. + * 3. Extract the 53-bit mantissa from the high 64 bits of the product, with + * one extra bit for round-to-nearest-even. + * 4. Apply the round-half-to-even rule, including the rare power-of-2 tie + * case that needs a second-pass check. + * + * For the 19-digit / |q| ≤ 22 input range the result is provably bit-exact + * with strtod() (Mushtak & Lemire, "Fast Number Parsing Without Fallback"). + * The caller falls back to strtod() if compute_float() signals indeterminate + * (we never trigger that branch with parse_number_string's bounded inputs). + * + * Ported from fast_float by Daniel Lemire & Joao Paulo Magalhaes + * (MIT-licensed, https://github.com/fastfloat/fast_float — decimal_to_binary.h + * and float_common.h). C++ template machinery dropped in favour of a + * double-only specialisation; struct layouts kept to ease future review. + * ---------------------------------------------------------------------------- */ + +/* IEEE-754 binary64 constants (mirrors fast_float's binary_format). */ +#define DOUBLE_MANTISSA_EXPLICIT_BITS 52 +#define DOUBLE_MIN_EXPONENT_ROUND_EVEN -4 +#define DOUBLE_MAX_EXPONENT_ROUND_EVEN 23 +#define DOUBLE_MINIMUM_EXPONENT -1023 +#define DOUBLE_INFINITE_POWER 0x7FF + +/* 128-bit unsigned, little-endian: low holds bits [0..63]. */ +typedef struct { + uint64_t low; + uint64_t high; +} value128; + +/* Result of compute_float(): a 53-bit mantissa and a biased binary exponent. + * power2 < 0 signals indeterminate (caller should fall back to strtod()). */ +typedef struct { + uint64_t mantissa; + int32_t power2; +} adjusted_mantissa; + +/* `__builtin_clzll` is undefined on input 0 — caller guarantees v > 0. */ +static inline int leading_zeroes_u64(uint64_t v) { + return __builtin_clzll(v); +} + +/* 64x64 -> 128 multiplication. __uint128_t is available on every 64-bit + * target Redis supports (gated explicitly in the call site). */ +static inline value128 full_multiplication(uint64_t a, uint64_t b) { + value128 r; +#ifdef __SIZEOF_INT128__ + __uint128_t prod = (__uint128_t)a * (__uint128_t)b; + r.low = (uint64_t)prod; + r.high = (uint64_t)(prod >> 64); +#else + /* 32-bit fallback: split each operand into two 32-bit halves. */ + uint64_t a_lo = (uint32_t)a, a_hi = a >> 32; + uint64_t b_lo = (uint32_t)b, b_hi = b >> 32; + uint64_t ll = a_lo * b_lo; + uint64_t lh = a_lo * b_hi; + uint64_t hl = a_hi * b_lo; + uint64_t hh = a_hi * b_hi; + uint64_t mid = (ll >> 32) + (uint32_t)lh + (uint32_t)hl; + r.low = (mid << 32) | (uint32_t)ll; + r.high = hh + (lh >> 32) + (hl >> 32) + (mid >> 32); +#endif + return r; +} + +/* For q in (-400, 350), this approximates floor(log2(5^q)) + q + 63 + * (or -ceil(log2(5^|q|)) + q + 63 for negative q). Used to derive power2. */ +static inline int32_t eisel_lemire_power(int32_t q) { + return (((152170 + 65536) * q) >> 16) + 63; +} + +/* 128-bit approximation of `w * 5^q`. The optional fixup multiplies by the + * second (extension) entry of the power-of-five table when the high half is + * close to a rounding boundary. Mathematical proof of sufficiency: see + * Mushtak & Lemire, "Fast Number Parsing Without Fallback". */ +static inline value128 compute_product_approximation_d(int64_t q, uint64_t w) { + int index = 2 * (int)(q - EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE); + value128 firstproduct = full_multiplication(w, power_of_five_128[index]); + /* For double, bit_precision = mantissa_explicit_bits (52) + 3 = 55. */ + const uint64_t precision_mask = + (uint64_t)0xFFFFFFFFFFFFFFFFULL >> 55; + if ((firstproduct.high & precision_mask) == precision_mask) { + value128 secondproduct = + full_multiplication(w, power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if (secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + return firstproduct; +} + +/* Eisel-Lemire main: compute a correctly-rounded representation of w * 10^q. + * Returns an `adjusted_mantissa`. Special outputs: + * - mantissa == 0 && power2 == 0: result is +/-0 + * - power2 == DOUBLE_INFINITE_POWER && mantissa == 0: result is infinity + * - power2 < 0: indeterminate (caller should fall back to strtod()). With + * parse_number_string()'s bounded mantissa (<= 19 digits), this branch + * is unreachable, but we keep the signature for safety. + */ +static adjusted_mantissa compute_float_d(int64_t q, uint64_t w) { + adjusted_mantissa answer; + + if (w == 0 || q < EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE) { + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + if (q > EISEL_LEMIRE_LARGEST_POWER_OF_FIVE) { + answer.power2 = DOUBLE_INFINITE_POWER; + answer.mantissa = 0; + return answer; + } + + /* Renormalise w so its top bit is set. */ + int lz = leading_zeroes_u64(w); + w <<= lz; + + value128 product = compute_product_approximation_d(q, w); + + int upperbit = (int)(product.high >> 63); + int shift = upperbit + 64 - DOUBLE_MANTISSA_EXPLICIT_BITS - 3; + + answer.mantissa = product.high >> shift; + answer.power2 = (int32_t)(eisel_lemire_power((int32_t)q) + upperbit - lz - DOUBLE_MINIMUM_EXPONENT); + + if (answer.power2 <= 0) { + /* Subnormal path. */ + if (-answer.power2 + 1 >= 64) { + /* More than 64 bits below minimum exponent — definitely zero. */ + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + /* Safe: -answer.power2 + 1 < 64. */ + answer.mantissa >>= -answer.power2 + 1; + answer.mantissa += (answer.mantissa & 1); /* round up */ + answer.mantissa >>= 1; + /* If post-rounding the value crosses back into the normal range, mark + * it normal (power2 = 1) rather than subnormal (power2 = 0). */ + answer.power2 = (answer.mantissa < ((uint64_t)1 << DOUBLE_MANTISSA_EXPLICIT_BITS)) ? 0 : 1; + return answer; + } + + /* Normal path: handle the round-half-to-even tie case. */ + if ((product.low <= 1) && + (q >= DOUBLE_MIN_EXPONENT_ROUND_EVEN) && + (q <= DOUBLE_MAX_EXPONENT_ROUND_EVEN) && + ((answer.mantissa & 3) == 1)) { + if ((answer.mantissa << shift) == product.high) { + answer.mantissa &= ~(uint64_t)1; /* clear LSB so we round down */ + } + } + answer.mantissa += (answer.mantissa & 1); + answer.mantissa >>= 1; + if (answer.mantissa >= ((uint64_t)2 << DOUBLE_MANTISSA_EXPLICIT_BITS)) { + answer.mantissa = (uint64_t)1 << DOUBLE_MANTISSA_EXPLICIT_BITS; + answer.power2++; + } + answer.mantissa &= ~((uint64_t)1 << DOUBLE_MANTISSA_EXPLICIT_BITS); + if (answer.power2 >= DOUBLE_INFINITE_POWER) { + answer.power2 = DOUBLE_INFINITE_POWER; + answer.mantissa = 0; + } + return answer; +} + +/* Pack adjusted_mantissa back to a double via IEEE-754 bit layout. */ +static inline double am_to_double(int negative, adjusted_mantissa am) { + uint64_t word = am.mantissa; + word |= (uint64_t)am.power2 << DOUBLE_MANTISSA_EXPLICIT_BITS; + if (negative) word |= (uint64_t)1 << 63; + double value; + memcpy(&value, &word, sizeof(value)); + return value; +} + /* Parse a decimal number string into components. * This follows the fast_float algorithm closely. */ static inline int parse_number_string(const char *p, const char *pend, double *result, const char **endptr) { @@ -261,65 +634,42 @@ static inline int parse_number_string(const char *p, const char *pend, double *r if (digit_count > MAX_DIGITS) return 0; } - /* Check if we're within fast path bounds */ - if (exponent < MIN_EXPONENT_FAST_PATH) return 0; - if (exponent > MAX_EXPONENT_FAST_PATH) return 0; - + /* Pick the conversion path. Two regimes: + * Clinger fast path: small mantissa (<= 2^53) and small |exp| (<= 22). + * One double multiply or divide; cheapest, exact by construction. + * Eisel-Lemire: large mantissa or wide exponent range (full double + * domain). Slightly slower per call (128-bit multiply + table lookup) + * but correctly-rounded by the Mushtak-Lemire proof. + * Inputs outside both ranges fall back to strtod() (caller of this fn). */ double value; - if (mantissa <= MAX_MANTISSA_FAST_PATH) { + if (mantissa <= MAX_MANTISSA_FAST_PATH && + exponent >= MIN_EXPONENT_FAST_PATH && + exponent <= MAX_EXPONENT_FAST_PATH) + { /* Clinger fast path: all operands exact in double precision, * single multiply/divide produces a correctly-rounded result. */ value = (double)mantissa; if (exponent < 0) value = value / powers_of_ten[-exponent]; else if (exponent > 0) value = value * powers_of_ten[exponent]; + if (negative) value = -value; } else { -#ifdef __SIZEOF_INT128__ - /* Widened fast path for 17-19 significant-digit mantissas. - * - * (double)mantissa alone loses up to 11 bits when mantissa > 2^53, - * so the existing Clinger path would yield up to 1 ULP vs strtod. - * We recover full precision by doing the multiply/divide in 128-bit - * integer arithmetic (correctly-rounded by construction). Cases - * outside the supported exponent range fall through to strtod. - * - * Requires __uint128_t (GCC/Clang builtin, available on every 64-bit - * target Redis supports). 32-bit builds take the strtod() fallback. */ - if (exponent < -19 || exponent > 19) return 0; + /* Eisel-Lemire path. Replaces a previously hand-rolled widened branch + * (`(double)hi * 2^64 + (double)lo` shortcut) that produced ±1 ULP + * mismatches vs strtod() on inputs like 9007199255094284e-19 and + * 2489830482329185244e1. compute_float_d is bit-exact with strtod() + * for every input parse_number_string can produce. */ + if (exponent < EISEL_LEMIRE_SMALLEST_POWER_OF_FIVE || exponent > EISEL_LEMIRE_LARGEST_POWER_OF_FIVE) + return 0; - if (exponent >= 0) { - /* (mantissa * 10^e) fits in 128 bits. Convert exactly: the - * single (double) cast from __uint128_t rounds to nearest. */ - __uint128_t prod = (__uint128_t)mantissa * (uint64_t)powers_of_ten[exponent]; - uint64_t hi = (uint64_t)(prod >> 64); - uint64_t lo = (uint64_t)prod; - /* (double)hi * 2^64 has no rounding error (hi up to 2^64-1 rounds - * once, then * 2^64 is exact). Adding lo rounds once. Total: - * matches strtod on every tested case with e in [0,19]. */ - value = (double)hi * 18446744073709551616.0 + (double)lo; - } else { - /* mantissa / 10^|e|: scale numerator up by 2^64 before integer - * division to preserve precision, then descale by multiplying by - * 2^-64 (exact power-of-two scaling, does not round). The single - * (double) cast of the integer quotient produces IEEE round-to- - * nearest-even, matching strtod() bit-exactly for every tested - * 16-19 significant digit case. */ - uint64_t divisor = (uint64_t)powers_of_ten[-exponent]; - __uint128_t scaled = (__uint128_t)mantissa << 64; - __uint128_t q = scaled / divisor; - uint64_t hi = (uint64_t)(q >> 64); - uint64_t lo = (uint64_t)q; - value = ((double)hi * 18446744073709551616.0 + (double)lo) - * 5.421010862427522170037e-20; /* 2^-64 */ - } -#else - /* 32-bit target without __uint128_t: fall through to the strtod() - * fallback. Correctness is preserved (it's the same path that shipped - * in 8.8-M02); only the perf gain is 64-bit-target-specific. */ - return 0; -#endif + adjusted_mantissa am = compute_float_d(exponent, mantissa); + /* power2 < 0 would mean indeterminate (caller should fall back to + * strtod). With our bounded mantissa (<= 19 digits) this branch is + * unreachable per the Mushtak-Lemire proof, but we keep the guard so + * any future caller that supplies a larger mantissa stays correct. */ + if (am.power2 < 0) return 0; + value = am_to_double(negative, am); } - if (negative) value = -value; *result = value; return 1; } @@ -524,9 +874,87 @@ int fastFloatTest(int argc, char **argv, int flags) { /* Negative numbers exercising the widened path */ {"-0.49606648747577575", -0.49606648747577575}, {"-9007199254740993", -9007199254740992.0}, + + /* Eisel-Lemire rounding-boundary cases. + * Reported by @vitahlin on #14661 against the previous + * `(double)hi * 2^64 + (double)lo` widened branch which + * double-rounded the 128-bit product. Both must now match + * strtod() exactly. */ + {"9007199255094284e-19", 9007199255094284e-19}, /* was -1 ULP */ + {"2489830482329185244e1", 2489830482329185244e1}, /* was +1 ULP */ + + /* Subnormal boundaries (Eisel-Lemire's subnormal branch). */ + {"5e-324", 5e-324}, /* smallest pos subnormal */ + {"4.9e-324", 5e-324}, /* below half: rounds up */ + {"2.2250738585072009e-308", 2.2250738585072009e-308}, /* largest subnormal */ + {"2.2250738585072014e-308", 2.2250738585072014e-308}, /* smallest normal */ + {"1e-323", 1e-323}, + + /* Round-half-to-even ties: post-Clinger range, hits compute_float_d + * tie path (product.low <= 1, q in [-4, 23], mantissa & 3 == 1). */ + {"5497558138880", 5497558138880.0}, /* 2^42 + 2^33 boundary */ + {"5e-22", 5e-22}, + {"7.038531e-26", 7.038531e-26}, + {"4503599627475501e-10", 4503599627475501e-10}, /* near 2^52 */ + + /* Largest finite double + overflow. */ + {"1.7976931348623157e308", 1.7976931348623157e308}, /* DBL_MAX */ + {"1.7976931348623158e308", 1.7976931348623157e308}, /* nearest is DBL_MAX */ + {"1e308", 1e308}, + + /* Wide exponent range now reachable via Eisel-Lemire (previously + * fell to strtod). */ + {"1.234567890123456e100", 1.234567890123456e100}, + {"9.999999999999999e99", 9.999999999999999e99}, + {"1e-300", 1e-300}, + {"1.7e-300", 1.7e-300}, + + /* Repunit / many-9 mantissas — adjacent-double tie territory. */ + {"9999999999999998", 9999999999999998.0}, + {"99999999999999999", 1e17}, }; run_ff_tests(decimal_ok, COUNTOF(decimal_ok), 0); + /* Differential cross-check: every accepted input must produce the + * exact same bits as libc strtod(). Hand-picked hard cases covering + * every code path in compute_float_d (subnormal branch, round-half- + * to-even tie path, near-infinity, repunit mantissa, wide exponent). */ + { + static const char *diff_inputs[] = { + /* Boundary classics around 2^53. */ + "9007199254740992", "9007199254740993", "9007199254740994", + "9007199254740995", "9007199254740996", + /* Limits of finite double. */ + "1.7976931348623157e308", "2.2250738585072014e-308", + "5e-324", "1e-323", "4.9406564584124654e-324", + /* The two reproducer inputs the previous widened branch missed. */ + "9007199255094284e-19", "2489830482329185244e1", + /* Mushtak-Lemire stress range — 19-digit mantissas. */ + "1234567890123456789e0", "1234567890123456789e-5", + "1234567890123456789e5", "9999999999999999e19", + /* Common scientific constants — mid-exponent sanity. */ + "3.141592653589793", "2.718281828459045", + "1.4142135623730951e150", "6.022140857e23", + "1.602176634e-19", "9.10938356e-31", + }; + for (int i = 0; i < COUNTOF(diff_inputs); i++) { + const char *s = diff_inputs[i]; + char *fend, *lend; + errno = 0; + double got = fast_float_strtod(s, strlen(s), &fend); + errno = 0; + double libc = strtod(s, &lend); + uint64_t gb, lb; + memcpy(&gb, &got, sizeof(gb)); + memcpy(&lb, &libc, sizeof(lb)); + char descr[160]; + snprintf(descr, sizeof(descr), + "differential vs strtod: \"%s\" ff=0x%016llx libc=0x%016llx", + s, (unsigned long long)gb, (unsigned long long)lb); + test_cond(descr, gb == lb); + } + } + /* No valid prefix for full buffer, or trailing junk. */ ff_testcase decimal_bad[] = { {"1abc", 1.0}, diff --git a/src/gcra.c b/src/gcra.c index 488fad5ce..a6b738824 100644 --- a/src/gcra.c +++ b/src/gcra.c @@ -9,6 +9,8 @@ #include "server.h" #include +#ifdef ENABLE_GCRA + /* GCRA algorithm for rate limiting. * Implementation is heavily based on the implementation of (redis-cell) * [https://github.com/brandur/redis-cell] by (brandur)[https://github.com/brandur]. @@ -278,3 +280,5 @@ robj *gcraDup(robj *o) { getLongLongFromGCRAObject(o, &val); return createGCRAObject(val); } + +#endif /* ENABLE_GCRA */ diff --git a/src/hotkeys.c b/src/hotkeys.c index bdcc831e4..817a8c394 100644 --- a/src/hotkeys.c +++ b/src/hotkeys.c @@ -13,11 +13,6 @@ #include "cluster.h" #include -static inline int nearestNextPowerOf2(unsigned int count) { - if (count <= 1) return 1; - return 1 << (32 - __builtin_clz(count-1)); -} - /* Comparison function for qsort to sort slot indices */ static inline int slotCompare(const void *a, const void *b) { return (*(const int *)a) - (*(const int *)b); diff --git a/src/lazyfree.c b/src/lazyfree.c index 8d291bc9a..f9cde4e7e 100644 --- a/src/lazyfree.c +++ b/src/lazyfree.c @@ -207,6 +207,9 @@ size_t lazyfreeGetFreeEffort(robj *key, robj *obj, int dbid) { /* If the module's free_effort returns 0, we will use asynchronous free * memory by default. */ return effort == 0 ? ULONG_MAX : effort; + } else if (obj->type == OBJ_ARRAY) { + redisArray *ar = obj->ptr; + return arCount(ar); } else { return 1; /* Everything else is a single allocation. */ } diff --git a/src/module.c b/src/module.c index 5a6f510ac..38a6ad388 100644 --- a/src/module.c +++ b/src/module.c @@ -4254,7 +4254,10 @@ int RM_KeyType(RedisModuleKey *key) { case OBJ_HASH: return REDISMODULE_KEYTYPE_HASH; case OBJ_MODULE: return REDISMODULE_KEYTYPE_MODULE; case OBJ_STREAM: return REDISMODULE_KEYTYPE_STREAM; +#ifdef ENABLE_GCRA case OBJ_GCRA: return REDISMODULE_KEYTYPE_GCRA; +#endif + case OBJ_ARRAY: return REDISMODULE_KEYTYPE_ARRAY; default: return REDISMODULE_KEYTYPE_EMPTY; } } diff --git a/src/networking.c b/src/networking.c index 0030078e7..3bcd74e82 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1181,6 +1181,18 @@ void addReplyLongLongFromStr(client *c, robj *str) { addReplyProto(c,"\r\n",2); } +/* Reply with unsigned 64-bit value. Uses integer reply when value fits in + * signed long long, otherwise big number (RESP3) or bulk string (RESP2). */ +void addReplyUnsignedLongLong(client *c, uint64_t v) { + if (v <= (uint64_t)LLONG_MAX) { + addReplyLongLong(c, (long long)v); + } else { + char buf[LONG_STR_SIZE]; + int len = ull2string(buf, sizeof(buf), v); + addReplyBigNum(c, buf, len); + } +} + void addReplyAggregateLen(client *c, long length, int prefix) { serverAssert(length >= 0); if (_prepareClientToWrite(c) != C_OK) return; diff --git a/src/notify.c b/src/notify.c index 729865f0e..5c8b188fa 100644 --- a/src/notify.c +++ b/src/notify.c @@ -37,10 +37,13 @@ int keyspaceEventsStringToFlags(char *classes) { case 't': flags |= NOTIFY_STREAM; break; case 'm': flags |= NOTIFY_KEY_MISS; break; case 'd': flags |= NOTIFY_MODULE; break; + case 'a': flags |= NOTIFY_ARRAY; break; case 'n': flags |= NOTIFY_NEW; break; case 'o': flags |= NOTIFY_OVERWRITTEN; break; case 'c': flags |= NOTIFY_TYPE_CHANGED; break; +#ifdef ENABLE_GCRA case 'r': flags |= NOTIFY_RATE_LIMIT; break; +#endif case 'S': flags |= NOTIFY_SUBKEYSPACE; break; case 'T': flags |= NOTIFY_SUBKEYEVENT; break; case 'I': flags |= NOTIFY_SUBKEYSPACEITEM; break; @@ -72,10 +75,13 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1); if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1); if (flags & NOTIFY_MODULE) res = sdscatlen(res,"d",1); + if (flags & NOTIFY_ARRAY) res = sdscatlen(res,"a",1); if (flags & NOTIFY_NEW) res = sdscatlen(res,"n",1); if (flags & NOTIFY_OVERWRITTEN) res = sdscatlen(res,"o",1); if (flags & NOTIFY_TYPE_CHANGED) res = sdscatlen(res,"c",1); +#ifdef ENABLE_GCRA if (flags & NOTIFY_RATE_LIMIT) res = sdscatlen(res,"r",1); +#endif } if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); diff --git a/src/object.c b/src/object.c index 44778014b..697ed6e39 100644 --- a/src/object.c +++ b/src/object.c @@ -514,6 +514,7 @@ robj *createStreamObject(void) { return o; } +#ifdef ENABLE_GCRA robj *createGCRAObject(long long value) { /* NOTE: for 32-bit systems we can't use integer encoding (as OBJ_STRING does) * as the GCRA object is a unixtime value in microseconds, which as of the @@ -530,6 +531,14 @@ robj *createGCRAObject(long long value) { o->encoding = OBJ_ENCODING_INT; return o; } +#endif + +robj *createArrayObject(void) { + redisArray *ar = arNew(); + robj *o = createObject(OBJ_ARRAY, ar); + o->encoding = OBJ_ENCODING_SLICED_ARRAY; + return o; +} robj *createModuleObject(moduleType *mt, void *value) { moduleValue *mv = zmalloc(sizeof(*mv)); @@ -603,6 +612,7 @@ void freeStreamObject(robj *o) { freeStream(o->ptr); } +#ifdef ENABLE_GCRA void freeGCRAObject(robj *o) { #if UINTPTR_MAX == 0xffffffff zfree(o->ptr); @@ -610,6 +620,11 @@ void freeGCRAObject(robj *o) { (void)o; #endif } +#endif + +void freeArrayObject(robj *o) { + arFree(o->ptr); +} void incrRefCount(robj *o) { if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT - 1) { @@ -662,7 +677,10 @@ void decrRefCount(robj *o) { case OBJ_HASH: freeHashObject(o); break; case OBJ_MODULE: freeModuleObject(o); break; case OBJ_STREAM: freeStreamObject(o); break; +#ifdef ENABLE_GCRA case OBJ_GCRA: freeGCRAObject(o); break; +#endif + case OBJ_ARRAY: freeArrayObject(o); break; default: serverPanic("Unknown object type"); break; } } @@ -810,12 +828,19 @@ void dismissStreamObject(robj *o, size_t size_hint) { } } +/* See dismissObject() */ +void dismissArrayObject(robj *o, size_t size_hint) { + arDismiss(o->ptr, size_hint); +} + +#ifdef ENABLE_GCRA void dismissGCRAObject(robj *o, size_t size_hint) { /* GCRA is a single allocation of a long long thus way smaller than a * page-size. The dismiss mechanism is not needed for it - hence NOOP.*/ (void)o; (void)size_hint; } +#endif /* When creating a snapshot in a fork child process, the main process and child * process share the same physical memory pages, and if / when the parent @@ -845,7 +870,10 @@ void dismissObject(robj *o, size_t size_hint) { case OBJ_ZSET: dismissZsetObject(o, size_hint); break; case OBJ_HASH: dismissHashObject(o, size_hint); break; case OBJ_STREAM: dismissStreamObject(o, size_hint); break; +#ifdef ENABLE_GCRA case OBJ_GCRA: dismissGCRAObject(o, size_hint); break; +#endif + case OBJ_ARRAY: dismissArrayObject(o, size_hint); break; default: break; } #else @@ -967,7 +995,10 @@ size_t getObjectLength(robj *o) { case OBJ_ZSET: return zsetLength(o); case OBJ_HASH: return hashTypeLength(o, 0); case OBJ_STREAM: return streamLength(o); +#ifdef ENABLE_GCRA case OBJ_GCRA: return gcraObjectLength(o); +#endif + case OBJ_ARRAY: return arCount(o->ptr); default: return 0; } } @@ -1176,6 +1207,7 @@ int getLongLongFromObject(robj *o, long long *target) { return C_OK; } +#ifdef ENABLE_GCRA int getLongLongFromGCRAObject(robj *o, long long *target) { long long res; serverAssertWithInfo(NULL, o, o->type == OBJ_GCRA); @@ -1191,6 +1223,7 @@ int getLongLongFromGCRAObject(robj *o, long long *target) { *target = res; return C_OK; } +#endif int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) { long long value; @@ -1265,6 +1298,7 @@ char *strEncoding(int encoding) { case OBJ_ENCODING_SKIPLIST: return "skiplist"; case OBJ_ENCODING_EMBSTR: return "embstr"; case OBJ_ENCODING_STREAM: return "stream"; + case OBJ_ENCODING_SLICED_ARRAY: return "sliced-array"; default: return "unknown"; } } @@ -1283,7 +1317,10 @@ size_t kvobjComputeSize(robj *key, kvobj *o, size_t sample_size, int dbid) { o->type == OBJ_ZSET || o->type == OBJ_HASH || o->type == OBJ_STREAM || - o->type == OBJ_GCRA) +#ifdef ENABLE_GCRA + o->type == OBJ_GCRA || +#endif + o->type == OBJ_ARRAY) { return kvobjAllocSize(o); } else if (o->type == OBJ_MODULE) { @@ -1309,14 +1346,20 @@ size_t kvobjAllocSize(kvobj *o) { } else if (o->type == OBJ_STREAM) { stream *s = o->ptr; asize += s->alloc_size; +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { asize += gcraTypeAllocSize(o); +#endif + } else if (o->type == OBJ_ARRAY) { + redisArray *ar = o->ptr; + asize += ar->alloc_size; } else if (o->type == OBJ_MODULE) { /* TODO: Provide moduleGetAllocSize() module API for O(1) allocation size retrieval */ } return asize; } +#ifdef ENABLE_GCRA size_t gcraTypeAllocSize(robj *o) { (void)o; #if UINTPTR_MAX == 0xffffffff @@ -1333,6 +1376,7 @@ size_t gcraObjectLength(robj *o) { (void)o; return 1; } +#endif /* Release data obtained with getMemoryOverheadData(). */ void freeMemoryOverheadData(struct redisMemOverhead *mh) { diff --git a/src/object.h b/src/object.h index 6b2591877..35cd40a3c 100644 --- a/src/object.h +++ b/src/object.h @@ -5,7 +5,7 @@ * values of different logical types (strings, lists, sets, hashes, sorted sets, * streams, modules, ...). It contains: * - type: one of OBJ_STRING, OBJ_LIST, OBJ_SET, OBJ_ZSET, OBJ_HASH, OBJ_STREAM, - * OBJ_GCRA, OBJ_MODULE, ... + * OBJ_MODULE, ... * - encoding: an implementation detail of how the value is represented in * memory for the given type (see OBJ_ENCODING_* below). For example, * strings may be RAW/EMBSTR/INT, sets may be INTSET or HT, etc. @@ -85,6 +85,7 @@ struct RedisModuleType; #define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */ #define OBJ_ENCODING_LISTPACK 11 /* Encoded as a listpack */ #define OBJ_ENCODING_LISTPACK_EX 12 /* Encoded as listpack, extended with metadata */ +#define OBJ_ENCODING_SLICED_ARRAY 13 /* Encoded as sliced array */ #define LRU_BITS 24 #define LRU_CLOCK_MAX ((1<lru */ @@ -163,6 +164,7 @@ robj *createZsetListpackObject(void); robj *createStreamObject(void); robj *createGCRAObject(long long value); robj *createModuleObject(struct RedisModuleType *mt, void *value); +robj *createArrayObject(void); int getLongFromObjectOrReply(struct client *c, robj *o, long *target, const char *msg); int getPositiveLongFromObjectOrReply(struct client *c, robj *o, long *target, const char *msg); int getRangeLongFromObjectOrReply(struct client *c, robj *o, long min, long max, long *target, const char *msg); diff --git a/src/rdb.c b/src/rdb.c index ef1047427..4cfb05719 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -124,33 +124,42 @@ time_t rdbLoadTime(rio *rdb) { return (time_t)t32; } -ssize_t rdbSaveMillisecondTime(rio *rdb, long long t) { - int64_t t64 = (int64_t) t; - memrev64ifbe(&t64); /* Store in little endian. */ - return rdbWriteRaw(rdb,&t64,8); +/* Save a signed 64-bit integer in little-endian format. */ +ssize_t rdbSaveSignedInteger(rio *rdb, int64_t val) { + memrev64ifbe(&val); /* Store in little endian. */ + return rdbWriteRaw(rdb, &val, 8); } -/* This function loads a time from the RDB file. It gets the version of the - * RDB because, unfortunately, before Redis 5 (RDB version 9), the function - * failed to convert data to/from little endian, so RDB files with keys having - * expires could not be shared between big endian and little endian systems - * (because the expire time will be totally wrong). The fix for this is just - * to call memrev64ifbe(), however if we fix this for all the RDB versions, +/* This function loads a signed 64-bit integer from the RDB file. It gets the + * version of the RDB because, unfortunately, before Redis 5 (RDB version 9), + * the function failed to convert data to/from little endian, so RDB files with + * keys having expires could not be shared between big endian and little endian + * systems (because the expire time will be totally wrong). The fix for this is + * just to call memrev64ifbe(), however if we fix this for all the RDB versions, * this call will introduce an incompatibility for big endian systems: * after upgrading to Redis version 5 they will no longer be able to load their * own old RDB files. Because of that, we instead fix the function only for new * RDB versions, and load older RDB versions as we used to do in the past, * allowing big endian systems to load their own old RDB files. * - * On I/O error the function returns LLONG_MAX, however if this is also a + * On I/O error the function returns INT64_MAX, however if this is also a * valid stored value, the caller should use rioGetReadError() to check for * errors after calling this function. */ -long long rdbLoadMillisecondTime(rio *rdb, int rdbver) { - int64_t t64; - if (rioRead(rdb,&t64,8) == 0) return LLONG_MAX; +int64_t rdbLoadSignedInteger(rio *rdb, int rdbver) { + int64_t val; + if (rioRead(rdb, &val, 8) == 0) return INT64_MAX; if (rdbver >= 9) /* Check the top comment of this function. */ - memrev64ifbe(&t64); /* Convert in big endian if the system is BE. */ - return (long long)t64; + memrev64ifbe(&val); /* Convert in big endian if the system is BE. */ + return val; +} + +/* Wrappers for millisecond time - these just call the signed integer functions */ +ssize_t rdbSaveMillisecondTime(rio *rdb, long long t) { + return rdbSaveSignedInteger(rdb, (int64_t)t); +} + +long long rdbLoadMillisecondTime(rio *rdb, int rdbver) { + return (long long)rdbLoadSignedInteger(rdb, rdbver); } /* Saves an encoded length. The first two bits in the first byte are used to @@ -714,10 +723,14 @@ int rdbSaveObjectType(rio *rdb, robj *o) { serverPanic("Unknown hash encoding"); case OBJ_STREAM: return rdbSaveType(rdb,RDB_TYPE_STREAM_LISTPACKS_5); +#ifdef ENABLE_GCRA case OBJ_GCRA: return rdbSaveType(rdb,RDB_TYPE_GCRA); +#endif case OBJ_MODULE: return rdbSaveType(rdb,RDB_TYPE_MODULE_2); + case OBJ_ARRAY: + return rdbSaveType(rdb,RDB_TYPE_ARRAY); default: serverPanic("Unknown object type"); } @@ -1040,6 +1053,68 @@ size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) { /* Save a Redis object. * Returns -1 on error, number of bytes written on success. */ +static ssize_t rdbSaveArrayElement(rio *rdb, uint64_t idx, void *v) { + ssize_t n, nwritten = 0; + + if ((n = rdbSaveLen(rdb, idx)) == -1) return -1; + nwritten += n; + + if (arIsInt(v)) { + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_INT)) == -1) return -1; + nwritten += n; + int64_t ival = arToInt(v); + if ((n = rdbSaveSignedInteger(rdb, ival)) == -1) return -1; + nwritten += n; + } else if (arIsFloat(v)) { + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_FLOAT)) == -1) return -1; + nwritten += n; + double d = arToDouble(v); + if (rdbSaveBinaryDoubleValue(rdb, d) == -1) return -1; + nwritten += 8; + } else if (arIsSmallStr(v)) { + char buf[AR_SMALLSTR_MAXLEN + 1]; + int len = arToSmallStr(v, buf); + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_SMALLSTR)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveRawString(rdb, (unsigned char *)buf, len)) == -1) return -1; + nwritten += n; + } else { + if ((n = rdbSaveLen(rdb, AR_RDB_TAG_SDS)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveRawString(rdb, (unsigned char *)arStringData(v), arStringLen(v))) == -1) return -1; + nwritten += n; + } + + return nwritten; +} + +static ssize_t rdbSaveArraySlice(rio *rdb, arSlice *s, uint64_t slice_id, + uint32_t slice_size) { + ssize_t n, nwritten = 0; + + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + void *v = s->layout.dense.items[i]; + if (arIsEmpty(v)) continue; + + uint64_t idx = arMakeIdx(slice_id, s->layout.dense.offset + i, slice_size); + if ((n = rdbSaveArrayElement(rdb, idx, v)) == -1) return -1; + nwritten += n; + } + } else { + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + for (uint32_t i = 0; i < s->count; i++) { + uint64_t idx = arMakeIdx(slice_id, offsets[i], slice_size); + if ((n = rdbSaveArrayElement(rdb, idx, values[i])) == -1) return -1; + nwritten += n; + } + } + + return nwritten; +} + ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { ssize_t n = 0, nwritten = 0; @@ -1402,11 +1477,13 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { /* Save the all-time count of duplicate IIDs detected. */ if ((n = rdbSaveLen(rdb,s->iids_duplicates)) == -1) return -1; nwritten += n; +#ifdef ENABLE_GCRA } else if (o->type == OBJ_GCRA) { long long t; getLongLongFromGCRAObject(o, &t); if ((n = rdbSaveLen(rdb,t)) == -1) return -1; nwritten += n; +#endif } else if (o->type == OBJ_MODULE) { /* Save a module-specific value. */ RedisModuleIO io; @@ -1433,6 +1510,57 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { zfree(io.ctx); } return io.error ? -1 : (ssize_t)io.bytes; + } else if (o->type == OBJ_ARRAY) { + /* Save an array value. We persist only elements and insert_idx - no + * implementation details like slice_size. Arrays are loaded using + * the current ar_slice_size config. */ + redisArray *ar = o->ptr; + + /* Save count */ + if ((n = rdbSaveLen(rdb, ar->count)) == -1) return -1; + nwritten += n; + + /* Save insert_idx: 0 = none, 1 = has value followed by actual value. + * We can't save UINT64_MAX directly with rdbSaveLen/rdbLoadLen because + * rdbLoadLen returns UINT64_MAX (RDB_LENERR) to signal an error, making + * it impossible to distinguish a valid UINT64_MAX value from an error. */ + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + if ((n = rdbSaveLen(rdb, 0)) == -1) return -1; + nwritten += n; + } else { + if ((n = rdbSaveLen(rdb, 1)) == -1) return -1; + nwritten += n; + if ((n = rdbSaveLen(rdb, ar->insert_idx)) == -1) return -1; + nwritten += n; + } + + /* Save elements in index order. + * We need to iterate through all slices, handling both flat directory + * mode and superdir mode. In superdir mode, blocks are sorted by + * block_id, so we iterate through blocks in order. */ + if (ar->superdir) { + /* Superdir mode: iterate through blocks */ + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + if (!s) continue; + uint64_t slice_id = block_base + si; + if ((n = rdbSaveArraySlice(rdb, s, slice_id, ar->slice_size)) == -1) return -1; + nwritten += n; + } + } + } else { + /* Flat directory mode */ + for (uint64_t slice_id = 0; slice_id <= ar->dir_highest_used && slice_id < ar->dir_alloc; slice_id++) { + arSlice *s = ar->dir[slice_id]; + if (!s) continue; + if ((n = rdbSaveArraySlice(rdb, s, slice_id, ar->slice_size)) == -1) return -1; + nwritten += n; + } + } } else { serverPanic("Unknown object type"); } @@ -2935,13 +3063,13 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) /* search for duplicate records */ sds field = sdstrynewlen(fstr, flen); - int field_added = (field != NULL && dictAdd(dupSearchDict, field, NULL) == DICT_OK); - if (!field_added || !lpSafeToAdd(lp, (size_t)flen + vlen)) { + if (!field || !lpSafeToAdd(lp, (size_t)flen + vlen) || + dictAdd(dupSearchDict, field, NULL) != DICT_OK) { rdbReportCorruptRDB("Hash zipmap with dup elements, or big length (%u)", flen); /* If field was not added to dict, we still own it. * If it was added, dict owns it and dictRelease will free it. */ - if (!field_added) sdsfree(field); dictRelease(dupSearchDict); + sdsfree(field); lpFree(lp); zfree(encoded); o->ptr = NULL; @@ -3550,7 +3678,6 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) rdbReportCorruptRDB("Duplicated consumer PEL entry " " loading a stream consumer " "group"); - streamFreeNACK(s, nack); decrRefCount(o); return NULL; } @@ -3736,6 +3863,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } o = createModuleObject(mt, ptr); +#ifdef ENABLE_GCRA } else if (rdbtype == RDB_TYPE_GCRA) { uint64_t time = rdbLoadLen(rdb, NULL); if (time == RDB_LENERR || time > LLONG_MAX) { @@ -3743,6 +3871,105 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) return NULL; } o = createGCRAObject((long long)time); +#endif + } else if (rdbtype == RDB_TYPE_ARRAY) { + /* Load array value. We only persist elements and insert_idx - no + * implementation details. Arrays use current ar_slice_size config. */ + uint64_t count; + if ((count = rdbLoadLen(rdb, NULL)) == RDB_LENERR) return NULL; + if (count == 0) { + rdbReportCorruptRDB("Empty array (count == 0) is invalid"); + return NULL; + } + + /* Load insert_idx: 0 = none, 1 = has value followed by actual value */ + uint64_t insert_idx_flag; + if ((insert_idx_flag = rdbLoadLen(rdb, NULL)) == RDB_LENERR) return NULL; + if (insert_idx_flag > 1) { + rdbReportCorruptRDB("Invalid array insert_idx_flag %llu", + (unsigned long long)insert_idx_flag); + return NULL; + } + uint64_t insert_idx; + if (insert_idx_flag == 0) { + insert_idx = AR_INSERT_IDX_NONE; + } else { + if ((insert_idx = rdbLoadLen(rdb, NULL)) == RDB_LENERR) return NULL; + } + + o = createArrayObject(); + redisArray *ar = o->ptr; + ar->insert_idx = insert_idx; + + /* Load elements */ + for (uint64_t i = 0; i < count; i++) { + uint64_t idx; + int idx_isencoded; + if (rdbLoadLenByRef(rdb, &idx_isencoded, &idx) == -1) { + decrRefCount(o); + return NULL; + } + if (idx_isencoded || idx == UINT64_MAX) { + decrRefCount(o); + rdbReportCorruptRDB("Invalid array index %llu", + (unsigned long long)idx); + return NULL; + } + + uint64_t type_tag; + if ((type_tag = rdbLoadLen(rdb, NULL)) == RDB_LENERR) { + decrRefCount(o); + return NULL; + } + + void *v; + if (type_tag == AR_RDB_TAG_INT) { + int64_t ival = rdbLoadSignedInteger(rdb, RDB_VERSION); + if (ival == INT64_MAX && rioGetReadError(rdb)) { + decrRefCount(o); + return NULL; + } + v = arValueFromRdbInt(ival); + } else if (type_tag == AR_RDB_TAG_FLOAT) { + double d; + if (rdbLoadBinaryDoubleValue(rdb, &d) == -1) { + decrRefCount(o); + return NULL; + } + v = arValueFromRdbFloat(d); + } else if (type_tag == AR_RDB_TAG_SMALLSTR) { + sds str; + if ((str = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL)) == NULL) { + decrRefCount(o); + return NULL; + } + size_t len = sdslen(str); + if (len > AR_SMALLSTR_MAXLEN) { + sdsfree(str); + decrRefCount(o); + rdbReportCorruptRDB("Invalid small string length %zu in array", len); + return NULL; + } + v = arValueFromRdbSmallStr(str, sdslen(str)); + sdsfree(str); + } else if (type_tag == AR_RDB_TAG_SDS) { + /* arString */ + sds str; + if ((str = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL)) == NULL) { + decrRefCount(o); + return NULL; + } + v = arEncode(str, sdslen(str)); + sdsfree(str); + } else { + decrRefCount(o); + rdbReportCorruptRDB("Unknown array element type_tag %llu", + (unsigned long long)type_tag); + return NULL; + } + + arSet(ar, idx, v); + } } else { rdbReportReadError("Unknown RDB encoding type %d",rdbtype); return NULL; diff --git a/src/rdb.h b/src/rdb.h index f1ea72150..7e49ddff0 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -80,11 +80,18 @@ #define RDB_TYPE_HASH_LISTPACK_EX 25 /* Hash LP with HFEs. Attach min TTL at start */ #define RDB_TYPE_STREAM_LISTPACKS_4 26 /* Stream with IDMP support */ #define RDB_TYPE_STREAM_LISTPACKS_5 27 /* Stream with XNACK support (NACKed entries) */ -#define RDB_TYPE_GCRA 28 /* GCRA object */ +#define RDB_TYPE_ARRAY 28 /* Array data type */ +#ifdef ENABLE_GCRA +#define RDB_TYPE_GCRA 29 /* GCRA object */ +#endif /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */ /* Test if a type is an object type. */ +#ifdef ENABLE_GCRA +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 29)) +#else #define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 28)) +#endif /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_KEY_META 243 /* Key metadata (module metadata classes). */ @@ -133,6 +140,8 @@ int rdbSaveType(rio *rdb, unsigned char type); int rdbLoadType(rio *rdb); time_t rdbLoadTime(rio *rdb); int rdbSaveLen(rio *rdb, uint64_t len); +ssize_t rdbSaveSignedInteger(rio *rdb, int64_t val); +int64_t rdbLoadSignedInteger(rio *rdb, int rdbver); ssize_t rdbSaveMillisecondTime(rio *rdb, long long t); long long rdbLoadMillisecondTime(rio *rdb, int rdbver); uint64_t rdbLoadLen(rio *rdb, int *isencoded); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 1bbebb691..e4c10216d 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -88,7 +88,10 @@ char *rdb_type_string[] = { "hash-listpack-md", "stream-v4", "stream-v5", + "array", +#ifdef ENABLE_GCRA "gcra", +#endif }; /* Show a few stats collected into 'rdbstate' */ diff --git a/src/redismodule.h b/src/redismodule.h index fae09c3fb..f0d9e8aa6 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -89,7 +89,7 @@ typedef long long ustime_t; #define REDISMODULE_KEYTYPE_ZSET 5 #define REDISMODULE_KEYTYPE_MODULE 6 #define REDISMODULE_KEYTYPE_STREAM 7 -#define REDISMODULE_KEYTYPE_GCRA 8 +#define REDISMODULE_KEYTYPE_ARRAY 8 /* Reply types. */ #define REDISMODULE_REPLY_UNKNOWN -1 @@ -248,24 +248,31 @@ This flag should not be used directly by the module. #define REDISMODULE_NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification */ #define REDISMODULE_NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification */ #define REDISMODULE_NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ -#define REDISMODULE_NOTIFY_RATE_LIMIT (1<<18) /* r, rate limit event */ #define REDISMODULE_NOTIFY_SUBKEYSPACE (1<<19) /* S */ #define REDISMODULE_NOTIFY_SUBKEYEVENT (1<<20) /* T */ #define REDISMODULE_NOTIFY_SUBKEYSPACEITEM (1<<21) /* I */ #define REDISMODULE_NOTIFY_SUBKEYSPACEEVENT (1<<22) /* V */ +#define REDISMODULE_NOTIFY_ARRAY (1<<23) /* a, array key space notification */ +#ifdef ENABLE_GCRA +#define REDISMODULE_NOTIFY_RATE_LIMIT (1<<24) /* r, rate limit event */ +#endif /* Next notification flag, must be updated when adding new flags above! This flag should not be used directly by the module. * Use RedisModule_GetKeyspaceNotificationFlagsAll instead. */ -#define _REDISMODULE_NOTIFY_NEXT (1<<23) +#ifdef ENABLE_GCRA +#define _REDISMODULE_NOTIFY_NEXT (1<<25) +#else +#define _REDISMODULE_NOTIFY_NEXT (1<<24) +#endif /* Delivery flags for RM_SubscribeToKeyspaceEventsWithSubkeys. * These are passed in the 'flags' parameter, not in 'types'. */ #define REDISMODULE_NOTIFY_FLAG_NONE 0 /* Invoke callback for all matching events */ #define REDISMODULE_NOTIFY_FLAG_SUBKEYS_REQUIRED (1<<0) /* Only invoke callback when subkeys are present */ -#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE) /* A */ +#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE | REDISMODULE_NOTIFY_ARRAY) /* A */ /* A special pointer that we can use between the core and the module to signal * field deletion, and that is impossible to be a valid pointer. */ diff --git a/src/replication.c b/src/replication.c index 6726cff19..44d81ba51 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2251,6 +2251,11 @@ void replicationAttachToNewMaster(void) { /* Asynchronously read the SYNC payload we receive from a master */ #define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */ void readSyncBulkPayload(connection *conn) { + /* During full sync, the functions engine is freed right before loading + * the RDB. To avoid this happening while a function is still running, + * delay full sync processing until it finishes. */ + if (isInsideYieldingLongCommand()) return; + char buf[PROTO_IOBUF_LEN]; ssize_t nread, readlen, nwritten; int use_diskless_load = useDisklessLoad(); diff --git a/src/sds.c b/src/sds.c index 14babcc51..2dacb0fdf 100644 --- a/src/sds.c +++ b/src/sds.c @@ -105,7 +105,14 @@ sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) { int hdrlen = sdsHdrSize(type); size_t bufsize; - assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */ + if (trymalloc) { + /* protect against size_t overflow */ + if (initlen + hdrlen + 1 <= initlen) + return NULL; + } else { + assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */ + } + sh = trymalloc? s_trymalloc_usable(hdrlen+initlen+1, &bufsize) : s_malloc_usable(hdrlen+initlen+1, &bufsize); diff --git a/src/server.h b/src/server.h index 6cecc6424..2a6fa5fcb 100644 --- a/src/server.h +++ b/src/server.h @@ -22,6 +22,7 @@ #include "atomicvar.h" #include "commands.h" #include "object.h" +#include "sparsearray.h" #include #include @@ -287,7 +288,10 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define ACL_CATEGORY_CONNECTION (1ULL<<18) #define ACL_CATEGORY_TRANSACTION (1ULL<<19) #define ACL_CATEGORY_SCRIPTING (1ULL<<20) -#define ACL_CATEGORY_RATE_LIMIT (1ULL<<21) +#define ACL_CATEGORY_ARRAY (1ULL<<21) +#ifdef ENABLE_GCRA +#define ACL_CATEGORY_RATE_LIMIT (1ULL<<22) +#endif /* Key-spec flags * * -------------- */ @@ -796,12 +800,15 @@ typedef enum { #define NOTIFY_OVERWRITTEN (1<<15) /* o, key overwrite notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_TYPE_CHANGED (1<<16) /* c, key type changed notification (Note: excluded from NOTIFY_ALL) */ #define NOTIFY_KEY_TRIMMED (1<<17) /* module only key space notification, indicates a key trimmed during slot migration */ -#define NOTIFY_RATE_LIMIT (1<<18) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ #define NOTIFY_SUBKEYSPACE (1<<19) /* S, subkey-level keyspace notification */ #define NOTIFY_SUBKEYEVENT (1<<20) /* T, subkey-level keyevent notification */ #define NOTIFY_SUBKEYSPACEITEM (1<<21) /* I, subkey-level notification per item: channel=key\nsubkey */ #define NOTIFY_SUBKEYSPACEEVENT (1<<22) /* V, subkey-level notification: channel=event|key */ -#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE) /* A flag */ +#define NOTIFY_ARRAY (1<<23) /* a, array notification */ +#ifdef ENABLE_GCRA +#define NOTIFY_RATE_LIMIT (1<<24) /* r, notify rate limit event (Note: excluded from NOTIFY_ALL)*/ +#endif +#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_MODULE | NOTIFY_ARRAY) /* A flag */ /* Using the following macro you can run code inside serverCron() with the * specified period, specified in milliseconds. @@ -863,10 +870,18 @@ typedef enum { * by a 64 bit module type ID, which has a 54 bits module-specific signature * in order to dispatch the loading to the right module, plus a 10 bits * encoding version. */ +/* Code related to GCRA is disabled by default. + * Build with -DENABLE_GCRA to compile it back in. */ + #define OBJ_MODULE 5 /* Module object. */ #define OBJ_STREAM 6 /* Stream object. */ -#define OBJ_GCRA 7 /* GCRA object. */ +#define OBJ_ARRAY 7 /* Array object. */ +#ifdef ENABLE_GCRA +#define OBJ_GCRA 8 /* GCRA object. */ +#define OBJ_TYPE_MAX 9 /* Maximum number of object types */ +#else #define OBJ_TYPE_MAX 8 /* Maximum number of object types */ +#endif /* NOTE: adding a new object requires changes in the following places: * - rdb.c - save/load (also bump RDB_VERSION if needed) @@ -2442,6 +2457,10 @@ struct redisServer { /* Stream IDMP parameters */ long long stream_idmp_duration; /* Default IDMP duration in seconds. */ long long stream_idmp_maxsize; /* Default IDMP max entries. */ + /* Array parameters */ + uint32_t array_slice_size; /* Slice size for new arrays */ + uint32_t array_sparse_kmax; /* Max elements before sparse->dense */ + uint32_t array_sparse_kmin; /* Min elements before dense->sparse */ /* List parameters */ int list_max_listpack_size; int list_compress_depth; @@ -2801,8 +2820,11 @@ typedef enum { COMMAND_GROUP_GEO, COMMAND_GROUP_STREAM, COMMAND_GROUP_BITMAP, + COMMAND_GROUP_ARRAY, COMMAND_GROUP_MODULE, +#ifdef ENABLE_GCRA COMMAND_GROUP_RATE_LIMIT, +#endif } redisCommandGroup; typedef void redisCommandProc(client *c); @@ -3213,6 +3235,7 @@ void addReplyBigNum(client *c, const char *num, size_t len); void addReplyHumanLongDouble(client *c, long double d); void addReplyLongLong(client *c, long long ll); void addReplyLongLongFromStr(client *c, robj* str); +void addReplyUnsignedLongLong(client *c, uint64_t v); void addReplyArrayLen(client *c, long length); void addReplyMapLen(client *c, long length); void addReplySetLen(client *c, long length); @@ -3844,6 +3867,9 @@ struct listpackEx *listpackExCreate(void); void listpackExAddNew(robj *o, char *field, size_t flen, char *value, size_t vlen, uint64_t expireAt); +/* Array data type. */ +robj *arrayTypeDup(robj *o); + /* Pub / Sub */ int pubsubUnsubscribeAllChannels(client *c, int notify); int pubsubUnsubscribeShardAllChannels(client *c, int notify); @@ -4511,6 +4537,26 @@ void digestCommand(client *c); void gcraCommand(client *c); void gcraSetValueCommand(client *c); +/* Array commands (t_array.c) */ +void arsetCommand(client *c); +void argetCommand(client *c); +void ardelCommand(client *c); +void ardelrangeCommand(client *c); +void arlenCommand(client *c); +void arcountCommand(client *c); +void argetrangeCommand(client *c); +void arscanCommand(client *c); +void argrepCommand(client *c); +void aropCommand(client *c); +void arinsertCommand(client *c); +void arringCommand(client *c); +void arnextCommand(client *c); +void arseekCommand(client *c); +void arlastitemsCommand(client *c); +void arinfoCommand(client *c); +void armsetCommand(client *c); +void armgetCommand(client *c); + #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); void free(void *ptr) __attribute__ ((deprecated)); diff --git a/src/sparsearray.c b/src/sparsearray.c new file mode 100644 index 000000000..d4945f2a7 --- /dev/null +++ b/src/sparsearray.c @@ -0,0 +1,2080 @@ +/* + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Sparse Array - A memory-efficient sparse array with 64-bit index space. + * Originally authored by: Salvatore Sanfilippo. + * + * This data structure was designed and implemented by Salvatore Sanfilippo. + */ + +#include "server.h" +#include +#include + +/****************************************************************************** + * SPARSE ARRAY IMPLEMENTATION + * + * Sparse arrays are random-access sequences indexed by non-negative 64-bit + * integers. They support O(1) get/set operations and efficient iteration. + * + * Arrays use tagged pointer-sized values. 64-bit builds inline more payload, + * while 32-bit builds use narrower immediate encodings and fall back to + * arString more often. SDS strings are not used as values since the final + * bits of SDS pointers are not guaranteed to be zero. + * + * See sparsearray.h for data structure documentation and inline helpers. + * + *****************************************************************************/ + +/* ---------------------------------------------------------------------------- + * Configuration - mapped to Redis server struct for easy standalone adaptation + * -------------------------------------------------------------------------- */ + +#define ArraySliceSize server.array_slice_size +#define ArraySparseKMax server.array_sparse_kmax +#define ArraySparseKMin server.array_sparse_kmin + +/* ---------------------------------------------------------------------------- + * Allocation size tracking + * + * Every zmalloc/zfree/zrealloc that contributes to the array's footprint is + * tracked in ar->alloc_size so that kvobjAllocSize() can return an O(1) + * answer. When ar is NULL (e.g. during arFree) tracking is skipped. + * -------------------------------------------------------------------------- */ + +static inline void *arAllocAndTrack(redisArray *ar, size_t size) { + size_t usable; + void *ptr = zmalloc_usable(size, &usable); + if (ar) ar->alloc_size += usable; + return ptr; +} +static inline void *arCallocAndTrack(redisArray *ar, size_t size) { + size_t usable; + void *ptr = zcalloc_usable(size, &usable); + if (ar) ar->alloc_size += usable; + return ptr; +} +static inline void arFreeAndTrack(redisArray *ar, void *ptr) { + size_t usable; + zfree_usable(ptr, &usable); + if (ar) ar->alloc_size -= usable; +} +static inline void *arReallocAndTrack(redisArray *ar, void *ptr, size_t size) { + size_t usable, old_usable; + void *newptr = zrealloc_usable(ptr, size, &usable, &old_usable); + if (ar) ar->alloc_size += usable - old_usable; + return newptr; +} + +/* Track a tagged value entering/leaving the array (arString bookkeeping). */ +static inline void arTrackValueIn(redisArray *ar, void *v) { + if (ar && arIsPtr(v)) ar->alloc_size += zmalloc_size(v); +} +static inline void arTrackValueOut(redisArray *ar, void *v) { + if (ar && arIsPtr(v)) ar->alloc_size -= zmalloc_size(v); +} + +/* ---------------------------------------------------------------------------- + * Internal helpers + * -------------------------------------------------------------------------- */ + +static inline size_t arStringHeaderSize(size_t len) { + return (len <= 32767) ? 2 : 8; +} + +size_t arStringLen(const void *ptr) { + const uint8_t *p = (const uint8_t *)ptr; + if (p[0] & 0x80) { + return ((size_t)(p[0] & 0x7F) << 8) | p[1]; + } else { + size_t len = 0; + for (int i = 0; i < 8; i++) len = (len << 8) | p[i]; + return len; + } +} + +const char *arStringData(const void *ptr) { + const uint8_t *p = (const uint8_t *)ptr; + return (const char *)(p + ((p[0] & 0x80) ? 2 : 8)); +} + +static inline size_t arSparseAllocSize(uint32_t cap) { + size_t offsets_size = cap * sizeof(uint16_t); + size_t padding = (sizeof(void *) - (offsets_size % sizeof(void *))) % sizeof(void *); + return sizeof(arSlice) + offsets_size + padding + cap * sizeof(void *); +} + +static inline size_t arDenseAllocSize(uint32_t winsize) { + return sizeof(arSlice) + winsize * sizeof(void *); +} + +static inline uint32_t arSliceMaxIdx(arSlice *s) { + if (s->encoding == AR_SLICE_DENSE) { + return s->layout.dense.max_idx; + } else { + return s->layout.sparse.offsets[s->count - 1]; + } +} + +/* ---------------------------------------------------------------------------- + * arString type + * -------------------------------------------------------------------------- */ + +/* Allocate a new arString with the given content. + * + * We use arString instead of SDS because SDS pointers are not guaranteed to + * have the low bits zero (SDS points inside an allocation, after the header). + * Our tagged pointer scheme needs tag 00 for heap strings, so we need aligned + * pointers. zmalloc guarantees sufficient alignment. + * + * arString has two header formats: + * + * 1. Short header (2 bytes): lengths up to 32767 bytes. + * The top bit of the first byte is set, and the remaining 15 bits store + * the length in big-endian form. + * + * +--------+--------+-------------------+ + * |1LLLLLLL|LLLLLLLL| payload | + * +--------+--------+-------------------+ + * byte 0 byte 1 + * + * 2. Long header (8 bytes): lengths up to 2^63-1 bytes. + * The top bit of the first byte is clear, and the remaining 63 bits store + * the length in big-endian form. + * + * +--------+--------+--------+--------+--------+--------+--------+--// + * |0LLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL|LLLLLLLL| + * +--------+--------+--------+--------+--------+--------+--------+--// + * byte 0 byte 1 byte 2 byte 3 byte 4 byte 5 byte 6 + * + * //--+--------+-------------------+ + * |LLLLLLLL| payload | + * //--+--------+-------------------+ + * byte 7 + * + * For simplicity we use a 63 bit len even when Redis is compiled with a 32 + * bit target, the overhead for strings > 32k is small. + * + * So the pointer returned by arStringNew() always points to the start of the + * header, and the string data begins immediately after the 2-byte or 8-byte + * header. */ +void *arStringNew(const char *s, size_t len) { + /* Length is stored in 63 bits; reject >= 2^63 to avoid + * hypothetical header corruption. On 32 bit builds this is guaranteed + * by size_t itself, so don't compile an always-true assertion. */ +#if SIZE_MAX > UINT32_MAX + serverAssert(len < ((size_t)1 << 63)); +#endif + size_t hdr_size = arStringHeaderSize(len); + uint8_t *ptr = zmalloc(hdr_size + len); + + if (hdr_size == 2) { + /* Short header: MSB=1, 15-bit length */ + ptr[0] = 0x80 | ((len >> 8) & 0x7F); + ptr[1] = len & 0xFF; + } else { + /* Long header: MSB=0, 63-bit length in big-endian */ + for (int i = 7; i >= 0; i--) { + ptr[7 - i] = (len >> (i * 8)) & 0xFF; + } + } + + memcpy(ptr + hdr_size, s, len); + return ptr; +} + +/* Free arString pointer */ +void arStringFree(void *ptr) { + zfree(ptr); +} + +/* Duplicate an arString */ +void *arStringDup(void *ptr) { + size_t len = arStringLen(ptr); + size_t hdr_size = arStringHeaderSize(len); + size_t total = hdr_size + len; + void *dup = zmalloc(total); + memcpy(dup, ptr, total); + return dup; +} + +/* Free arString if value is pointer-tagged, otherwise nothing to + * free, the info is encoded in the pointer itself. */ +void arFreePtr(void *v) { + if (arIsPtr(v)) { + arStringFree(v); + } +} + +/* ---------------------------------------------------------------------------- + * Slice allocation and management + * -------------------------------------------------------------------------- */ + +/* Create a new dense slice with given rel_idx (index relative to slice base) */ +arSlice *arSliceDenseNew(redisArray *ar, uint32_t rel_idx, uint32_t slice_size) { + uint32_t winsize = AR_SLICE_MIN_ALLOC; + uint32_t offset = rel_idx; + + /* Adjust offset if the initial window would extend past the slice + * boundary. For example, with slice size 4096 (the default), creating + * the slice around relative index 4093 needs the window shifted left. */ + if (offset + winsize > slice_size) { + offset = slice_size - winsize; + } + + arSlice *s = arAllocAndTrack(ar, arDenseAllocSize(winsize)); + s->encoding = AR_SLICE_DENSE; + s->count = 0; + s->layout.dense.offset = offset; + s->layout.dense.winsize = winsize; + s->layout.dense.max_idx = 0; + s->layout.dense.items = (void **)(s + 1); /* Payload starts after struct */ + memset(s->layout.dense.items, 0, winsize * sizeof(void *)); + return s; +} + +/* Sparse slices are a single allocation: the arSlice struct followed by + * a payload containing offsets[] and values[]. This function computes + * where these arrays live in the payload and sets the pointers accordingly. + * Must be called after zmalloc or memcpy, since copied slices have stale + * pointers that still reference the source allocation's memory. The values + * array requires pointer alignment, hence the padding after offsets[]. */ +void arSparseSetupPointers(arSlice *s) { + char *p = (char *)(s + 1); + size_t offsets_size = s->layout.sparse.cap * sizeof(uint16_t); + size_t padding = (sizeof(void *) - (offsets_size % sizeof(void *))) % sizeof(void *); + s->layout.sparse.offsets = (uint16_t *)p; + s->layout.sparse.values = (void **)(p + offsets_size + padding); +} + +/* Create a new sparse slice */ +arSlice *arSliceSparseNew(redisArray *ar) { + uint32_t cap = (ArraySparseKMax < 4) ? ArraySparseKMax : 4; + arSlice *s = arAllocAndTrack(ar, arSparseAllocSize(cap)); + s->encoding = AR_SLICE_SPARSE; + s->count = 0; + s->layout.sparse.cap = cap; + arSparseSetupPointers(s); + return s; +} + +/* Free a slice (including all arString values inside). + * When ar is non-NULL, deducts the memory from ar->alloc_size. + * Pass NULL for ar when the entire array is being destroyed (arFree). */ +void arSliceFree(redisArray *ar, arSlice *s) { + if (!s) return; + + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + arTrackValueOut(ar, s->layout.dense.items[i]); + arFreePtr(s->layout.dense.items[i]); + } + } else { + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < s->count; i++) { + arTrackValueOut(ar, values[i]); + arFreePtr(values[i]); + } + } + arFreeAndTrack(ar, s); +} + +/* Grow dense slice to accommodate rel_idx (right growth) */ +arSlice *arSliceDenseGrowRight(redisArray *ar, arSlice *s, uint32_t rel_idx, uint32_t slice_size) { + uint32_t new_winsize = s->layout.dense.winsize; + + /* Double until rel_idx fits */ + while (rel_idx >= s->layout.dense.offset + new_winsize && new_winsize < slice_size) { + new_winsize <<= 1; + } + + uint32_t new_offset = s->layout.dense.offset; + if (new_winsize >= slice_size) { + new_winsize = slice_size; + new_offset = 0; + } else if (new_offset + new_winsize > slice_size) { + /* Window would exceed slice boundary, adjust offset */ + new_offset = slice_size - new_winsize; + } + + /* Fast path: when offset does not move, we can use realloc() to grow + * the dense allocation without relocating existing items ourselves. */ + if (new_offset == s->layout.dense.offset) { + uint32_t old_winsize = s->layout.dense.winsize; + arSlice *ns = arReallocAndTrack(ar, s, arDenseAllocSize(new_winsize)); + ns->layout.dense.winsize = new_winsize; + ns->layout.dense.items = (void **)(ns + 1); + + /* New tail must be explicitly zeroed for arIsEmpty() semantics. */ + memset(ns->layout.dense.items + old_winsize, 0, + (new_winsize - old_winsize) * sizeof(void *)); + return ns; + } + + /* Data copy path: offset moved, so we allocate a new slice and copy. */ + arSlice *ns = arAllocAndTrack(ar, arDenseAllocSize(new_winsize)); + ns->encoding = AR_SLICE_DENSE; + ns->count = s->count; + ns->layout.dense.offset = new_offset; + ns->layout.dense.winsize = new_winsize; + ns->layout.dense.max_idx = s->layout.dense.max_idx; + ns->layout.dense.items = (void **)(ns + 1); + + /* Zero-fill first to ensure arIsEmpty() works for new slots, then + * copy old data */ + memset(ns->layout.dense.items, 0, new_winsize * sizeof(void *)); + uint32_t shift = s->layout.dense.offset - new_offset; + serverAssert(shift + s->layout.dense.winsize <= new_winsize); + memcpy(ns->layout.dense.items + shift, s->layout.dense.items, s->layout.dense.winsize * sizeof(void *)); + + arFreeAndTrack(ar, s); + return ns; +} + +/* Grow dense slice to accommodate rel_idx (left growth with slack). + * Note that in this case no realloc() optimization is possible, still + * we can grow on the left more than needed (next power of two) so if + * there is a right-to-left access pattern we can cope. */ +arSlice *arSliceDenseGrowLeft(redisArray *ar, arSlice *s, uint32_t rel_idx, uint32_t slice_size) { + uint32_t old_end = s->layout.dense.offset + s->layout.dense.winsize; + uint32_t need = old_end - rel_idx; + + /* Find next power of two that fits */ + uint32_t new_winsize = nearestNextPowerOf2(need); + if (new_winsize < AR_SLICE_MIN_ALLOC) new_winsize = AR_SLICE_MIN_ALLOC; + if (new_winsize > slice_size) new_winsize = slice_size; + + /* Position the window so that the old data is right-aligned (leaving + * slack on left) */ + int32_t new_offset = (int32_t)old_end - (int32_t)new_winsize; + if (new_offset < 0) new_offset = 0; + if (new_winsize == slice_size) new_offset = 0; + + arSlice *ns = arAllocAndTrack(ar, arDenseAllocSize(new_winsize)); + ns->encoding = AR_SLICE_DENSE; + ns->count = s->count; + ns->layout.dense.offset = (uint32_t)new_offset; + ns->layout.dense.winsize = new_winsize; + ns->layout.dense.max_idx = s->layout.dense.max_idx; + ns->layout.dense.items = (void **)(ns + 1); + + /* Zero-fill for arIsEmpty() semantics, then copy old data right-aligned */ + memset(ns->layout.dense.items, 0, new_winsize * sizeof(void *)); + uint32_t shift = s->layout.dense.offset - ns->layout.dense.offset; + serverAssert(shift + s->layout.dense.winsize <= new_winsize); + memcpy(ns->layout.dense.items + shift, s->layout.dense.items, s->layout.dense.winsize * sizeof(void *)); + + arFreeAndTrack(ar, s); + return ns; +} + +/* Grow dense slice if rel_idx is outside the current window. Returns a new + * slice, or the old pointer if the current slice can already accommodate the + * index. */ +arSlice *arSliceDenseGrowIfNeeded(redisArray *ar, arSlice *s, uint32_t rel_idx, uint32_t slice_size) { + if (rel_idx >= s->layout.dense.offset + s->layout.dense.winsize) { + return arSliceDenseGrowRight(ar, s, rel_idx, slice_size); + } else if (rel_idx < s->layout.dense.offset) { + return arSliceDenseGrowLeft(ar, s, rel_idx, slice_size); + } + return s; +} + +/* Binary search in sparse slice. + * Returns index where rel_idx is or should be (the two cases + * can be distinguished via 'found'). */ +uint32_t arSparseFindPos(arSlice *s, uint16_t rel_idx, int *found) { + uint16_t *offsets = s->layout.sparse.offsets; + uint32_t lo = 0, hi = s->count; + while (lo < hi) { + uint32_t mid = lo + (hi - lo) / 2; + if (offsets[mid] < rel_idx) { + lo = mid + 1; + } else { + hi = mid; + } + } + *found = (lo < s->count && offsets[lo] == rel_idx); + return lo; +} + +/* Promote sparse slice to dense. */ +arSlice *arSparsePromote(redisArray *ar, arSlice *s, uint32_t slice_size) { + if (s->count == 0) { + arFreeAndTrack(ar, s); + return arSliceDenseNew(ar, 0, slice_size); + } + + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + uint32_t min_off = offsets[0]; + uint32_t max_off = offsets[s->count - 1]; + uint32_t need = max_off - min_off + 1; + + uint32_t winsize = nearestNextPowerOf2(need); + if (winsize < AR_SLICE_MIN_ALLOC) winsize = AR_SLICE_MIN_ALLOC; + + uint32_t offset = min_off; + if (winsize >= slice_size) { + winsize = slice_size; + offset = 0; + } else if (offset + winsize > slice_size) { + /* Window would exceed slice boundary, adjust offset */ + offset = slice_size - winsize; + } + + arSlice *d = arAllocAndTrack(ar, arDenseAllocSize(winsize)); + d->encoding = AR_SLICE_DENSE; + d->count = s->count; + d->layout.dense.offset = offset; + d->layout.dense.winsize = winsize; + d->layout.dense.max_idx = max_off; + d->layout.dense.items = (void **)(d + 1); + + /* Set the entries in the sparse representation into the + * new dense slice. */ + memset(d->layout.dense.items, 0, winsize * sizeof(void *)); + for (uint32_t i = 0; i < s->count; i++) { + serverAssert(offsets[i] >= offset); + serverAssert(offsets[i] - offset < winsize); + d->layout.dense.items[offsets[i] - offset] = values[i]; + } + + arFreeAndTrack(ar, s); + return d; +} + +/* Demote the provided dense slice to a sparse slice, if beneficial. + * The function returns the dense slice given in input if not demoted, + * otherwise the newly created sparse slice containing the same elements + * is returned, in this case, as a side effect, the dense slice in + * input is freed. */ +arSlice *arDenseMaybeDemote(redisArray *ar, arSlice *d) { + if (ArraySparseKMax == 0) return d; // Sparse is disabled by config. + if (d->count > ArraySparseKMin) return d; // Yet not at demotion level. + if (d->count > ArraySparseKMax) return d; // Just config sanity check. + if (d->layout.dense.winsize == AR_SLICE_MIN_ALLOC) return d; // Already small. + + /* Only demote if it actually saves memory. We require the dense slice + * to be significantly larger than sparse would be (at least 25% bigger), + * and large enough in absolute terms (4x kmin) to be worth the trouble. */ + size_t dense_bytes = arDenseAllocSize(d->layout.dense.winsize); + size_t sparse_bytes = arSparseAllocSize(ArraySparseKMin); + if (d->layout.dense.winsize < 4 * ArraySparseKMin) return d; + if (dense_bytes < sparse_bytes * 5 / 4) return d; + + /* Demote it. */ + arSlice *s = arAllocAndTrack(ar, arSparseAllocSize(ArraySparseKMin)); + s->encoding = AR_SLICE_SPARSE; + s->count = 0; + s->layout.sparse.cap = ArraySparseKMin; + arSparseSetupPointers(s); + + /* Copy every entry from dense to sparse. */ + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < d->layout.dense.winsize && s->count < d->count; i++) { + if (!arIsEmpty(d->layout.dense.items[i])) { + offsets[s->count] = d->layout.dense.offset + i; + values[s->count] = d->layout.dense.items[i]; + s->count++; + } + } + + arFreeAndTrack(ar, d); + return s; +} + +/* Update max_idx after deletion in dense slice. This is O(winsize) in the worst + * case, but we only scan when we deleted the current max, which is rare. */ +void arDenseUpdateMaxIdx(arSlice *d, uint32_t deleted_off) { + /* Note that if the slice is left without elements, it will get + * deallocated so there is nothing to set. */ + if (d->count == 0 || deleted_off < d->layout.dense.max_idx) return; + + /* Scan backward from old max to find new max. */ + for (int pos = d->layout.dense.max_idx - d->layout.dense.offset; pos >= 0; pos--) { + if (!arIsEmpty(d->layout.dense.items[pos])) { + d->layout.dense.max_idx = d->layout.dense.offset + pos; + return; + } + } +} + +/* ---------------------------------------------------------------------------- + * Directory management (flat mode and superdir mode) + * + * Why two modes: + * + * - Flat mode (ar->superdir == NULL): ar->dir is indexed by slice_id + * (ar->dir[slice_id] -> arSlice*). This is very fast and compact while + * slice IDs stay relatively low. + * + * - Superdir mode (ar->superdir != NULL): there are two levels of indirection. + * Metadata (that is, pointers to actual array slices) is split into sorted + * entries by block_id; each block is a fixed table of 2048 slice pointers. + * That table uses about 8 KB on 32-bit builds and 16 KB on 64-bit builds. + * Blocks are allocated on demand. Basically this means that what was, in + * flat mode, a contiguous array of slice pointers (called the directory), + * in superdir mode becomes a sparse array of directory pieces. + * + * The superdir avoids catastrophic metadata growth for sparse/high indices. + * A flat directory must be sized up to the highest slice_id, even if almost + * all entries are NULL. With very large index jumps, that would waste huge + * memory. Superdir keeps metadata proportional to the number of populated + * blocks/slices instead of the largest slice_id ever seen. + * + * Promotion trigger: + * - When slice_id >= AR_SUPER_BLOCK_SLOTS (2048), flat mode is promoted. + * - Practical meaning: slice_id is idx / slice_size. + * With default slice_size=4096, threshold slice_id=2048 corresponds to + * idx >= 2048*4096 = 8,388,608 (first index that needs block_id 1). + * + * Hint: here what we gain is not just efficiency. Also there are no security + * concerns with setting a very high index. No problem with a corrupted RDB + * file containing a very high index, and no need to configure a maximum index + * allowable in an array. Thanks to this design the array type of Redis is + * a more useful and safe type. + * -------------------------------------------------------------------------- */ + +/* Binary search for block_id in superdir. + * Returns index where found or should be inserted. */ +uint32_t arSuperDirFind(redisArray *ar, uint64_t block_id, int *found) { + uint32_t lo = 0, hi = ar->sdir_len; + while (lo < hi) { + uint32_t mid = lo + (hi - lo) / 2; + if (ar->superdir[mid].block_id < block_id) { + lo = mid + 1; + } else { + hi = mid; + } + } + *found = (lo < ar->sdir_len && ar->superdir[lo].block_id == block_id); + return lo; +} + +/* Get slice pointer from superdir mode. Returns NULL if not found. */ +arSlice **arSuperDirGetSlot(redisArray *ar, uint64_t slice_id) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + uint32_t block_off = slice_id % AR_SUPER_BLOCK_SLOTS; + + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + if (!found) return NULL; + + return ar->superdir[pos].slots + block_off; +} + +/* Ensure block exists in superdir, creating if needed. Returns slot pointer. */ +arSlice **arSuperDirEnsureSlot(redisArray *ar, uint64_t slice_id) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + uint32_t block_off = slice_id % AR_SUPER_BLOCK_SLOTS; + + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + + if (!found) { + /* Need to insert new block */ + if (ar->sdir_len >= ar->sdir_cap) { + /* Grow superdir array */ + uint32_t new_cap = ar->sdir_cap ? ar->sdir_cap * 2 : 4; + ar->superdir = arReallocAndTrack(ar, ar->superdir, new_cap * sizeof(arSDirEntry)); + ar->sdir_cap = new_cap; + } + + /* Shift entries to make room */ + if (pos < ar->sdir_len) { + memmove(ar->superdir + pos + 1, ar->superdir + pos, + (ar->sdir_len - pos) * sizeof(arSDirEntry)); + } + + /* Initialize new entry */ + ar->superdir[pos].block_id = block_id; + ar->superdir[pos].count = 0; + ar->superdir[pos].slots = arCallocAndTrack(ar, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + ar->sdir_len++; + } + + return ar->superdir[pos].slots + block_off; +} + +/* Look up the superdir block that contains slice_id. + * Returns a pointer to that arSDirEntry, or NULL if the block was never + * allocated (no slices currently exist in that block). */ +arSDirEntry *arSuperDirGetEntry(redisArray *ar, uint64_t slice_id) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + return found ? ar->superdir + pos : NULL; +} + +/* Remove one block entry from superdir at index pos. + * We assume 'pos' is valid and the block is logically empty (count == 0). + * Frees the slice-pointer table, compacts remaining entries (keeping order by + * block_id), and decrements ar->sdir_len. */ +void arSuperDirRemoveBlock(redisArray *ar, uint32_t pos) { + arFreeAndTrack(ar, ar->superdir[pos].slots); + if (pos < ar->sdir_len - 1) { + memmove(ar->superdir + pos, ar->superdir + pos + 1, + (ar->sdir_len - pos - 1) * sizeof(arSDirEntry)); + } + ar->sdir_len--; +} + +/* Promote from flat directory to superdir mode. Flat mode only ever uses + * slice_id < AR_SUPER_BLOCK_SLOTS, so all existing slices belong to block 0. */ +void arPromoteToSuperDir(redisArray *ar) { + ar->sdir_cap = 4; + ar->sdir_len = 0; + ar->superdir = arAllocAndTrack(ar, ar->sdir_cap * sizeof(arSDirEntry)); + + /* Copy existing flat dir content into block 0 */ + if (ar->dir_alloc > 0) { + ar->superdir[0].block_id = 0; + ar->superdir[0].slots = arCallocAndTrack(ar, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + ar->superdir[0].count = 0; + ar->sdir_len = 1; + + /* Copy flat dir pointers to block 0, counting non-NULL */ + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + ar->superdir[0].slots[i] = ar->dir[i]; + if (ar->dir[i]) ar->superdir[0].count++; + } + } + + /* Free old flat directory */ + if (ar->dir) arFreeAndTrack(ar, ar->dir); + ar->dir = NULL; + ar->dir_alloc = 0; +} + +/* Grow directory to accommodate slice_id (handles both modes, dense and + * superdir mode). */ +void arDirGrow(redisArray *ar, uint64_t slice_id) { + /* Check if promotion to superdir is needed */ + if (ar->superdir == NULL && slice_id >= AR_SUPER_BLOCK_SLOTS) { + arPromoteToSuperDir(ar); + } + + if (ar->superdir) { + /* Superdir allocates blocks on-demand in arSetSlice(), so we don't + * allocate a 2048-pointer block for ranges that end up empty. */ + return; + } + + /* Flat mode: grow directory if needed */ + if (slice_id < ar->dir_alloc) return; + + uint64_t new_alloc = ar->dir_alloc ? ar->dir_alloc : 1; + + /* Grow geometrically and stop at the first power-of-two size + * that can index slice_id. Note that thanks to superdir mode the + * size of this table of pointers is bound. */ + while (new_alloc <= slice_id) { + new_alloc <<= 1; + } + + arSlice **new_dir = arReallocAndTrack(ar, ar->dir, new_alloc * sizeof(arSlice *)); + + /* Zero-fill new slots */ + memset(new_dir + ar->dir_alloc, 0, (new_alloc - ar->dir_alloc) * sizeof(arSlice *)); + ar->dir = new_dir; + ar->dir_alloc = new_alloc; +} + +/* Maybe shrink directory after freeing a slice (flat mode only). + * Since dir_alloc is always a power of two, we can only shrink by halving. + * So shrinking only happens when dir_highest_used < dir_alloc/2. The 90% + * check is just a quick early-out to skip the loop in the common case. */ +void arDirMaybeShrink(redisArray *ar) { + if (ar->superdir) return; /* Superdir mode: blocks freed individually */ + if (ar->count == 0) return; /* Will be deleted anyway */ + if (ar->dir_highest_used >= ar->dir_alloc * 9 / 10) return; + + /* Find smallest power of two > dir_highest_used */ + uint64_t new_alloc = 1; + while (new_alloc <= ar->dir_highest_used) new_alloc <<= 1; + if (new_alloc >= ar->dir_alloc) return; + + ar->dir = arReallocAndTrack(ar, ar->dir, new_alloc * sizeof(arSlice *)); + ar->dir_alloc = new_alloc; +} + +/* Update dir_highest_used after freeing a slice. + * To always know the highest directory index used is useful + * for a number of reasons: + * 1. arLen() is O(1) this way. + * 2. We can start reverse scans from the rightmost populated directory entry. + * 3. We can shrink the directory (in flat mode) if needed, since we know + * the usage. */ +void arDirUpdateHighest(redisArray *ar, uint64_t freed_id) { + if (ar->count == 0) return; + if (freed_id < ar->dir_highest_used) return; + + if (ar->superdir) { + /* Superdir mode: scan backwards through blocks */ + for (int32_t bi = ar->sdir_len - 1; bi >= 0; bi--) { + arSDirEntry *e = ar->superdir + bi; + if (e->count == 0) continue; + /* Scan backwards through this block's slots */ + for (int32_t si = AR_SUPER_BLOCK_SLOTS - 1; si >= 0; si--) { + if (e->slots[si] != NULL) { + ar->dir_highest_used = e->block_id * AR_SUPER_BLOCK_SLOTS + si; + return; + } + } + } + ar->dir_highest_used = 0; + } else { + /* Flat mode: scan backward for next non-NULL slice */ + for (int64_t i = (int64_t)freed_id - 1; i >= 0; i--) { + if (ar->dir[i] != NULL) { + ar->dir_highest_used = i; + return; + } + } + ar->dir_highest_used = 0; + } +} + +/* Get slice pointer by slice_id (which is the logical array-index divided by + * the elements-per-slice), handling both flat and superdir modes. If no slice + * was already allocated for such slice_id, NULL is returned. */ +arSlice *arGetSlice(redisArray *ar, uint64_t slice_id) { + if (ar->superdir) { + arSlice **slot = arSuperDirGetSlot(ar, slice_id); + return slot ? *slot : NULL; + } else { + if (slice_id >= ar->dir_alloc) return NULL; + return ar->dir[slice_id]; + } +} + +/* Set slice pointer in the directory. In superdir mode, setting to NULL + * decrements the block's slice count and frees the block if it becomes empty. + * Setting to non-NULL allocates the block if needed. */ +void arSetSlice(redisArray *ar, uint64_t slice_id, arSlice *s) { + if (ar->superdir) { + uint64_t block_id = slice_id / AR_SUPER_BLOCK_SLOTS; + uint32_t block_off = slice_id % AR_SUPER_BLOCK_SLOTS; + + int found; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + + if (s == NULL) { + /* Setting to NULL: decrement block count, maybe remove block */ + if (!found) return; /* Block doesn't exist, nothing to do */ + arSDirEntry *entry = ar->superdir + pos; + if (entry->slots[block_off] != NULL) { + entry->slots[block_off] = NULL; + entry->count--; + ar->num_slices--; + /* Remove empty block */ + if (entry->count == 0) { + arSuperDirRemoveBlock(ar, pos); + } + } + } else { + /* Setting to non-NULL: ensure block exists */ + arSlice **slot = arSuperDirEnsureSlot(ar, slice_id); + arSDirEntry *entry = arSuperDirGetEntry(ar, slice_id); + if (*slot == NULL) { + entry->count++; + ar->num_slices++; + } + *slot = s; + } + } else { + if (s == NULL && ar->dir[slice_id] != NULL) ar->num_slices--; + else if (s != NULL && ar->dir[slice_id] == NULL) ar->num_slices++; + ar->dir[slice_id] = s; + } +} + +/* ---------------------------------------------------------------------------- + * Value encoding + * -------------------------------------------------------------------------- */ + +/* Try to encode string as immediate integer */ +int arTryEncodeInt(const char *s, size_t len, void **out) { + long long ll; + if (string2ll(s, len, &ll) && arIntFits(ll)) { + *out = arFromInt(ll); + return 1; + } + return 0; +} + +/* Try to encode string as immediate float. + * + * The local immediate float encoding clears the low 2 bits of the underlying + * floating-point payload to make room for the tag. On 64-bit builds we do it + * on the IEEE-754 double bits directly. On 32-bit builds we first quantize to + * float, then clear the low 2 bits of the float payload. We only encode if the + * later string representation matches the original input exactly. + * + * There's a subtlety with whole-number floats: d2string formats 1.0 as "1" + * (without decimal point), so "1.0" wouldn't match and would be stored as + * a heap string. We fix this by appending ".0" when d2string produces an + * integer-looking result and comparing again. + * + * Note: pure integers like "1" are handled by arTryEncodeInt first, so values + * reaching here that look like integers after d2string likely had ".0". */ +int arTryEncodeFloat(const char *s, size_t len, void **out) { + /* Fast filter to discard things that obviously can't pass the later + * round-trip test: + * + * 1. Can have optional leading '-'. + * 2. Can be composed only by digits plus one mandatory '.'. + * + * This skips expensive float parsing for obvious non-candidates. */ + size_t i = 0; + int dot_seen = 0; + + if (len == 0) return 0; + if (s[0] == '-') { + if (len == 1) return 0; + i = 1; + } + for (; i < len; i++) { + char c = s[i]; + if (c == '.') { + if (dot_seen) return 0; + dot_seen = 1; + } else if (c < '0' || c > '9') { + return 0; + } + } + if (!dot_seen) return 0; + + /* Expensive round-trip path: convert to double. */ + double d; + if (!string2d(s, len, &d)) return 0; + if (isnan(d) || isinf(d)) return 0; + + uint64_t bits_trunc; + double d_trunc; +#if UINTPTR_MAX == UINT64_MAX + /* Truncate the double payload directly on 64-bit builds. */ + uint64_t bits; + memcpy(&bits, &d, sizeof(bits)); + bits_trunc = bits & ~AR_TAG_MASK; + memcpy(&d_trunc, &bits_trunc, sizeof(d_trunc)); +#else + /* 32-bit builds inline floats, not doubles. Quantize first, then clear + * the low 2 bits of the float payload. */ + float f = (float)d; + if (!isfinite(f)) return 0; // May happen after casting. + uint32_t bits32; + uint32_t bits32_trunc; + float f_trunc; + + memcpy(&bits32, &f, sizeof(bits32)); + bits32_trunc = bits32 & ~(uint32_t)AR_TAG_MASK; + memcpy(&f_trunc, &bits32_trunc, sizeof(f_trunc)); + bits_trunc = bits32_trunc; + d_trunc = (double)f_trunc; // Reduced precision float here. +#endif + + /* Verify round-trip */ + char buf[AR_INLINE_BUFSIZE]; + int buflen = d2string(buf, sizeof(buf) - 2, d_trunc); + if ((size_t)buflen == len && memcmp(buf, s, len) == 0) { + *out = arFromFloatBits(bits_trunc); + return 1; + } + + /* Also try the ".0" form. d2string(1.0) returns "1", but when floats are + * later converted back to strings we restore ".0" for integer-looking + * values, so inputs like "1.0" can still round-trip exactly. */ + buf[buflen] = '.'; + buf[buflen + 1] = '0'; + buf[buflen + 2] = '\0'; + buflen += 2; + if ((size_t)buflen == len && memcmp(buf, s, len) == 0) { + *out = arFromFloatBits(bits_trunc); + return 1; + } + + return 0; +} + +/* Format a float in the canonical string form exposed by arrays. + * buf must be at least AR_INLINE_BUFSIZE bytes. We use d2string() for the + * shortest round-trippable representation, then restore ".0" for + * integer-looking finite values so decoded/replied floats match the logical + * form expected by array persistence and encoding checks. */ +int arFormatFloat(double d, char *buf, size_t bufsize) { + serverAssert(bufsize >= AR_INLINE_BUFSIZE); + int len = d2string(buf, bufsize - 2, d); + if (isfinite(d) && !memchr(buf, '.', len) && !memchr(buf, 'e', len) && + !memchr(buf, 'E', len)) { + serverAssert((size_t)len + 2 < bufsize); + buf[len++] = '.'; + buf[len++] = '0'; + buf[len] = '\0'; + } + return len; +} + +/* Encode a string into a tagged value */ +void *arEncode(const char *s, size_t len) { + void *v; + + /* Try integer first */ + if (arTryEncodeInt(s, len, &v)) { + return v; + } + + /* Try float */ + if (arTryEncodeFloat(s, len, &v)) { + return v; + } + + /* Try small string (architecture-dependent inline limit). */ + if (len <= AR_SMALLSTR_MAXLEN) { + return arFromSmallStr(s, (int)len); + } + + /* Fall back to arString (8+ bytes) */ + return arStringNew(s, len); +} + +void *arValueFromRdbInt(int64_t ival) { + if (arIntFits(ival)) return arFromInt(ival); + + /* If the integer does not fit (i.e. loading into a 32 bit instance + * what was stored in the RDB by a 64 bit instance), we promote it + * to a plain string. */ + char buf[32]; + int len = ll2string(buf, sizeof(buf), ival); + return arStringNew(buf, len); +} + +void *arValueFromRdbFloat(double d) { +#if UINTPTR_MAX == UINT64_MAX + /* On 64-bit, doubles are inlined directly (low 2 bits cleared). + * No string round-trip needed: the RDB double already has clean + * low bits (from the saving side's arToDouble). */ + uint64_t bits; + memcpy(&bits, &d, sizeof(bits)); + return arFromFloatBits(bits); +#endif + + /* Loading on a 32 bit system is more complicated to do efficiently. + * + * RDB always stores array floats as doubles. On 32-bit systems we can + * only inline a float payload with the low 2 bits stolen for the tag. + * Simulate that exact quantization path and keep the value encoded only + * if it survives unchanged. */ + uint32_t bits32; + uint32_t bits32_trunc; + float f_trunc; + double d_trunc; + + /* Narrow to float first, then clear the low 2 payload bits that are + * reserved for the tagged-pointer type. */ + float f = (float)d; + memcpy(&bits32, &f, sizeof(bits32)); + bits32_trunc = bits32 & ~(uint32_t)AR_TAG_MASK; + memcpy(&f_trunc, &bits32_trunc, sizeof(f_trunc)); + d_trunc = (double)f_trunc; + + /* Bitwise comparison keeps signed zero distinct and tells us whether + * the 64-bit RDB value is exactly representable by the local 30-bit + * inline-float format. */ + uint64_t bits64; + uint64_t bits64_trunc; + memcpy(&bits64, &d, sizeof(bits64)); + memcpy(&bits64_trunc, &d_trunc, sizeof(bits64_trunc)); + if (bits64 == bits64_trunc) return arFromFloatBits(bits32_trunc); + + /* Otherwise materialize the canonical string form for this float. */ + char buf[AR_INLINE_BUFSIZE]; + int len = arFormatFloat(d, buf, sizeof(buf)); + return arStringNew(buf, len); +} + +void *arValueFromRdbSmallStr(const char *s, size_t len) { + if (len <= AR_SMALLSTR_MAXLEN) return arFromSmallStr(s, (int)len); + return arStringNew(s, len); +} + +/* Decode a tagged value into raw bytes. + * For inline values, buf must point to at least AR_INLINE_BUFSIZE bytes and + * the returned pointer will be buf. For arString values, the returned pointer + * aliases the string payload directly. Returns NULL if value is empty. + * + * This is a helper function used for AOF rewriting, AROP string "MATCH" + * and DEBUG DIGEST. */ +const char *arDecode(void *v, char *buf, size_t bufsize, size_t *outlen) { + serverAssert(bufsize >= AR_INLINE_BUFSIZE); + if (arIsEmpty(v)) { + if (outlen) *outlen = 0; + return NULL; + } + + if (arIsInt(v)) { + int64_t ival = arToInt(v); + int len = ll2string(buf, 32, ival); + if (outlen) *outlen = len; + return buf; + } + + if (arIsFloat(v)) { + double d = arToDouble(v); + int len = arFormatFloat(d, buf, bufsize); + if (outlen) *outlen = len; + return buf; + } + + if (arIsSmallStr(v)) { + int len = arSmallStrLen(v); + if (outlen) *outlen = len; + arToSmallStr(v, buf); + return buf; + } + + /* arString pointer */ + size_t len = arStringLen(v); + if (outlen) *outlen = len; + return arStringData(v); +} + + +/* ---------------------------------------------------------------------------- + * Array lifecycle + * -------------------------------------------------------------------------- */ + +/* Create a new empty array */ +redisArray *arNew(void) { + redisArray *ar = zmalloc(sizeof(redisArray)); + ar->count = 0; + ar->insert_idx = AR_INSERT_IDX_NONE; + ar->dir_alloc = 0; + ar->dir_highest_used = 0; + ar->num_slices = 0; + ar->alloc_size = zmalloc_size(ar); + ar->slice_size = ArraySliceSize; /* Use current config value */ + ar->sdir_len = 0; + ar->sdir_cap = 0; + ar->dir = NULL; + ar->superdir = NULL; + return ar; +} + +/* Free an array and all its contents */ +void arFree(redisArray *ar) { + if (!ar) return; + + if (ar->superdir) { + /* Superdir mode: free all blocks and their slices */ + for (uint32_t i = 0; i < ar->sdir_len; i++) { + arSDirEntry *e = ar->superdir + i; + for (uint32_t j = 0; j < AR_SUPER_BLOCK_SLOTS; j++) { + if (e->slots[j]) arSliceFree(NULL, e->slots[j]); + } + zfree(e->slots); + } + zfree(ar->superdir); + } else { + /* Flat mode */ + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i]) { + arSliceFree(NULL, ar->dir[i]); + } + } + zfree(ar->dir); + } + zfree(ar); +} + +/* Dismiss a single slice's memory back to the OS. */ +static void arSliceDismiss(arSlice *s, int dismiss_values) { + if (s->encoding == AR_SLICE_DENSE) { + if (dismiss_values) { + void **items = s->layout.dense.items; + for (uint32_t i = 0; i < s->layout.dense.winsize; i++) { + if (arIsPtr(items[i])) + dismissMemory(items[i], arStringLen(items[i])); + } + } + dismissMemory(s, arDenseAllocSize(s->layout.dense.winsize)); + } else { + if (dismiss_values) { + void **values = s->layout.sparse.values; + for (uint32_t i = 0; i < s->count; i++) { + if (arIsPtr(values[i])) + dismissMemory(values[i], arStringLen(values[i])); + } + } + dismissMemory(s, arSparseAllocSize(s->layout.sparse.cap)); + } +} + +/* See dismissObject(). Always dismiss the directory and slices; per-value + * dismissal only when the average element size makes it worthwhile. */ +void arDismiss(redisArray *ar, size_t size_hint) { + if (!ar) return; + uint64_t count = ar->count; + int dismiss_values = (count != 0 && size_hint / count >= server.page_size); + + if (ar->superdir) { + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + if (e->slots[si] == NULL) continue; + arSliceDismiss(e->slots[si], dismiss_values); + } + dismissMemory(e->slots, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + } + dismissMemory(ar->superdir, ar->sdir_cap * sizeof(arSDirEntry)); + } else if (ar->dir) { + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i] == NULL) continue; + arSliceDismiss(ar->dir[i], dismiss_values); + } + dismissMemory(ar->dir, ar->dir_alloc * sizeof(arSlice *)); + } +} + +/* arDup() helper to duplicate a single slice into the duplicated array. + * This function is responsible of tracking allocations in dup_ar + * (hence the name of the parameter), since it has the knowledge of + * the array slice that it is duplicating. + * + * The dear reader of this code may wonder why we don't just duplicate the + * array and its slices without tracking memory, and then copy the memory + * field of the array at the end. The problem is that the array does not + * track the logical allocated memory, but the actual memory usage reported + * by the allocator: there is no guarantee that the allocations of the copy + * perfectly match the ones of the original array. */ +arSlice *arSliceDup(redisArray *dup_ar, arSlice *s) { + if (s->encoding == AR_SLICE_DENSE) { + size_t sz = arDenseAllocSize(s->layout.dense.winsize); + arSlice *nd = arAllocAndTrack(dup_ar, sz); + memcpy(nd, s, sizeof(arSlice)); + nd->layout.dense.items = (void **)(nd + 1); + memcpy(nd->layout.dense.items, s->layout.dense.items, + s->layout.dense.winsize * sizeof(void *)); + + /* Duplicate arString pointers */ + for (uint32_t j = 0; j < s->layout.dense.winsize; j++) { + if (arIsPtr(nd->layout.dense.items[j])) { + nd->layout.dense.items[j] = arStringDup(nd->layout.dense.items[j]); + arTrackValueIn(dup_ar, nd->layout.dense.items[j]); + } + } + return nd; + } else { + size_t sz = arSparseAllocSize(s->layout.sparse.cap); + arSlice *nsp = arAllocAndTrack(dup_ar, sz); + memcpy(nsp, s, sizeof(arSlice)); + arSparseSetupPointers(nsp); + memcpy(nsp->layout.sparse.offsets, s->layout.sparse.offsets, + s->layout.sparse.cap * sizeof(uint16_t)); + memcpy(nsp->layout.sparse.values, s->layout.sparse.values, + s->layout.sparse.cap * sizeof(void *)); + + /* Duplicate arString pointers */ + void **values = nsp->layout.sparse.values; + for (uint32_t j = 0; j < s->count; j++) { + if (arIsPtr(values[j])) { + values[j] = arStringDup(values[j]); + arTrackValueIn(dup_ar, values[j]); + } + } + return nsp; + } +} + +/* Duplicate an array (deep copy) */ +redisArray *arDup(redisArray *ar) { + redisArray *dup = zmalloc(sizeof(redisArray)); + dup->count = ar->count; + dup->insert_idx = ar->insert_idx; + dup->dir_alloc = ar->dir_alloc; + dup->dir_highest_used = ar->dir_highest_used; + dup->num_slices = ar->num_slices; + dup->alloc_size = zmalloc_size(dup); + dup->slice_size = ar->slice_size; + dup->sdir_len = ar->sdir_len; + dup->sdir_cap = ar->sdir_cap; + + if (ar->superdir) { + /* Superdir mode */ + dup->dir = NULL; + dup->superdir = arAllocAndTrack(dup, ar->sdir_cap * sizeof(arSDirEntry)); + + for (uint32_t i = 0; i < ar->sdir_len; i++) { + arSDirEntry *src = ar->superdir + i; + arSDirEntry *dst = dup->superdir + i; + + dst->block_id = src->block_id; + dst->count = src->count; + dst->slots = arCallocAndTrack(dup, AR_SUPER_BLOCK_SLOTS * sizeof(arSlice *)); + + for (uint32_t j = 0; j < AR_SUPER_BLOCK_SLOTS; j++) { + if (src->slots[j]) { + dst->slots[j] = arSliceDup(dup, src->slots[j]); + } + } + } + } else if (ar->dir_alloc > 0) { + /* Flat mode */ + dup->superdir = NULL; + dup->dir = arAllocAndTrack(dup, ar->dir_alloc * sizeof(arSlice *)); + memset(dup->dir, 0, ar->dir_alloc * sizeof(arSlice *)); + + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i]) { + dup->dir[i] = arSliceDup(dup, ar->dir[i]); + } + } + } else { + dup->dir = NULL; + dup->superdir = NULL; + } + + return dup; +} + +/* ---------------------------------------------------------------------------- + * Core operations + * -------------------------------------------------------------------------- */ + +/* Get value at index (returns NULL for empty/missing) */ +void *arGet(redisArray *ar, uint64_t idx) { + uint64_t slice_id = arSliceId(idx, ar->slice_size); + uint32_t rel_idx = arSliceOff(idx, ar->slice_size); + + arSlice *s = arGetSlice(ar, slice_id); + if (s == NULL) return NULL; // No slice at all for this index. + + if (s->encoding == AR_SLICE_DENSE) { + if (rel_idx < s->layout.dense.offset || + rel_idx >= s->layout.dense.offset + s->layout.dense.winsize) + { + // The slice window does not include this index. + return NULL; + } + return s->layout.dense.items[rel_idx - s->layout.dense.offset]; + } else { + int found; + uint32_t pos = arSparseFindPos(s, (uint16_t)rel_idx, &found); + if (found) { + void **values = s->layout.sparse.values; + return values[pos]; + } + return NULL; + } +} + +/* Set value at index. Caller must ensure idx != UINT64_MAX. + * v must not be NULL (empty) - use arDel() to delete elements. */ +void arSet(redisArray *ar, uint64_t idx, void *v) { + serverAssert(v != NULL); /* Use arDel for deletion, not arSet(v=NULL) */ + /* UINT64_MAX can't be used for a couple of reasons: for once, + * the array len is the max index set + 1, so we could not represent + * that; also it is a sentinel for last set index still not being set. */ + serverAssert(idx != UINT64_MAX); + uint64_t slice_id = arSliceId(idx, ar->slice_size); + uint32_t rel_idx = arSliceOff(idx, ar->slice_size); + + /* Ensure directory capacity (may trigger promotion to superdir) */ + arDirGrow(ar, slice_id); + + /* Get current slice */ + arSlice *s = arGetSlice(ar, slice_id); + + /* Create slice if missing */ + if (s == NULL) { + if (ArraySparseKMax > 0) { + s = arSliceSparseNew(ar); + } else { + s = arSliceDenseNew(ar, rel_idx, ar->slice_size); + } + arSetSlice(ar, slice_id, s); + } + + if (s->encoding == AR_SLICE_DENSE) { + /* Grow the slice window if needed */ + s = arSliceDenseGrowIfNeeded(ar, s, rel_idx, ar->slice_size); + arSetSlice(ar, slice_id, s); // In case it changed. + + uint32_t pos = rel_idx - s->layout.dense.offset; + void *old = s->layout.dense.items[pos]; + + if (arIsEmpty(old)) { + s->count++; + ar->count++; + } else { + /* Replace existing value. */ + arTrackValueOut(ar, old); + arFreePtr(old); + } + + arTrackValueIn(ar, v); + s->layout.dense.items[pos] = v; + + /* Update max_idx */ + if (rel_idx > s->layout.dense.max_idx) { + s->layout.dense.max_idx = rel_idx; + } + } else { + int found; + uint32_t pos = arSparseFindPos(s, (uint16_t)rel_idx, &found); + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + if (found) { + /* Replace existing */ + arTrackValueOut(ar, values[pos]); + arFreePtr(values[pos]); + arTrackValueIn(ar, v); + values[pos] = v; + } else { + /* Insert new */ + if (s->count >= ArraySparseKMax) { + /* Promote to dense */ + arSlice *d = arSparsePromote(ar, s, ar->slice_size); + + /* Grow window if needed */ + d = arSliceDenseGrowIfNeeded(ar, d, rel_idx, ar->slice_size); + arSetSlice(ar, slice_id, d); + + uint32_t dpos = rel_idx - d->layout.dense.offset; + arTrackValueIn(ar, v); + d->layout.dense.items[dpos] = v; + d->count++; + ar->count++; + if (rel_idx > d->layout.dense.max_idx) d->layout.dense.max_idx = rel_idx; + } else { + /* Insert in sparse */ + if (s->count >= s->layout.sparse.cap) { + /* Grow capacity, we grow 2x but note that there is no + * point in growing more than kmax, so we clamp to kmax. */ + uint32_t new_cap = s->layout.sparse.cap * 2; + if (new_cap > ArraySparseKMax) new_cap = ArraySparseKMax; + arSlice *ns = arAllocAndTrack(ar, arSparseAllocSize(new_cap)); + ns->encoding = AR_SLICE_SPARSE; + ns->count = s->count; + ns->layout.sparse.cap = new_cap; + arSparseSetupPointers(ns); + + /* Copy old data to new slice */ + uint16_t *old_offsets = s->layout.sparse.offsets; + void **old_values = s->layout.sparse.values; + uint16_t *new_offsets = ns->layout.sparse.offsets; + void **new_values = ns->layout.sparse.values; + memcpy(new_offsets,old_offsets,s->count * sizeof(uint16_t)); + memcpy(new_values,old_values,s->count * sizeof(void *)); + + arFreeAndTrack(ar, s); + s = ns; + arSetSlice(ar, slice_id, s); + offsets = new_offsets; + values = new_values; + } + + /* Shift and insert in place */ + memmove(offsets + pos + 1, offsets + pos, + (s->count - pos) * sizeof(uint16_t)); + memmove(values + pos + 1, values + pos, + (s->count - pos) * sizeof(void *)); + offsets[pos] = (uint16_t)rel_idx; + arTrackValueIn(ar, v); + values[pos] = v; + s->count++; + ar->count++; + } + } + } + + /* Update dir_highest_used. The count==1 check handles when we just added + * the first element to an empty array. */ + if (slice_id > ar->dir_highest_used || ar->count == 1) { + ar->dir_highest_used = slice_id; + } +} + +/* Delete value at index. Returns 1 if deleted, 0 if was already empty. */ +int arDel(redisArray *ar, uint64_t idx) { + uint64_t slice_id = arSliceId(idx, ar->slice_size); + uint32_t rel_idx = arSliceOff(idx, ar->slice_size); + + arSlice *s = arGetSlice(ar, slice_id); + if (s == NULL) return 0; + + if (s->encoding == AR_SLICE_DENSE) { + if (rel_idx < s->layout.dense.offset || rel_idx >= s->layout.dense.offset + s->layout.dense.winsize) { + return 0; + } + + uint32_t pos = rel_idx - s->layout.dense.offset; + void *old = s->layout.dense.items[pos]; + if (arIsEmpty(old)) return 0; + + arTrackValueOut(ar, old); + arFreePtr(old); + s->layout.dense.items[pos] = NULL; + s->count--; + ar->count--; + + /* Update max_idx if we deleted the max */ + arDenseUpdateMaxIdx(s, rel_idx); + if (s->count != 0) { + /* Maybe demote to sparse. */ + arSetSlice(ar, slice_id, arDenseMaybeDemote(ar, s)); + return 1; + } + } else { + int found; + uint32_t pos = arSparseFindPos(s, (uint16_t)rel_idx, &found); + if (!found) return 0; + + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + arTrackValueOut(ar, values[pos]); + arFreePtr(values[pos]); + memmove(offsets + pos, offsets + pos + 1, + (s->count - pos - 1) * sizeof(uint16_t)); + memmove(values + pos, values + pos + 1, + (s->count - pos - 1) * sizeof(void *)); + s->count--; + ar->count--; + } + + /* Delete the slice if now it is empty. */ + if (s->count == 0) { + arSliceFree(ar, s); + /* Note that in superdir mode arSetSlice() will also free + * empty blocks. */ + arSetSlice(ar, slice_id, NULL); + arDirUpdateHighest(ar, slice_id); + arDirMaybeShrink(ar); + } + return 1; +} + +/* ============================================================================ + * GENERALIZED RANGE DELETE - arDeleteRange + * ============================================================================ + * + * This function provides O(N) range deletion where N is the number of stored + * elements, NOT the numeric range length. It achieves this by: + * + * 1. Deleting whole fully-covered slices in the middle range. + * 2. In superdir mode, visiting only overlapping blocks and covered slice + * slots within them, instead of scanning the numeric slice-id span. + * 3. Only doing element-level deletion inside the two boundary slices. + * + * This is used by ARDELRANGE directly and by arTruncate as a special case. + * -------------------------------------------------------------------------- */ + +/* Helper: delete elements within a single slice in offset range + * [del_lo..del_hi]. Returns number of elements deleted. Handles both dense + * and sparse slices. + * + * Dense slices delete slot-by-slot inside the window. Sparse slices identify + * the contiguous offset span to delete, free those values, then compact the + * tail once. + * + * If the slice becomes empty, it is freed and the slot is cleared. */ +uint64_t arDeleteSliceRange(redisArray *ar, uint64_t slice_id, + uint32_t del_lo, uint32_t del_hi) { + arSlice *s = arGetSlice(ar, slice_id); + if (!s) return 0; + + uint64_t deleted = 0; + + if (s->encoding == AR_SLICE_DENSE) { + /* Dense: intersect deletion range with allocated window */ + uint32_t win_lo = s->layout.dense.offset; + uint32_t win_hi = s->layout.dense.offset + s->layout.dense.winsize - 1; + + /* Clamp to window */ + uint32_t eff_lo = (del_lo > win_lo) ? del_lo : win_lo; + uint32_t eff_hi = (del_hi < win_hi) ? del_hi : win_hi; + + if (eff_lo <= eff_hi) { + /* Clear every populated slot in the effective dense range. */ + for (uint32_t off = eff_lo; off <= eff_hi; off++) { + uint32_t pos = off - s->layout.dense.offset; + if (!arIsEmpty(s->layout.dense.items[pos])) { + arTrackValueOut(ar, s->layout.dense.items[pos]); + arFreePtr(s->layout.dense.items[pos]); + s->layout.dense.items[pos] = NULL; + s->count--; + ar->count--; + deleted++; + } + } + + /* Update max_idx if affected */ + if (s->count > 0 && s->layout.dense.max_idx >= eff_lo) { + /* Scan backwards to find new max */ + s->layout.dense.max_idx = s->layout.dense.offset; + for (int32_t i = (int32_t)win_hi; i >= (int32_t)win_lo; i--) { + if (!arIsEmpty(s->layout.dense.items[i - s->layout.dense.offset])) { + s->layout.dense.max_idx = i; + break; + } + } + } + } + + /* Delete slice if empty, or demote it to sparse if we are + * below the threshold. */ + if (s->count == 0) { + arSliceFree(ar, s); + arSetSlice(ar, slice_id, NULL); + } else { + arSetSlice(ar, slice_id, arDenseMaybeDemote(ar, s)); + } + } else { + /* Sparse: deleted elements form a contiguous span in the sorted + * offsets/values arrays. Find that span, free the values in it, + * then compact the tail once. */ + uint16_t *offsets = s->layout.sparse.offsets; + void **values = s->layout.sparse.values; + + int found; + uint32_t first = arSparseFindPos(s, (uint16_t)del_lo, &found); + uint32_t last = arSparseFindPos(s, (uint16_t)del_hi, &found); + if (found) last++; + + /* Free all values in the contiguous sparse span to delete. */ + for (uint32_t i = first; i < last; i++) { + arTrackValueOut(ar, values[i]); + arFreePtr(values[i]); + } + + /* Shift the surviving tail left to close the deleted gap. */ + if (first < last) { + uint32_t tail = s->count - last; + if (tail > 0) { + memmove(offsets + first, offsets + last, + tail * sizeof(uint16_t)); + memmove(values + first, values + last, + tail * sizeof(void *)); + } + + deleted = last - first; + s->count -= deleted; + ar->count -= deleted; + } + + if (s->count == 0) { + arSliceFree(ar, s); + arSetSlice(ar, slice_id, NULL); + } + } + + return deleted; +} + +/* Main range delete function: delete all elements in [lo..hi]. + * Returns number of elements deleted. + * + * Algorithm: + * 1. Compute slice boundaries + * 2. Handle boundary slices with element-level deletion + * 3. Delete full slices/blocks in between (O(1) per slice) + * 4. Update metadata (dir_highest_used, shrink directories) + * + * Complexity: O(S + N) where S = slices touched, N = boundary elements. + * Note that just looping with arGetSlice() and removing the in-the-middle + * slices one after the other would be much simpler but would have completely + * different complexity properties, in case of big span of empty indexes. */ +uint64_t arDeleteRange(redisArray *ar, uint64_t lo, uint64_t hi) { + if (ar->count == 0 || lo > hi) return 0; + + uint32_t slice_size = ar->slice_size; + uint64_t lo_slice = arSliceId(lo, slice_size); + uint64_t hi_slice = arSliceId(hi, slice_size); + uint32_t lo_off = arSliceOff(lo, slice_size); + uint32_t hi_off = arSliceOff(hi, slice_size); + + uint64_t deleted = 0; + int touched_highest = 0; /* Did we touch dir_highest_used? */ + + if (lo_slice == hi_slice) { + /* Range is within a single slice: element-level delete only */ + deleted = arDeleteSliceRange(ar, lo_slice, lo_off, hi_off); + if (lo_slice >= ar->dir_highest_used) touched_highest = 1; + } else { + /* Multiple slices: handle boundaries and full slices separately */ + + /* 1. Delete within lo_slice: [lo_off .. slice_size-1] */ + deleted += arDeleteSliceRange(ar, lo_slice, lo_off, slice_size - 1); + + /* 2. Delete within hi_slice: [0 .. hi_off] */ + deleted += arDeleteSliceRange(ar, hi_slice, 0, hi_off); + if (hi_slice >= ar->dir_highest_used) touched_highest = 1; + + /* 3. Delete full slices in between [lo_slice+1 .. hi_slice-1] */ + if (lo_slice + 1 <= hi_slice - 1) { + if (ar->superdir) { + /* Superdir mode: identify only the block entries that can + * contain slices in the middle range, then delete the covered + * slot interval inside each of those blocks. Iterate from high + * to low so removing an empty block does not invalidate the + * yet-to-be-visited entries. */ + uint64_t mid_lo = lo_slice + 1; + uint64_t mid_hi = hi_slice - 1; + uint64_t lo_block = mid_lo / AR_SUPER_BLOCK_SLOTS; + uint64_t hi_block = mid_hi / AR_SUPER_BLOCK_SLOTS; + + /* arSuperDirFind() is a lower-bound search on block_id. + * start is the first entry whose block_id is >= lo_block. + * end is the first entry whose block_id is > hi_block, so the + * blocks to visit are exactly [start, end). */ + int found; + uint32_t start = arSuperDirFind(ar, lo_block, &found); + uint32_t end = arSuperDirFind(ar, hi_block, &found); + if (found) end++; /* Convert matching index to past-the-end. */ + + /* Iterate backward because deleting the last slice in a block + * removes that block entry and compacts the superdir array. */ + for (int32_t bi = (int32_t)end - 1; bi >= (int32_t)start; bi--) { + arSDirEntry *e = ar->superdir + bi; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + uint64_t block_end = block_base + AR_SUPER_BLOCK_SLOTS - 1; + + /* Convert the global middle slice range to the local slot + * interval covered inside this block. */ + uint32_t first_si = (mid_lo > block_base) ? + (uint32_t)(mid_lo - block_base) : 0; + uint32_t last_si = (mid_hi < block_end) ? + (uint32_t)(mid_hi - block_base) : AR_SUPER_BLOCK_SLOTS - 1; + + /* Delete each covered slice slot. The block itself, if it + * becomes empty, is removed after this local scan. */ + for (uint32_t si = first_si; si <= last_si; si++) { + if (e->slots[si]) { + uint64_t slice_id = block_base + si; + deleted += e->slots[si]->count; + ar->count -= e->slots[si]->count; + arSliceFree(ar, e->slots[si]); + e->slots[si] = NULL; + e->count--; + ar->num_slices--; + if (slice_id >= ar->dir_highest_used) + touched_highest = 1; + } + } + + /* Remove the superdir block if empty. */ + if (e->count == 0) { + arSuperDirRemoveBlock(ar, bi); + } + } + } else { + /* Flat mode: delete full slices in middle range */ + uint64_t end = hi_slice - 1; + if (end >= ar->dir_alloc) end = ar->dir_alloc - 1; + + for (uint64_t sid = lo_slice + 1; sid <= end; sid++) { + if (ar->dir[sid]) { + deleted += ar->dir[sid]->count; + ar->count -= ar->dir[sid]->count; + arSliceFree(ar, ar->dir[sid]); + ar->dir[sid] = NULL; + ar->num_slices--; + if (sid >= ar->dir_highest_used) touched_highest = 1; + } + } + } + } + } + + /* Update dir_highest_used if we touched or deleted high slices */ + if (touched_highest && ar->count > 0) { + ar->dir_highest_used = 0; + if (ar->superdir) { + for (int32_t bi = ar->sdir_len - 1; bi >= 0; bi--) { + arSDirEntry *e = ar->superdir + bi; + if (e->count == 0) continue; + for (int32_t si = AR_SUPER_BLOCK_SLOTS - 1; si >= 0; si--) { + if (e->slots[si]) { + ar->dir_highest_used = e->block_id * AR_SUPER_BLOCK_SLOTS + si; + goto found_highest; + } + } + } + } else { + for (int64_t i = (int64_t)ar->dir_alloc - 1; i >= 0; i--) { + if (ar->dir[i]) { + ar->dir_highest_used = i; + goto found_highest; + } + } + } + } +found_highest: + + if (ar->count == 0) { + ar->dir_highest_used = 0; + } + + arDirMaybeShrink(ar); + return deleted; +} + +/* Truncate array: delete all elements with index >= limit. + * Used by ARRING to implement ring buffer wrap-around. + * + * This is implemented as a special case of arDeleteRange. limit==0 means + * "delete everything". */ +void arTruncate(redisArray *ar, uint64_t limit) { + if (ar->count == 0) return; + + uint64_t len = arLen(ar); + if (limit >= len) return; /* Nothing to delete */ + + arDeleteRange(ar, limit, len - 1); +} + +/* ---------------------------------------------------------------------------- + * Properties + * -------------------------------------------------------------------------- */ + +/* Get count of non-empty elements */ +uint64_t arCount(redisArray *ar) { + return ar->count; +} + +/* Get logical length (max index + 1) */ +uint64_t arLen(redisArray *ar) { + if (ar->count == 0) return 0; + + arSlice *s = arGetSlice(ar, ar->dir_highest_used); + if (s == NULL) return 0; /* Defensive: if count>0 but slice missing, corrupted state */ + uint32_t local_max = arSliceMaxIdx(s); + return arMakeIdx(ar->dir_highest_used, local_max, ar->slice_size) + 1; +} + +/* ---------------------------------------------------------------------------- + * Range set optimization + * -------------------------------------------------------------------------- */ + +/* Pre-promote sparse slices to dense if a range set would overflow them. + * + * When ARSET writes many elements to a sparse slice, each insertion + * requires a sorted insert with memmove. If the slice eventually exceeds + * kmax elements, it gets promoted to dense anyway - wasting all that work. + * + * This function checks each slice touched by [lo, hi] and promotes it to + * dense upfront if the final element count would exceed kmax. Slices that + * will stay within kmax remain sparse. This way, bulk writes either go + * into sparse (if small) or dense (if large), but never do expensive + * sparse insertions followed by promotion. */ +void arMayPromoteToDenseForRangeSet(redisArray *ar, uint64_t lo, uint64_t hi) { + if (ArraySparseKMax == 0) return; /* Sparse disabled, nothing to do */ + + uint64_t slice_lo = arSliceId(lo, ar->slice_size); + uint64_t slice_hi = arSliceId(hi, ar->slice_size); + + /* Ensure directory can hold all slices we might touch */ + arDirGrow(ar, slice_hi); + + for (uint64_t sid = slice_lo; sid <= slice_hi; sid++) { + /* Compute offset range within this slice */ + uint64_t range_start = (sid == slice_lo) ? lo : (sid << arSliceBits(ar->slice_size)); + uint64_t range_end = (sid == slice_hi) ? hi : ((sid + 1) << arSliceBits(ar->slice_size)) - 1; + uint32_t start_off = arSliceOff(range_start, ar->slice_size); + uint32_t end_off = arSliceOff(range_end, ar->slice_size); + uint32_t range_size = end_off - start_off + 1; + + arSlice *s = arGetSlice(ar, sid); + + if (s == NULL) { + /* No slice yet - create dense directly if range exceeds kmax */ + if (range_size > ArraySparseKMax) { + arSetSlice(ar, sid, arSliceDenseNew(ar, start_off, ar->slice_size)); + } + continue; + } + + if (s->encoding == AR_SLICE_DENSE) continue; /* Already dense */ + + /* Sparse slice - check if we need to promote */ + if (range_size > ArraySparseKMax) { + /* Range alone exceeds kmax, must promote */ + arSetSlice(ar, sid, arSparsePromote(ar, s, ar->slice_size)); + continue; + } + + /* Count existing elements in [start_off, end_off] via linear scan. + * Sparse slices have at most kmax elements, so this is O(kmax). */ + uint16_t *offsets = s->layout.sparse.offsets; + uint32_t existing = 0; + for (uint32_t i = 0; i < s->count; i++) { + if (offsets[i] >= start_off && offsets[i] <= end_off) { + existing++; + } + } + + /* New elements = range_size - existing (slots we'll fill that are empty) */ + uint32_t new_elements = range_size - existing; + if (s->count + new_elements > ArraySparseKMax) { + arSetSlice(ar, sid, arSparsePromote(ar, s, ar->slice_size)); + } + } +} + +/* ---------------------------------------------------------------------------- + * Defragmentation + * -------------------------------------------------------------------------- */ + +/* Defrag one slice, fix the slice pointers that point inside its allocation + * and defrag the heap strings as well. + * + * If work is not NULL, also account for the slice scan performed here: + * dense slices add winsize, while sparse slices add count. We update the + * active defrag scanned statistic at the same time, so callers do not need + * to duplicate that logic. */ +static arSlice *arDefragSlice(arSlice *s, unsigned long *work, + void *(*defragfn)(void *)) { + /* 1. Try to defrag the slice itself. If the pointer changed, + * we need to also change the structure pointers pointing inside + * the allocation (that now has a different base address). */ + arSlice *new_s = defragfn(s); + if (new_s) { + s = new_s; + if (s->encoding == AR_SLICE_DENSE) + s->layout.dense.items = (void **)(s + 1); + else + arSparseSetupPointers(s); + } + + /* Defrag the arString() items. All the other items are + * encoded in the pointer value itself and need no handling. */ + if (s->encoding == AR_SLICE_DENSE) { + for (uint32_t j = 0; j < s->layout.dense.winsize; j++) { + if (!arIsPtr(s->layout.dense.items[j])) continue; + void *new_ptr = defragfn(s->layout.dense.items[j]); + if (new_ptr) s->layout.dense.items[j] = new_ptr; + } + if (work) { + *work += s->layout.dense.winsize; + server.stat_active_defrag_scanned += s->layout.dense.winsize; + } + } else { + void **values = s->layout.sparse.values; + for (uint32_t j = 0; j < s->count; j++) { + if (!arIsPtr(values[j])) continue; + void *new_ptr = defragfn(values[j]); + if (new_ptr) values[j] = new_ptr; + } + if (work) { + *work += s->count; + server.stat_active_defrag_scanned += s->count; + } + } + return s; +} + +/* Defrag the array header and the top-level directory object that points to + * slices. This is the cheap metadata pass done before we walk the slices + * themselves. */ +static redisArray *arDefragTopLevel(redisArray *ar, void *(*defragfn)(void *)) { + redisArray *new_ar = defragfn(ar); + if (new_ar) ar = new_ar; + + if (ar->superdir) { + arSDirEntry *new_sdir = defragfn(ar->superdir); + if (new_sdir) ar->superdir = new_sdir; + } else if (ar->dir) { + arSlice **new_dir = defragfn(ar->dir); + if (new_dir) ar->dir = new_dir; + } + return ar; +} + +/* Encode the next superdir scan position as a single cursor. + * Cursor 0 means "start from the beginning" and also "finished". + * + * On 64-bit builds we encode block_id and slot, so resume is stable even if + * blocks before the current one are inserted or removed between defrag steps. + * + * On 32-bit builds the generic defrag cursor type is only unsigned long, so + * it cannot always hold a full 64-bit block_id. In that case we fall back to + * the positional (block-index, slot) encoding. */ +static inline unsigned long arDefragSuperdirCursor(redisArray *ar, uint32_t bi, uint32_t si) { + serverAssert(si < AR_SUPER_BLOCK_SLOTS); +#if ULONG_MAX >= UINT64_MAX + uint64_t block_id = ar->superdir[bi].block_id; + serverAssert(block_id <= (ULONG_MAX - 1) / AR_SUPER_BLOCK_SLOTS); + return ((unsigned long)block_id * AR_SUPER_BLOCK_SLOTS + si) + 1; +#else + UNUSED(ar); + return ((unsigned long)bi * AR_SUPER_BLOCK_SLOTS + si) + 1; +#endif +} + +/* Decode the next superdir scan position stored in the incremental defrag + * cursor. */ +static void arDefragDecodeSuperdirCursor(redisArray *ar, unsigned long cursor, + uint32_t *bi, uint32_t *si) { + serverAssert(cursor > 0); + unsigned long pos = cursor - 1; +#if ULONG_MAX >= UINT64_MAX + /* Flat-mode cursors are also encoded as "slot + 1". After promotion to + * superdir, those old cursors still decode correctly here as block_id 0 + * with the same slot index, because flat mode only ever covers block 0 + * and arPromoteToSuperDir() copies the flat directory into block 0. */ + uint64_t block_id = pos / AR_SUPER_BLOCK_SLOTS; + int found; + + *si = pos % AR_SUPER_BLOCK_SLOTS; + *bi = arSuperDirFind(ar, block_id, &found); + if (!found) *si = 0; +#else + UNUSED(ar); + *bi = pos / AR_SUPER_BLOCK_SLOTS; + *si = pos % AR_SUPER_BLOCK_SLOTS; +#endif +} + +/* Defrag an array that is small enough that we can handle it + * in a single pass. */ +redisArray *arDefrag(redisArray *ar, void *(*defragfn)(void *)) { + ar = arDefragTopLevel(ar, defragfn); + + if (ar->superdir) { + /* Defrag each block slots array, then each slice referenced by it. */ + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + arSlice **new_slots = defragfn(e->slots); + if (new_slots) e->slots = new_slots; + + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + if (e->slots[si] == NULL) continue; + e->slots[si] = arDefragSlice(e->slots[si], NULL, defragfn); + } + } + } else if (ar->dir) { + /* Defrag each slice referenced by the flat directory. */ + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + if (ar->dir[i] == NULL) continue; + ar->dir[i] = arDefragSlice(ar->dir[i], NULL, defragfn); + } + } + + return ar; +} + +/* Incremental defrag step for arrays. Cursor 0 means "start from the + * beginning" and also "no more work". + * + * Work is counted explicitly in order to keep one call roughly aligned with + * active_defrag_max_scan_fields: + * + * 1. Visiting one flat directory entry costs 1. + * 2. In superdir mode, visiting one top-level block entry costs 1, and + * visiting one slot inside that block costs another 1. + * 3. Defragmenting a slice then adds the cost of scanning that slice: + * sparse slices add s->count, while dense slices add winsize. + * + * Slices are still defragmented as whole units. So a dense slice may cause one + * call to overshoot the configured budget, but we still stop immediately after + * that slice in order to resume from the next cursor position later. */ +unsigned long arDefragIncremental(redisArray **arref, unsigned long cursor, + void *(*defragfn)(void *)) +{ + redisArray *ar = *arref; + unsigned long work = 0; + unsigned long maxwork = server.active_defrag_max_scan_fields; + if (ar == NULL) return 0; + + if (cursor == 0) { + ar = arDefragTopLevel(ar, defragfn); + *arref = ar; + } + + if (ar->superdir) { + uint32_t bi = 0, si = 0; + if (cursor != 0) arDefragDecodeSuperdirCursor(ar, cursor, &bi, &si); + + for (; bi < ar->sdir_len; bi++, si = 0) { + arSDirEntry *e = ar->superdir + bi; + /* Defrag the block slots array once when we enter the block from + * its first slot. If we later resume in the middle of the same + * block, the slots array was already handled. */ + if (si == 0) { + arSlice **new_slots = defragfn(e->slots); + if (new_slots) e->slots = new_slots; + work++; + server.stat_active_defrag_scanned++; + } + + for (; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + work++; + server.stat_active_defrag_scanned++; + + if (s == NULL) { + if (work > maxwork) { + si++; + if (si == AR_SUPER_BLOCK_SLOTS) { + bi++; + si = 0; + } + if (bi >= ar->sdir_len) return 0; + return arDefragSuperdirCursor(ar, bi, si); + } + continue; + } + + e->slots[si] = arDefragSlice(s, &work, defragfn); + + if (work > maxwork) { + si++; + if (si == AR_SUPER_BLOCK_SLOTS) { + bi++; + si = 0; + } + if (bi >= ar->sdir_len) return 0; + return arDefragSuperdirCursor(ar, bi, si); + } + } + } + return 0; + } + + if (ar->dir == NULL) return 0; + + uint64_t i = (cursor == 0) ? 0 : cursor - 1; + for (; i < ar->dir_alloc; i++) { + arSlice *s = ar->dir[i]; + work++; + server.stat_active_defrag_scanned++; + + if (s == NULL) { + if (work > maxwork) { + i++; + if (i >= ar->dir_alloc) return 0; + return i + 1; + } + continue; + } + + ar->dir[i] = arDefragSlice(s, &work, defragfn); + + if (work > maxwork) { + i++; + if (i >= ar->dir_alloc) return 0; + return i + 1; + } + } + return 0; +} diff --git a/src/sparsearray.h b/src/sparsearray.h new file mode 100644 index 000000000..c0444ee7b --- /dev/null +++ b/src/sparsearray.h @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Sparse Array - A memory-efficient sparse array with 64-bit index space. + * + * This data structure was designed and implemented by Salvatore Sanfilippo. + */ + +#ifndef __SPARSEARRAY_H +#define __SPARSEARRAY_H + +#include +#include +#include + +/* ============================================================================ + * SPARSE ARRAY OVERVIEW + * ============================================================================ + * + * Sparse arrays are random-access sequences indexed by non-negative 64-bit + * integers. They support O(1) get/set operations and efficient iteration. + * + * MEMORY LAYOUT + * ------------- + * The array uses a two-level structure: a directory pointing to "slices", + * which contain just a range of elements. For very large/sparse arrays, a + * three-level "superdir" structure is used. + * + * SLICE TYPES + * ----------- + * Each slice holds up to slice_size elements and can be: + * + * - Sparse: Sorted array of (offset, value) pairs. Memory-efficient when + * elements are scattered within the slice. + * + * - Dense: Contiguous array with a sliding window. Used when the slice + * has many elements. + * + * VALUE ENCODING (Tagged Pointers) + * -------------------------------- + * Values are stored in tagged pointer-sized words, using the low 2 bits as a + * tag. The exact immediate encoding depends on pointer width: + * + * 64-bit builds: + * Tag 00: arString pointer (heap-allocated, 8+ byte strings) + * Tag 01: Immediate signed integer in the 62-bit payload + * Tag 10: Immediate double (low 2 bits of the IEEE-754 payload cleared) + * Tag 11: Inline small string (0-7 bytes) + * + * 32-bit builds: + * Tag 00: arString pointer + * Tag 01: Immediate signed integer in the 30-bit payload + * Tag 10: Immediate float (low 2 bits of the IEEE-754 payload cleared) + * Tag 11: Inline small string (0-3 bytes) + * + * RDB persistence is architecture-neutral: values are saved as logical ints, + * doubles and strings, never as raw tagged words. + * ========================================================================== */ + +/* ---------------------------------------------------------------------------- + * Configuration defaults + * -------------------------------------------------------------------------- */ + +#define AR_SLICE_SIZE_DEFAULT 4096 +#define AR_SLICE_SIZE_MIN 256 +#define AR_SLICE_SIZE_MAX 65536 +#define AR_SPARSE_KMAX_DEFAULT 10 +#define AR_SPARSE_KMIN_DEFAULT 5 + +/* Superdir: fixed-size blocks of slice pointers. Each block holds 2048 + * pointers to actual array slices, which uses about 8 KB on 32-bit builds + * and 16 KB on 64-bit builds. This keeps very large indices from forcing + * catastrophic flat-directory growth. */ +#define AR_SUPER_BLOCK_SLOTS 2048 + +/* Internal constants */ +#define AR_SLICE_MIN_ALLOC 8 /* Initial dense window allocation */ +#define AR_INSERT_IDX_NONE UINT64_MAX /* No insert performed yet */ + +/* Slice encoding types */ +#define AR_SLICE_DENSE 0 +#define AR_SLICE_SPARSE 1 + +/* Tagged value encoding (low 2 bits). NULL (0) means empty slot. */ +#define AR_TAG_PTR ((uintptr_t)0) /* arString pointer (low 2 bits = 00) */ +#define AR_TAG_INT ((uintptr_t)1) /* Immediate signed integer (01) */ +#define AR_TAG_FLOAT ((uintptr_t)2) /* Immediate float (10) */ +#define AR_TAG_STR ((uintptr_t)3) /* Inline small string (11) */ +#define AR_TAG_MASK ((uintptr_t)3) + +#if UINTPTR_MAX == UINT64_MAX +#define AR_SMALLSTR_MAXLEN 7 +#define AR_SMALLSTR_LEN_MASK 0x7u +#elif UINTPTR_MAX == UINT32_MAX +#define AR_SMALLSTR_MAXLEN 3 +#define AR_SMALLSTR_LEN_MASK 0x3u +#else +#error "Unsupported pointer size" +#endif + +/* RDB type tags for array elements */ +#define AR_RDB_TAG_SDS 0 +#define AR_RDB_TAG_INT 1 +#define AR_RDB_TAG_FLOAT 2 +#define AR_RDB_TAG_SMALLSTR 3 + +/* Buffer size for inline types (int/float/smallstr) */ +#define AR_INLINE_BUFSIZE 64 + +/* ---------------------------------------------------------------------------- + * Data structures + * -------------------------------------------------------------------------- */ + +/* Array slice: holds a range of elements. Single allocation with payload. */ +typedef struct arSlice { + uint8_t encoding; /* 0=dense, 1=sparse */ + uint8_t _pad1[3]; + uint32_t count; /* Non-empty items in this slice */ + union { + struct { + uint32_t offset; /* First logical offset in window */ + uint32_t winsize; /* Window size (power of two) */ + uint32_t max_idx; /* Highest offset with a value */ + void **items; /* Points into payload */ + } dense; + struct { + uint32_t cap; /* Capacity */ + uint16_t *offsets; /* Points into payload */ + void **values; /* Points into payload (aligned) */ + } sparse; + } layout; +} arSlice; + +/* Super-directory entry: groups slices into fixed-size pointer blocks. */ +typedef struct arSDirEntry { + uint64_t block_id; /* slice_id / AR_SUPER_BLOCK_SLOTS */ + uint32_t count; /* Non-NULL slots in this block */ + uint32_t _pad; + arSlice **slots; /* AR_SUPER_BLOCK_SLOTS pointers to slices */ +} arSDirEntry; + +/* Array header */ +typedef struct redisArray { + uint64_t count; /* Total non-empty items */ + uint64_t insert_idx; /* Last insert index, or UINT64_MAX if none */ + uint64_t dir_alloc; /* Flat directory length (flat mode) */ + uint64_t dir_highest_used; /* Highest non-NULL slice index */ + uint64_t num_slices; /* Number of allocated slices */ + size_t alloc_size; /* Tracked total allocation (for slot stats) */ + uint32_t slice_size; /* Slice size (power of two) */ + uint32_t sdir_len; /* Superdir entries count */ + uint32_t sdir_cap; /* Superdir capacity */ + uint32_t _pad; + arSlice **dir; /* Flat directory or NULL */ + arSDirEntry *superdir; /* Super-directory or NULL */ +} redisArray; + +/* ---------------------------------------------------------------------------- + * Inline helpers: index arithmetic + * -------------------------------------------------------------------------- */ + +/* Compute bits needed to address elements within a slice. */ +static inline int arSliceBits(uint32_t slice_size) { + if (slice_size == 4096) return 12; /* Fast path for default */ + int bits = 0; + uint32_t x = slice_size; + while (x > 1) { x >>= 1; bits++; } + return bits; +} + +static inline uint64_t arSliceId(uint64_t idx, uint32_t slice_size) { + return idx >> arSliceBits(slice_size); +} + +static inline uint32_t arSliceOff(uint64_t idx, uint32_t slice_size) { + return (uint32_t)(idx & (slice_size - 1)); +} + +static inline uint64_t arMakeIdx(uint64_t slice_id, uint32_t off, uint32_t slice_size) { + return (slice_id << arSliceBits(slice_size)) | off; +} + +/* ---------------------------------------------------------------------------- + * Inline helpers: tagged value encoding + * -------------------------------------------------------------------------- */ + +static inline int arIsEmpty(void *v) { return v == NULL; } + +static inline int arIsPtr(void *v) { + return v != NULL && ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_PTR; +} + +static inline int arIsInt(void *v) { + return ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_INT; +} + +static inline int64_t arToInt(void *v) { + return (int64_t)(intptr_t)v >> 2; /* Arithmetic shift preserves sign */ +} + +static inline void *arFromInt(int64_t ival) { + return (void *)(((uintptr_t)ival << 2) | AR_TAG_INT); +} + +static inline int arIntFits(int64_t ival) { +#if UINTPTR_MAX == UINT64_MAX + return ival >= -(1LL << 61) && ival <= (1LL << 61) - 1; +#else + return ival >= -(1LL << 29) && ival <= (1LL << 29) - 1; +#endif +} + +static inline int arIsFloat(void *v) { + return ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_FLOAT; +} + +static inline double arToDouble(void *v) { +#if UINTPTR_MAX == UINT64_MAX + uint64_t bits = (uintptr_t)v & ~AR_TAG_MASK; + double d; + memcpy(&d, &bits, sizeof(d)); + return d; +#else + uint32_t bits = (uint32_t)((uintptr_t)v & ~(uintptr_t)AR_TAG_MASK); + float f; + memcpy(&f, &bits, sizeof(f)); + return (double)f; +#endif +} + +static inline void *arFromFloatBits(uint64_t bits_trunc) { +#if UINTPTR_MAX == UINT64_MAX + return (void *)((bits_trunc & ~AR_TAG_MASK) | AR_TAG_FLOAT); +#else + uint32_t bits32 = (uint32_t)bits_trunc; + return (void *)(uintptr_t)((bits32 & ~(uint32_t)AR_TAG_MASK) | AR_TAG_FLOAT); +#endif +} + +static inline int arIsSmallStr(void *v) { + return ((uintptr_t)v & AR_TAG_MASK) == AR_TAG_STR; +} + +static inline int arSmallStrLen(void *v) { + return (int)(((uintptr_t)v >> 2) & AR_SMALLSTR_LEN_MASK); +} + +static inline int arToSmallStr(void *v, char *buf) { + int len = arSmallStrLen(v); + uintptr_t val = (uintptr_t)v; + for (int i = 0; i < len; i++) { + buf[i] = (char)((val >> (8 * (i + 1))) & 0xFF); + } + buf[len] = '\0'; + return len; +} + +static inline void *arFromSmallStr(const char *s, int len) { + uintptr_t v = AR_TAG_STR | ((uintptr_t)len << 2); + for (int i = 0; i < len; i++) { + v |= ((uintptr_t)(uint8_t)s[i]) << (8 * (i + 1)); + } + return (void *)v; +} + +/* ---------------------------------------------------------------------------- + * Public API + * -------------------------------------------------------------------------- */ + +/* Lifecycle */ +redisArray *arNew(void); +void arFree(redisArray *ar); +redisArray *arDup(redisArray *ar); +void arDismiss(redisArray *ar, size_t size_hint); + +/* Element access */ +void *arGet(redisArray *ar, uint64_t idx); +void arSet(redisArray *ar, uint64_t idx, void *v); +int arDel(redisArray *ar, uint64_t idx); + +/* Value encoding/decoding */ +void *arEncode(const char *s, size_t len); +const char *arDecode(void *v, char *buf, size_t bufsize, size_t *outlen); +int arFormatFloat(double d, char *buf, size_t bufsize); +size_t arStringLen(const void *ptr); +const char *arStringData(const void *ptr); +void *arValueFromRdbInt(int64_t ival); +void *arValueFromRdbFloat(double d); +void *arValueFromRdbSmallStr(const char *s, size_t len); + +/* Queries */ +uint64_t arCount(redisArray *ar); +uint64_t arLen(redisArray *ar); + +/* Bulk operations */ +uint64_t arDeleteRange(redisArray *ar, uint64_t lo, uint64_t hi); +void arTruncate(redisArray *ar, uint64_t limit); +void arMayPromoteToDenseForRangeSet(redisArray *ar, uint64_t lo, uint64_t hi); + +/* Utilities */ +uint32_t arSparseFindPos(arSlice *s, uint16_t rel_idx, int *found); +uint32_t arSuperDirFind(redisArray *ar, uint64_t block_id, int *found); +redisArray *arDefrag(redisArray *ar, void *(*defragfn)(void *)); +unsigned long arDefragIncremental(redisArray **arref, unsigned long cursor, + void *(*defragfn)(void *)); + +#endif /* __SPARSEARRAY_H */ diff --git a/src/t_array.c b/src/t_array.c new file mode 100644 index 000000000..4fb72f8da --- /dev/null +++ b/src/t_array.c @@ -0,0 +1,2021 @@ +/* + * Copyright (c) 2026-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of (a) the Redis Source Available License 2.0 + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the + * GNU Affero General Public License v3 (AGPLv3). + * + * Redis Array commands implementation. + * Originally authored by: Salvatore Sanfilippo. + * + * The core sparse array data structure is in sparsearray.c/sparsearray.h. + * This file contains Redis command handlers and Redis-specific operations. + */ + +#include "server.h" +#include "../deps/tre/local_includes/tre.h" +#include + +/****************************************************************************** + * + * ARRAY COMMANDS AND HIGHER LEVEL LOGIC + * + * This section contains all the Redis commands for the Array type, as well + * as the type operations used by COPY and other server-level functionality. + * + *****************************************************************************/ + +/* ---------------------------------------------------------------------------- + * Array type operations for COPY command + * -------------------------------------------------------------------------- */ + +robj *arrayTypeDup(robj *o) { + redisArray *ar = o->ptr; + redisArray *dup = arDup(ar); + robj *newobj = createObject(OBJ_ARRAY, dup); + newobj->encoding = OBJ_ENCODING_SLICED_ARRAY; + return newobj; +} + +/* ---------------------------------------------------------------------------- + * Internal helpers + * -------------------------------------------------------------------------- */ + +#define ARGETRANGE_MAX_ITEMS 1000000 + +/* Lookup array object for write, create it if missing, or reply with + * WRONGTYPE and return NULL if the key holds a different type. */ +robj *lookupArrayForWriteOrReply(client *c, robj *key) { + robj *o = lookupKeyWrite(c->db, key); + if (o == NULL) { + o = createArrayObject(); + dbAdd(c->db, key, &o); + } else if (checkType(c, o, OBJ_ARRAY)) { + return NULL; + } + return o; +} + +/* Reply with an array value. This helper is needed because we used + * tagged pointers for inlining values like floats, integers, small + * strings directly inside the pointer. Big memory saves, but more + * work needed when there is to reply to the client. */ +void addReplyArrayValue(client *c, void *v) { + if (arIsEmpty(v)) { + addReplyNull(c); + return; + } + + char buf[AR_INLINE_BUFSIZE]; + size_t len; + const char *data = arDecode(v, buf, sizeof(buf), &len); + addReplyBulkCBuffer(c, data, len); +} + +/* Parse array index from object. Accepts 0 to 2^64-2 by default. + * If allow_max is true, also accepts UINT64_MAX. This is used by ARSEEK + * because ARSEEK UINT64_MAX sets insert_idx to UINT64_MAX-1, which is + * a valid terminal state (next ARINSERT would overflow). + * Returns C_OK/C_ERR. Does NOT send error reply - caller decides. */ +int getArrayIndexFromObject(robj *o, uint64_t *idx, int allow_max) { + unsigned long long ull; + if (o->encoding == OBJ_ENCODING_INT) { + if ((long)o->ptr < 0) return C_ERR; + ull = (unsigned long long)(long)o->ptr; + } else { + if (!string2ull(o->ptr, &ull)) return C_ERR; + } + if (ull == UINT64_MAX && !allow_max) return C_ERR; + *idx = ull; + return C_OK; +} + +/* Parse an array index argument and reply with an error on failure. */ +int arrayParseIndexOrReply(client *c, robj *arg, uint64_t *idx) { + if (getArrayIndexFromObject(arg, idx, 0) != C_OK) { + addReplyError(c, "invalid array index"); + return C_ERR; + } + return C_OK; +} + +/* ---------------------------------------------------------------------------- + * ARGET / ARMGET + * -------------------------------------------------------------------------- */ + +/* ARGET key idx + * + * Returns the value at idx in O(1). + * Missing keys and holes both reply with NULL. */ +void argetCommand(client *c) { + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o && checkType(c, o, OBJ_ARRAY)) return; + + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[2], &idx) != C_OK) return; + + void *v = o ? arGet(o->ptr, idx) : NULL; + addReplyArrayValue(c, v); +} + +/* ARMGET key idx [idx ...] + * + * Returns the values at the requested indices in O(N), where N is the number + * of indices. Missing keys and holes reply with NULLs. All indices are + * validated before the reply starts, so malformed input fails atomically. */ +void armgetCommand(client *c) { + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) return; + + /* Pre-validate all indices so malformed input fails the whole command, + * like the other array commands. */ + for (int i = 2; i < c->argc; i++) { + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[i], &idx) != C_OK) return; + } + + addReplyArrayLen(c, c->argc - 2); + + for (int i = 2; i < c->argc; i++) { + if (o == NULL) { + /* Non existing keys are semantically equivalent + * to non existing indexes of existing arrays. */ + addReplyNull(c); + continue; + } + + uint64_t idx = 0; + getArrayIndexFromObject(c->argv[i], &idx, 0); /* Already validated. */ + + redisArray *ar = o->ptr; + void *v = arGet(ar, idx); + addReplyArrayValue(c, v); + } +} + +/* ---------------------------------------------------------------------------- + * ARSET / ARMSET + * -------------------------------------------------------------------------- */ + +/* ARSET key [value ...] + * + * Sets one or more contiguous values in O(N), where N is the number of + * values. Creates the array if needed and returns the number of previously + * empty slots that were filled. */ + void arsetCommand(client *c) { + uint64_t start_idx; + if (arrayParseIndexOrReply(c, c->argv[2], &start_idx) != C_OK) return; + + int num_values = c->argc - 3; + + /* Pre-validate: check for overflow and forbidden max index. */ + uint64_t last_idx = start_idx + (uint64_t)num_values - 1; + if (last_idx < start_idx || last_idx == UINT64_MAX) { + addReplyError(c, "array index overflow"); + return; + } + + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + + /* Pre-promote sparse slices only for true bulk sets. A single-element + * write does not benefit from the extra range-analysis pass. */ + if (num_values > 1) + arMayPromoteToDenseForRangeSet(ar, start_idx, last_idx); + + /* Write all values starting at start_idx */ + uint64_t idx = start_idx; + for (int i = 3; i < c->argc; i++) { + sds val = c->argv[i]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, idx, v); + idx++; + } + + long long set_count = arCount(ar) - old_count; + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arset", c->argv[1], c->db->id); + server.dirty += num_values; + addReplyLongLong(c, set_count); +} + +/* ARMSET key idx value [idx value ...] + * + * Sets multiple scattered index/value pairs in O(N), where N is the number of + * pairs. Creates the array if needed, returns the number of newly filled + * slots, and validates all indices before mutating. */ +void armsetCommand(client *c) { + if ((c->argc - 2) % 2 != 0) { + addReplyErrorArity(c); + return; + } + + /* Validate all indices first */ + for (int i = 2; i < c->argc; i += 2) { + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[i], &idx) != C_OK) return; + } + + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + + for (int i = 2; i < c->argc; i += 2) { + uint64_t idx = 0; + getArrayIndexFromObject(c->argv[i], &idx, 0); /* Already validated */ + + sds val = c->argv[i + 1]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, idx, v); + } + + int num_pairs = (c->argc - 2) / 2; + long long set_count = arCount(ar) - old_count; + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "armset", c->argv[1], c->db->id); + server.dirty += num_pairs; + addReplyLongLong(c, set_count); +} + +/* ---------------------------------------------------------------------------- + * ARDEL / ARDELRANGE + * -------------------------------------------------------------------------- */ + +/* ARDEL key idx [idx ...] + * + * Deletes the specified indices in O(N), where N is the number of indices. + * All indices are validated first, and if the array becomes empty the key + * itself is deleted. The number of deleted (existing) items is returned. */ +void ardelCommand(client *c) { + /* Pre-validate all indices before mutating, to report syntax errors + * even if the key doesn't exist. */ + for (int i = 2; i < c->argc; i++) { + uint64_t idx; + if (arrayParseIndexOrReply(c, c->argv[i], &idx) != C_OK) return; + } + + robj *o = lookupKeyWrite(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + long long deleted = 0; + + for (int i = 2; i < c->argc; i++) { + uint64_t idx = 0; + getArrayIndexFromObject(c->argv[i], &idx, 0); /* Already validated */ + deleted += arDel(ar, idx); + } + + int keyremoved = (arCount(ar) == 0); + if (server.memory_tracking_enabled && deleted > 0 && keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + if (deleted > 0) { + if (keyremoved) + dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); + updateKeysizesHist(c->db, OBJ_ARRAY, + old_count, keyremoved ? -1 : (int64_t)arCount(ar)); + if (server.memory_tracking_enabled && !keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "ardel", c->argv[1], c->db->id); + if (keyremoved) + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty += deleted; + } + addReplyLongLong(c, deleted); +} + +/* ARDELRANGE key start end [start end ...] + * + * Deletes one or more ranges. Complexity is proportional to the existing + * elements / slices touched, not to the numeric span of the requested ranges, + * so huge ranges do not block the server forever. + * + * Each pair may be given in either order. All ranges are validated up front, + * and an empty resulting array deletes the key. */ +void ardelrangeCommand(client *c) { + if ((c->argc - 2) % 2 != 0) { + addReplyErrorArity(c); + return; + } + + /* Pre-validate all ranges before mutating, to avoid partial updates + * if a later range has invalid syntax. */ + for (int i = 2; i < c->argc; i += 2) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[i], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[i + 1], &end) != C_OK) return; + } + + robj *o = lookupKeyWrite(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + uint64_t total_deleted = 0; + + /* Process each range using the generalized arDeleteRange */ + for (int i = 2; i < c->argc; i += 2) { + uint64_t start = 0, end = 0; + getArrayIndexFromObject(c->argv[i], &start, 0); /* Already validated */ + getArrayIndexFromObject(c->argv[i + 1], &end, 0); + + uint64_t lo = (start <= end) ? start : end; + uint64_t hi = (start <= end) ? end : start; + + total_deleted += arDeleteRange(ar, lo, hi); + } + + int keyremoved = (arCount(ar) == 0); + if (server.memory_tracking_enabled && total_deleted > 0 && keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + if (total_deleted > 0) { + if (keyremoved) + dbDeleteSkipKeysizesUpdate(c->db, c->argv[1]); + updateKeysizesHist(c->db, OBJ_ARRAY, + old_count, keyremoved ? -1 : (int64_t)arCount(ar)); + if (server.memory_tracking_enabled && !keyremoved) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], keyremoved ? NULL : o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "ardelrange", c->argv[1], c->db->id); + if (keyremoved) + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + server.dirty += total_deleted; + } + addReplyUnsignedLongLong(c, total_deleted); +} + +/* ---------------------------------------------------------------------------- + * ARLEN / ARCOUNT + * -------------------------------------------------------------------------- */ + +/* ARLEN key + * + * Returns max-index-plus-one in O(1). + * Missing keys reply with 0. */ +void arlenCommand(client *c) { + robj *o = lookupKeyReadOrReply(c, c->argv[1], shared.czero); + if (o == NULL || checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + addReplyUnsignedLongLong(c, arLen(ar)); +} + +/* ARCOUNT key + * + * Returns the number of non-empty elements in O(1). + * Missing keys reply with 0. */ +void arcountCommand(client *c) { + robj *o = lookupKeyReadOrReply(c, c->argv[1], shared.czero); + if (o == NULL || checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + addReplyUnsignedLongLong(c, arCount(ar)); +} + +/* ---------------------------------------------------------------------------- + * ARGETRANGE + * -------------------------------------------------------------------------- */ + +/* ARGETRANGE key start end + * + * Returns every position in the requested range in O(N), where N is the range + * length. Holes are returned as NULLs, and a missing key behaves like an all- + * NULL array. If start > end the reply order is reversed. + * + * To avoid giant synthetic NULL replies, the range length is hard-limited, + * otherwise the command, with a wrong range, could make the server totally + * unusable. The max range is 1 million elements and is fixed, constituting + * the user-facing semantic of the command. */ +void argetrangeCommand(client *c) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[2], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[3], &end) != C_OK) return; + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) return; + + int reverse = start > end; + uint64_t lo = reverse ? end : start; + uint64_t hi = reverse ? start : end; + uint64_t len = hi - lo + 1; + + /* ARGETRANGE is a special command: it can trigger a huge reply blocking + * the server (basically forever) even if there is no actual data. This + * is unlike an SMEMBERS against a very big key: somebody added so many + * elements inside, before asking for a huge amount of elements. But, in the + * case of ARGETRANGE, you can just trigger a huge amount of NULLs to be + * sent to the client. In ARSCAN this was optimized to be O(N) with the + * actual populated elements, but in this case it can't be done because + * of the semantic of the command, and the Redis protocol inability to reply + * with run-length ranges (a, b, c, 1293455 NULLs, d, e). + * + * Because of all that, we put an hard limit in the range size, and this + * limit must be part of the Redis culture, so it should not be tuned in + * any way: 1 million items, with an hard error if the range is bigger than + * that, not just a silent trimming at this length, that would cause hard + * to track bugs. */ + if (len > ARGETRANGE_MAX_ITEMS) { + addReplyErrorFormat(c, "range exceeds maximum of %u items", + ARGETRANGE_MAX_ITEMS); + return; + } + + addReplyArrayLen(c, len); + if (o == NULL) { + for (uint64_t i = 0; i < len; i++) addReplyNull(c); + return; + } + + redisArray *ar = o->ptr; + if (reverse) { + for (uint64_t idx = hi; ; idx--) { + void *v = arGet(ar, idx); + addReplyArrayValue(c, v); + if (idx == lo) break; + } + } else { + for (uint64_t idx = lo; idx <= hi; idx++) { + void *v = arGet(ar, idx); + addReplyArrayValue(c, v); + } + } +} + +/* ---------------------------------------------------------------------------- + * ARSCAN + * -------------------------------------------------------------------------- */ + +/* Iterate populated elements in [start..end]. + * + * This iterator is read-only and not mutation-stable: between Init() and the + * final Next() that returns 0, the caller must not write to the array. Any + * write may free or relocate the current slice, making the iterator state + * stale. The goal of this abstraction was to capture repeated code in the + * implementation of ARSCAN, ARGREP, AROP. + * + * The struct lives on the caller stack, so setup and iteration stay allocation + * free and command-local. */ +typedef struct { + redisArray *ar; + uint64_t lo; /* Normalized inclusive range start. */ + uint64_t hi; /* Normalized inclusive range end. */ + uint64_t lo_slice; /* First slice touched by the range. */ + uint64_t hi_slice; /* Last slice touched by the range. */ + uint32_t slice_size; /* Cached slice size. */ + int reverse; /* Iterate from high to low. */ + int32_t step; /* +1 forward, -1 backward. */ + int done; /* No more elements to return. */ + int top_done; /* No more slices to inspect after current. */ + + uint64_t slice_id; /* Next flat-directory slice to inspect. */ + int32_t sdir_index; /* Next superdir entry to inspect. */ + int32_t slot_index; /* Next slot inside the current superdir entry. */ + + arSlice *slice; /* Slice currently being scanned. */ + uint64_t slice_base; /* Logical index of slice offset 0. */ + uint32_t off_lo; /* First in-range offset for current slice. */ + uint32_t off_hi; /* Last in-range offset for current slice. */ + int dense; /* Current slice is dense. */ + void **dense_items; /* Dense items window. */ + int32_t dense_off; /* Current dense logical offset. */ + int32_t dense_item_pos; /* Current dense window index. */ + int32_t dense_item_end; /* Final dense window index. */ + uint16_t *sparse_offsets; /* Sparse offsets array. */ + void **sparse_values; /* Sparse values array. */ + int32_t sparse_count; /* Sparse entry count. */ + int32_t sparse_pos; /* Current sparse entry position. */ + int slice_ready; /* Current slice scan state is initialized. */ +} arScanIter; + +#define AR_SCAN_ITER_SLOT_UNSET INT32_MIN + +/* Keep the per-element iterator hot path inline in the command loops. + * It helps a lot with certain targets, up to ~30-50% speed regression + * without forcing the inlining. */ +#if defined(__GNUC__) || defined(__clang__) +#define ALWAYS_INLINE __attribute__((always_inline)) inline +#else +#define ALWAYS_INLINE inline +#endif + +/* Initialize a populated-elements iterator. Empty arrays and empty clipped + * ranges are turned into a done iterator here so the first Next() is a single + * branch. */ +static void arScanIterInit(redisArray *ar, uint64_t start, uint64_t end, + arScanIter *it) +{ + memset(it, 0, sizeof(*it)); + it->ar = ar; + + if (ar == NULL || arCount(ar) == 0) { + it->done = 1; + it->top_done = 1; + return; + } + + /* Note that a few things here could be taken + * from the array itself, as they are immutable, + * but after introducing this abstraction a small + * but measurable speed regression suggested to + * micro-optimize for this hot path and have + * iterator-side copies of often used stuff. */ + it->reverse = start > end; + it->step = it->reverse ? -1 : 1; + it->lo = it->reverse ? end : start; + it->hi = it->reverse ? start : end; + it->slice_size = ar->slice_size; + it->lo_slice = it->lo / it->slice_size; + it->hi_slice = it->hi / it->slice_size; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + + /* No intersection between the range and the array span. */ + if (it->lo_slice > ar->dir_highest_used) { + it->done = 1; + it->top_done = 1; + return; + } + + /* Clip the high end to the actual array span. */ + if (it->hi_slice > ar->dir_highest_used) { + it->hi_slice = ar->dir_highest_used; + it->hi = arMakeIdx(it->hi_slice, it->slice_size - 1, it->slice_size); + } + + /* Clipping made the range empty? */ + if (it->lo_slice > it->hi_slice) { + it->done = 1; + it->top_done = 1; + return; + } + + if (ar->superdir) { + int found; + + /* Start from the first superdir block that can intersect the range. */ + uint64_t block_id = (it->reverse ? it->hi_slice : it->lo_slice) / + AR_SUPER_BLOCK_SLOTS; + uint32_t pos = arSuperDirFind(ar, block_id, &found); + + if (it->reverse) it->sdir_index = found ? (int32_t)pos : (int32_t)pos - 1; + else it->sdir_index = (int32_t)pos; + + /* No superdir block intersects the clipped range. */ + if (it->sdir_index < 0 || it->sdir_index >= (int32_t)ar->sdir_len) { + it->done = 1; + it->top_done = 1; + } + } else { + /* Flat directory iteration starts directly from the first in-range slice. */ + it->slice_id = it->reverse ? it->hi_slice : it->lo_slice; + } +} + +/* Prepare the current slice-local scan state. Returns 1 if the slice may + * yield at least one populated element in range, otherwise 0. + * The function is used by arScanIterLoadNextSlice() each time a new + * slice should be iterated. When a new slice is selected by + * arScanIterLoadNextSlice(), then this function is called to setup the + * iteration needed by arScanIterNext(). */ +static ALWAYS_INLINE int arScanIterPrepareSlice(arScanIter *it, + arSlice *s, uint64_t slice_id) +{ + uint64_t slice_base = slice_id * it->slice_size; + /* Restrict the scan to the part of this slice touched by the query. */ + uint32_t off_lo = (slice_id == it->lo_slice) ? + arSliceOff(it->lo, it->slice_size) : 0; + uint32_t off_hi = (slice_id == it->hi_slice) ? + arSliceOff(it->hi, it->slice_size) : it->slice_size - 1; + + if (s->encoding == AR_SLICE_DENSE) { + uint32_t win_lo = s->layout.dense.offset; + uint32_t win_hi = s->layout.dense.offset + s->layout.dense.winsize - 1; + + /* Dense slices may only have a smaller populated window allocated. */ + if (off_lo < win_lo) off_lo = win_lo; + if (off_hi > win_hi) off_hi = win_hi; + + /* No intersection between the range and the dense window. */ + if (off_lo > off_hi) return 0; + + it->dense = 1; + it->dense_items = s->layout.dense.items; + it->dense_off = it->reverse ? (int32_t)off_hi : (int32_t)off_lo; + it->dense_item_pos = it->dense_off - (int32_t)win_lo; + it->dense_item_end = (it->reverse ? (int32_t)off_lo : + (int32_t)off_hi) - (int32_t)win_lo; + } else { + int found; + uint32_t pos; + uint16_t *offsets = s->layout.sparse.offsets; + + it->dense = 0; + it->sparse_offsets = offsets; + it->sparse_values = s->layout.sparse.values; + it->sparse_count = (int32_t)s->count; + if (it->reverse) { + /* Start from the last sparse entry that can still be in range. */ + pos = arSparseFindPos(s, (uint16_t)off_hi, &found); + it->sparse_pos = found ? (int32_t)pos : (int32_t)pos - 1; + + /* No sparse entry falls inside the requested offsets. */ + if (it->sparse_pos < 0 || offsets[it->sparse_pos] < off_lo) + return 0; + } else { + /* Start from the first sparse entry that can still be in range. */ + pos = arSparseFindPos(s, (uint16_t)off_lo, &found); + it->sparse_pos = (int32_t)pos; + + /* No sparse entry falls inside the requested offsets. */ + if (it->sparse_pos >= (int32_t)s->count || + offsets[it->sparse_pos] > off_hi) return 0; + } + } + + it->slice = s; + it->slice_base = slice_base; + it->off_lo = off_lo; + it->off_hi = off_hi; + it->slice_ready = 1; + return 1; +} + +/* Advance top-level directory state until a non-NULL slice in range is ready + * for local scanning, or return 0 if the iterator is exhausted. */ +static ALWAYS_INLINE int arScanIterLoadNextSlice(arScanIter *it) { + redisArray *ar = it->ar; + + if (ar->superdir) { + while (!it->top_done) { + /* No more superdir blocks to inspect. */ + if (it->sdir_index < 0 || it->sdir_index >= (int32_t)ar->sdir_len) { + it->top_done = 1; + break; + } + + arSDirEntry *e = ar->superdir + it->sdir_index; + uint64_t block_base = e->block_id * AR_SUPER_BLOCK_SLOTS; + uint64_t block_end = block_base + AR_SUPER_BLOCK_SLOTS - 1; + int32_t block_slot_lo = (block_base < it->lo_slice) ? + (int32_t)(it->lo_slice - block_base) : 0; + int32_t block_slot_hi = (block_end > it->hi_slice) ? + (int32_t)(it->hi_slice - block_base) : AR_SUPER_BLOCK_SLOTS - 1; + + /* This block starts after the requested range. */ + if (block_base > it->hi_slice) { + it->top_done = 1; + break; + } + + /* This block ends before the requested range. */ + if (block_end < it->lo_slice) { + if (it->reverse) it->top_done = 1; + else it->sdir_index++; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + continue; + } + + if (it->reverse) { + /* slot_index uses a sentinel outside the valid 0..2047 range + * so reverse scans can consume slot 0 and then fall below the + * block without looking like a fresh block entry. */ + if (it->slot_index == AR_SCAN_ITER_SLOT_UNSET) + it->slot_index = block_slot_hi; + + while (it->slot_index >= block_slot_lo) { + int32_t si = it->slot_index--; + arSlice *s = e->slots[si]; + if (s && arScanIterPrepareSlice(it, s, block_base + si)) + return 1; + } + + /* This block had no more matching slices, move to the previous block. */ + it->sdir_index--; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + } else { + /* slot_index uses a sentinel outside the valid 0..2047 range + * so an exhausted block does not look like a fresh entry. */ + if (it->slot_index == AR_SCAN_ITER_SLOT_UNSET) + it->slot_index = block_slot_lo; + + while (it->slot_index <= block_slot_hi) { + int32_t si = it->slot_index++; + arSlice *s = e->slots[si]; + if (s && arScanIterPrepareSlice(it, s, block_base + si)) + return 1; + } + + /* This block had no more matching slices, move to the next block. */ + it->sdir_index++; + it->slot_index = AR_SCAN_ITER_SLOT_UNSET; + } + } + } else { + while (!it->top_done) { + uint64_t slice_id = it->slice_id; + arSlice *s = ar->dir[slice_id]; + + /* Advance the top-level cursor before possibly returning this slice. */ + if (it->reverse) { + if (slice_id == it->lo_slice) it->top_done = 1; + else it->slice_id = slice_id - 1; + } else { + if (slice_id == it->hi_slice) it->top_done = 1; + else it->slice_id = slice_id + 1; + } + + if (s && arScanIterPrepareSlice(it, s, slice_id)) + return 1; + } + } + + return 0; +} + +/* Return the next populated element in range, or 0 when done. */ +static ALWAYS_INLINE int arScanIterNext(arScanIter *it, + uint64_t *idx, void **value) +{ + /* The iterator was already fully consumed. */ + if (it->done) return 0; + + while (1) { + if (it->slice_ready) { + /* Drain the current slice before asking for another one. */ + if (it->dense) { + while ((it->step > 0 && it->dense_item_pos <= it->dense_item_end) || + (it->step < 0 && it->dense_item_pos >= it->dense_item_end)) { + uint32_t off = (uint32_t)it->dense_off; + void *v = it->dense_items[it->dense_item_pos]; + it->dense_off += it->step; + it->dense_item_pos += it->step; + + /* Dense windows may contain holes. */ + if (arIsEmpty(v)) continue; + + if (idx) *idx = it->slice_base + off; + *value = v; + return 1; + } + } else { + while (it->sparse_pos >= 0 && it->sparse_pos < it->sparse_count) { + int32_t pos = it->sparse_pos; + uint32_t off = it->sparse_offsets[pos]; + + /* Sparse entries are sorted, so leaving the window ends this slice. */ + if (off < it->off_lo || off > it->off_hi) break; + + it->sparse_pos += it->step; + if (idx) *idx = it->slice_base + off; + *value = it->sparse_values[pos]; + return 1; + } + } + + /* The current slice has no more in-range populated elements. */ + it->slice = NULL; + it->slice_ready = 0; + } + + /* No more in-range slices are available. */ + if (!arScanIterLoadNextSlice(it)) { + it->done = 1; + return 0; + } + } +} + +/* ARSCAN key start end [LIMIT count] + * + * Returns only existing elements as flat index/value pairs. + * + * Complexity is O(P), where P is visited positions in touched slices + * (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) + * and typical case close to O(N), where N is the number of existing + * elements in range. This means that huge ranges are safe and will not + * block the server with a work bound to the span length. + * + * Unlike ARGETRANGE, holes are skipped rather than returned as NULLs. + * LIMIT caps the number of returned pairs. */ +void arscanCommand(client *c) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[2], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[3], &end) != C_OK) return; + + /* Parse optional LIMIT */ + uint64_t remaining = UINT64_MAX; + if (c->argc == 6) { + if (strcasecmp(c->argv[4]->ptr, "LIMIT") != 0) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + long long ll; + if (getLongLongFromObjectOrReply(c, c->argv[5], &ll, NULL) != C_OK) + return; + if (ll <= 0) { + addReplyError(c, "LIMIT must be positive"); + return; + } + remaining = (uint64_t)ll; + } else if (c->argc != 4) { + addReplyErrorArity(c); + return; + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) return; + + if (o == NULL) { + addReplyArrayLen(c, 0); + return; + } + + redisArray *ar = o->ptr; + void *replylen = addReplyDeferredLen(c); + uint64_t count = 0; + arScanIter it; + uint64_t idx; + void *v; + + arScanIterInit(ar, start, end, &it); + while (remaining && arScanIterNext(&it, &idx, &v)) { + /* Reply with nested [idx, value] pairs. */ + addReplyArrayLen(c, 2); + addReplyUnsignedLongLong(c, idx); + addReplyArrayValue(c, v); + count++; + remaining--; + } + + setDeferredArrayLen(c, replylen, count); +} + +/* ============================================================================ + * ARGREP + * ============================================================================ + * + * Search existing array elements in a range using textual predicates. + * Like ARSCAN, the work is bound by the visited slices, not by the raw + * numeric span alone: dense slices scan the touched dense window, while + * sparse slices only scan stored entries inside the covered offsets. + * -------------------------------------------------------------------------- */ + +#define ARGREP_PRED_EXACT 1 +#define ARGREP_PRED_MATCH 2 +#define ARGREP_PRED_GLOB 3 +#define ARGREP_PRED_RE 4 + +#define ARGREP_MAX_PREDICATES 250 +#define ARGREP_MAX_RE_LEN 2048 + +#define ARGREP_COMBINE_OR 1 +#define ARGREP_COMBINE_AND 2 + +#define ARGREP_BOUND_INDEX 1 +#define ARGREP_BOUND_START 2 +#define ARGREP_BOUND_END 3 + +typedef struct { + int type; /* EXACT, MATCH, GLOB, or RE. */ + sds pattern; /* Pattern argument exactly as given by the user. */ + regex_t regex; /* Compiled regex for RE predicates. */ + int regex_compiled; /* Whether regex must be freed. */ +} arGrepPredicate; + +typedef struct { + int type; /* Numeric index, logical start, or logical end. */ + uint64_t index; /* Used only for numeric bounds. */ +} arGrepBound; + +typedef struct { + arGrepPredicate *preds; /* All predicates to apply to each element. */ + int num_preds; /* Number of predicates stored in preds[]. */ + int combine; /* OR by default, AND if requested. */ + int withvalues; /* Reply with [idx value ...] instead of [idx ...]. */ + int nocase; /* Apply case-insensitive matching globally. */ +} arGrepPlan; + +/* Lowercase only ASCII letters. This keeps MATCH/EXACT deterministic and + * locale-independent even on arbitrary binary payloads. */ +static inline unsigned char arGrepLowerAscii(unsigned char c) { + return (c >= 'A' && c <= 'Z') ? (unsigned char)(c + ('a' - 'A')) : c; +} + +/* Compare two byte strings, optionally ignoring ASCII case. */ +int arGrepBytesEqual(const char *a, size_t alen, const char *b, size_t blen, + int nocase) { + if (alen != blen) return 0; + if (!nocase) return memcmp(a, b, alen) == 0; + + for (size_t i = 0; i < alen; i++) { + if (arGrepLowerAscii((unsigned char)a[i]) != + arGrepLowerAscii((unsigned char)b[i])) { + return 0; + } + } + return 1; +} + +/* Find a needle inside a byte string, optionally ignoring ASCII case. */ +int arGrepBytesContains(const char *haystack, size_t haystack_len, + const char *needle, size_t needle_len, int nocase) { + if (needle_len == 0) return 1; + if (needle_len > haystack_len) return 0; + + size_t last = haystack_len - needle_len; + for (size_t i = 0; i <= last; i++) { + if (arGrepBytesEqual(haystack + i, needle_len, needle, needle_len, + nocase)) { + return 1; + } + } + return 0; +} + +/* Return the predicate type for a keyword, or 0 if it is not one. */ +int arGrepPredicateType(const char *token) { + if (!strcasecmp(token, "EXACT")) return ARGREP_PRED_EXACT; + if (!strcasecmp(token, "MATCH")) return ARGREP_PRED_MATCH; + if (!strcasecmp(token, "GLOB")) return ARGREP_PRED_GLOB; + if (!strcasecmp(token, "RE")) return ARGREP_PRED_RE; + return 0; +} + +/* Free any compiled regex state created while parsing ARGREP. */ +void arGrepFreePlan(arGrepPlan *plan) { + if (plan->preds == NULL) return; + + for (int i = 0; i < plan->num_preds; i++) { + if (plan->preds[i].regex_compiled) + tre_regfree(&plan->preds[i].regex); + } + zfree(plan->preds); + plan->preds = NULL; +} + +/* Parse a bound argument. ARGREP accepts the special tokens "-" and "+" + * in addition to normal array indexes. */ +int arGrepParseBoundOrReply(client *c, robj *arg, arGrepBound *bound) { + if (arg->encoding != OBJ_ENCODING_INT) { + sds token = arg->ptr; + if (sdslen(token) == 1 && token[0] == '-') { + bound->type = ARGREP_BOUND_START; + bound->index = 0; + return C_OK; + } + if (sdslen(token) == 1 && token[0] == '+') { + bound->type = ARGREP_BOUND_END; + bound->index = 0; + return C_OK; + } + } + + if (getArrayIndexFromObject(arg, &bound->index, 0) != C_OK) { + addReplyError(c, "invalid array index"); + return C_ERR; + } + bound->type = ARGREP_BOUND_INDEX; + return C_OK; +} + +/* Resolve a parsed bound against the current array length. */ +uint64_t arGrepResolveBound(arGrepBound *bound, uint64_t max_index) { + if (bound->type == ARGREP_BOUND_START) return 0; + if (bound->type == ARGREP_BOUND_END) return max_index; + return bound->index; +} + +/* Compile all RE predicates after the whole command is parsed, so NOCASE is + * already known and affects every regex consistently. */ +int arGrepCompileRegexesOrReply(client *c, arGrepPlan *plan) { + for (int i = 0; i < plan->num_preds; i++) { + arGrepPredicate *pred = &plan->preds[i]; + if (pred->type != ARGREP_PRED_RE) continue; + + if (sdslen(pred->pattern) == 0) { + addReplyError(c, "regular expression is empty"); + return C_ERR; + } + + int cflags = REG_EXTENDED | REG_NOSUB | REG_USEBYTES; + if (plan->nocase) cflags |= REG_ICASE; + + int err = tre_regncompb(&pred->regex, pred->pattern, + sdslen(pred->pattern), cflags); + if (err != REG_OK) { + char errbuf[256]; + tre_regerror(err, &pred->regex, errbuf, sizeof(errbuf)); + addReplyErrorFormat(c, "invalid regular expression: %s", errbuf); + return C_ERR; + } + pred->regex_compiled = 1; + + if (tre_have_backrefs(&pred->regex)) { + addReplyError(c, "regular expression backreferences are not supported"); + return C_ERR; + } + } + return C_OK; +} + +/* Parse predicates and global modifiers in a single pass. This makes the + * command more user-friendly because predicates and options can be mixed + * freely. If the same global option appears multiple times, the last one + * wins. */ +int arGrepParsePlanOrReply(client *c, arGrepPlan *plan, uint64_t *limit) { + memset(plan, 0, sizeof(*plan)); + plan->combine = ARGREP_COMBINE_OR; + *limit = UINT64_MAX; + + int max_preds = c->argc - 4; + plan->preds = zcalloc(sizeof(*plan->preds) * max_preds); + + for (int arg = 4; arg < c->argc; ) { + sds token = c->argv[arg]->ptr; + int type = arGrepPredicateType(token); + + if (type != 0) { + if (arg + 1 >= c->argc) { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + if (plan->num_preds >= ARGREP_MAX_PREDICATES) { + addReplyErrorFormat(c, "too many predicates, maximum is %d", + ARGREP_MAX_PREDICATES); + return C_ERR; + } + + arGrepPredicate *pred = &plan->preds[plan->num_preds++]; + pred->type = type; + pred->pattern = c->argv[arg + 1]->ptr; + if (type == ARGREP_PRED_RE && + sdslen(pred->pattern) > ARGREP_MAX_RE_LEN) { + addReplyErrorFormat(c, + "regular expression is too long, maximum is %d bytes", + ARGREP_MAX_RE_LEN); + return C_ERR; + } + arg += 2; + continue; + } + + if (!strcasecmp(token, "LIMIT")) { + if (arg + 1 >= c->argc) { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + + long long ll; + if (getLongLongFromObjectOrReply(c, c->argv[arg + 1], &ll, NULL) + != C_OK) { + return C_ERR; + } + if (ll <= 0) { + addReplyError(c, "LIMIT must be positive"); + return C_ERR; + } + + *limit = (uint64_t)ll; + arg += 2; + continue; + } + + if (!strcasecmp(token, "WITHVALUES")) { + plan->withvalues = 1; + arg++; + continue; + } + + if (!strcasecmp(token, "NOCASE")) { + plan->nocase = 1; + arg++; + continue; + } + + if (!strcasecmp(token, "AND") || !strcasecmp(token, "OR")) { + plan->combine = !strcasecmp(token, "AND") ? + ARGREP_COMBINE_AND : ARGREP_COMBINE_OR; + arg++; + continue; + } + + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + + if (plan->num_preds == 0) { + addReplyErrorObject(c, shared.syntaxerr); + return C_ERR; + } + + return arGrepCompileRegexesOrReply(c, plan); +} + +/* Match one predicate against the decoded element bytes. */ +int arGrepMatchPredicate(arGrepPredicate *pred, const char *data, size_t len, + int nocase) { + size_t pattern_len = sdslen(pred->pattern); + + switch (pred->type) { + case ARGREP_PRED_EXACT: + return arGrepBytesEqual(data, len, pred->pattern, pattern_len, nocase); + case ARGREP_PRED_MATCH: + return arGrepBytesContains(data, len, pred->pattern, pattern_len, + nocase); + case ARGREP_PRED_GLOB: + return stringmatchlen(pred->pattern, pattern_len, data, len, nocase); + case ARGREP_PRED_RE: + return tre_regnexecb(&pred->regex, data, len, 0, NULL, 0) == REG_OK; + default: + serverPanic("Unknown ARGREP predicate type"); + } +} + +/* Decode one array value and apply all the predicates to it. */ +int arGrepValueMatches(arGrepPlan *plan, void *v) { + char buf[AR_INLINE_BUFSIZE]; + size_t len; + const char *data = arDecode(v, buf, sizeof(buf), &len); + + if (plan->combine == ARGREP_COMBINE_AND) { + for (int i = 0; i < plan->num_preds; i++) { + if (!arGrepMatchPredicate(&plan->preds[i], data, len, + plan->nocase)) { + return 0; + } + } + return 1; + } + + for (int i = 0; i < plan->num_preds; i++) { + if (arGrepMatchPredicate(&plan->preds[i], data, len, plan->nocase)) + return 1; + } + return 0; +} + +/* ARGREP key start end + * (EXACT string | MATCH string | GLOB pattern | RE pattern) ... + * [AND | OR] [LIMIT count] [WITHVALUES] [NOCASE] + * + * Search existing elements in a range and return matching indexes. + * + * Complexity is O(P * C), where P is the number of visited positions in the + * touched slices and C is the cost of evaluating the active predicates. + * Dense slices scan the touched dense window, sparse slices only visit stored + * entries, and LIMIT stops as soon as enough matches were emitted. + * + * "-" and "+" mean the logical start and end of the array. WITHVALUES changes + * the reply from [idx ...] to [idx value ...]. */ +void argrepCommand(client *c) { + arGrepBound start_bound, end_bound; + if (arGrepParseBoundOrReply(c, c->argv[2], &start_bound) != C_OK) return; + if (arGrepParseBoundOrReply(c, c->argv[3], &end_bound) != C_OK) return; + + arGrepPlan plan; + uint64_t remaining; + if (arGrepParsePlanOrReply(c, &plan, &remaining) != C_OK) { + arGrepFreePlan(&plan); + return; + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o != NULL && checkType(c, o, OBJ_ARRAY)) { + arGrepFreePlan(&plan); + return; + } + if (o == NULL) { + arGrepFreePlan(&plan); + addReplyArrayLen(c, 0); + return; + } + + redisArray *ar = o->ptr; + uint64_t ar_len = arLen(ar); + if (ar_len == 0 || arCount(ar) == 0) { + arGrepFreePlan(&plan); + addReplyArrayLen(c, 0); + return; + } + + void *replylen = addReplyDeferredLen(c); + uint64_t count = 0; + uint64_t max_index = ar_len - 1; + uint64_t start = arGrepResolveBound(&start_bound, max_index); + uint64_t end = arGrepResolveBound(&end_bound, max_index); + arScanIter it; + uint64_t idx; + void *v; + + arScanIterInit(ar, start, end, &it); + while (remaining && arScanIterNext(&it, &idx, &v)) { + if (!arGrepValueMatches(&plan, v)) continue; + /* With WITHVALUES, reply nested [idx, value] pairs. */ + if (plan.withvalues) addReplyArrayLen(c, 2); + addReplyUnsignedLongLong(c, idx); + if (plan.withvalues) addReplyArrayValue(c, v); + count++; + remaining--; + } + + setDeferredArrayLen(c, replylen, count); + arGrepFreePlan(&plan); +} + +/* ============================================================================ + * AROP + * ============================================================================ + * + * Aggregate operations over a range. Uses O(N) iteration where N is the + * number of stored elements. Dense slices scan the window intersection + * (bounded by dense.winsize, kept small by demotion when density drops). + * -------------------------------------------------------------------------- */ + +/* Operation types for AROP */ +#define AROP_SUM 1 /* Sum of numeric elements in range. */ +#define AROP_MIN 2 /* Minimum numeric element in range. */ +#define AROP_MAX 3 /* Maximum numeric element in range. */ +#define AROP_AND 4 /* Bitwise AND of integer elements in range. */ +#define AROP_OR 5 /* Bitwise OR of integer elements in range. */ +#define AROP_XOR 6 /* Bitwise XOR of integer elements in range. */ +#define AROP_MATCH 7 /* Count elements equal to a target string. */ +#define AROP_USED 8 /* Count of non-empty (used) slots in range. */ + +/* Accumulator state for AROP */ +typedef struct { + int op; /* Selected AROP operation. */ + sds match_val; /* MATCH target string. */ + long double sum_acc; /* Running SUM accumulator. */ + long double minmax_acc; /* Running MIN or MAX accumulator. */ + int64_t bitwise_acc; /* Running AND/OR/XOR accumulator. */ + long long match_count; /* Number of MATCH hits. */ + long long used_count; /* Number of non-empty elements seen. */ + int has_numeric; /* Saw at least one numeric value. */ + int has_int; /* Saw at least one bitwise-usable integer. */ +} arOpAcc; + +/* Process a single value for AROP aggregation, aggregating it + * into the structure arOpAcc 'acc'. This helper is used + * directly by the AROP command implementation while scanning + * populated elements in the requested range. */ +static inline void arOpAccumulate(arOpAcc *acc, void *v) { + if (acc->op == AROP_USED) { + acc->used_count++; + return; + } + + if (acc->op == AROP_MATCH) { + size_t vlen; + char vbuf[AR_INLINE_BUFSIZE]; + const char *data = arDecode(v, vbuf, sizeof(vbuf), &vlen); + if (vlen == sdslen(acc->match_val) && + memcmp(data, acc->match_val, vlen) == 0) { + acc->match_count++; + } + return; + } + + /* Numeric operations */ + long double num; + int is_int = 0; + int64_t ival = 0; + + if (arIsInt(v)) { + ival = arToInt(v); + num = (long double)ival; + is_int = 1; + } else if (arIsFloat(v)) { + num = (long double)arToDouble(v); + } else { + const char *data; + size_t vlen; + char smallbuf[8]; + + if (arIsSmallStr(v)) { + vlen = arToSmallStr(v, smallbuf); + data = smallbuf; + } else { + data = arStringData(v); + vlen = arStringLen(v); + } + + long long ll; + if (string2ll(data, vlen, &ll)) { + ival = ll; + num = (long double)ll; + is_int = 1; + } else { + long double ld; + if (string2ld(data, vlen, &ld)) { + num = ld; + } else { + return; + } + } + } + + if (acc->op == AROP_AND || acc->op == AROP_OR || acc->op == AROP_XOR) { + if (!is_int) { + /* If it is a float, we only take the integer part. */ + if (isnan(num)) return; + if (num < (long double)INT64_MIN || num > (long double)INT64_MAX) + return; + ival = (int64_t)num; /* Truncate toward zero. */ + } + if (!acc->has_int) { + acc->bitwise_acc = ival; + acc->has_int = 1; + } else { + if (acc->op == AROP_AND) acc->bitwise_acc &= ival; + else if (acc->op == AROP_OR) acc->bitwise_acc |= ival; + else acc->bitwise_acc ^= ival; + } + } else { + if (!acc->has_numeric) { + /* Handle the first element seen for SUM, MIN, MAX. */ + acc->sum_acc = num; + acc->minmax_acc = num; + acc->has_numeric = 1; + } else { + if (acc->op == AROP_SUM) + acc->sum_acc += num; + else if (acc->op == AROP_MIN && num < acc->minmax_acc) + acc->minmax_acc = num; + else if (acc->op == AROP_MAX && num > acc->minmax_acc) + acc->minmax_acc = num; + } + } +} + +/* AROP key start end OP [arg] + * + * Aggregates over existing elements in the requested range, the + * aggregation performed depends in the "op" argument. + * + * Complexity is O(P), where P is visited positions in touched slices + * (dense scanned slots + sparse entries), with worst-case O(|end-start|+1) + * and typical case close to O(N), where N is the number of existing + * elements in range. + * + * MATCH and USED count hits. SUM/MIN/MAX ignore values that are not numeric. + * AND/OR/XOR truncate floats toward zero and ignore values that, after the + * truncation, cannot be represented as int64_t. */ +void aropCommand(client *c) { + uint64_t start, end; + if (arrayParseIndexOrReply(c, c->argv[2], &start) != C_OK) return; + if (arrayParseIndexOrReply(c, c->argv[3], &end) != C_OK) return; + + const char *opstr = c->argv[4]->ptr; + int op = 0; + if (!strcasecmp(opstr, "SUM")) op = AROP_SUM; + else if (!strcasecmp(opstr, "MIN")) op = AROP_MIN; + else if (!strcasecmp(opstr, "MAX")) op = AROP_MAX; + else if (!strcasecmp(opstr, "AND")) op = AROP_AND; + else if (!strcasecmp(opstr, "OR")) op = AROP_OR; + else if (!strcasecmp(opstr, "XOR")) op = AROP_XOR; + else if (!strcasecmp(opstr, "MATCH")) op = AROP_MATCH; + else if (!strcasecmp(opstr, "USED")) op = AROP_USED; + else { + addReplyError(c, "unknown operation"); + return; + } + + sds match_val = NULL; + if (op == AROP_MATCH) { + if (c->argc != 6) { + addReplyError(c, "MATCH requires a value argument"); + return; + } + match_val = c->argv[5]->ptr; + } else if (c->argc != 5) { + addReplyErrorArity(c); + return; + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + if (op == AROP_MATCH || op == AROP_USED) { + addReplyLongLong(c, 0); + } else { + addReplyNull(c); + } + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + arOpAcc acc = { + .op = op, .match_val = match_val, + .sum_acc = 0, .minmax_acc = 0, .bitwise_acc = 0, + .match_count = 0, .used_count = 0, + .has_numeric = 0, .has_int = 0 + }; + arScanIter it; + void *v; + + /* All current AROP operations are order-independent, so iterating the + * user-provided direction is fine here. */ + arScanIterInit(ar, start, end, &it); + while (arScanIterNext(&it, NULL, &v)) + arOpAccumulate(&acc, v); + + /* Reply */ + if (op == AROP_MATCH) { + addReplyLongLong(c, acc.match_count); + } else if (op == AROP_USED) { + addReplyLongLong(c, acc.used_count); + } else if (op == AROP_AND || op == AROP_OR || op == AROP_XOR) { + if (!acc.has_int) addReplyNull(c); + else addReplyLongLong(c, acc.bitwise_acc); + } else { + if (!acc.has_numeric) { + addReplyNull(c); + } else { + long double result = (op == AROP_SUM) ? acc.sum_acc : acc.minmax_acc; + char buf[MAX_LONG_DOUBLE_CHARS + 1]; + int len = ld2string(buf, sizeof(buf), result, LD_STR_AUTO); + addReplyBulkCBuffer(c, buf, len); + } + } +} + +/* ---------------------------------------------------------------------------- + * The ring buffer family of commands: + * + * ARINSERT / ARNEXT / ARSEEK / ARLASTITEMS + * -------------------------------------------------------------------------- */ + +/* ARINSERT key value [value ...] + * + * Appends one or more values at the private insert cursor in O(N), where N is + * the number of values. The whole batch fails on index overflow. + * + * The cursor is then advanced to the last written index, which is also + * returned as the command return value, and can be inspected later + * with ARNEXT. */ +void arinsertCommand(client *c) { + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + int num_values = c->argc - 2; + + /* Pre-validate: compute start cursor and check entire batch fits */ + uint64_t start_cursor; + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + start_cursor = 0; + } else { + if (ar->insert_idx >= UINT64_MAX - 1) { + addReplyError(c, "insert index overflow"); + return; + } + start_cursor = ar->insert_idx + 1; + } + + /* Check last cursor won't overflow or reach forbidden index. */ + uint64_t last_cursor = start_cursor + (uint64_t)num_values - 1; + if (last_cursor < start_cursor || last_cursor == UINT64_MAX) { + addReplyError(c, "insert index overflow"); + return; + } + + /* Pre-promote sparse slices only for true bulk inserts. A single-element + * insert does not benefit from the extra range-analysis pass. */ + if (num_values > 1) + arMayPromoteToDenseForRangeSet(ar, start_cursor, last_cursor); + + /* Apply all values */ + uint64_t cursor = start_cursor; + for (int i = 0; i < num_values; i++) { + sds val = c->argv[2 + i]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, cursor, v); + cursor++; + } + ar->insert_idx = last_cursor; + + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arinsert", c->argv[1], c->db->id); + server.dirty += num_values; + + addReplyUnsignedLongLong(c, ar->insert_idx); +} + +/* Duplicate one array value exactly. Immediate values can be copied as tagged + * words, while heap strings are re-encoded from their logical string form. + * This could be regarded as costly, but capturing values out of the existing + * array would break the sparsearray API isolation. */ +static void *arRingDupValue(void *v) { + if (v == NULL || !arIsPtr(v)) return v; + return arEncode(arStringData(v), arStringLen(v)); +} + +/* Return the next slot that ARRING would write to before modulo reduction. */ +static uint64_t arRingNextCursor(redisArray *ar) { + return (ar->insert_idx == AR_INSERT_IDX_NONE) ? 0 : ar->insert_idx + 1; +} + +/* Decide if ARRING needs to rebuild the retained logical ring positions before + * writing new values. + * + * We rebuild in only two cases: + * + * 1. Shrink: new size is smaller than the current inferred ring span. + * 2. Grow after wrap: the ring had already wrapped inside the old span, so + * without a rebuild the next write would overwrite old low indexes instead + * of using the newly added capacity. + * + * An explicit ARSEEK 0 is treated differently on grow: it is a direct cursor + * override saying "write next at index 0", so we honor it instead of forcing + * a grow-after-wrap repack first. + * + * keep_span is the maximum number of logical positions that may be retained. */ +static int arRingNeedsRework(redisArray *ar, uint64_t ring_size, + uint64_t *old_span, uint64_t *keep_span) { + *old_span = arLen(ar); + *keep_span = 0; + + if (*old_span == 0) return 0; + + if (ring_size < *old_span) { + *keep_span = ring_size; + return 1; + } + if (ring_size == *old_span) { + return 0; + } + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + return 0; + } + if (arRingNextCursor(ar) < *old_span) { + *keep_span = *old_span; + return 1; + } + return 0; +} + +/* Rebuild the retained logical ring positions into a fresh compact array. + * + * We walk backward from the current anchor and keep at most keep_span items, + * but stop as soon as the first NULL is encountered. This makes resize keep + * the latest contiguous tail of existing items instead of crossing holes. + * + * The retained items are replayed in chronological order, oldest to newest, + * so after the rebuild: + * + * - index 0 holds the oldest retained position + * - index retained_count-1 holds the newest retained position + * - insert_idx points to retained_count-1, ready for the next ARRING write + * + * We use two passes: one backward pass to count the contiguous retained tail, + * then one forward replay pass into the new array. This avoids any temporary + * retained-items buffer. */ +static redisArray *arRingRework(redisArray *ar, uint64_t old_span, + uint64_t keep_span) { + serverAssert(old_span > 0); + serverAssert(keep_span > 0); + serverAssert(keep_span <= old_span); + + redisArray *new_ar = arNew(); + + /* The rebuild operates on the inferred ring window [0..old_span-1]. If + * insert_idx is outside that window because of ARSEEK, fold it back into + * the current inferred span with modulo. If ARSEEK 0 was used and we are + * shrinking, anchor the walk at the current tail, just like ARLASTITEMS. + * Grow does not reach this path because arRingNeedsRework() skips grow + * rework when insert_idx is AR_INSERT_IDX_NONE. */ + uint64_t anchor_idx = (ar->insert_idx == AR_INSERT_IDX_NONE) ? + (old_span - 1) : (ar->insert_idx % old_span); + + uint64_t retained_count = 0; + uint64_t src_idx = anchor_idx; + + while (retained_count < keep_span) { + void *v = arGet(ar, src_idx); + if (v == NULL) break; /* This makes any mix of ARSET/SEEK/RING calls + * always bound to populatede items, not logical + * array span. */ + + retained_count++; + src_idx = (src_idx == 0) ? old_span - 1 : src_idx - 1; + } + + /* src_idx now points to the position just before the oldest retained + * item, so advance once to start replaying oldest -> newest. */ + src_idx++; + if (src_idx == old_span) src_idx = 0; + + for (uint64_t dst_idx = 0; dst_idx < retained_count; dst_idx++) { + void *v = arGet(ar, src_idx); + serverAssert(v != NULL); + arSet(new_ar, dst_idx, arRingDupValue(v)); + + src_idx++; + if (src_idx == old_span) src_idx = 0; + } + if (retained_count != 0) new_ar->insert_idx = retained_count - 1; + return new_ar; +} + +/* ARRING key size value [value ...] + * + * Writes values into a logical ring buffer. May rework the array if + * the logical size changes across calls, so that the up to size + * items are retained in the correct logical position. + * + * Complexity is O(M) normally, where M is the number of inserted values, + * and O(N+M) on resize, where N is the maximum of the old and new ring size. + * The rebuild stops at the first NULL, so holes cut the retained tail. + * + * ARSEEK 0 is still honored as a direct cursor override on grow. + * + * Returns the last written slot. */ +void arringCommand(client *c) { + long long ll; + if (getLongLongFromObjectOrReply(c,c->argv[2],&ll,"invalid size") != C_OK) + return; + if (ll <= 0) { + addReplyError(c, "size must be positive"); + return; + } + uint64_t ring_size = (uint64_t)ll; + + robj *o = lookupArrayForWriteOrReply(c, c->argv[1]); + if (o == NULL) return; + + redisArray *ar = o->ptr; + uint64_t old_count = arCount(ar); + size_t old_alloc = 0; + if (server.memory_tracking_enabled) old_alloc = kvobjAllocSize(o); + int num_values = c->argc - 3; + uint64_t cursor = 0; + + /* If the requested size changes the logical ring shape, rebuild once + * before the hot insertion loop. This makes the command, when the user + * updates the window, no longer O(M), but O(N+M), however note that this + * is absolutely needed for high level sane semantics. Users will resize + * ring buffers, and they want to retain the latest items in a logically + * correct way. */ + uint64_t old_span, keep_span; + if (arRingNeedsRework(ar, ring_size, &old_span, &keep_span)) { + redisArray *new_ar = arRingRework(ar, old_span, keep_span); + arFree(ar); + o->ptr = ar = new_ar; + } + + /* Set the new items, modulo ring size. */ + for (int i = 0; i < num_values; i++) { + /* Compute the next write position, then wrap it into the requested + * ring size if needed. By this point any needed resize/rework was + * already handled above. */ + cursor = arRingNextCursor(ar); + if (cursor >= ring_size) cursor = cursor % ring_size; + + /* Set the value */ + sds val = c->argv[3 + i]->ptr; + void *v = arEncode(val, sdslen(val)); + arSet(ar, cursor, v); + ar->insert_idx = cursor; + } + + updateKeysizesHist(c->db, OBJ_ARRAY, old_count, arCount(ar)); + if (server.memory_tracking_enabled) + updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), o, old_alloc, kvobjAllocSize(o)); + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arring", c->argv[1], c->db->id); + server.dirty += num_values; + + addReplyUnsignedLongLong(c, cursor); +} + +/* ARNEXT key + * + * Returns in O(1) the next index that ARINSERT / ARRING would use. + * + * Missing keys and the pre-insert state reply with 0. If the cursor is in the + * terminal state where the next append would overflow, the reply is NULL. */ +void arnextCommand(client *c) { + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + if (ar->insert_idx == AR_INSERT_IDX_NONE) { + addReplyLongLong(c, 0); + } else if (ar->insert_idx == UINT64_MAX - 1) { + addReplyNull(c); /* Terminal: index space exhausted */ + } else { + addReplyUnsignedLongLong(c, ar->insert_idx + 1); + } +} + +/* ARSEEK key idx + * + * Sets in O(1) the next index used by ARINSERT and ARRING. + * + * Returns 1 if the cursor was updated and 0 if the key does not exist. + * idx 0 resets the insert state to "next write goes to 0": in this case + * successive ARRING calls are guaranteed to don't rework the array in chase + * of logical size change. */ +void arseekCommand(client *c) { + uint64_t idx; + /* Allow UINT64_MAX because ARSEEK UINT64_MAX sets insert_idx to + * UINT64_MAX-1, which is a valid terminal state (next ARINSERT + * would overflow and fail). This is needed for AOF persistence. */ + if (getArrayIndexFromObject(c->argv[2], &idx, 1) != C_OK) { + addReplyError(c, "invalid array index"); + return; + } + + /* There aren't many good options for non existing keys: both creating + * an empty array or failing with "no such key" does not align very + * well with the Redis commands usual semantics. However we need to signal + * back that we ignored the index set if the key is not there, so zero + * is returned. */ + robj *o = lookupKeyWrite(c->db, c->argv[1]); + if (o == NULL) { + addReplyLongLong(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + + /* Set insert_idx so next ARINSERT writes to idx */ + if (idx == 0) { + ar->insert_idx = AR_INSERT_IDX_NONE; + } else { + ar->insert_idx = idx - 1; + } + + keyModified(c, c->db, c->argv[1], o, 1); + notifyKeyspaceEvent(NOTIFY_ARRAY, "arseek", c->argv[1], c->db->id); + server.dirty++; + addReplyLongLong(c, 1); +} + +/* ARLASTITEMS key count [REV] + * + * Returns the most recent positions from the current insert anchor in O(N), + * where N is the requested count. REV flips the reply order. + * + * This command may return NULLs because it walks positions, not only existing + * items. If ARSEEK 0 was used, the current array tail is used as the anchor. */ +void arlastitemsCommand(client *c) { + long long count; + if (getLongLongFromObjectOrReply(c, c->argv[2], &count, + "invalid COUNT") != C_OK) return; + + /* For count <= 0, nothing to return, just an empty array. */ + if (count <= 0) { + addReplyArrayLen(c, 0); + return; + } + + /* Parse REV if provided. */ + int rev = 0; + if (c->argc == 4) { + if (strcasecmp(c->argv[3]->ptr, "REV") == 0) { + rev = 1; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + } else if (c->argc != 3) { + addReplyErrorArity(c); + return; + } + + /* No key? Empty reply. */ + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + addReplyArrayLen(c, 0); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + uint64_t ar_len = arLen(ar); + uint64_t effective_count = + (uint64_t)count > ar->count ? ar->count : (uint64_t)count; + + /* Should never happen in practice, because we checked the COUNT before + * and the array should not be empty to be still a Redis key, so this + * is mostly a safety net. */ + if (effective_count == 0) { + addReplyArrayLen(c, 0); + return; + } + + /* Collect items walking backward from insert_idx. If ARSEEK 0 was used, + * insert_idx is AR_INSERT_IDX_NONE: in that case use the max set index as + * the anchor so ARLASTITEMS still reports the tail of the current array. + * + * Note that we use an array to collect the items: in the no-REV case + * otherwise a double scan would be needed. */ + void **collected = zmalloc(effective_count * sizeof(void *)); + uint64_t anchor_idx = + (ar->insert_idx == AR_INSERT_IDX_NONE) ? ar_len - 1 : ar->insert_idx; + uint64_t current_idx = anchor_idx; + uint64_t steps = 0; + + while(steps < effective_count) { + collected[steps] = arGet(ar, current_idx); + steps++; + + /* Decrement with wrap */ + if (current_idx == 0) { + current_idx = ar_len - 1; + } else { + current_idx--; + } + } + + /* Emit the protocol with the collected items. */ + addReplyArrayLen(c, steps); + if (rev) { + /* Return in reverse chronological order (newest first) */ + for (uint64_t i = 0; i < steps; i++) + addReplyArrayValue(c, collected[i]); + } else { + /* Return in chronological order (oldest first) */ + for (int64_t i = steps - 1; i >= 0; i--) + addReplyArrayValue(c, collected[i]); + } + zfree(collected); +} + +/* ---------------------------------------------------------------------------- + * ARINFO + * -------------------------------------------------------------------------- */ + +/* ARINFO key [FULL] + * + * Returns metadata about the array in O(1), or O(N) with FULL where N is the + * number of slices. Unlike ARLEN and ARCOUNT, a missing key is an error. + * FULL adds per-encoding slice statistics by scanning the directory. */ +void arinfoCommand(client *c) { + int full = 0; + + if (c->argc > 2) { + if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr, "full")) { + full = 1; + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + } + + robj *o = lookupKeyRead(c->db, c->argv[1]); + if (o == NULL) { + addReplyError(c, "no such key"); + return; + } + if (checkType(c, o, OBJ_ARRAY)) return; + + redisArray *ar = o->ptr; + + /* Per-encoding stats (only computed for FULL) */ + uint64_t num_dense = 0; + uint64_t num_sparse = 0; + uint64_t dense_total_winsize = 0; + uint64_t dense_total_count = 0; + uint64_t sparse_total_cap = 0; + + if (full) { + if (ar->superdir) { + for (uint32_t bi = 0; bi < ar->sdir_len; bi++) { + arSDirEntry *e = ar->superdir + bi; + for (uint32_t si = 0; si < AR_SUPER_BLOCK_SLOTS; si++) { + arSlice *s = e->slots[si]; + if (!s) continue; + if (s->encoding == AR_SLICE_DENSE) { + num_dense++; + dense_total_winsize += s->layout.dense.winsize; + dense_total_count += s->count; + } else { + num_sparse++; + sparse_total_cap += s->layout.sparse.cap; + } + } + } + } else { + for (uint64_t i = 0; i < ar->dir_alloc; i++) { + arSlice *s = ar->dir[i]; + if (!s) continue; + if (s->encoding == AR_SLICE_DENSE) { + num_dense++; + dense_total_winsize += s->layout.dense.winsize; + dense_total_count += s->count; + } else { + num_sparse++; + sparse_total_cap += s->layout.sparse.cap; + } + } + } + } + + if (full) { + addReplyMapLen(c, 12); + } else { + addReplyMapLen(c, 7); + } + + addReplyBulkCString(c, "count"); + addReplyUnsignedLongLong(c, ar->count); + + addReplyBulkCString(c, "len"); + addReplyUnsignedLongLong(c, arLen(ar)); + + addReplyBulkCString(c, "next-insert-index"); + if (ar->insert_idx == AR_INSERT_IDX_NONE || + ar->insert_idx == UINT64_MAX - 1) { + addReplyLongLong(c, 0); + } else { + addReplyUnsignedLongLong(c, ar->insert_idx + 1); + } + + addReplyBulkCString(c, "slices"); + addReplyLongLong(c, ar->num_slices); + + addReplyBulkCString(c, "directory-size"); + if (ar->superdir) { + /* Superdir mode: report allocated capacity */ + addReplyLongLong(c, ar->sdir_cap); + } else { + addReplyLongLong(c, ar->dir_alloc); + } + + addReplyBulkCString(c, "super-dir-entries"); + addReplyLongLong(c, ar->superdir ? ar->sdir_len : 0); + + addReplyBulkCString(c, "slice-size"); + addReplyLongLong(c, ar->slice_size); + + if (full) { + addReplyBulkCString(c, "dense-slices"); + addReplyLongLong(c, num_dense); + + addReplyBulkCString(c, "sparse-slices"); + addReplyLongLong(c, num_sparse); + + addReplyBulkCString(c, "avg-dense-size"); + if (num_dense > 0) { + addReplyDouble(c, (double)dense_total_winsize / num_dense); + } else { + addReplyDouble(c, 0); + } + + addReplyBulkCString(c, "avg-dense-fill"); + if (dense_total_winsize > 0) { + addReplyDouble(c, (double)dense_total_count / dense_total_winsize); + } else { + addReplyDouble(c, 0); + } + + addReplyBulkCString(c, "avg-sparse-size"); + if (num_sparse > 0) { + addReplyDouble(c, (double)sparse_total_cap / num_sparse); + } else { + addReplyDouble(c, 0); + } + } +} diff --git a/src/util.h b/src/util.h index 056ffdcf6..0c775c205 100644 --- a/src/util.h +++ b/src/util.h @@ -91,6 +91,12 @@ static inline int log2ceil(size_t x) { #endif } +/* Return the smallest power of 2 >= count (e.g. 5 -> 8, 8 -> 8). */ +static inline int nearestNextPowerOf2(unsigned int count) { + if (count <= 1) return 1; + return 1 << (32 - __builtin_clz(count-1)); +} + /* Check for __builtin_add_overflow() */ #ifndef __has_builtin #define __has_builtin(x) 0 diff --git a/src/zipmap.c b/src/zipmap.c index 51c64ca81..e3981d810 100644 --- a/src/zipmap.c +++ b/src/zipmap.c @@ -387,6 +387,10 @@ int zipmapValidateIntegrity(unsigned char *zm, size_t size, int deep) { /* read the field name length */ l = zipmapDecodeLength(p); + /* Sanity check: length < 254 must be encoded in 1 byte, not 5 bytes */ + if (l < ZIPMAP_BIGLEN && s != 1) + return 0; + p += s; /* skip the encoded field size */ p += l; /* skip the field */ @@ -402,6 +406,9 @@ int zipmapValidateIntegrity(unsigned char *zm, size_t size, int deep) { /* read the value length */ l = zipmapDecodeLength(p); + /* Sanity check: length < 254 must be encoded in 1 byte, not 5 bytes */ + if (l < ZIPMAP_BIGLEN && s != 1) + return 0; p += s; /* skip the encoded value size*/ e = *p++; /* skip the encoded free space (always encoded in one byte) */ p += l+e; /* skip the value and free space */ diff --git a/tests/assets/array-32bit.rdb b/tests/assets/array-32bit.rdb new file mode 100644 index 000000000..94ff98ea3 Binary files /dev/null and b/tests/assets/array-32bit.rdb differ diff --git a/tests/integration/corrupt-dump-fuzzer.tcl b/tests/integration/corrupt-dump-fuzzer.tcl index e69a2221b..8bd170027 100644 --- a/tests/integration/corrupt-dump-fuzzer.tcl +++ b/tests/integration/corrupt-dump-fuzzer.tcl @@ -15,7 +15,7 @@ if { ! [ catch { proc generate_collections {suffix elements} { set rd [redis_deferring_client] - set numcmd 7 + set numcmd 8 ;# base commands including array set has_vsets [server_has_command vadd] if {$has_vsets} {incr numcmd} @@ -29,6 +29,15 @@ proc generate_collections {suffix elements} { $rd zadd zset$suffix $j $val $rd sadd set$suffix $val $rd xadd stream$suffix * item 1 value $val + # Array with sparse indices and mixed value types (int, float, string) + set idx [expr {$j * 100 + int(rand() * 50)}] ;# sparse indices + if {$j % 3 == 0} { + $rd arset array$suffix $idx $j ;# integer value + } elseif {$j % 3 == 1} { + $rd arset array$suffix $idx [format "%.5f" [expr {rand() * 1000}]] ;# float value + } else { + $rd arset array$suffix $idx "str_$val" ;# string value + } if {$has_vsets} { $rd vadd vset$suffix VALUES 3 1 1 1 $j } @@ -59,7 +68,9 @@ proc generate_types {} { # create other non-collection types r incr int r set string str +if 0 { r gcra gcra 10 5 60000 +} # create bigger objects with 10 items (more than a single ziplist / listpack) generate_collections big 10 diff --git a/tests/integration/corrupt-dump.tcl b/tests/integration/corrupt-dump.tcl index 09e268911..c4d586462 100644 --- a/tests/integration/corrupt-dump.tcl +++ b/tests/integration/corrupt-dump.tcl @@ -1180,5 +1180,88 @@ test {corrupt payload: stream listpacks in non-ascending master order} { } } +test {corrupt payload: zipmap - element wouldn't fit in listpack} { + # Redis converts legacy zipmap encoded hashes to listpacks. + # This test creates a zipmap entry with a 1GB value which cannot + # fit into a listpack and verifies that RESTORE fails. + + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no proto-max-bulk-len 2147483648 client-query-buffer-limit 2147483648]] { + proc zipmap_encode_len {len} { + if {$len < 254} { + return [binary format c $len] + } else { + return [binary format ci 254 $len] + } + } + r config set sanitize-dump-payload no + + # Generates Zipmap with 1GB value - should fail lpSafeToAdd check + set val_len [expr {1024 * 1024 * 1024 + 1}] + + # Zipmap has 1 element + set zm [binary format c 1] + # Field is 1 byte long + append zm [zipmap_encode_len 1] + append zm "k" + # Value is 1GB long + append zm [zipmap_encode_len $val_len] + append zm [binary format c 0] + append zm [string repeat "A" $val_len] + # ZIPMAP_END marker + append zm [binary format c 255] + # Prepend RDB header + set zm_len [string length $zm] + set rdb_len [binary format cI 0x80 $zm_len] + set dump [binary format c 9] + append dump $rdb_len + append dump $zm + append dump [binary format s 9] + append dump [binary format w 0] + + catch {r RESTORE _hash 0 $dump} err + assert_match "*Bad data format*" $err + } +} {} {large-memory} + +test {corrupt payload: zipmap - 5 bytes length encoding for a small field} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + catch { + r restore key 0 "\x09\x11\x01\xfe\x04\x00\x00\x00\x01\x00\xff\x00\x04\x00\x76\x61\x6c\x31\xff\x09\x00\xf9\xd5\xa4\xf7\x7d\x00\x3f\x1b" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: zipmap - 5 bytes length encoding for a small value} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no]] { + catch { + r restore key 0 "\x09\x0e\x01\x01\x6b\xfe\x04\x00\x00\x00\x00\x76\x61\x6c\x31\xff\x09\x00\xd0\xf9\xe4\x1d\xe4\xfb\x11\x4c" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: zipmap - 5 bytes length encoding and a huge field} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x09\x41\x15\x02\x04\x6b\x65\x79\x31\x04\x00\x76\x61\x6c\x31\xfe\x04\x00\x00\x00\xfe\xff\xff\xff\xfd\x00\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\xff\x09\x00\x54\x2f\x0a\xca\x4e\x5c\x49\x9f" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*integrity check failed*" 0 + } +} + +test {corrupt payload: stream - duplicated consumer PEL entry} { + start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] { + catch { + r restore key 0 "\x15\x01\x10\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\xc3\x39\x40\x42\x15\x42\x00\x00\x00\x11\x00\x02\x01\x00\x01\x01\x01\x86\x66\x69\x65\x6c\x64\x31\x07\x00\x01\x40\x0f\x0a\x00\x01\x86\x76\x61\x6c\x75\x65\x31\x07\x04\x20\x0b\x02\xcd\xd9\x02\xe0\x01\x22\x01\x32\x07\x80\x1a\x04\x32\x07\x06\x01\xff\x02\x81\x00\x00\x01\x9b\x0d\x56\xb7\x90\x00\x81\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x02\x01\x07\x6d\x79\x67\x72\x6f\x75\x70\x81\x00\x00\x01\x9b\x0d\x56\xb7\x90\x00\x02\x02\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x80\xd9\x56\x0d\x9b\x01\x00\x00\x01\x00\x00\x01\x9b\x0d\x56\xb7\x90\x00\x00\x00\x00\x00\x00\x00\x00\x80\xd9\x56\x0d\x9b\x01\x00\x00\x01\x01\x09\x63\x6f\x6e\x73\x75\x6d\x65\x72\x31\x80\xd9\x56\x0d\x9b\x01\x00\x00\x80\xd9\x56\x0d\x9b\x01\x00\x00\x02\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x9b\x0d\x56\xa9\xb7\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x4b\xe0\x99\x30\x67\x4d\xe5\x87" + } err + assert_match "*Bad data format*" $err + verify_log_message 0 "*Stream consumer PEL entry already has a consumer assigned*" 0 + } +} + } ;# tags diff --git a/tests/integration/dismiss-mem.tcl b/tests/integration/dismiss-mem.tcl index 2b0fbb3e4..50f125762 100644 --- a/tests/integration/dismiss-mem.tcl +++ b/tests/integration/dismiss-mem.tcl @@ -46,6 +46,15 @@ start_server {tags {"dismiss external:skip needs:debug"}} { # stream r xadd bigstream * entry1 $bigstr entry2 $bigstr + # array: dense slice populated with large string values, plus a + # sparsely-populated array whose indices span multiple slices. + for {set i 0} {$i < 32} {incr i} { + r arset dense_array $i $bigstr + } + for {set i 0} {$i < 16} {incr i} { + r arset sparse_array [expr {$i * 5000}] $bigstr + } + set digest [debug_digest] # Test both RDB (yes) and AOF (no) rewrite paths. foreach preamble {yes no} { diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index b3a03a2f9..05ed71ee0 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -1878,3 +1878,79 @@ start_server {tags {"repl external:skip"}} { } } } + +# Fullsync should not free the functions lib ctx while the replica has +# a timed out function that is still running. +foreach type {script function} { + start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + + test "Fullsync should not free scripting engine on a replica while a $type is running" { + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + # Set small client output buffer limit to trigger fullsync quickly + $master config set client-output-buffer-limit "replica 1k 1k 0" + $replica config set busy-reply-threshold 1 ;# script timeout in 1 ms + + # Load function + if {$type eq "function"} { + $master function load replace {#!lua name=blocklib + redis.register_function{ + function_name='blockfunc', + callback=function() while true do end end, + flags={'no-writes'} + } + } + } + + # Start replication + $replica replicaof $master_host $master_port + wait_for_sync $replica + + # Run the blocking script on replica + set rd [redis_deferring_client] + if {$type eq "script"} { + $rd eval {while true do end} 0 + } else { + $rd fcall_ro blockfunc 0 + } + + # Verify replica replies with BUSY + wait_for_condition 50 100 { + [catch {$replica ping} e] == 1 && [string match {*BUSY*} $e] + } else { + fail "$type didn't become busy" + } + + # Fills client output buffer and triggers fullsync + populate 5 bigkey 1000000 -1 + wait_for_condition 50 100 { + [s -1 sync_full] >= 2 + } else { + fail "Fullsync was not triggered" + } + + # Verify replica is still running the function + after 1000 + catch {$replica ping} e + assert_match {*BUSY*} $e "replica should still reply with BUSY" + + if {$type eq "script"} { + $replica script kill + } else { + $replica function kill + } + + # Verify replica is responsive again + catch {$rd read} result + $rd close + wait_for_sync $replica + assert_equal [$replica ping] "PONG" + } + } + } +} diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 0aabb1463..e46da150a 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -801,8 +801,12 @@ proc generate_fuzzy_traffic_on_key {key type duration} { set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE} set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM XDELEX XACKDEL XNACK} set vset_commands {VADD VREM} + set array_commands {ARSET ARGET ARDEL ARCOUNT ARMSET ARMGET ARGETRANGE ARDELRANGE ARINFO} + set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands array $array_commands] +if 0 { set gcra_commands {GCRA} - set commands [dict create string $string_commands hash $hash_commands zset $zset_commands list $list_commands set $set_commands stream $stream_commands vectorset $vset_commands gcra $gcra_commands] + dict set commands gcra $gcra_commands +} set cmds [dict get $commands $type] set start_time [clock seconds] @@ -863,6 +867,49 @@ proc generate_fuzzy_traffic_on_key {key type duration} { lappend cmd [randomValue] incr i 2 } + # Array commands need integer indices + if {$cmd == "ARSET"} { + lappend cmd $key + lappend cmd [randomInt 100000] ;# index + lappend cmd [randomValue] ;# value + incr i 3 + } + if {$cmd == "ARGET" || $cmd == "ARDEL"} { + lappend cmd $key + lappend cmd [randomInt 100000] ;# index + incr i 2 + } + if {$cmd == "ARCOUNT" || $cmd == "ARINFO"} { + lappend cmd $key + incr i 1 + } + if {$cmd == "ARMSET"} { + lappend cmd $key + # Add 2-4 index/value pairs + set npairs [expr {int(rand() * 3) + 2}] + for {set p 0} {$p < $npairs} {incr p} { + lappend cmd [randomInt 100000] + lappend cmd [randomValue] + } + incr i [expr {1 + $npairs * 2}] + } + if {$cmd == "ARMGET"} { + lappend cmd $key + # Add 2-4 indices + set nidx [expr {int(rand() * 3) + 2}] + for {set p 0} {$p < $nidx} {incr p} { + lappend cmd [randomInt 100000] + } + incr i [expr {1 + $nidx}] + } + if {$cmd == "ARGETRANGE" || $cmd == "ARDELRANGE"} { + lappend cmd $key + set idx1 [randomInt 100000] + set idx2 [expr {$idx1 + [randomInt 1000]}] + lappend cmd $idx1 + lappend cmd $idx2 + incr i 3 + } for {} {$i < $arity} {incr i} { if {$i == $firstkey || $i == $lastkey} { diff --git a/tests/unit/aofrw.tcl b/tests/unit/aofrw.tcl index 11324e18d..8341fcdbf 100644 --- a/tests/unit/aofrw.tcl +++ b/tests/unit/aofrw.tcl @@ -204,6 +204,70 @@ start_server {tags {"aofrw external:skip debug_defrag:skip"} overrides {aof-use- r FUNCTION LIST } {{library_name test engine LUA functions {{name test description {} flags {}}}}} + # Array AOF rewrite tests + test "AOF rewrite of array with mixed value types" { + r flushall + # Create array with various value types + r arset myarray 0 12345 ;# int + r arset myarray 1 "hello" ;# small string + r arset myarray 2 3.14159 ;# float + r arset myarray 100 [string repeat x 50] ;# large string + r arset myarray 10000 "sparse" ;# sparse index + set d1 [debug_digest] + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set d2 [debug_digest] + if {$d1 ne $d2} { + error "assertion:$d1 is not equal to $d2" + } + } + + test "AOF rewrite of array with insert_idx (circular buffer)" { + r flushall + # Create circular buffer using ARRING + for {set i 0} {$i < 25} {incr i} { + r arring myarray 10 "v$i" + } + # insert_idx should be 4 ((25-1) % 10 = 4) + set next_before [r arnext myarray] + set d1 [debug_digest] + + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + set d2 [debug_digest] + if {$d1 ne $d2} { + error "assertion:$d1 is not equal to $d2" + } + # Verify insert_idx preserved + assert_equal $next_before [r arnext myarray] + + # Continue inserting - should continue from correct position + set new_idx [r arring myarray 10 "after_aof"] + assert_equal $next_before $new_idx + } + + test "AOF rewrite of array spanning multiple slices" { + r flushall + # Create array across multiple slices (slice_size = 4096) + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * 4096}] + for {set i 0} {$i < 20} {incr i} { + r arset myarray [expr {$base + $i * 100}] "s${slice}_v$i" + } + } + set d1 [debug_digest] + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + set d2 [debug_digest] + if {$d1 ne $d2} { + error "assertion:$d1 is not equal to $d2" + } + } + test {BGREWRITEAOF is delayed if BGSAVE is in progress} { r flushall r set k v diff --git a/tests/unit/client-eviction.tcl b/tests/unit/client-eviction.tcl index 2e08715b8..afe32e4f9 100644 --- a/tests/unit/client-eviction.tcl +++ b/tests/unit/client-eviction.tcl @@ -611,5 +611,34 @@ start_server {} { } } +start_server {} { + r flushall + r client no-evict on + r config set maxmemory-clients 0 + + test "Verify blocked client eviction during unblock does not cause use-after-free" { + # Create a deferring client that will be blocked on stream + # Use a long stream name to make client memory usage exceed 200000 bytes + set rd [redis_deferring_client] + $rd XREAD BLOCK 0 STREAMS mystream stream_[string repeat x 200000] $ $ + + # Wait for the client to be blocked + wait_for_condition 50 100 { + [s blocked_clients] eq {1} + } else { + fail "Client was not blocked" + } + + # Now lower MAXMEMORY-CLIENTS to a low value and use + # XADD to unblock the blocked client, triggering eviction. + r MULTI + r CONFIG SET MAXMEMORY-CLIENTS 100000 ;# Put in MULTI to defer blocked client eviction until after EXEC + r XADD mystream * field val + r EXEC + r PING + $rd close + } +} + } ;# tags diff --git a/tests/unit/cluster/announced-endpoints.tcl b/tests/unit/cluster/announced-endpoints.tcl index a37ca58d1..58643a2a7 100644 --- a/tests/unit/cluster/announced-endpoints.tcl +++ b/tests/unit/cluster/announced-endpoints.tcl @@ -72,4 +72,97 @@ start_cluster 2 2 {tags {external:skip cluster}} { fail "Cluster announced port was not updated in cluster slots" } } + + # Tests for cluster-announce-ip validation + test "cluster-announce-ip validation" { + # Reject control characters in IP-like values + catch {R 0 config set cluster-announce-ip "192.168.1.100\nnext"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "10.0.0.1\ttab"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "1.2.3.4\r\n"} err + assert_match "*alphanumeric*" $err + + # Reject control characters in hostname-like values + catch {R 0 config set cluster-announce-ip "redis-node\nnext"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "redis-node\ttab"} err + assert_match "*alphanumeric*" $err + + catch {R 0 config set cluster-announce-ip "redis-node\r\n"} err + assert_match "*alphanumeric*" $err + + # Accept valid IPv4 + R 0 config set cluster-announce-ip "192.168.1.100" + assert_equal "192.168.1.100" [lindex [R 0 config get cluster-announce-ip] 1] + + # Accept valid IPv6 + R 0 config set cluster-announce-ip "2001:db8::1" + assert_equal "2001:db8::1" [lindex [R 0 config get cluster-announce-ip] 1] + + # Accept valid hostname + R 0 config set cluster-announce-ip "redis-node-1.example.com" + assert_equal "redis-node-1.example.com" [lindex [R 0 config get cluster-announce-ip] 1] + + # Can be cleared + R 0 config set cluster-announce-ip "" + assert_equal "" [lindex [R 0 config get cluster-announce-ip] 1] + } + + # Tests for cluster-announce-human-nodename validation + test "cluster-announce-human-nodename validation" { + # Reject control characters + catch {R 0 config set cluster-announce-human-nodename "badchar\nnext"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad\ttab"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad\r\nline"} err + assert_match "*invalid character*" $err + + # Reject delimiter characters (comma, equals, space) + catch {R 0 config set cluster-announce-human-nodename "bad,comma"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad=equals"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad space"} err + assert_match "*invalid character*" $err + + # Reject quote characters (double quote, single quote, backslash) + catch {R 0 config set cluster-announce-human-nodename "bad\"quote"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad'quote"} err + assert_match "*invalid character*" $err + + catch {R 0 config set cluster-announce-human-nodename "bad\\slash"} err + assert_match "*invalid character*" $err + + # Accept valid names + R 0 config set cluster-announce-human-nodename "my-redis-node-1" + assert_equal "my-redis-node-1" [lindex [R 0 config get cluster-announce-human-nodename] 1] + } + + # DoS prevention test: verify server can restart after CLUSTER SAVECONFIG + test "cluster-announce-ip persists correctly with CLUSTER SAVECONFIG" { + R 0 config set cluster-announce-ip "192.168.1.100" + R 0 cluster saveconfig + + # Verify the IP appears in CLUSTER NODES output + assert_match "*192.168.1.100*" [R 0 cluster nodes] + } + + test "cluster-announce-human-nodename persists correctly with CLUSTER SAVECONFIG" { + R 0 config set cluster-announce-human-nodename "production-node-1" + R 0 cluster saveconfig + + # Verify the nodename is set correctly + assert_equal "production-node-1" [lindex [R 0 config get cluster-announce-human-nodename] 1] + } } diff --git a/tests/unit/dump.tcl b/tests/unit/dump.tcl index 923e391b4..1a5c01ea4 100644 --- a/tests/unit/dump.tcl +++ b/tests/unit/dump.tcl @@ -158,6 +158,19 @@ start_server {tags {"dump"}} { close_replication_stream $repl } {} {needs:repl} + test {RESTORE fail with invalid payload size} { + # Payload with mismatched size: claims 0xFFFFFFFFFFFFFFF7 bytes (max uint64 - 8) but provides no data + # \x00 = String type + # \x81 = 64-bit length marker + # \xFF\xFF\xFF\xFF\xFF\xFF\xFF\xF7 = 18446744073709551607 in big-endian + # \x0c\x00 = RDB version + # \x00... = fake CRC64 + set encoded "\x00\x81\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xF7\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00" + r del test + catch {r restore test 0 $encoded} e + set e + } {*Bad data format*} + test {DUMP of non existing key returns nil} { r dump nonexisting_key } {} diff --git a/tests/unit/gcra.tcl b/tests/unit/gcra.tcl index 1080e76f7..1721334cb 100644 --- a/tests/unit/gcra.tcl +++ b/tests/unit/gcra.tcl @@ -1,4 +1,5 @@ start_server {tags {"gcra" "external:skip"}} { +if 0 { test {GCRA - argument validation} { # Wrong number of arguments (too few) catch {r gcra} err @@ -236,8 +237,10 @@ start_server {tags {"gcra" "external:skip"}} { assert {[r pttl mykey] > 0} } } +} start_server {tags {"gcra" "external:skip"}} { +if 0 { test {GCRA - RDB save and reload preserves value} { r del mykey r gcra mykey 5 1 60 @@ -333,8 +336,10 @@ start_server {tags {"gcra" "external:skip"}} { assert_equal $digest_before $digest_after } {} {needs:debug} } +} start_server {tags {"gcra repl" "external:skip"}} { +if 0 { set replica [srv 0 client] set replica_host [srv 0 host] set replica_port [srv 0 port] @@ -368,3 +373,4 @@ start_server {tags {"gcra repl" "external:skip"}} { } {} {external:skip} } } +} diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 895248606..f488ca85f 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -1113,6 +1113,97 @@ run_solo {defrag} { } ;# standalone } } + + if {[string match {*jemalloc*} [s mem_allocator]] && + [r debug mallctl arenas.page] <= 8192 && + $type eq "standalone"} { ;# skip in cluster mode and non-jemalloc + test "Active defrag arrays: $type" { + r flushdb + r config set hz 100 + r config set activedefrag no + wait_for_defrag_stop 500 100 + r config resetstat + r config set active-defrag-max-scan-fields 100 + r config set active-defrag-threshold-lower 1 + r config set active-defrag-cycle-min 65 + r config set active-defrag-cycle-max 75 + r config set active-defrag-ignore-bytes 512kb + r config set maxmemory 0 + + # Create two large arrays with interleaved allocations. Indices are + # one full slice apart so the surviving array is stored as many + # separate slices and uses superdir mode. + set rd [redis_deferring_client] + set payload [string repeat A 500] + set elements 3000 + set base 8388608 + set count 0 + for {set j 0} {$j < $elements} {incr j} { + set idx [expr {$base + $j * 4096}] + $rd arset bigarray1 $idx "a1:$j:$payload" + $rd arset bigarray2 $idx "a2:$j:$payload" + + incr count + discard_replies_every $rd $count 1000 2000 + } + set remaining [expr {($count % 1000) * 2}] + for {set j 0} {$j < $remaining} {incr j} { + $rd read + } + + assert_equal $elements [r arcount bigarray1] + assert_equal $elements [r arcount bigarray2] + assert_morethan [dict get [r arinfo bigarray1] directory-size] 0 + + # Free one full array to create fragmentation around the surviving + # array's slices and string allocations. + r del bigarray2 + + after 120 ;# serverCron only updates the info once in 100ms + r config set latency-monitor-threshold 5 + r latency reset + + set digest [debug_digest] + catch {r config set activedefrag yes} e + if {[r config get activedefrag] eq "activedefrag yes"} { + wait_for_condition 50 100 { + [s total_active_defrag_time] ne 0 + } else { + after 120 ;# serverCron only updates the info once in 100ms + puts [r info memory] + puts [r info stats] + puts [r memory malloc-stats] + fail "defrag not started." + } + + # This test only needs to verify that active defrag reached the + # array and processed it without corrupting the value. We do + # not require the allocator to fully converge to a no-fragmentation + # state on every platform. + wait_for_condition 500 100 { + [s active_defrag_key_hits] + [s active_defrag_key_misses] > 0 + } else { + after 120 ;# serverCron only updates the info once in 100ms + puts [r info memory] + puts [r info stats] + puts [r memory malloc-stats] + fail "array defrag did not touch the key." + } + + r config set activedefrag no + wait_for_defrag_stop 500 100 + } + + # Verify the array stayed intact after active defrag touched it. + assert_equal $elements [r arcount bigarray1] + assert_equal "a1:0:$payload" [r arget bigarray1 $base] + assert_equal "a1:1234:$payload" [r arget bigarray1 [expr {$base + 1234 * 4096}]] + assert_equal "a1:2999:$payload" [r arget bigarray1 [expr {$base + 2999 * 4096}]] + assert_equal $digest [debug_digest] + assert_equal OK [r save] ;# Iterates all pointers again after defrag. + expr 1 + } {1} + } } test "Active defrag can't be triggered during replicaof database flush. See issue #14267" { diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl index 115970a31..afcddee77 100644 --- a/tests/unit/pubsub.tcl +++ b/tests/unit/pubsub.tcl @@ -1369,5 +1369,134 @@ start_server {tags {"pubsub network"}} { assert_equal [r publish foo vaz] {1} assert_equal [r read] {message foo vaz} } {} {resp3} - +} + +start_server {tags {"pubsub network"}} { + # Helper proc for tests that subscribe multiple times until hitting OOM + proc test_subscribe_oom_loop {cmd description clients} { + test "$cmd $description fails with OOM when memory limit exceeded" { + # Set 10MB memory limit + r config set maxmemory 10485760 + r config set maxmemory-policy noeviction + + # Create clients + if {$clients == 1} { + set rd [redis_deferring_client] + } else { + set rd1 [redis_deferring_client] + set rd2 [redis_deferring_client] + } + + set base_str [string repeat "a" 2048] + set success_count 0 + set oom_occurred 0 + + # Try to subscribe until we hit OOM + for {set i 0} {$i < 5000} {incr i} { + # Select client + if {$clients == 1} { + set client $rd + } else { + set client [expr {$i % 2 ? $rd1 : $rd2}] + } + + # Build channel/pattern name + if {$cmd eq "psubscribe"} { + set channel_name "${base_str}${i}*" + } else { + set channel_name "${base_str}${i}" + } + + $client $cmd $channel_name + if {[catch {$client read} err]} { + if {[string match "*OOM command not allowed*" $err]} { + set oom_occurred 1 + break + } + error "Unexpected error: $err" + } + incr success_count + } + + # Verify we had at least one success and hit OOM + assert {$success_count > 10} + assert {$oom_occurred == 1} + + # Close clients + if {$clients == 1} { + $rd close + } else { + $rd1 close + $rd2 close + } + } + } + + # Helper proc for tests with single large channel that immediately fails + proc test_subscribe_large_channel_oom {cmd channel_type} { + test "$cmd with large $channel_type name fails due to OOM" { + # Set maxmemory to 2MB + r config set maxmemory 2097152 + r config set maxmemory-policy noeviction + + # Create large channel/pattern name: 2MB + set channel_name [string repeat "a" 2097152] + + # Create a single pubsub client + set rd [redis_deferring_client] + + # Subscribe should fail with OOM error + $rd $cmd $channel_name + assert_error "*OOM command not allowed when used memory > 'maxmemory'*" {$rd read} + + # Cleanup + $rd close + } + } + + # Helper proc for tests with small success then large failure + proc test_subscribe_small_then_large_oom {cmd channel_type} { + test "$cmd succeeds with small $channel_type but fails with large $channel_type due to OOM" { + # Set maxmemory to 5MB + r config set maxmemory 5242880 + r config set maxmemory-policy noeviction + + # Create channel names: first 10KB, second 5MB + set channel1 [string repeat "a" 10240] + set channel2 [string repeat "b" 5242880] + + # Create a single pubsub client + set rd [redis_deferring_client] + + # First subscribe should succeed (10KB) + $rd $cmd $channel1 + set reply1 [$rd read] + assert_equal [list $cmd] [lindex $reply1 0] + + # Second subscribe should fail with OOM error (5MB exceeds limit) + $rd $cmd $channel2 + assert_error "*OOM command not allowed when used memory > 'maxmemory'*" {$rd read} + + # Cleanup + $rd close + } + } + + # Multiple subscriptions until OOM tests + test_subscribe_oom_loop "subscribe" "" 1 + test_subscribe_oom_loop "ssubscribe" "" 1 + test_subscribe_oom_loop "psubscribe" "" 1 + test_subscribe_oom_loop "subscribe" "with 2 clients" 2 + test_subscribe_oom_loop "ssubscribe" "with 2 clients" 2 + test_subscribe_oom_loop "psubscribe" "with 2 clients" 2 + + # Single large channel immediate OOM tests + test_subscribe_large_channel_oom "subscribe" "channel" + test_subscribe_large_channel_oom "psubscribe" "pattern" + test_subscribe_large_channel_oom "ssubscribe" "shard channel" + + # Small success then large failure tests + test_subscribe_small_then_large_oom "subscribe" "channel" + test_subscribe_small_then_large_oom "psubscribe" "pattern" + test_subscribe_small_then_large_oom "ssubscribe" "channel" } diff --git a/tests/unit/type/array.tcl b/tests/unit/type/array.tcl new file mode 100644 index 000000000..d0f62fe3e --- /dev/null +++ b/tests/unit/type/array.tcl @@ -0,0 +1,3114 @@ +start_server { + tags {"array"} +} { + # Basic ARSET/ARGET tests + test {ARSET and ARGET basics} { + r del myarray + assert_equal 1 [r arset myarray 0 hello] + assert_equal hello [r arget myarray 0] + assert_equal {} [r arget myarray 1] + } + + test {ARSET overwrites existing value} { + r del myarray + assert_equal 1 [r arset myarray 0 hello] + assert_equal 0 [r arset myarray 0 world] + assert_equal world [r arget myarray 0] + } + + test {ARGET non-existing key} { + r del myarray + assert_equal {} [r arget myarray 0] + } + + test {ARGET validates index even on non-existing key} { + r del myarray + assert_error {*invalid array index*} {r arget myarray not-an-index} + } + + test {ARSET/ARGET with integer values} { + r del myarray + r arset myarray 0 12345 + assert_equal 12345 [r arget myarray 0] + } + + test {ARSET/ARGET with float values} { + r del myarray + r arset myarray 0 3.14159 + assert_equal 3.14159 [r arget myarray 0] + } + + test {ARSET/ARGET with small strings} { + r del myarray + r arset myarray 0 abc + assert_equal abc [r arget myarray 0] + } + + test {ARSET/ARGET with large string} { + r del myarray + set longstr [string repeat x 100] + r arset myarray 0 $longstr + assert_equal $longstr [r arget myarray 0] + } + + test {ARSET/ARGET with empty string} { + r del myarray + r arset myarray 0 "" + assert_equal "" [r arget myarray 0] + } + + # ARLEN and ARCOUNT tests + test {ARLEN and ARCOUNT basics} { + r del myarray + assert_equal 0 [r arlen myarray] + assert_equal 0 [r arcount myarray] + + r arset myarray 0 a + assert_equal 1 [r arlen myarray] + assert_equal 1 [r arcount myarray] + + r arset myarray 5 b + assert_equal 6 [r arlen myarray] + assert_equal 2 [r arcount myarray] + + r arset myarray 100 c + assert_equal 101 [r arlen myarray] + assert_equal 3 [r arcount myarray] + } + + # ARDEL tests + test {ARDEL basics} { + r del myarray + r arset myarray 0 a + r arset myarray 1 b + r arset myarray 2 c + + assert_equal 1 [r ardel myarray 1] + assert_equal {} [r arget myarray 1] + assert_equal 2 [r arcount myarray] + + # Delete non-existing index returns 0 + assert_equal 0 [r ardel myarray 1] + } + + test {ARDEL multiple indices} { + r del myarray + r arset myarray 0 a + r arset myarray 1 b + r arset myarray 2 c + r arset myarray 3 d + + assert_equal 3 [r ardel myarray 0 1 2] + assert_equal 1 [r arcount myarray] + } + + test {ARDEL last element deletes key} { + r del myarray + r arset myarray 0 a + r ardel myarray 0 + assert_equal 0 [r exists myarray] + } + + test {ARDEL notifies array event before del when key is removed} { + set orig_notify [lindex [r config get notify-keyspace-events] 1] + r config set notify-keyspace-events KEA + r del myarray + r arset myarray 0 a + + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 *] + assert_equal 1 [r ardel myarray 0] + + assert_match "pmessage * __keyspace@*__:myarray ardel" [$rd1 read] + assert_match "pmessage * __keyevent@*__:ardel myarray" [$rd1 read] + assert_match "pmessage * __keyspace@*__:myarray del" [$rd1 read] + assert_match "pmessage * __keyevent@*__:del myarray" [$rd1 read] + + $rd1 close + r config set notify-keyspace-events $orig_notify + } + + # ARDELRANGE tests + test {ARDELRANGE basics} { + r del myarray + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i [expr $i * 10] + } + assert_equal 10 [r arcount myarray] + + assert_equal 5 [r ardelrange myarray 2 6] + assert_equal 5 [r arcount myarray] + } + + test {ARDELRANGE reverse order} { + r del myarray + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i [expr $i * 10] + } + + assert_equal 5 [r ardelrange myarray 6 2] + assert_equal 5 [r arcount myarray] + } + + test {ARDELRANGE notifies array event before del when key is removed} { + set orig_notify [lindex [r config get notify-keyspace-events] 1] + r config set notify-keyspace-events KEA + r del myarray + assert_equal 3 [r arset myarray 0 a b c] + + set rd1 [redis_deferring_client] + assert_equal {1} [psubscribe $rd1 *] + assert_equal 3 [r ardelrange myarray 0 2] + + assert_match "pmessage * __keyspace@*__:myarray ardelrange" [$rd1 read] + assert_match "pmessage * __keyevent@*__:ardelrange myarray" [$rd1 read] + assert_match "pmessage * __keyspace@*__:myarray del" [$rd1 read] + assert_match "pmessage * __keyevent@*__:del myarray" [$rd1 read] + + $rd1 close + r config set notify-keyspace-events $orig_notify + } + + # ARMSET and ARMGET tests + test {ARMSET basics} { + r del myarray + assert_equal 3 [r armset myarray 0 a 1 b 2 c] + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 1] + assert_equal c [r arget myarray 2] + } + + test {ARMSET returns only newly filled slots} { + r del myarray + r arset myarray 0 a + assert_equal 1 [r armset myarray 0 aa 1 b] + assert_equal aa [r arget myarray 0] + assert_equal b [r arget myarray 1] + } + + test {ARMGET basics} { + r del myarray + r arset myarray 0 a + r arset myarray 1 b + r arset myarray 5 c + + set result [r armget myarray 0 1 5 3] + assert_equal a [lindex $result 0] + assert_equal b [lindex $result 1] + assert_equal c [lindex $result 2] + assert_equal {} [lindex $result 3] + } + + # ARGETRANGE and contiguous ARSET tests + test {ARGETRANGE basics} { + r del myarray + r armset myarray 0 a 1 b 2 c 3 d 4 e + + set result [r argetrange myarray 1 3] + assert_equal {b c d} $result + } + + test {ARGETRANGE reverse} { + r del myarray + r armset myarray 0 a 1 b 2 c 3 d 4 e + + set result [r argetrange myarray 3 1] + assert_equal {d c b} $result + } + + test {ARGETRANGE errors when requested range exceeds the hard limit} { + assert_error {*range exceeds maximum of 1000000 items*} {r argetrange myarray 0 1000000} + } + + test {ARGETRANGE reverse errors when requested range exceeds the hard limit} { + assert_error {*range exceeds maximum of 1000000 items*} {r argetrange myarray 1000000 0} + } + + # ARSCAN tests + test {ARSCAN returns only existing elements with indices} { + r del myarray + r arset myarray 0 a + r arset myarray 5 b + r arset myarray 9 c + + set result [r arscan myarray 0 10] + assert_equal {{0 a} {5 b} {9 c}} $result + } + + test {ARSCAN on empty range returns empty array} { + r del myarray + r arset myarray 500 x + + set result [r arscan myarray 0 100] + assert_equal {} $result + } + + test {ARSCAN reversed range} { + r del myarray + r arset myarray 0 a + r arset myarray 5 b + + set result [r arscan myarray 5 0] + assert_equal {{5 b} {0 a}} $result + } + + test {ARSCAN on non-existent key returns empty array} { + r del nokey + set result [r arscan nokey 0 100] + assert_equal {} $result + } + + test {ARSCAN with mixed value types} { + r del myarray + r arset myarray 0 string + r arset myarray 1 12345 + r arset myarray 2 3.14 + + set result [r arscan myarray 0 10] + assert_equal 3 [llength $result] + assert_equal {0 string} [lindex $result 0] + assert_equal {1 12345} [lindex $result 1] + assert_equal {2 3.14} [lindex $result 2] + } + + # ARGREP tests + test {ARGREP MATCH returns matching indexes} { + r del myarray + r armset myarray 0 alpha 1 beta 2 alphabet 5 gamma + + assert_equal {0 2} [r argrep myarray - + MATCH alpha] + } + + test {ARGREP supports WITHVALUES and reverse ranges} { + r del myarray + r armset myarray 0 alpha 1 beta 2 alphabet 3 delta + + assert_equal {{2 alphabet} {0 alpha}} \ + [r argrep myarray 3 0 MATCH alpha WITHVALUES] + } + + test {ARGREP supports AND, GLOB, and NOCASE} { + r del myarray + r armset myarray 0 RedisArray 1 redis-match 2 array-only 3 plain + + assert_equal {0} [r argrep myarray - + MATCH redis GLOB *array* AND NOCASE] + } + + test {ARGREP supports RE predicates} { + r del myarray + r armset myarray 0 foo123 1 bar 2 zoo999 3 Foo777 + + assert_equal {0 2 3} [r argrep myarray - + RE {^.*[0-9]{3}$}] + assert_equal {0 3} [r argrep myarray - + RE {^foo[0-9]+$} NOCASE] + } + + test {ARGREP RE literal alternation forms still match correctly} { + r del myarray + r armset myarray 0 foo 1 bar 2 baz 3 foobar 4 BAR 5 quxfoo 6 zedbar \ + 7 plain 8 ALPS 9 alphabet + + assert_equal {0 1 3 5 6} [r argrep myarray - + RE {foo|bar}] + assert_equal {0 1 3 4 5 6} [r argrep myarray - + RE {foo|bar} NOCASE] + assert_equal {0 1 4} [r argrep myarray - + RE {^(foo|bar)$} NOCASE] + assert_equal {0 1 3 4} [r argrep myarray - + RE {^(foo|bar)} NOCASE] + assert_equal {0 1 3 4 5 6} [r argrep myarray - + RE {(foo|bar)$} NOCASE] + assert_equal {8 9} [r argrep myarray - + RE {alpha|alps} NOCASE] + } + + test {ARGREP RE grouped alternation smoke test} { + r del myarray + r armset myarray 0 item-foo-123 1 ITEM-BAR-456 2 item-baz 3 plain + + assert_equal {0 1} \ + [r argrep myarray - + RE {^item-(foo|bar)-[0-9]{3}$} NOCASE] + } + + test {ARGREP enforces RE length and rejects backreferences} { + r del myarray + set re2048 [string repeat a 2048] + set re2049 [string repeat a 2049] + r arset myarray 0 $re2048 + + assert_equal {0} [r argrep myarray - + RE $re2048] + assert_error {*maximum is 2048 bytes*} {r argrep myarray - + RE $re2049} + assert_error {*backreferences are not supported*} {r argrep myarray - + RE {(a)\1}} + assert_error {*regular expression is empty*} {r argrep myarray - + RE {}} + } + + test {ARGREP LIMIT stops after enough matches} { + r del myarray + r armset myarray 0 hit-1 1 hit-2 2 miss 3 hit-3 + + assert_equal {0 1} [r argrep myarray - + MATCH hit LIMIT 2] + } + + test {ARGREP allows mixed predicate and option order, last wins} { + r del myarray + r armset myarray 0 RedisArray 1 redis-match 2 array-only 3 plain + + assert_equal {0} \ + [r argrep myarray - + OR MATCH redis LIMIT 3 GLOB *array* AND LIMIT 1 NOCASE] + } + + test {ARGREP enforces the predicate limit} { + r del myarray + r arset myarray 0 foo + + set cmd [list r argrep myarray - +] + for {set i 0} {$i < 250} {incr i} { + lappend cmd MATCH foo + } + assert_equal {0} [uplevel 1 $cmd] + + lappend cmd MATCH foo + assert_error {*maximum is 250*} [list uplevel 1 $cmd] + } + + test {ARGREP handles missing keys and syntax errors} { + r del nokey + assert_equal {} [r argrep nokey - + MATCH foo] + assert_error {*syntax error*} {r argrep myarray - + LIMIT 1} + assert_error {*invalid regular expression*} {r argrep myarray - + RE {(}} + } + + test {ARGREP rejects malformed braced hex regex escapes} { + r del myarray + r arset myarray 0 hello + + set invalid [format "\\%c%c1" 120 123] + assert_error {*invalid regular expression*} [list r argrep myarray - + RE $invalid] + assert_error {*invalid regular expression*} [list r argrep myarray - + RE $invalid NOCASE] + } + + test {ARSET contiguous write basics} { + r del myarray + assert_equal 3 [r arset myarray 0 a b c] + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 1] + assert_equal c [r arget myarray 2] + } + + # ARINSERT tests + test {ARINSERT basics} { + r del myarray + assert_equal 0 [r arinsert myarray a] + assert_equal 1 [r arinsert myarray b] + assert_equal 2 [r arinsert myarray c] + + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 1] + assert_equal c [r arget myarray 2] + } + + test {ARRING creates ring buffer} { + r del myarray + for {set i 0} {$i < 10} {incr i} { + r arring myarray 5 $i + } + + # After wrap, we should have indices 0-4 with values 5-9 + assert_equal 5 [r arget myarray 0] + assert_equal 6 [r arget myarray 1] + assert_equal 7 [r arget myarray 2] + assert_equal 8 [r arget myarray 3] + assert_equal 9 [r arget myarray 4] + assert_equal 5 [r arcount myarray] + } + + # ARNEXT, ARSEEK tests + test {ARNEXT tracks insert position} { + r del myarray + assert_equal 0 [r arnext myarray] + + r arinsert myarray a + assert_equal 1 [r arnext myarray] + + r arinsert myarray b + assert_equal 2 [r arnext myarray] + } + + test {ARSEEK} { + r del myarray + r arinsert myarray a + r arinsert myarray b + + assert_equal 1 [r arseek myarray 10] + r arinsert myarray c + assert_equal 11 [r arnext myarray] + assert_equal c [r arget myarray 10] + } + + test {ARNEXT returns null when insert cursor is exhausted} { + r del myarray + r arinsert myarray a + + # Move to terminal cursor state: insert_idx = UINT64_MAX-1 + r arseek myarray 18446744073709551615 + assert_equal {} [r arnext myarray] + assert_error {*insert index overflow*} {r arinsert myarray b} + } + + # ARLASTITEMS tests + test {ARLASTITEMS basics} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arinsert myarray [expr $i * 10] + } + + set result [r arlastitems myarray 3] + assert_equal {20 30 40} $result + + set result [r arlastitems myarray 3 REV] + assert_equal {40 30 20} $result + } + + test {ARLASTITEMS after ARSEEK 0 uses array tail} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arinsert myarray [expr $i * 10] + } + + assert_equal 1 [r arseek myarray 0] + assert_equal {20 30 40} [r arlastitems myarray 3] + assert_equal {40 30 20} [r arlastitems myarray 3 REV] + } + + # AROP tests + test {AROP SUM} { + r del myarray + r armset myarray 0 10 1 20 2 30 + + set result [r arop myarray 0 2 SUM] + assert_equal 60 $result + } + + test {AROP MIN} { + r del myarray + r armset myarray 0 30 1 10 2 20 + + set result [r arop myarray 0 2 MIN] + assert_equal 10 $result + } + + test {AROP MAX} { + r del myarray + r armset myarray 0 30 1 10 2 20 + + set result [r arop myarray 0 2 MAX] + assert_equal 30 $result + } + + test {AROP MATCH} { + r del myarray + r armset myarray 0 hello 1 world 2 hello 3 foo + + assert_equal 2 [r arop myarray 0 3 MATCH hello] + assert_equal 1 [r arop myarray 0 3 MATCH world] + assert_equal 0 [r arop myarray 0 3 MATCH bar] + } + + test {AROP USED} { + r del myarray + r armset myarray 0 a 2 b 5 c + + assert_equal 3 [r arop myarray 0 10 USED] + } + + test {AROP AND/OR/XOR} { + r del myarray + # Use decimal values: 255, 15, 240 + r armset myarray 0 255 1 15 2 240 + + assert_equal 0 [r arop myarray 0 2 AND] + assert_equal 255 [r arop myarray 0 2 OR] + assert_equal 0 [r arop myarray 0 2 XOR] + } + + test {AROP AND/OR/XOR truncates floats toward zero} { + r del myarray + # Truncated values: 7, 3, 1 + r armset myarray 0 7.9 1 3.2 2 1.8 + + assert_equal 1 [r arop myarray 0 2 AND] + assert_equal 7 [r arop myarray 0 2 OR] + assert_equal 5 [r arop myarray 0 2 XOR] + } + + # ARINFO tests + test {ARINFO basics} { + r del myarray + r armset myarray 0 a 1 b 100 c + + set info [r arinfo myarray] + assert_equal 3 [dict get $info count] + assert_equal 101 [dict get $info len] + } + + # Type check tests + test {Array commands on wrong type} { + r del mykey + r set mykey value + assert_error {WRONGTYPE*} {r arget mykey 0} + assert_error {WRONGTYPE*} {r arset mykey 0 foo} + assert_error {WRONGTYPE*} {r arlen mykey} + assert_error {WRONGTYPE*} {r arcount mykey} + } + + # TYPE command + test {TYPE returns array} { + r del myarray + r arset myarray 0 hello + assert_equal array [r type myarray] + } + + # OBJECT ENCODING command + test {OBJECT ENCODING returns sliced-array} { + r del myarray + r arset myarray 0 hello + assert_equal sliced-array [r object encoding myarray] + } + + # Sparse indices test + test {Sparse array with large gaps} { + r del myarray + r arset myarray 0 a + r arset myarray 10000 b + r arset myarray 1000000 c + + assert_equal a [r arget myarray 0] + assert_equal b [r arget myarray 10000] + assert_equal c [r arget myarray 1000000] + assert_equal 3 [r arcount myarray] + assert_equal 1000001 [r arlen myarray] + } + + # RDB persistence test + test {Array survives RDB save and load} { + r del myarray + r armset myarray 0 hello 1 world 100 test + r arseek myarray 101 + r arinsert myarray value + + r bgsave + waitForBgsave r + + r debug reload + assert_equal hello [r arget myarray 0] + assert_equal world [r arget myarray 1] + assert_equal test [r arget myarray 100] + assert_equal value [r arget myarray 101] + assert_equal 102 [r arnext myarray] + } {} {needs:debug} + + # ========================================================================= + # Edge case tests: directory resizing, slice transitions, window growth + # ========================================================================= + + # Directory resizing tests + test {Directory resize - many slices} { + r del myarray + # Default slice size is 4096, so indices 0, 4096, 8192, 12288, etc. + # create new slices requiring directory growth + set slice_size 4096 + for {set i 0} {$i < 20} {incr i} { + set idx [expr {$i * $slice_size}] + r arset myarray $idx "slice$i" + } + + # Verify all values + for {set i 0} {$i < 20} {incr i} { + set idx [expr {$i * $slice_size}] + assert_equal "slice$i" [r arget myarray $idx] + } + assert_equal 20 [r arcount myarray] + } + + test {Directory resize - very large index jump} { + r del myarray + r arset myarray 0 "start" + # Jump to a very high slice index, forcing directory allocation + r arset myarray 1000000 "middle" + r arset myarray 10000000 "end" + + assert_equal "start" [r arget myarray 0] + assert_equal "middle" [r arget myarray 1000000] + assert_equal "end" [r arget myarray 10000000] + assert_equal 3 [r arcount myarray] + } + + # Dense slice window growth tests + test {Dense window growth - right expansion} { + r del myarray + # Start with element at offset 0, then add elements going right + # Initial window is small (8 elements), this forces growth + for {set i 0} {$i < 100} {incr i} { + r arset myarray $i "val$i" + } + + # Verify all values stored correctly + for {set i 0} {$i < 100} {incr i} { + assert_equal "val$i" [r arget myarray $i] + } + assert_equal 100 [r arcount myarray] + + # Verify window grew (avg-dense-size should be >= 128 to fit 100 elements) + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert {[dict get $info avg-dense-size] >= 128} + } + + test {Dense window growth - left expansion} { + r del myarray + # Start with element at high offset, then add elements going left + # This forces window to expand leftward + r arset myarray 500 "anchor" + for {set i 499} {$i >= 400} {incr i -1} { + r arset myarray $i "val$i" + } + + assert_equal "anchor" [r arget myarray 500] + for {set i 400} {$i < 500} {incr i} { + assert_equal "val$i" [r arget myarray $i] + } + assert_equal 101 [r arcount myarray] + + # Verify window grew (avg-dense-size should be >= 128 to fit 101 elements) + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert {[dict get $info avg-dense-size] >= 128} + } + + test {Dense window growth - bidirectional expansion} { + r del myarray + # Start in middle, expand both directions + r arset myarray 500 "center" + for {set i 1} {$i <= 50} {incr i} { + r arset myarray [expr {500 - $i}] "left$i" + r arset myarray [expr {500 + $i}] "right$i" + } + + assert_equal "center" [r arget myarray 500] + for {set i 1} {$i <= 50} {incr i} { + assert_equal "left$i" [r arget myarray [expr {500 - $i}]] + assert_equal "right$i" [r arget myarray [expr {500 + $i}]] + } + assert_equal 101 [r arcount myarray] + + # Verify window grew (avg-dense-size should be >= 128 to fit 101 elements) + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert {[dict get $info avg-dense-size] >= 128} + } + + # Sparse to dense promotion tests + test {Sparse to dense promotion - exceed kmax threshold} { + r del myarray + # kmax default is 10, add 11+ elements to force promotion + # Use sparse pattern (scattered offsets within one slice) + for {set i 0} {$i < 15} {incr i} { + # Scattered within first slice (0-4095) + set idx [expr {$i * 100}] + r arset myarray $idx "sparse$i" + } + + # Verify all values after promotion + for {set i 0} {$i < 15} {incr i} { + set idx [expr {$i * 100}] + assert_equal "sparse$i" [r arget myarray $idx] + } + assert_equal 15 [r arcount myarray] + + # Verify promotion actually happened using ARINFO FULL + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + } + + test {Sparse to dense promotion - then continue adding} { + r del myarray + # First create sparse slice, then promote, then add more + for {set i 0} {$i < 5} {incr i} { + r arset myarray [expr {$i * 200}] "phase1_$i" + } + + # Verify starts as sparse + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Add more to trigger promotion + for {set i 5} {$i < 20} {incr i} { + r arset myarray [expr {$i * 200}] "phase2_$i" + } + + # Verify all + for {set i 0} {$i < 20} {incr i} { + assert_equal "phase[expr {$i < 5 ? 1 : 2}]_$i" [r arget myarray [expr {$i * 200}]] + } + + # Verify promotion happened + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + } + + # Dense to sparse demotion tests + test {Dense to sparse demotion - delete below kmin threshold} { + r del myarray + # Create dense slice with many elements + for {set i 0} {$i < 50} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 50 [r arcount myarray] + + # Verify starts as dense + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + + # Delete most elements, leaving only 3 (below kmin=5) + for {set i 3} {$i < 50} {incr i} { + r ardel myarray $i + } + + # Verify remaining elements + assert_equal "val0" [r arget myarray 0] + assert_equal "val1" [r arget myarray 1] + assert_equal "val2" [r arget myarray 2] + assert_equal 3 [r arcount myarray] + + # Verify demotion happened + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + } + + test {Dense to sparse demotion - then add again} { + r del myarray + # Create dense, demote to sparse, then add more + for {set i 0} {$i < 30} {incr i} { + r arset myarray $i "initial$i" + } + + # Delete to demote + for {set i 4} {$i < 30} {incr i} { + r ardel myarray $i + } + assert_equal 4 [r arcount myarray] + + # Verify demotion happened + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Add new elements (should work in sparse mode) + for {set i 100} {$i < 105} {incr i} { + r arset myarray $i "new$i" + } + + # Verify old and new + for {set i 0} {$i < 4} {incr i} { + assert_equal "initial$i" [r arget myarray $i] + } + for {set i 100} {$i < 105} {incr i} { + assert_equal "new$i" [r arget myarray $i] + } + } + + # Combined stress test + test {Stress test - mixed operations across multiple slices} { + r del myarray + set slice_size 4096 + + # Create elements across 5 slices + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * $slice_size}] + # Add 20 elements per slice + for {set i 0} {$i < 20} {incr i} { + r arset myarray [expr {$base + $i * 50}] "s${slice}_e$i" + } + } + assert_equal 100 [r arcount myarray] + + # Delete half from each slice (should cause some demotions) + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * $slice_size}] + for {set i 10} {$i < 20} {incr i} { + r ardel myarray [expr {$base + $i * 50}] + } + } + assert_equal 50 [r arcount myarray] + + # Verify remaining elements + for {set slice 0} {$slice < 5} {incr slice} { + set base [expr {$slice * $slice_size}] + for {set i 0} {$i < 10} {incr i} { + assert_equal "s${slice}_e$i" [r arget myarray [expr {$base + $i * 50}]] + } + } + } + + test {Stress test - rapid insert/delete cycles} { + r del myarray + + # Multiple cycles of growth and shrinkage + for {set cycle 0} {$cycle < 3} {incr cycle} { + # Grow + for {set i 0} {$i < 100} {incr i} { + r arset myarray $i "cycle${cycle}_$i" + } + assert_equal 100 [r arcount myarray] + + # Shrink (but leave some) + for {set i 10} {$i < 100} {incr i} { + r ardel myarray $i + } + assert_equal 10 [r arcount myarray] + } + + # Verify final state + for {set i 0} {$i < 10} {incr i} { + assert_equal "cycle2_$i" [r arget myarray $i] + } + } + + # RDB with complex state + test {RDB persistence with sparse and dense slices} { + r del myarray + + # Create mix of sparse and dense slices + # Slice 0: dense (many elements) + for {set i 0} {$i < 50} {incr i} { + r arset myarray $i "dense$i" + } + + # Slice 1 (offset 4096): sparse (few elements) + r arset myarray 4096 "sparse0" + r arset myarray 4200 "sparse1" + r arset myarray 4500 "sparse2" + + # Slice 10 (offset 40960): single element + r arset myarray 40960 "lonely" + + r bgsave + waitForBgsave r + r debug reload + + # Verify all types survived + for {set i 0} {$i < 50} {incr i} { + assert_equal "dense$i" [r arget myarray $i] + } + assert_equal "sparse0" [r arget myarray 4096] + assert_equal "sparse1" [r arget myarray 4200] + assert_equal "sparse2" [r arget myarray 4500] + assert_equal "lonely" [r arget myarray 40960] + } {} {needs:debug} + + # Regression test for dense window boundary bug (GitHub issue) + # When a dense slice window doubles but doesn't reach ar_slice_size, + # offset + winsize could exceed the slice boundary (4096), causing crashes. + test {Regression - dense window growth must not exceed slice boundary} { + r del myarray + set slice_size 4096 + + # Create a dense slice with elements at high offsets within the slice. + # Start at offset 2100 with a small window, then force growth. + # Initial window: offset=2100, winsize=64 (or similar small power of 2) + r arset myarray 2100 "start" + + # Add elements to grow the window to the right. + # After several doublings, winsize might become 2048. + # With offset=2100 and winsize=2048, end would be 4148 > 4096 (BUG!) + # The fix adjusts offset so the window stays within bounds. + for {set i 2101} {$i < 2200} {incr i} { + r arset myarray $i "val$i" + } + + # Now force further right growth that would exceed boundary without fix + for {set i 2200} {$i < 3500} {incr i 10} { + r arset myarray $i "val$i" + } + + # Verify all values are accessible (would crash before the fix) + assert_equal "start" [r arget myarray 2100] + assert_equal "val2150" [r arget myarray 2150] + assert_equal "val3000" [r arget myarray 3000] + + # Verify window respects slice boundary via ARINFO FULL + set info [r arinfo myarray FULL] + set avg_size [dict get $info avg-dense-size] + # With the fix, window should be properly sized (at most slice_size) + assert {$avg_size <= $slice_size} + } + + test {Regression - sparse to dense promotion with high offset boundary} { + r del myarray + set slice_size 4096 + + # Create sparse slice with elements near upper boundary of slice + # This tests arSparsePromote boundary handling + for {set i 0} {$i < 8} {incr i} { + set idx [expr {2200 + $i * 100}] ;# 2200, 2300, ..., 2900 + r arset myarray $idx "sparse$i" + } + + # Verify starts as sparse + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info sparse-slices] + + # Add more to trigger promotion - elements span 2200 to 3800 + # Window needs to cover 2200-3800 range (1601 elements span) + # Without boundary fix, offset=2200 + winsize=2048 = 4248 > 4096 (BUG!) + for {set i 8} {$i < 20} {incr i} { + set idx [expr {2200 + $i * 100}] ;# continues: 3000, 3100, ..., 4100 + r arset myarray $idx "promoted$i" + } + + # Verify all values survived promotion (would crash before fix) + for {set i 0} {$i < 8} {incr i} { + set idx [expr {2200 + $i * 100}] + assert_equal "sparse$i" [r arget myarray $idx] + } + for {set i 8} {$i < 20} {incr i} { + set idx [expr {2200 + $i * 100}] + assert_equal "promoted$i" [r arget myarray $idx] + } + } + + # Helper to generate random values of different encoding types + proc random_value {} { + set type [expr {int(rand() * 4)}] + switch $type { + 0 { + # INT encoding: small integers + set val [expr {int(rand() * 200000) - 100000}] + } + 1 { + # FLOAT encoding: synthetic float with random digits + set int_digits [expr {int(rand() * 5) + 1}] ;# 1-5 digits before dot + set frac_digits [expr {int(rand() * 5) + 1}] ;# 1-5 digits after dot + set int_part "" + for {set i 0} {$i < $int_digits} {incr i} { + append int_part [expr {int(rand() * 10)}] + } + set frac_part "" + for {set i 0} {$i < $frac_digits} {incr i} { + append frac_part [expr {int(rand() * 10)}] + } + # Add negative sign randomly + set sign [expr {rand() < 0.5 ? "-" : ""}] + set val "${sign}${int_part}.${frac_part}" + } + 2 { + # SMALLSTR encoding: short strings (1-6 bytes) + set len [expr {int(rand() * 6) + 1}] + set val "" + for {set i 0} {$i < $len} {incr i} { + append val [format %c [expr {int(rand() * 26) + 97}]] ;# a-z + } + } + 3 { + # arString pointer: longer strings (10-30 bytes) + set len [expr {int(rand() * 21) + 10}] + set val "" + for {set i 0} {$i < $len} {incr i} { + append val [format %c [expr {int(rand() * 26) + 97}]] ;# a-z + } + } + } + return $val + } + + proc random_array_index {} { + set roll [expr {int(rand() * 100)}] + if {$roll < 35} { + return [expr {int(rand() * 256)}] + } elseif {$roll < 55} { + return [expr {4096 + int(rand() * 512)}] + } elseif {$roll < 75} { + return [expr {8388608 + int(rand() * 8192)}] + } elseif {$roll < 90} { + return [expr {16777216 + int(rand() * 8192)}] + } else { + return [expr {int(rand() * 30000000)}] + } + } + + proc model_array_delrange {arrname lo hi} { + upvar 1 $arrname expected + + if {$lo > $hi} { + set tmp $lo + set lo $hi + set hi $tmp + } + + set deleted 0 + foreach idx [array names expected] { + if {$idx >= $lo && $idx <= $hi} { + unset expected($idx) + incr deleted + } + } + return $deleted + } + + proc model_array_scan {arrname} { + upvar 1 $arrname expected + + set result {} + foreach idx [lsort -integer [array names expected]] { + lappend result [list $idx $expected($idx)] + } + return $result + } + + proc iterator_stress_rand_between {lo hi} { + return [expr {$lo + int(rand() * ($hi - $lo + 1))}] + } + + proc iterator_stress_random_index {slice_size mode} { + set roll [expr {int(rand() * 100)}] + switch -- $mode { + mixed { + if {$roll < 25} { + return [expr {int(rand() * ($slice_size * 2))}] + } elseif {$roll < 45} { + return [expr {$slice_size - 4 + int(rand() * 9)}] + } elseif {$roll < 60} { + return [expr {$slice_size * 2 - 4 + int(rand() * 9)}] + } elseif {$roll < 78} { + return [expr {8388608 + int(rand() * ($slice_size * 2))}] + } elseif {$roll < 92} { + return [expr {16777216 + int(rand() * ($slice_size * 2))}] + } else { + return [expr {int(rand() * 30000000)}] + } + } + dense { + if {$roll < 60} { + return [expr {int(rand() * ($slice_size * 2))}] + } elseif {$roll < 80} { + return [expr {$slice_size - 8 + int(rand() * 17)}] + } else { + return [expr {int(rand() * ($slice_size * 8))}] + } + } + superdir { + if {$roll < 20} { + return [expr {int(rand() * 1024)}] + } elseif {$roll < 45} { + return [expr {8388608 + int(rand() * ($slice_size * 4))}] + } elseif {$roll < 70} { + return [expr {16777216 + int(rand() * ($slice_size * 4))}] + } elseif {$roll < 90} { + return [expr {25165824 + int(rand() * ($slice_size * 4))}] + } else { + return [expr {int(rand() * 40000000)}] + } + } + } + return [expr {int(rand() * 30000000)}] + } + + proc iterator_stress_sorted_indices {arrname reverse} { + upvar 1 $arrname model + if {$reverse} { + return [lsort -integer -decreasing [array names model]] + } + return [lsort -integer [array names model]] + } + + proc iterator_stress_scan {arrname start end limit} { + upvar 1 $arrname model + set reverse [expr {$start > $end}] + set lo [expr {$reverse ? $end : $start}] + set hi [expr {$reverse ? $start : $end}] + set result {} + set emitted 0 + + foreach idx [iterator_stress_sorted_indices model $reverse] { + if {$idx < $lo || $idx > $hi} continue + lappend result [list $idx $model($idx)] + incr emitted + if {$limit > 0 && $emitted >= $limit} break + } + return $result + } + + proc iterator_stress_argrep {arrname start end type pattern nocase withvalues limit} { + upvar 1 $arrname model + set reverse [expr {$start > $end}] + set lo [expr {$reverse ? $end : $start}] + set hi [expr {$reverse ? $start : $end}] + set pattern_cmp $pattern + if {$nocase} { set pattern_cmp [string tolower $pattern_cmp] } + set result {} + set emitted 0 + + foreach idx [iterator_stress_sorted_indices model $reverse] { + if {$idx < $lo || $idx > $hi} continue + set value $model($idx) + set cmp $value + if {$nocase} { set cmp [string tolower $cmp] } + + if {$type eq "EXACT"} { + set match [expr {$cmp eq $pattern_cmp}] + } else { + set match [expr {[string first $pattern_cmp $cmp] != -1}] + } + + if {$match} { + if {$withvalues} { + lappend result [list $idx $value] + } else { + lappend result $idx + } + incr emitted + if {$emitted >= $limit} break + } + } + return $result + } + + proc iterator_stress_arop_used {arrname start end} { + upvar 1 $arrname model + set lo [expr {$start > $end ? $end : $start}] + set hi [expr {$start > $end ? $start : $end}] + set used 0 + + foreach idx [array names model] { + if {$idx >= $lo && $idx <= $hi} { incr used } + } + return $used + } + + proc iterator_stress_arop_match {arrname start end needle} { + upvar 1 $arrname model + set lo [expr {$start > $end ? $end : $start}] + set hi [expr {$start > $end ? $start : $end}] + set matches 0 + + foreach idx [array names model] { + if {$idx >= $lo && $idx <= $hi && $model($idx) eq $needle} { + incr matches + } + } + return $matches + } + + proc iterator_stress_arop_sum {arrname start end} { + upvar 1 $arrname model + set lo [expr {$start > $end ? $end : $start}] + set hi [expr {$start > $end ? $start : $end}] + set sum 0.0 + set has_numeric 0 + + foreach idx [array names model] { + if {$idx < $lo || $idx > $hi} continue + if {[string is double -strict $model($idx)]} { + set sum [expr {$sum + ($model($idx) + 0.0)}] + set has_numeric 1 + } + } + + if {!$has_numeric} { return {} } + return $sum + } + + proc iterator_stress_pick_existing_value {arrname} { + upvar 1 $arrname model + set keys [array names model] + if {[llength $keys] == 0} { return [random_value] } + return $model([lindex $keys [expr {int(rand() * [llength $keys])}]]) + } + + proc iterator_stress_pick_match_pattern {value} { + set len [string length $value] + if {$len <= 1} { return $value } + set start [expr {int(rand() * $len)}] + set width [expr {1 + int(rand() * ($len - $start))}] + return [string range $value $start [expr {$start + $width - 1}]] + } + + proc iterator_stress_flip_case {value} { + set out "" + foreach ch [split $value ""] { + if {![string is alpha -strict $ch] || rand() < 0.5} { + append out $ch + } elseif {$ch eq [string tolower $ch]} { + append out [string toupper $ch] + } else { + append out [string tolower $ch] + } + } + return $out + } + + proc iterator_stress_check_equal {label expected got} { + if {$expected ne $got} { + fail "$label mismatch - expected '$expected', got '$got'" + } + } + + proc iterator_stress_check_sum {label expected got} { + if {$expected eq {} || $got eq {}} { + if {$expected ne $got} { + fail "$label mismatch - expected '$expected', got '$got'" + } + return + } + + if {abs(($expected + 0.0) - ($got + 0.0)) > 1e-9} { + fail "$label mismatch - expected '$expected', got '$got'" + } + } + + proc iterator_stress_validate {r arrname slice_size mode tag step full_scan} { + upvar 1 $arrname model + set count [array size model] + + if {$count == 0} { + iterator_stress_check_equal "$tag/$step exists" 0 [r exists myarray] + if {$full_scan} { + iterator_stress_check_equal "$tag/$step empty-scan" {} \ + [r arscan myarray 0 50000000] + } + return + } + + iterator_stress_check_equal "$tag/$step count" $count [r arcount myarray] + if {$full_scan} { + set start [expr {$step % 2 == 0 ? 0 : 50000000}] + set end [expr {$step % 2 == 0 ? 50000000 : 0}] + iterator_stress_check_equal "$tag/$step full-scan" \ + [iterator_stress_scan model $start $end 0] \ + [r arscan myarray $start $end] + } + + for {set probe 0} {$probe < 2} {incr probe} { + set start [iterator_stress_random_index $slice_size $mode] + set end [iterator_stress_random_index $slice_size $mode] + if {rand() < 0.15} { set start 0 } + if {rand() < 0.15} { set end 50000000 } + + set limit [iterator_stress_rand_between 1 10] + iterator_stress_check_equal "$tag/$step scan/$probe" \ + [iterator_stress_scan model $start $end $limit] \ + [r arscan myarray $start $end LIMIT $limit] + + set grep_type [expr {rand() < 0.5 ? "EXACT" : "MATCH"}] + if {rand() < 0.7} { + set pattern [iterator_stress_pick_existing_value model] + if {$grep_type eq "MATCH"} { + set pattern [iterator_stress_pick_match_pattern $pattern] + } + } else { + set pattern [random_value] + } + + set withvalues [expr {rand() < 0.5}] + set nocase [expr {rand() < 0.5}] + if {$nocase} { set pattern [iterator_stress_flip_case $pattern] } + set grep_limit [iterator_stress_rand_between 1 8] + set grep_cmd [list r argrep myarray $start $end $grep_type $pattern LIMIT $grep_limit] + if {$withvalues} { lappend grep_cmd WITHVALUES } + if {$nocase} { lappend grep_cmd NOCASE } + + iterator_stress_check_equal "$tag/$step argrep/$probe" \ + [iterator_stress_argrep model $start $end $grep_type $pattern $nocase $withvalues $grep_limit] \ + [uplevel 1 $grep_cmd] + + iterator_stress_check_equal "$tag/$step used/$probe" \ + [iterator_stress_arop_used model $start $end] \ + [r arop myarray $start $end USED] + + set needle [iterator_stress_pick_existing_value model] + iterator_stress_check_equal "$tag/$step match/$probe" \ + [iterator_stress_arop_match model $start $end $needle] \ + [r arop myarray $start $end MATCH $needle] + + iterator_stress_check_sum "$tag/$step sum/$probe" \ + [iterator_stress_arop_sum model $start $end] \ + [r arop myarray $start $end SUM] + } + } + + proc iterator_stress_apply_operation {r arrname slice_size mode} { + upvar 1 $arrname model + set roll [expr {int(rand() * 100)}] + + if {$roll < 30} { + set idx [iterator_stress_random_index $slice_size $mode] + set val [random_value] + r arset myarray $idx $val + set model($idx) $val + } elseif {$roll < 45} { + set start [iterator_stress_random_index $slice_size $mode] + set values {} + set len [iterator_stress_rand_between 2 8] + + for {set i 0} {$i < $len} {incr i} { + set val [random_value] + lappend values $val + set model([expr {$start + $i}]) $val + } + r arset myarray $start {*}$values + } elseif {$roll < 58} { + set idx [iterator_stress_random_index $slice_size $mode] + r ardel myarray $idx + catch {unset model($idx)} + } elseif {$roll < 78} { + set args {} + set nranges [iterator_stress_rand_between 1 3] + + for {set i 0} {$i < $nranges} {incr i} { + set lo [iterator_stress_random_index $slice_size $mode] + set hi [iterator_stress_random_index $slice_size $mode] + lappend args $lo $hi + model_array_delrange model $lo $hi + } + r ardelrange myarray {*}$args + } elseif {$roll < 90} { + set base [expr {[iterator_stress_random_index $slice_size $mode] / $slice_size * $slice_size}] + set start [expr {$base + [iterator_stress_rand_between 0 [expr {$slice_size > 16 ? 16 : $slice_size - 1}]]}] + set values {} + set len [iterator_stress_rand_between 4 10] + + for {set i 0} {$i < $len} {incr i} { + set val [random_value] + lappend values $val + set model([expr {$start + $i}]) $val + } + r arset myarray $start {*}$values + } else { + set base [expr {[iterator_stress_random_index $slice_size $mode] / $slice_size * $slice_size}] + set lo [expr {$base + [iterator_stress_rand_between 0 [expr {$slice_size > 24 ? 24 : $slice_size - 1}]]}] + set hi [expr {$base + [iterator_stress_rand_between 0 [expr {$slice_size > 24 ? 24 : $slice_size - 1}]]}] + model_array_delrange model $lo $hi + r ardelrange myarray $lo $hi + } + } + + # Random testing - most effective way to find edge case bugs + test {Random testing - staged write/delete workload with verification} { + r flushdb + expr {srand(12345)} ;# Fixed seed for reproducibility + set max_idx 5000 ;# Range of possible indices + set ops_per_stage 200 ;# Operations per stage + + # Tcl-side tracking of expected state + array set expected {} + + # 11 stages with decreasing write ratio + # Stage 0: 100% writes, Stage 10: 0% writes 100% deletes + set stages { + {100 0} + {90 10} + {80 20} + {70 30} + {60 40} + {50 50} + {40 60} + {30 70} + {20 80} + {10 90} + {0 100} + } + + set stage_num 0 + foreach stage $stages { + set write_pct [lindex $stage 0] + + for {set op 0} {$op < $ops_per_stage} {incr op} { + set roll [expr {int(rand() * 100)}] + set idx [expr {int(rand() * $max_idx)}] + + if {$roll < $write_pct} { + # Write operation with random value type + set val [random_value] + r arset myarray $idx $val + set expected($idx) $val + } else { + # Delete operation - always send to Redis, track locally + r ardel myarray $idx + if {[info exists expected($idx)]} { + unset expected($idx) + } + } + } + + # Verify entire array matches expected state + set expected_count [array size expected] + if {[r exists myarray]} { + set actual_count [r arcount myarray] + } else { + set actual_count 0 + } + + if {$expected_count != $actual_count} { + fail "Stage $stage_num: count mismatch - expected $expected_count, got $actual_count" + } + + # Verify all expected values individually + foreach idx [array names expected] { + set got [r arget myarray $idx] + if {$got ne $expected($idx)} { + fail "Stage $stage_num: idx $idx - expected '$expected($idx)', got '$got'" + } + } + + incr stage_num + } + + # Final cleanup: delete all remaining expected entries + foreach idx [array names expected] { + r ardel myarray $idx + unset expected($idx) + } + + # After cleanup, array should be empty/deleted + assert_equal 0 [r exists myarray] + } + + test {Random testing - large scale with RDB verification} { + r flushdb + expr {srand(54321)} ;# Fixed seed for reproducibility + set max_idx 100000 ;# Range to test multiple slices + set num_writes 2000 + + # Tcl-side tracking + array set expected {} + + # Phase 1: Random writes with mixed value types + for {set i 0} {$i < $num_writes} {incr i} { + set idx [expr {int(rand() * $max_idx)}] + set val [random_value] + r arset myarray $idx $val + set expected($idx) $val + } + + set expected_count [array size expected] + set count_before [r arcount myarray] + assert_equal $expected_count $count_before + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify count preserved + assert_equal $count_before [r arcount myarray] + + # Verify all expected values + foreach idx [array names expected] { + set got [r arget myarray $idx] + if {$got ne $expected($idx)} { + fail "After reload: idx $idx - expected '$expected($idx)', got '$got'" + } + } + + # Phase 2: Random deletes (delete half) + set keys_list [array names expected] + set delete_count [expr {[llength $keys_list] / 2}] + for {set i 0} {$i < $delete_count} {incr i} { + set idx [lindex $keys_list $i] + r ardel myarray $idx + unset expected($idx) + } + + # Verify remaining + set remaining [array size expected] + assert_equal $remaining [r arcount myarray] + + foreach idx [array names expected] { + assert_equal $expected($idx) [r arget myarray $idx] + } + } {} {needs:debug} + + test {Random testing - iterator model stress across dense sparse and superdir} { + set orig_slice_size [lindex [r config get array-slice-size] 1] + set orig_kmax [lindex [r config get array-sparse-kmax] 1] + set orig_kmin [lindex [r config get array-sparse-kmin] 1] + set scenarios { + {mixed-default 4096 10 5 mixed 120 111} + {small-slices 256 6 3 dense 140 333} + {superdir-heavy 1024 8 4 superdir 160 555} + {superdir-heavy 1024 8 4 superdir 160 666} + } + + set err [catch { + foreach scenario $scenarios { + lassign $scenario name slice_size kmax kmin mode steps seed + r flushdb + r config set array-sparse-kmax $kmax + r config set array-sparse-kmin $kmin + r config set array-slice-size $slice_size + expr {srand($seed)} + catch {array unset model} + array set model {} + + # Start each scenario with the exact superdir shape that + # previously exposed iterator progress bugs. + r arset myarray 43 a + set model(43) a + r arset myarray [expr {$slice_size + 490}] b + set model([expr {$slice_size + 490}]) b + r arset myarray 19245258 c + set model(19245258) c + + iterator_stress_validate r model $slice_size $mode "$name/$seed" -1 1 + + for {set step 0} {$step < $steps} {incr step} { + iterator_stress_apply_operation r model $slice_size $mode + iterator_stress_validate r model $slice_size $mode \ + "$name/$seed" $step [expr {$step % 20 == 0}] + } + } + } msg opts] + + r flushdb + r config set array-sparse-kmax $orig_kmax + r config set array-sparse-kmin $orig_kmin + r config set array-slice-size $orig_slice_size + + if {$err} { + return -options $opts $msg + } + } + + # ========================================================================= + # Circular buffer (ring buffer) comprehensive tests + # ========================================================================= + + test {Circular buffer - ARRING basic wraparound} { + r del myarray + # Insert 20 values with MOD 10 - should wrap around twice + for {set i 0} {$i < 20} {incr i} { + set result [r arring myarray 10 "val$i"] + assert_equal [expr {$i % 10}] $result + } + # Should have exactly 10 elements (0-9) + assert_equal 10 [r arcount myarray] + # Values should be the last 10 inserted (val10-val19) + for {set i 0} {$i < 10} {incr i} { + assert_equal "val[expr {$i + 10}]" [r arget myarray $i] + } + } + + test {Circular buffer - ARRING with size 1} { + r del myarray + # MOD 1 means only ever keep one element at index 0 + for {set i 0} {$i < 100} {incr i} { + r arring myarray 1 "val$i" + } + assert_equal 1 [r arcount myarray] + assert_equal "val99" [r arget myarray 0] + } + + test {Circular buffer - ARRING preserves insert_idx through RDB} { + r del myarray + # Create a circular buffer, wrap around a few times + for {set i 0} {$i < 15} {incr i} { + r arring myarray 5 "val$i" + } + # insert_idx should now be 0 (15 % 5 = 0) + set next_before [r arnext myarray] + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify insert_idx is preserved + assert_equal $next_before [r arnext myarray] + + # Continue inserting - should continue from where it left off + r arring myarray 5 "after_reload" + # The next insert should be at position 1 (since we were at 0) + assert_equal "after_reload" [r arget myarray [expr {$next_before % 5}]] + } {} {needs:debug} + + test {Circular buffer - ARLASTITEMS with wraparound} { + r del myarray + # Create circular buffer with 8 items, MOD 5 + for {set i 0} {$i < 8} {incr i} { + r arring myarray 5 $i + } + # Values: 0->3, 1->4, 2->5, 3->6, 4->7 + # insert_idx = 3 (8 % 5 = 3) + + # ARLASTITEMS should return the N most recently inserted + set result [r arlastitems myarray 3] + # Last 3 inserted: 7, 6, 5 - in chronological order: 5, 6, 7 + assert_equal {5 6 7} $result + + # With REV flag + set result [r arlastitems myarray 3 REV] + assert_equal {7 6 5} $result + + # Request more items than exist + set result [r arlastitems myarray 10] + assert_equal 5 [llength $result] + } + + test {Circular buffer - ARLASTITEMS handles empty and partial cases} { + r del myarray + # Empty array + set result [r arlastitems myarray 5] + assert_equal {} $result + + # Fewer items than requested (no wraparound yet) + r arring myarray 10 a + r arring myarray 10 b + r arring myarray 10 c + + set result [r arlastitems myarray 5] + assert_equal {a b c} $result + } + + test {Circular buffer - ARNEXT tracks correctly with ARRING} { + r del myarray + # Insert with MOD, tracking position + # MOD wraps the insert position but ARNEXT continues until next wrap + for {set i 0} {$i < 7} {incr i} { + set expected_idx [expr {$i % 4}] + set result [r arring myarray 4 $i] + assert_equal $expected_idx $result + # ARNEXT: after a wraparound insert, it's expected_idx+1 + # Otherwise it's the running counter+1 until it wraps + if {$i < 4} { + # Before first wrap, ARNEXT is i+1 + assert_equal [expr {$i + 1}] [r arnext myarray] + } else { + # After wrap, ARNEXT is (position+1) + assert_equal [expr {$expected_idx + 1}] [r arnext myarray] + } + } + } + + test {Circular buffer - ARSEEK followed by ARRING} { + r del myarray + # Start inserting + r arinsert myarray a + r arinsert myarray b + r arinsert myarray c + # insert_idx = 2, next = 3 + + # Seek to position 10 + r arseek myarray 10 + assert_equal 10 [r arnext myarray] + + # Now use MOD - should reset behavior + r arring myarray 5 x + # This should insert at index 0 (10 % 5 = 0) + assert_equal x [r arget myarray 0] + } + + test {Circular buffer - ARSEEK 0 is honored on ARRING grow} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arring myarray 3 "ring$i" + } + + assert_equal 1 [r arseek myarray 0] + r arring myarray 8 "grown" + + # ARSEEK 0 is an explicit cursor override, so grow should not repack + # first: the next ARRING write still goes to index 0. + assert_equal "grown" [r arget myarray 0] + assert_equal "ring4" [r arget myarray 1] + assert_equal "ring2" [r arget myarray 2] + assert_equal 1 [r arnext myarray] + } + + test {Circular buffer - ARRING growth uses new capacity after wrap} { + r del myarray + for {set i 0} {$i < 8} {incr i} { + r arring myarray 5 "v$i" + } + # Current ring window contains the latest 5 values: + # v3 v4 v5 v6 v7, with insert_idx at position 2. + + r arring myarray 8 "grown" + + # Growing must compact the wrapped ring first, so the new value uses + # the newly added capacity instead of overwriting low indexes again. + assert_equal "v3" [r arget myarray 0] + assert_equal "v4" [r arget myarray 1] + assert_equal "v5" [r arget myarray 2] + assert_equal "v6" [r arget myarray 3] + assert_equal "v7" [r arget myarray 4] + assert_equal "grown" [r arget myarray 5] + assert_equal 6 [r arnext myarray] + } + + test {Circular buffer - Mixed ARSET and ARRING immediately restores ring size} { + r del myarray + # Use MOD to create ring buffer + for {set i 0} {$i < 5} {incr i} { + r arring myarray 3 "ring$i" + } + # After 5 inserts with MOD 3: + # Position 0: ring0 -> ring3 (overwritten) + # Position 1: ring1 -> ring4 (overwritten) + # Position 2: ring2 + # insert_idx=1, next=2 + + # Now manually set a value outside the ring + r arset myarray 100 "outside" + + # Ring buffer values should still be there + assert_equal "ring3" [r arget myarray 0] + assert_equal "ring4" [r arget myarray 1] + assert_equal "ring2" [r arget myarray 2] + assert_equal "outside" [r arget myarray 100] + + # Continue ring buffer. The ring size should be re-established + # immediately, so values outside the 0..2 window disappear at once. + r arring myarray 3 "ring5" + assert_equal 3 [r arcount myarray] + assert_equal {} [r arget myarray 100] + assert_equal "ring5" [r arget myarray 0] + } + + test {Circular buffer - insert_idx survives RDB with complex state} { + r del myarray + # Create circular buffer across multiple slices + for {set i 0} {$i < 100} {incr i} { + # Use large MOD to spread across slices + r arring myarray 50 "v$i" + } + + set info_before [r arinfo myarray] + set next_before [r arnext myarray] + set count_before [r arcount myarray] + + # Also set some values outside the ring + r arset myarray 10000 "far_away" + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify state preserved + assert_equal $count_before [expr {[r arcount myarray] - 1}] ;# -1 for far_away + assert_equal $next_before [r arnext myarray] + assert_equal "far_away" [r arget myarray 10000] + + # Verify ring buffer content - last 50 values should be v50-v99 + for {set i 0} {$i < 50} {incr i} { + assert_equal "v[expr {$i + 50}]" [r arget myarray $i] + } + } {} {needs:debug} + + test {Circular buffer - ARLASTITEMS reverse order} { + r del myarray + # Create ring with wraparound + for {set i 0} {$i < 12} {incr i} { + r arring myarray 8 "v$i" + } + # After 12 inserts MOD 8: + # insert_idx = 12 % 8 = 4 - 1 = 3 (last inserted at position 3) + # Values: positions 0-7 contain v4-v11 + + # ARLASTITEMS returns most recent items in chronological order + set result [r arlastitems myarray 4] + # Last 4 inserted were v11, v10, v9, v8 - returned oldest to newest + assert_equal {v8 v9 v10 v11} $result + + # With REV flag - returned newest to oldest + set result [r arlastitems myarray 4 REV] + assert_equal {v11 v10 v9 v8} $result + + # Request all items + set result [r arlastitems myarray 100] + assert_equal 8 [llength $result] + } + + test {Circular buffer - ARRING truncation when size decreases} { + r del myarray + # Create ring buffer with MOD 10 + for {set i 0} {$i < 15} {incr i} { + r arring myarray 10 "v$i" + } + # Now have 10 elements at positions 0-9 + # After 15 inserts: 0->v10, 1->v11, ..., 4->v14, 5->v5, ..., 9->v9 + assert_equal 10 [r arcount myarray] + + # Use smaller MOD - this truncates to positions 0-4 AND inserts new value + # The new insert goes to position (15 % 5) = 0, replacing v10 + r arring myarray 5 "truncated" + # Now have only 5 elements (positions 0-4), with position 0 = "truncated" + assert_equal 5 [r arcount myarray] + + # Verify values + assert_equal "truncated" [r arget myarray 0] ;# new value + assert_equal "v11" [r arget myarray 1] + assert_equal "v12" [r arget myarray 2] + assert_equal "v13" [r arget myarray 3] + assert_equal "v14" [r arget myarray 4] + + # Positions 5-9 should be empty (truncated) + assert_equal {} [r arget myarray 5] + assert_equal {} [r arget myarray 9] + } + + test {Circular buffer - ARRING shrink stops at first hole} { + r del myarray + for {set i 0} {$i < 5} {incr i} { + r arring myarray 5 "v$i" + } + + r ardel myarray 3 + r arring myarray 3 "new" + + assert_equal 2 [r arcount myarray] + assert_equal "v4" [r arget myarray 0] + assert_equal "new" [r arget myarray 1] + assert_equal {} [r arget myarray 2] + } + + test {Circular buffer - ARRING grow stops at first hole} { + r del myarray + for {set i 0} {$i < 8} {incr i} { + r arring myarray 5 "v$i" + } + + r ardel myarray 1 + r arring myarray 8 "grown" + + assert_equal 2 [r arcount myarray] + assert_equal "v7" [r arget myarray 0] + assert_equal "grown" [r arget myarray 1] + assert_equal {} [r arget myarray 2] + } + + test {Circular buffer - ARLASTITEMS with various counts and REV} { + r del myarray + # Create simple ring buffer + for {set i 0} {$i < 20} {incr i} { + r arring myarray 10 "item$i" + } + # Contains item10-item19 at positions 0-9 + + # Get exactly 1 item + assert_equal {item19} [r arlastitems myarray 1] + assert_equal {item19} [r arlastitems myarray 1 REV] + + # Get 3 items + set result [r arlastitems myarray 3] + assert_equal {item17 item18 item19} $result + set result [r arlastitems myarray 3 REV] + assert_equal {item19 item18 item17} $result + + # Get all 10 items + set result [r arlastitems myarray 10] + assert_equal 10 [llength $result] + assert_equal "item10" [lindex $result 0] + assert_equal "item19" [lindex $result end] + + # REV order for all items + set result [r arlastitems myarray 10 REV] + assert_equal "item19" [lindex $result 0] + assert_equal "item10" [lindex $result end] + } + + test {Circular buffer - ARLASTITEMS edge cases} { + r del myarray + # Empty array + assert_equal {} [r arlastitems myarray 5] + assert_equal {} [r arlastitems myarray 5 REV] + + # Single element + r arinsert myarray "only" + assert_equal {only} [r arlastitems myarray 1] + assert_equal {only} [r arlastitems myarray 10] + assert_equal {only} [r arlastitems myarray 1 REV] + + # Two elements - no wraparound yet + r arinsert myarray "second" + assert_equal {only second} [r arlastitems myarray 5] + assert_equal {second only} [r arlastitems myarray 5 REV] + } + + # ============================================================ + # Regression tests for bugs found during code review + # ============================================================ + + test {Regression #3 - arTruncate must decrement count correctly} { + r del myarray + # Fill array with 20 elements + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 20 [r arcount myarray] + + # Use ARRING to trigger truncation + # First set insert_idx to 15, then insert with MOD 10 + r arseek myarray 16 + r arring myarray 10 "wrap" + + # After MOD 10 truncation, only indices 0-9 should exist + # The count should be <= 10 (some original values + new one) + set count [r arcount myarray] + assert_lessthan $count 11 ;# count <= 10 + + # Verify elements >= 10 are gone + assert_equal {} [r arget myarray 10] + assert_equal {} [r arget myarray 15] + assert_equal {} [r arget myarray 19] + } + + test {Regression #5 - AROP MATCH with large strings (>256 bytes)} { + r del myarray + # Create a string larger than 256 bytes + set largestr [string repeat "x" 300] + set largestr2 [string repeat "y" 300] + + r arset myarray 0 $largestr + r arset myarray 1 "small" + r arset myarray 2 $largestr + r arset myarray 3 $largestr2 + + # MATCH should find exactly 2 occurrences of largestr + assert_equal 2 [r arop myarray 0 3 MATCH $largestr] + assert_equal 1 [r arop myarray 0 3 MATCH $largestr2] + assert_equal 1 [r arop myarray 0 3 MATCH "small"] + assert_equal 0 [r arop myarray 0 3 MATCH "notfound"] + } + + test {Regression #6 - DEBUG DIGEST with large strings (>256 bytes)} { + r del myarray + set largestr [string repeat "z" 500] + r arset myarray 0 $largestr + r arset myarray 1 "small" + r arset myarray 100 [string repeat "w" 1000] + + # Get digest - should not crash and should be deterministic + set d1 [r debug digest-value myarray] + set d2 [r debug digest-value myarray] + assert_equal $d1 $d2 "Digest should be deterministic" + + # Modify and verify digest changes + r arset myarray 0 "changed" + set d3 [r debug digest-value myarray] + if {$d1 eq $d3} { + fail "Digest should change after modification" + } + } {} {needs:debug} + + test {Regression #7 - RDB with negative integers including -1} { + r flushdb + # -1 was problematic because it became UINT64_MAX which was RDB_LENERR + r arset myarray 0 -1 + r arset myarray 1 -100 + r arset myarray 2 -9223372036854775808 ;# INT64_MIN as string + r arset myarray 3 0 + r arset myarray 4 1 + r arset myarray 5 9223372036854775807 ;# INT64_MAX as string + + set d1 [r debug digest-value myarray] + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + # Verify values survived + assert_equal -1 [r arget myarray 0] + assert_equal -100 [r arget myarray 1] + # Note: very large integers may be stored as strings + assert_equal 0 [r arget myarray 3] + assert_equal 1 [r arget myarray 4] + + set d2 [r debug digest-value myarray] + assert_equal $d1 $d2 "Digest should match after RDB reload" + } {} {needs:debug} + + test {Regression #10 - ARSEEK on non-existent key should not create it} { + r del myarray + # ARSEEK on non-existent key + assert_equal 0 [r arseek myarray 100] + + # Key should NOT exist + assert_equal 0 [r exists myarray] + + # Now create the array and verify ARSEEK works + r arinsert myarray "first" + assert_equal 1 [r exists myarray] + + # ARSEEK on existing key should work + assert_equal 1 [r arseek myarray 50] + r arinsert myarray "second" + assert_equal 51 [r arnext myarray] + } + + test {Regression #12 - ARMGET/ARGETRANGE return WRONGTYPE on wrong type} { + r del myarray + r set myarray "string_value" + + # ARMGET should return WRONGTYPE error + assert_error {WRONGTYPE*} {r armget myarray 0 1 2} + + # ARGETRANGE should return WRONGTYPE error + assert_error {WRONGTYPE*} {r argetrange myarray 0 10} + + # Cleanup + r del myarray + } + + test {Regression - RDB preserves exact numeric string forms} { + r flushdb + set values [list \ + 0 "3.141592653589793" \ + 1 "-2.718281828459045" \ + 2 "1.0e-10" \ + 3 "1.0e+100"] + + foreach {idx val} $values { + r arset myarray $idx $val + } + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + + # Save and reload + r bgsave + waitForBgsave r + r debug reload + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + } {} {needs:debug} + + test {Whole-number floats with .0 suffix encode as inline floats} { + # Values like "1.0" should be encoded as inline floats, not heap strings. + # This tests the ".0" suffix optimization in arTryEncodeFloat. + r del myarray + + # Various whole-number floats that should round-trip with ".0" + r arset myarray 0 1.0 + r arset myarray 1 -1.0 + r arset myarray 2 0.0 + r arset myarray 3 42.0 + r arset myarray 4 -42.0 + r arset myarray 5 1000000.0 + r arset myarray 6 -9999999.0 + + # Verify exact round-trip (the ".0" must be preserved) + assert_equal "1.0" [r arget myarray 0] + assert_equal "-1.0" [r arget myarray 1] + assert_equal "0.0" [r arget myarray 2] + assert_equal "42.0" [r arget myarray 3] + assert_equal "-42.0" [r arget myarray 4] + assert_equal "1000000.0" [r arget myarray 5] + assert_equal "-9999999.0" [r arget myarray 6] + + # Verify these survive RDB save/reload (confirms they're properly encoded) + r bgsave + waitForBgsave r + r debug reload + + assert_equal "1.0" [r arget myarray 0] + assert_equal "-1.0" [r arget myarray 1] + assert_equal "0.0" [r arget myarray 2] + assert_equal "42.0" [r arget myarray 3] + assert_equal "-42.0" [r arget myarray 4] + assert_equal "1000000.0" [r arget myarray 5] + assert_equal "-9999999.0" [r arget myarray 6] + } {} {needs:debug} + + test {Integer values without .0 still encode as integers, not floats} { + # Ensure "1" (without decimal) is encoded as integer, not float + r del myarray + + r arset myarray 0 1 + r arset myarray 1 -1 + r arset myarray 2 0 + r arset myarray 3 42 + r arset myarray 4 9999999 + + # Values without ".0" should stay as integers + assert_equal "1" [r arget myarray 0] + assert_equal "-1" [r arget myarray 1] + assert_equal "0" [r arget myarray 2] + assert_equal "42" [r arget myarray 3] + assert_equal "9999999" [r arget myarray 4] + + # Verify RDB round-trip preserves them as integers + r bgsave + waitForBgsave r + r debug reload + + assert_equal "1" [r arget myarray 0] + assert_equal "-1" [r arget myarray 1] + assert_equal "0" [r arget myarray 2] + assert_equal "42" [r arget myarray 3] + assert_equal "9999999" [r arget myarray 4] + } {} {needs:debug} + + test {AROP on whole-number floats works correctly} { + # Verify AROP aggregation works on values encoded with the .0 optimization + r del myarray + + r arset myarray 0 10.0 + r arset myarray 1 20.0 + r arset myarray 2 30.0 + + # SUM should work on whole-number floats (AROP returns computed values) + assert_equal 60 [r arop myarray 0 2 SUM] + + # MIN/MAX should work + assert_equal 10 [r arop myarray 0 2 MIN] + assert_equal 30 [r arop myarray 0 2 MAX] + + # MATCH should find the encoded values + assert_equal 1 [r arop myarray 0 2 MATCH 10.0] + assert_equal 1 [r arop myarray 0 2 MATCH 20.0] + } + + test {Exact string recovery survives AOF rewrite} { + r flushdb + set longstr [string repeat x 100] + set values [list \ + 0 "1.0" \ + 1 "-1.0" \ + 2 "42.0" \ + 3 "hello" \ + 4 "12345" \ + 5 "-0.0" \ + 6 "0.00" \ + 7 "10.500" \ + 8 "001.25" \ + 9 "1.0e-10" \ + 10 "1.0e+100" \ + 11 $longstr \ + 12 ""] + + foreach {idx val} $values { + r arset myarray $idx $val + } + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + + # Trigger AOF rewrite and reload + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + foreach {idx val} $values { + assert_equal $val [r arget myarray $idx] + } + } {} {needs:debug} + + test {Regression - CONFIG GET/SET for array settings} { + # Verify config options exist and are readable + set slice_size [lindex [r config get array-slice-size] 1] + set sparse_kmax [lindex [r config get array-sparse-kmax] 1] + set sparse_kmin [lindex [r config get array-sparse-kmin] 1] + + # Verify defaults + assert_equal 4096 $slice_size + assert_equal 10 $sparse_kmax + assert_equal 5 $sparse_kmin + + # sparse-kmax and sparse-kmin should be modifiable + r config set array-sparse-kmax 20 + assert_equal 20 [lindex [r config get array-sparse-kmax] 1] + r config set array-sparse-kmax $sparse_kmax ;# restore + + r config set array-sparse-kmin 8 + assert_equal 8 [lindex [r config get array-sparse-kmin] 1] + r config set array-sparse-kmin $sparse_kmin ;# restore + + # slice-size is modifiable but must be a power of two + r config set array-slice-size 8192 + assert_equal 8192 [lindex [r config get array-slice-size] 1] + r config set array-slice-size $slice_size ;# restore + + # Non-power-of-two should error + assert_error {*power of two*} {r config set array-slice-size 5000} + } + + test {Arrays created with different slice sizes work after config change} { + # Create an array with current slice size + r del myarray + set orig_size [lindex [r config get array-slice-size] 1] + + # Create array and populate it + for {set i 0} {$i < 10000} {incr i 1000} { + r arset myarray $i "value_$i" + } + set orig_count [r arcount myarray] + + # Change slice size - existing arrays should keep working + r config set array-slice-size 8192 + + # Verify old array still works + assert_equal $orig_count [r arcount myarray] + assert_equal "value_0" [r arget myarray 0] + assert_equal "value_5000" [r arget myarray 5000] + assert_equal "value_9000" [r arget myarray 9000] + + # Create new array with new slice size + r del newarray + r arset newarray 0 "new_value" + assert_equal "new_value" [r arget newarray 0] + + # Restore config + r config set array-slice-size $orig_size + r del myarray + r del newarray + } + + test {Regression - AOF rewrite with superdir mode (high indices)} { + # This tests the fix for AOF rewrite not iterating superdir blocks. + # With slice_size=4096, slice_id 2048 starts at index 8388608. + # Indices >= 8388608 trigger superdir mode. + + r del aoftest + + # Create array with elements that trigger superdir mode + r arset aoftest 0 base + r arset aoftest 8388608 triggers_superdir + r arset aoftest 50000000 high + r arset aoftest 100000000 very_high + + assert_equal 4 [r arcount aoftest] + + # Verify superdir mode is active (directory-size shows number of blocks) + set info [r arinfo aoftest] + set dir_size [dict get $info directory-size] + # With these indices across multiple superdir blocks, dir_size should be > 1 + assert {$dir_size >= 1} + + # Trigger AOF rewrite and reload (same pattern as other AOF tests) + r bgrewriteaof + waitForBgrewriteaof r + r debug loadaof + + # Verify data survived AOF rewrite and reload + assert_equal 4 [r arcount aoftest] + assert_equal "base" [r arget aoftest 0] + assert_equal "triggers_superdir" [r arget aoftest 8388608] + assert_equal "high" [r arget aoftest 50000000] + assert_equal "very_high" [r arget aoftest 100000000] + + assert_equal 1 [r del aoftest] + } {} {needs:debug} + + # ========================================================================= + # Superdir command coverage + # ========================================================================= + + test {ARGETRANGE works across a superdir slice boundary} { + r del myarray + + # Cross slice 2047 -> 2048. Inserting the high index forces the array + # into superdir mode, but the range itself is still short. + r arset myarray 8388607 "left" + r arset myarray 8388608 "mid" + r arset myarray 8388609 "right" + + assert_equal {left mid right} [r argetrange myarray 8388607 8388609] + assert_equal {right mid left} [r argetrange myarray 8388609 8388607] + } + + test {ARSET pre-promotes sparse slice in superdir mode} { + r del myarray + set kmax [lindex [r config get array-sparse-kmax] 1] + assert {$kmax >= 4} + + # Build a sparse slice with kmax-1 existing elements at even offsets. + # The later range write covers offsets 0..kmax-1, so some of these + # positions are already filled and some are new. + for {set i 0} {$i < $kmax - 1} {incr i} { + set off [expr {$i * 2}] + r arset myarray [expr {8388608 + $off}] "old$off" + } + + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # The range has kmax slots, while the slice already contains kmax-1 + # elements spread across the slice. This keeps range_size <= kmax, so + # the helper must take the count+new_elements path in order to decide + # the promotion. + set values {} + set existing_in_range 0 + for {set off 0} {$off < $kmax} {incr off} { + lappend values "n$off" + if {$off % 2 == 0 && $off <= 2 * ($kmax - 2)} { + incr existing_in_range + } + } + set expected_new [expr {$kmax - $existing_in_range}] + assert_equal $expected_new [r arset myarray 8388608 {*}$values] + + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + assert_equal $values [r argetrange myarray 8388608 [expr {8388608 + $kmax - 1}]] + assert_equal "old[expr {2 * ($kmax - 2)}]" [r arget myarray [expr {8388608 + 2 * ($kmax - 2)}]] + } + + # ========================================================================= + # Range delete + iterator tests (dense→sparse demotion, superdir, sparse) + # ========================================================================= + + test {ARDELRANGE triggers dense to sparse demotion} { + r del myarray + # Pin config to ensure test doesn't break if defaults change + set orig_kmin [lindex [r config get array-sparse-kmin] 1] + r config set array-sparse-kmin 5 + + # Create a dense slice with 50 elements + for {set i 0} {$i < 50} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 50 [r arcount myarray] + + # Verify it's dense + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + assert_equal 0 [dict get $info sparse-slices] + + # Delete most elements with ARDELRANGE, leaving only 3 (below kmin=5) + assert_equal 47 [r ardelrange myarray 3 49] + assert_equal 3 [r arcount myarray] + + # Verify demotion to sparse + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Verify remaining elements + assert_equal "val0" [r arget myarray 0] + assert_equal "val1" [r arget myarray 1] + assert_equal "val2" [r arget myarray 2] + + r config set array-sparse-kmin $orig_kmin + } + + test {ARDELRANGE partial delete preserves dense then demotes} { + r del myarray + # Pin config + set orig_kmin [lindex [r config get array-sparse-kmin] 1] + r config set array-sparse-kmin 5 + + # Create dense slice + for {set i 0} {$i < 40} {incr i} { + r arset myarray $i $i + } + + # Delete some but not enough to trigger demotion (keep 10 > kmin=5) + assert_equal 30 [r ardelrange myarray 10 39] + assert_equal 10 [r arcount myarray] + + set info [r arinfo myarray FULL] + assert_equal 1 [dict get $info dense-slices] + + # Now delete more to trigger demotion + assert_equal 6 [r ardelrange myarray 4 9] + assert_equal 4 [r arcount myarray] + + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + r config set array-sparse-kmin $orig_kmin + } + + test {ARDELRANGE deletes full slices within superdir block} { + r del myarray + # With slice_size=4096: + # - Slice 2048 starts at index 8388608 + # - Slice 2049 starts at index 8392704 + # - Both are in superdir block 1 + + # Create elements in two adjacent slices within same superdir block + r arset myarray 8388608 "slice2048_a" + r arset myarray 8388700 "slice2048_b" + r arset myarray 8392704 "slice2049_a" + r arset myarray 8392800 "slice2049_b" + # And one element in a different block for reference + r arset myarray 0 "slice0" + + assert_equal 5 [r arcount myarray] + + # Delete range that fully covers both slices 2048 and 2049 + # This should trigger full-slice deletion (not element-by-element) + assert_equal 4 [r ardelrange myarray 8388608 8396799] + assert_equal 1 [r arcount myarray] + + # Verify only slice0 element remains + assert_equal "slice0" [r arget myarray 0] + assert_equal {} [r arget myarray 8388608] + assert_equal {} [r arget myarray 8392704] + + r del myarray + } + + test {ARDELRANGE spanning multiple superdir blocks} { + r del myarray + # Superdir block boundaries with slice_size=4096: + # - Block 0: slices 0-2047 (indices 0 - 8388607) + # - Block 1: slices 2048-4095 (indices 8388608 - 16777215) + # - Block 2: slices 4096+ (indices 16777216+) + + # Create elements across three blocks + r arset myarray 100 "block0" + r arset myarray 8388608 "block1_start" + r arset myarray 12000000 "block1_mid" + r arset myarray 16777200 "block1_end" + r arset myarray 16777216 "block2_start" + r arset myarray 20000000 "block2_mid" + + assert_equal 6 [r arcount myarray] + + # Delete range spanning from block1 into block2 + # This exercises cross-block deletion + assert_equal 4 [r ardelrange myarray 8388608 18000000] + assert_equal 2 [r arcount myarray] + + # Verify block0 and remaining block2 element + assert_equal "block0" [r arget myarray 100] + assert_equal "block2_mid" [r arget myarray 20000000] + assert_equal {} [r arget myarray 8388608] + assert_equal {} [r arget myarray 16777216] + + r del myarray + } + + test {ARDELRANGE superdir middle range with missing upper block} { + r del myarray + # Occupied blocks: + # - block 0: boundary lo_slice + # - block 1: middle full slices to delete + # - block 3: boundary hi_slice + # block 2 is intentionally empty, so the upper lower-bound search + # must stop at the insertion point rather than on an exact match. + r arset myarray 8388590 "block0_keep" + r arset myarray 8388608 "block1_a" + r arset myarray 8392704 "block1_b" + r arset myarray 25165825 "block3_keep" + + assert_equal 4 [r arcount myarray] + assert_equal 2 [r ardelrange myarray 8388595 25165824] + assert_equal 2 [r arcount myarray] + + assert_equal "block0_keep" [r arget myarray 8388590] + assert_equal {} [r arget myarray 8388608] + assert_equal {} [r arget myarray 8392704] + assert_equal "block3_keep" [r arget myarray 25165825] + } + + test {ARDELRANGE superdir with empty middle block interval} { + r del myarray + # Only the boundary slices are populated. The superdir middle interval + # is empty, so the block loop must resolve to [start, end) = empty. + r arset myarray 8388590 "block0_keep" + r arset myarray 8388607 "block0_del" + r arset myarray 25165824 "block3_del" + r arset myarray 25165825 "block3_keep" + + assert_equal 4 [r arcount myarray] + assert_equal 2 [r ardelrange myarray 8388600 25165824] + assert_equal 2 [r arcount myarray] + + assert_equal "block0_keep" [r arget myarray 8388590] + assert_equal {} [r arget myarray 8388607] + assert_equal {} [r arget myarray 25165824] + assert_equal "block3_keep" [r arget myarray 25165825] + } + + test {ARDELRANGE with multiple ranges in single call} { + r del myarray + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i "val$i" + } + assert_equal 20 [r arcount myarray] + + # Delete two separate ranges in one command + # Ranges: [2,4] and [10,14] + assert_equal 8 [r ardelrange myarray 2 4 10 14] + assert_equal 12 [r arcount myarray] + + # Verify correct elements deleted + assert_equal "val0" [r arget myarray 0] + assert_equal "val1" [r arget myarray 1] + assert_equal {} [r arget myarray 2] + assert_equal {} [r arget myarray 3] + assert_equal {} [r arget myarray 4] + assert_equal "val5" [r arget myarray 5] + assert_equal "val9" [r arget myarray 9] + assert_equal {} [r arget myarray 10] + assert_equal {} [r arget myarray 14] + assert_equal "val15" [r arget myarray 15] + } + + test {ARDELRANGE with overlapping ranges} { + r del myarray + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i "val$i" + } + + # Overlapping ranges: [5,12] and [8,15] + # Should delete [5,15] total = 11 elements + # But second range re-deletes already-deleted [8,12], so still 11 unique + assert_equal 11 [r ardelrange myarray 5 12 8 15] + assert_equal 9 [r arcount myarray] + + assert_equal "val4" [r arget myarray 4] + assert_equal {} [r arget myarray 5] + assert_equal {} [r arget myarray 12] + assert_equal {} [r arget myarray 15] + assert_equal "val16" [r arget myarray 16] + } + + test {ARDELRANGE sparse slice middle-span deletion} { + r del myarray + # Create sparse slice with specific offsets + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + assert_equal 5 [r arcount myarray] + + # Delete a middle contiguous sparse span. + assert_equal 3 [r ardelrange myarray 20 40] + assert_equal 2 [r arcount myarray] + + # Verify correct elements remain + assert_equal "a" [r arget myarray 10] + assert_equal {} [r arget myarray 20] + assert_equal {} [r arget myarray 30] + assert_equal {} [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + } + + test {ARDELRANGE sparse with non-contiguous deletions} { + r del myarray + # Sparse elements at various offsets + r arset myarray 5 "v5" + r arset myarray 15 "v15" + r arset myarray 25 "v25" + r arset myarray 35 "v35" + r arset myarray 45 "v45" + + # Delete range that only hits some elements + assert_equal 2 [r ardelrange myarray 10 30] + assert_equal 3 [r arcount myarray] + + assert_equal "v5" [r arget myarray 5] + assert_equal {} [r arget myarray 15] + assert_equal {} [r arget myarray 25] + assert_equal "v35" [r arget myarray 35] + assert_equal "v45" [r arget myarray 45] + } + + test {ARDELRANGE sparse prefix span deletion} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete the sparse prefix span: first == 0, last in the middle. + assert_equal 2 [r ardelrange myarray 0 25] + assert_equal 3 [r arcount myarray] + + assert_equal {} [r arget myarray 10] + assert_equal {} [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + } + + test {ARDELRANGE sparse suffix span deletion} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete the sparse suffix span: first in the middle, last == count. + assert_equal 2 [r ardelrange myarray 35 100] + assert_equal 3 [r arcount myarray] + + assert_equal "a" [r arget myarray 10] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal {} [r arget myarray 40] + assert_equal {} [r arget myarray 50] + } + + test {ARDELRANGE sparse whole-slice deletion} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete the whole sparse slice: first == 0, last == count. + assert_equal 5 [r ardelrange myarray 0 100] + assert_equal 0 [r exists myarray] + } + + test {ARDELRANGE sparse no-hit range} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete a range that falls strictly between two sparse offsets. + assert_equal 0 [r ardelrange myarray 11 19] + assert_equal 5 [r arcount myarray] + + assert_equal "a" [r arget myarray 10] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + } + + test {ARDELRANGE sparse single edge deletions} { + r del myarray + r arset myarray 10 "a" + r arset myarray 20 "b" + r arset myarray 30 "c" + r arset myarray 40 "d" + r arset myarray 50 "e" + + # Delete exactly the first sparse element, then exactly the last one. + assert_equal 1 [r ardelrange myarray 10 10] + assert_equal 4 [r arcount myarray] + assert_equal {} [r arget myarray 10] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal "e" [r arget myarray 50] + + assert_equal 1 [r ardelrange myarray 50 50] + assert_equal 3 [r arcount myarray] + assert_equal "b" [r arget myarray 20] + assert_equal "c" [r arget myarray 30] + assert_equal "d" [r arget myarray 40] + assert_equal {} [r arget myarray 50] + } + + test {Random testing - blackbox ARDELRANGE model stress} { + r flushdb + expr {srand(24680)} + array set model_state {} + + for {set step 0} {$step < 400} {incr step} { + set roll [expr {int(rand() * 100)}] + + if {$roll < 50} { + set idx [random_array_index] + set val [random_value] + r arset myarray $idx $val + set model_state($idx) $val + } elseif {$roll < 70} { + set idx [random_array_index] + set expected_deleted 0 + if {[info exists model_state($idx)]} { + unset model_state($idx) + set expected_deleted 1 + } + assert_equal $expected_deleted [r ardel myarray $idx] + } else { + set args {} + set expected_deleted 0 + set nranges [expr {int(rand() * 3) + 1}] + + for {set i 0} {$i < $nranges} {incr i} { + set lo [random_array_index] + set hi [random_array_index] + lappend args $lo $hi + incr expected_deleted [model_array_delrange model_state $lo $hi] + } + + assert_equal $expected_deleted [r ardelrange myarray {*}$args] + } + + if {$step % 25 == 0 || $step == 399} { + set expected_scan [model_array_scan model_state] + set expected_count [array size model_state] + + if {$expected_count == 0} { + assert_equal 0 [r exists myarray] + assert_equal {} [r arscan myarray 0 30000000] + } else { + assert_equal $expected_count [r arcount myarray] + assert_equal $expected_scan [r arscan myarray 0 30000000] + } + + for {set probe 0} {$probe < 20} {incr probe} { + set idx [random_array_index] + if {[info exists model_state($idx)]} { + assert_equal $model_state($idx) [r arget myarray $idx] + } else { + assert_equal {} [r arget myarray $idx] + } + } + } + } + } + + test {ARSCAN after ARDELRANGE with demotion} { + r del myarray + # Create dense + for {set i 0} {$i < 30} {incr i} { + r arset myarray $i "val$i" + } + + # Delete most, triggering demotion + r ardelrange myarray 4 29 + + # ARSCAN should find remaining elements + set result [r arscan myarray 0 100] + assert_equal 4 [llength $result] + assert_equal {{0 val0} {1 val1} {2 val2} {3 val3}} $result + + # Reverse scan + set result [r arscan myarray 100 0] + assert_equal {{3 val3} {2 val2} {1 val1} {0 val0}} $result + } + + test {ARSCAN with LIMIT after range delete} { + r del myarray + for {set i 0} {$i < 20} {incr i} { + r arset myarray $i $i + } + + # Delete some in the middle + r ardelrange myarray 5 14 + + # Scan with limit + set result [r arscan myarray 0 100 LIMIT 3] + assert_equal 3 [llength $result] + assert_equal {{0 0} {1 1} {2 2}} $result + } + + test {AROP after ARDELRANGE across multiple slices} { + r del myarray + # Create elements across slice boundaries (slice_size=4096) + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i $i + } + for {set i 4096} {$i < 4106} {incr i} { + r arset myarray $i $i + } + + assert_equal 20 [r arcount myarray] + + # Delete first slice partially + r ardelrange myarray 5 9 + + # AROP SUM should work across slices + # Remaining: 0+1+2+3+4 + 4096..4105 = 10 + sum(4096..4105) + # sum(4096..4105) = (4096+4105)*10/2 = 41005 + set sum [r arop myarray 0 5000 SUM] + assert_equal 41015 $sum + + # AROP USED + assert_equal 15 [r arop myarray 0 5000 USED] + + # AROP MIN/MAX + assert_equal 0 [r arop myarray 0 5000 MIN] + assert_equal 4105 [r arop myarray 0 5000 MAX] + } + + test {AROP MATCH after dense demotion} { + r del myarray + # Create dense with repeated values + for {set i 0} {$i < 30} {incr i} { + r arset myarray $i "target" + } + r arset myarray 2 "other" + + # Delete most to trigger demotion, keep indices 0-3 + # After delete: 0=target, 1=target, 2=other, 3=target + r ardelrange myarray 4 29 + + # Verify demotion happened + set info [r arinfo myarray FULL] + assert_equal 0 [dict get $info dense-slices] + assert_equal 1 [dict get $info sparse-slices] + + # Count matches in sparse slice (3 "target" values) + assert_equal 3 [r arop myarray 0 100 MATCH target] + } + + test {ARSCAN over superdir blocks} { + r del myarray + # Elements in different superdir blocks + r arset myarray 0 "first" + r arset myarray 8388608 "second" + r arset myarray 16777216 "third" + + # Scan entire range + set result [r arscan myarray 0 20000000] + assert_equal 3 [llength $result] + assert_equal {0 first} [lindex $result 0] + assert_equal {8388608 second} [lindex $result 1] + assert_equal {16777216 third} [lindex $result 2] + + # Reverse scan + set result [r arscan myarray 20000000 0] + assert_equal {16777216 third} [lindex $result 0] + assert_equal {8388608 second} [lindex $result 1] + assert_equal {0 first} [lindex $result 2] + + r del myarray + } + + test {Iterator commands do not rescan exhausted superdir blocks} { + r del myarray + r arset myarray 43 "a" + r arset myarray 4586 "b" + r arset myarray 19245258 "c" + + assert_equal {{43 a} {4586 b} {19245258 c}} \ + [r arscan myarray 0 30000000 LIMIT 8] + assert_equal {{19245258 c}} \ + [r argrep myarray 0 30000000 EXACT c WITHVALUES LIMIT 4] + assert_equal 3 [r arop myarray 0 30000000 USED] + } + + test {AROP over superdir with partial range} { + r del myarray + r arset myarray 0 10 + r arset myarray 100 20 + r arset myarray 8388608 30 + r arset myarray 8388700 40 + r arset myarray 16777216 50 + + # SUM only in first block + assert_equal 30 [r arop myarray 0 1000 SUM] + + # SUM spanning blocks + assert_equal 150 [r arop myarray 0 20000000 SUM] + + # USED in specific range + assert_equal 2 [r arop myarray 8388600 8388800 USED] + + r del myarray + } + + test {ARDELRANGE delete entire slice then verify iteration} { + r del myarray + # Two slices + for {set i 0} {$i < 10} {incr i} { + r arset myarray $i "slice0_$i" + } + for {set i 4096} {$i < 4106} {incr i} { + r arset myarray $i "slice1_$i" + } + + # Delete entire first slice + assert_equal 10 [r ardelrange myarray 0 4095] + assert_equal 10 [r arcount myarray] + + # ARSCAN should only find second slice elements + set result [r arscan myarray 0 5000] + assert_equal 10 [llength $result] + assert_equal {4096 slice1_4096} [lindex $result 0] + } + +} + +# Test loading a 32-bit generated RDB on the current architecture. +# The RDB file contains arrays exercising all tagged pointer encodings: +# immediate ints (including 30-bit boundary values), inline floats, +# small strings, arString heap strings, mixed types, sparse indices, +# and insert_idx preservation. +set server_path [tmpdir "server.array-32bit-rdb-test"] +exec cp [file join [pwd] tests/assets/array-32bit.rdb] $server_path + +start_server [list overrides [list "dir" $server_path "dbfilename" "array-32bit.rdb"] tags {"array external:skip"}] { + + test {Load 32-bit RDB - integer encodings} { + r select 0 + # Inline ints and boundary values + assert_equal 0 [r arget ints 0] + assert_equal 1 [r arget ints 1] + assert_equal -1 [r arget ints 2] + assert_equal 42 [r arget ints 3] + assert_equal -42 [r arget ints 4] + # 30-bit int boundary (max/min for 32-bit tagged ints) + assert_equal 536870911 [r arget ints 5] + assert_equal -536870912 [r arget ints 6] + # Values beyond 30-bit range (arString on 32-bit, re-encoded on load) + assert_equal 536870912 [r arget ints 7] + assert_equal -536870913 [r arget ints 8] + assert_equal 2147483647 [r arget ints 9] + assert_equal -2147483648 [r arget ints 10] + assert_equal 1000000000 [r arget ints 11] + assert_equal 999999999 [r arget ints 12] + assert_equal 100 [r arget ints 13] + assert_equal 14 [r arcount ints] + } + + test {Load 32-bit RDB - float encodings} { + r select 0 + assert_equal 1.0 [r arget floats 0] + assert_equal -1.0 [r arget floats 1] + assert_equal 3.14 [r arget floats 2] + assert_equal 0.5 [r arget floats 3] + assert_equal -0.5 [r arget floats 4] + assert_equal 0.25 [r arget floats 5] + assert_equal 100.0 [r arget floats 6] + assert_equal -100.0 [r arget floats 7] + assert_equal 1.5 [r arget floats 8] + assert_equal 1.75 [r arget floats 9] + assert_equal 0.1 [r arget floats 10] + assert_equal 1234.5 [r arget floats 11] + assert_equal 0.0625 [r arget floats 12] + assert_equal 999999.0 [r arget floats 13] + assert_equal 1.23456789012 [r arget floats 14] + assert_equal 15 [r arcount floats] + } + + test {Load 32-bit RDB - string encodings} { + r select 0 + # Empty string, 1-3 byte inline (smallstr on 32-bit), + # 4-7 byte (smallstr on 64-bit only, arString on 32-bit), + # 8+ byte (always arString) + assert_equal {} [r arget strs 0] + assert_equal a [r arget strs 1] + assert_equal ab [r arget strs 2] + assert_equal abc [r arget strs 3] + assert_equal abcd [r arget strs 4] + assert_equal abcde [r arget strs 5] + assert_equal abcdef [r arget strs 6] + assert_equal abcdefg [r arget strs 7] + assert_equal abcdefgh [r arget strs 8] + assert_equal {hello world} [r arget strs 9] + assert_equal {this is a longer string for testing} [r arget strs 10] + assert_equal x [r arget strs 11] + assert_equal xy [r arget strs 12] + assert_equal xyz [r arget strs 13] + assert_equal 14 [r arcount strs] + } + + test {Load 32-bit RDB - mixed type encodings} { + r select 0 + assert_equal 42 [r arget mixed 0] + assert_equal 3.14 [r arget mixed 1] + assert_equal hi [r arget mixed 2] + assert_equal -536870912 [r arget mixed 3] + assert_equal 0.5 [r arget mixed 4] + assert_equal abcdefghij [r arget mixed 5] + assert_equal 536870911 [r arget mixed 6] + assert_equal -1.5 [r arget mixed 7] + assert_equal ab [r arget mixed 8] + assert_equal 0 [r arget mixed 9] + assert_equal 1.0 [r arget mixed 10] + assert_equal hello [r arget mixed 11] + assert_equal 2147483647 [r arget mixed 12] + assert_equal 0.25 [r arget mixed 13] + assert_equal xyz [r arget mixed 14] + assert_equal 15 [r arcount mixed] + } + + test {Load 32-bit RDB - sparse indices across slices} { + r select 0 + assert_equal first [r arget sparse 0] + assert_equal slice0end [r arget sparse 4095] + assert_equal slice1start [r arget sparse 4096] + assert_equal slice1end [r arget sparse 8191] + assert_equal 42 [r arget sparse 10000] + assert_equal 3.14 [r arget sparse 50000] + assert_equal hello [r arget sparse 100000] + assert_equal 7 [r arcount sparse] + } + + test {Load 32-bit RDB - insert_idx preservation} { + r select 0 + assert_equal one [r arget withinsert 0] + assert_equal two [r arget withinsert 1] + assert_equal three [r arget withinsert 2] + assert_equal four [r arget withinsert 3] + assert_equal five [r arget withinsert 4] + assert_equal 5 [r arcount withinsert] + # Verify insert_idx was preserved: next insert should go at index 5 + r arinsert withinsert six + assert_equal six [r arget withinsert 5] + } + + test {Load 32-bit RDB - re-save and reload cycle} { + r select 0 + # Save from 64-bit, reload, verify integrity + r save + r debug reload + foreach {idx value} { + 0 0 1 1 2 -1 3 42 4 -42 + 5 536870911 6 -536870912 7 536870912 8 -536870913 + 9 2147483647 10 -2147483648 11 1000000000 12 999999999 13 100 + } { + assert_equal $value [r arget ints $idx] + } + assert_equal 14 [r arcount ints] + + foreach {idx value} { + 0 1.0 1 -1.0 2 3.14 3 0.5 4 -0.5 + 5 0.25 6 100.0 7 -100.0 8 1.5 9 1.75 + 10 0.1 11 1234.5 12 0.0625 13 999999.0 14 1.23456789012 + } { + assert_equal $value [r arget floats $idx] + } + assert_equal 15 [r arcount floats] + + foreach {idx value} { + 0 {} 1 a 2 ab 3 abc 4 abcd 5 abcde 6 abcdef 7 abcdefg + 8 abcdefgh 9 {hello world} 10 {this is a longer string for testing} + 11 x 12 xy 13 xyz + } { + assert_equal $value [r arget strs $idx] + } + assert_equal 14 [r arcount strs] + + foreach {idx value} { + 0 42 1 3.14 2 hi 3 -536870912 4 0.5 + 5 abcdefghij 6 536870911 7 -1.5 8 ab 9 0 + 10 1.0 11 hello 12 2147483647 13 0.25 14 xyz + } { + assert_equal $value [r arget mixed $idx] + } + assert_equal 15 [r arcount mixed] + + foreach {idx value} { + 0 first 4095 slice0end 4096 slice1start 8191 slice1end + 10000 42 50000 3.14 100000 hello + } { + assert_equal $value [r arget sparse $idx] + } + assert_equal 7 [r arcount sparse] + + foreach {idx value} { + 0 one 1 two 2 three 3 four 4 five 5 six + } { + assert_equal $value [r arget withinsert $idx] + } + assert_equal 6 [r arcount withinsert] + r arinsert withinsert seven + assert_equal seven [r arget withinsert 6] + } {} {needs:debug} +} diff --git a/tools/array-bench.py b/tools/array-bench.py new file mode 100755 index 000000000..959e12961 --- /dev/null +++ b/tools/array-bench.py @@ -0,0 +1,431 @@ +#!/usr/bin/env python3 +import argparse +import json +import os +import re +import signal +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Optional + + +QPS_RE = re.compile(r"([0-9]+(?:\.[0-9]+)?)\s+requests per second") + + +@dataclass +class Workload: + name: str + description: str + command: list[str] + requests: int + clients: int + pipeline: int + rand_range: int = 0 + warmup_requests: int = 2000 + setup: Optional[str] = None + + +@dataclass +class Result: + name: str + description: str + qps: float + requests: int + clients: int + pipeline: int + rand_range: int + command: list[str] + raw_output: str + + +class BenchError(RuntimeError): + pass + + +class RedisArrayBench: + def __init__(self, args: argparse.Namespace): + self.args = args + self.base_dir = Path(__file__).resolve().parent + repo_root = self.base_dir.parent + src_dir = Path(args.src_dir) if args.src_dir else repo_root / "src" + self.redis_server = str(src_dir / "redis-server") + self.redis_cli = str(src_dir / "redis-cli") + self.redis_benchmark = str(src_dir / "redis-benchmark") + self.server_proc: Optional[subprocess.Popen[str]] = None + self.server_dir: Optional[tempfile.TemporaryDirectory[str]] = None + self.host = args.host + self.port = args.port + self.db = args.db + self.results: list[Result] = [] + + for binary in (self.redis_server, self.redis_cli, self.redis_benchmark): + if not os.path.exists(binary): + raise BenchError(f"missing binary: {binary}") + + def run(self) -> int: + try: + if self.args.start_server: + self.start_server() + self.prepare_data() + self.print_dataset_summary() + for workload in self.selected_workloads(): + result = self.run_workload(workload) + self.results.append(result) + print(f"{result.name:28s} {result.qps:12.2f} req/s") + self.print_summary() + if self.args.json_out: + with open(self.args.json_out, "w", encoding="utf-8") as fp: + json.dump({ + "host": self.host, + "port": self.port, + "db": self.db, + "results": [asdict(r) for r in self.results], + }, fp, indent=2) + print(f"json written to {self.args.json_out}") + return 0 + finally: + if self.args.start_server and not self.args.keep_server: + self.stop_server() + + def start_server(self) -> None: + self.server_dir = tempfile.TemporaryDirectory(prefix="array-bench-") + cmd = [ + self.redis_server, + "--port", str(self.port), + "--save", "", + "--appendonly", "no", + "--dir", self.server_dir.name, + "--loglevel", "warning", + "--daemonize", "no", + ] + self.server_proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + self.wait_for_ping(timeout=10.0) + + def stop_server(self) -> None: + if self.server_proc is not None and self.server_proc.poll() is None: + self.server_proc.send_signal(signal.SIGTERM) + try: + self.server_proc.wait(timeout=5) + except subprocess.TimeoutExpired: + self.server_proc.kill() + self.server_proc.wait(timeout=5) + if self.server_dir is not None: + self.server_dir.cleanup() + self.server_proc = None + self.server_dir = None + + def wait_for_ping(self, timeout: float) -> None: + deadline = time.time() + timeout + last_error = None + while time.time() < deadline: + if self.server_proc is not None and self.server_proc.poll() is not None: + raise BenchError( + "server exited before becoming ready:\n" + f"{self.read_server_output().strip()}" + ) + try: + cmd = [ + self.redis_cli, + "-h", self.host, + "-p", str(self.port), + "-n", str(self.db), + "--raw", + "PING", + ] + probe = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if probe.returncode != 0: + raise BenchError(probe.stderr.strip() or probe.stdout.strip()) + out = probe.stdout.strip() + if out == "PONG": + return + except Exception as exc: # pragma: no cover - startup race handling + last_error = exc + time.sleep(0.05) + raise BenchError( + f"server did not start on {self.host}:{self.port}: {last_error}\n" + f"{self.read_server_output().strip()}" + ) + + def read_server_output(self) -> str: + if self.server_proc is None or self.server_proc.stdout is None: + return "" + try: + return self.server_proc.stdout.read() + except Exception: # pragma: no cover - best effort diagnostics + return "" + + def cli(self, command: list[str], raw: bool = False) -> str: + cmd = [self.redis_cli, "-h", self.host, "-p", str(self.port), "-n", str(self.db)] + if raw: + cmd.append("--raw") + cmd.extend(command) + return subprocess.check_output(cmd, text=True) + + def pipe(self, payload: bytes) -> None: + cmd = [self.redis_cli, "-h", self.host, "-p", str(self.port), "-n", str(self.db), "--pipe"] + proc = subprocess.run(cmd, input=payload, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if proc.returncode != 0: + raise BenchError(f"redis-cli --pipe failed:\n{proc.stdout.decode('utf-8', 'replace')}") + out = proc.stdout.decode("utf-8", "replace") + if "errors: 0, replies:" not in out: + raise BenchError(f"unexpected --pipe output:\n{out}") + + @staticmethod + def resp(parts: list[str]) -> bytes: + out = [f"*{len(parts)}\r\n".encode()] + for part in parts: + data = part.encode("utf-8") + out.append(f"${len(data)}\r\n".encode()) + out.append(data) + out.append(b"\r\n") + return b"".join(out) + + def prepare_data(self) -> None: + print("preparing datasets...", file=sys.stderr) + self.cli(["FLUSHDB"]) + payload = bytearray() + payload += self.resp(["DEL", "bench:array:dense:num", "bench:array:dense:text", "bench:array:sparse:text", "bench:array:append", "bench:array:ring"]) + payload += self.build_dense_numeric() + payload += self.build_dense_text() + payload += self.build_sparse_text() + self.pipe(bytes(payload)) + + def build_dense_numeric(self) -> bytes: + key = "bench:array:dense:num" + total = self.args.dense_len + batch = 256 + payload = bytearray() + for start in range(0, total, batch): + values = [str(start + i) for i in range(min(batch, total - start))] + payload += self.resp(["ARSET", key, str(start), *values]) + return bytes(payload) + + def build_dense_text(self) -> bytes: + key = "bench:array:dense:text" + total = self.args.dense_len + batch = 128 + payload = bytearray() + for start in range(0, total, batch): + values = [] + for i in range(start, min(start + batch, total)): + mod = i % 4 + if mod == 0: + values.append(f"row:{i} alpha encoding complexity") + elif mod == 1: + values.append(f"row:{i} beta sparse vector") + elif mod == 2: + values.append(f"row:{i} gamma dense matcher") + else: + values.append(f"row:{i} delta encoding helper") + payload += self.resp(["ARSET", key, str(start), *values]) + return bytes(payload) + + def build_sparse_text(self) -> bytes: + key = "bench:array:sparse:text" + clusters = [ + (0, 97, 384), + (8_388_608, 113, 640), + (16_777_216, 127, 896), + (25_165_824, 151, 896), + ] + batch_pairs = 64 + pairs: list[str] = [] + payload = bytearray() + nth = 0 + for base, stride, count in clusters: + for i in range(count): + idx = base + i * stride + mod = nth % 4 + if mod == 0: + value = f"slot:{idx} alpha encoding complexity" + elif mod == 1: + value = f"slot:{idx} beta sparse needle" + elif mod == 2: + value = f"slot:{idx} gamma dense helper" + else: + value = f"slot:{idx} delta complexity marker" + pairs.extend([str(idx), value]) + nth += 1 + if len(pairs) >= batch_pairs * 2: + payload += self.resp(["ARMSET", key, *pairs]) + pairs.clear() + if pairs: + payload += self.resp(["ARMSET", key, *pairs]) + return bytes(payload) + + def print_dataset_summary(self) -> None: + summary = { + "bench:array:dense:num": { + "count": self.cli(["ARCOUNT", "bench:array:dense:num"], raw=True).strip(), + "len": self.cli(["ARLEN", "bench:array:dense:num"], raw=True).strip(), + }, + "bench:array:dense:text": { + "count": self.cli(["ARCOUNT", "bench:array:dense:text"], raw=True).strip(), + "len": self.cli(["ARLEN", "bench:array:dense:text"], raw=True).strip(), + }, + "bench:array:sparse:text": { + "count": self.cli(["ARCOUNT", "bench:array:sparse:text"], raw=True).strip(), + "len": self.cli(["ARLEN", "bench:array:sparse:text"], raw=True).strip(), + }, + } + print("dataset:") + for key, info in summary.items(): + print(f" {key}: count={info['count']} len={info['len']}") + + def selected_workloads(self) -> list[Workload]: + workloads = self.workloads() + if not self.args.only: + return workloads + wanted = {name.strip() for name in self.args.only.split(",") if name.strip()} + unknown = wanted - {w.name for w in workloads} + if unknown: + raise BenchError(f"unknown workload(s): {', '.join(sorted(unknown))}") + return [w for w in workloads if w.name in wanted] + + def workloads(self) -> list[Workload]: + dense_range_end = min(8192 + 31, self.args.dense_len - 1) + return [ + Workload("arget_dense_rand", "ARGET dense random hit", ["ARGET", "bench:array:dense:num", "__rand_int__"], 200_000, 50, 16, rand_range=self.args.dense_len), + Workload("armget_dense_4_rand", "ARMGET dense 4 random hits", ["ARMGET", "bench:array:dense:num", "__rand_int__", "__rand_int__", "__rand_int__", "__rand_int__"], 100_000, 50, 16, rand_range=self.args.dense_len), + Workload("argetrange_dense_32", "ARGETRANGE dense 32 hot", ["ARGETRANGE", "bench:array:dense:num", "8192", str(dense_range_end)], 50_000, 32, 8), + Workload("arscan_dense_limit_100", "ARSCAN dense LIMIT 100", ["ARSCAN", "bench:array:dense:text", "0", str(self.args.dense_len - 1), "LIMIT", "100"], 50_000, 24, 4), + Workload("argrep_match_dense", "ARGREP MATCH dense", ["ARGREP", "bench:array:dense:text", "0", str(self.args.dense_len - 1), "MATCH", "encoding", "LIMIT", "20", "WITHVALUES"], 20_000, 20, 2), + Workload("argrep_re_dense_nocase", "ARGREP RE dense nocase", ["ARGREP", "bench:array:dense:text", "0", str(self.args.dense_len - 1), "RE", "encoding|complexity|helper", "NOCASE", "LIMIT", "20", "WITHVALUES"], 20_000, 20, 2), + Workload("arop_sum_dense_4096", "AROP SUM dense 4096", ["AROP", "bench:array:dense:num", "0", "4095", "SUM"], 50_000, 24, 4), + Workload("arget_sparse_rand", "ARGET sparse random mostly miss", ["ARGET", "bench:array:sparse:text", "__rand_int__"], 200_000, 50, 16, rand_range=self.args.sparse_space), + Workload("arscan_sparse_limit_100", "ARSCAN sparse LIMIT 100", ["ARSCAN", "bench:array:sparse:text", "0", str(self.args.sparse_space - 1), "LIMIT", "100"], 25_000, 20, 2), + Workload("argrep_match_sparse", "ARGREP MATCH sparse", ["ARGREP", "bench:array:sparse:text", "0", str(self.args.sparse_space - 1), "MATCH", "encoding", "LIMIT", "20", "WITHVALUES"], 10_000, 16, 1), + Workload("arop_used_sparse", "AROP USED sparse", ["AROP", "bench:array:sparse:text", "0", str(self.args.sparse_space - 1), "USED"], 25_000, 20, 2), + Workload("arset_dense_rand", "ARSET dense random update", ["ARSET", "bench:array:dense:num", "__rand_int__", "42"], 150_000, 50, 16, rand_range=self.args.dense_len), + Workload("armset_dense_4_rand", "ARMSET dense 4 random updates", ["ARMSET", "bench:array:dense:num", "__rand_int__", "11", "__rand_int__", "22", "__rand_int__", "33", "__rand_int__", "44"], 100_000, 50, 16, rand_range=self.args.dense_len), + Workload("arinsert_append_hot", "ARINSERT append hot path", ["ARINSERT", "bench:array:append", "x"], 50_000, 24, 8, setup="reset_append"), + Workload("arring_hot_1024", "ARRING size 1024 hot path", ["ARRING", "bench:array:ring", "1024", "x"], 100_000, 50, 16, setup="reset_ring"), + ] + + def run_workload(self, workload: Workload) -> Result: + if workload.setup: + getattr(self, workload.setup)() + if self.args.warmup and workload.warmup_requests > 0: + self.invoke_benchmark(workload, workload.warmup_requests, quiet=True) + raw = self.invoke_benchmark(workload, self.scale_requests(workload.requests), quiet=True) + qps = self.parse_qps(raw) + return Result( + name=workload.name, + description=workload.description, + qps=qps, + requests=self.scale_requests(workload.requests), + clients=workload.clients, + pipeline=workload.pipeline, + rand_range=workload.rand_range, + command=workload.command, + raw_output=raw.strip(), + ) + + def invoke_benchmark(self, workload: Workload, requests: int, quiet: bool) -> str: + cmd = [ + self.redis_benchmark, + "-h", self.host, + "-p", str(self.port), + "--dbnum", str(self.db), + "-n", str(requests), + "-c", str(workload.clients), + "-P", str(workload.pipeline), + "--seed", str(self.args.seed), + ] + if quiet: + cmd.append("-q") + if workload.rand_range: + cmd.extend(["-r", str(workload.rand_range)]) + cmd.extend(workload.command) + return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT) + + def parse_qps(self, raw: str) -> float: + m = QPS_RE.search(raw) + if not m: + raise BenchError(f"could not parse qps from redis-benchmark output:\n{raw}") + return float(m.group(1)) + + def scale_requests(self, requests: int) -> int: + scaled = int(requests * self.args.request_scale) + return max(1000, scaled) + + def reset_append(self) -> None: + self.cli(["DEL", "bench:array:append"]) + + def reset_ring(self) -> None: + self.cli(["DEL", "bench:array:ring"]) + + def print_summary(self) -> None: + print("\nsummary:") + print("| workload | qps | req | c | P | notes |") + print("|---|---:|---:|---:|---:|---|") + for r in self.results: + notes = r.description + if r.rand_range: + notes += f", rand=0..{r.rand_range - 1}" + print(f"| {r.name} | {r.qps:.2f} | {r.requests} | {r.clients} | {r.pipeline} | {notes} |") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Standalone Array benchmark harness. It uses DB 9 by default, " + "flushes that DB, loads deterministic Array datasets, and runs " + "custom redis-benchmark workloads." + ) + ) + parser.add_argument("--src-dir", help="Path to the src directory containing redis-server, redis-cli, and redis-benchmark") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", type=int, default=6395) + parser.add_argument("--db", type=int, default=9) + parser.add_argument("--start-server", action="store_true", default=True, + help="Start an ephemeral redis-server on --port (default: enabled)") + parser.add_argument("--no-start-server", dest="start_server", action="store_false", + help="Use an already running server instead of starting one") + parser.add_argument("--keep-server", action="store_true", + help="Do not stop the ephemeral server after the run") + parser.add_argument("--only", help="Comma-separated workload names to run") + parser.add_argument("--seed", type=int, default=12345) + parser.add_argument("--request-scale", type=float, default=1.0, + help="Scale factor applied to all workload request counts") + parser.add_argument("--warmup", action="store_true", default=True, + help="Run a short warmup before each benchmark (default: enabled)") + parser.add_argument("--no-warmup", dest="warmup", action="store_false") + parser.add_argument("--json-out", help="Optional path for machine-readable results") + parser.add_argument("--dense-len", type=int, default=16_384, + help="Number of contiguous dense elements to preload") + parser.add_argument("--sparse-space", type=int, default=30_000_000, + help="Logical range used by sparse benchmarks") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + try: + bench = RedisArrayBench(args) + return bench.run() + except BenchError as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + except subprocess.CalledProcessError as exc: + output = exc.output if isinstance(exc.output, str) else exc.output.decode("utf-8", "replace") + print(output, file=sys.stderr) + return exc.returncode or 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/utils/generate-command-code.py b/utils/generate-command-code.py index 8a25039ad..fcd676df5 100755 --- a/utils/generate-command-code.py +++ b/utils/generate-command-code.py @@ -34,6 +34,7 @@ GROUPS = { "geo": "COMMAND_GROUP_GEO", "stream": "COMMAND_GROUP_STREAM", "bitmap": "COMMAND_GROUP_BITMAP", + "array": "COMMAND_GROUP_ARRAY", "rate_limit": "COMMAND_GROUP_RATE_LIMIT", } @@ -603,8 +604,11 @@ const char *COMMAND_GROUP_STR[] = { "geo", "stream", "bitmap", + "array", "module", +#ifdef ENABLE_GCRA "rate_limit" +#endif }; const char *commandGroupStr(int index) {