mirror of
https://github.com/redis/redis.git
synced 2026-05-28 04:02:46 -04:00
[Vector Sets] IN operator for string/string operands (#14122)
This PR introduces "IN" overloading for strings in Vector Sets VSIM
FILTER expressions.
Now it is possible to do something like:
"foo" IN "foobar"
IN continues to work as usually if the second operand is an array,
checking for membership of the left operand.
Ping @rowantrollope that requested this feature. I'm evaluating if to
add glob matching functionalities via the `=~` operator but I need to do
an optimization round in our glob matching function probably. Glob
matching can be slower, at the same time the complexity of the greedy
search in the graph remains unchanged, so it may be a good idea to have
it.
Case insensitive search will be likely not be added however, since this
would require handling unicode that is kinda outside the scope of Redis
filters. The user is still able to perform `"foo" in "foobar" || "FOO"
in "foobar"` at least.
This commit is contained in:
parent
a25f0a715e
commit
8948a5d2b2
3 changed files with 120 additions and 26 deletions
|
|
@ -365,6 +365,18 @@ JSON attributes are converted in this way:
|
|||
|
||||
Any other type is ignored, and accessig it will make the expression evaluate to false.
|
||||
|
||||
### The IN operator
|
||||
|
||||
The `IN` operator works in two ways, it can test for membership in an array, like in:
|
||||
|
||||
5 in [1, 2, 3]
|
||||
"foo" in [1, "foo", "bar"]
|
||||
|
||||
But can also check for substrings, in case the A and B operators are both strings.
|
||||
|
||||
"foo" in "barfoobar" # Will evaluate to true
|
||||
"zap" in "foobar" # Will evaluate to false
|
||||
|
||||
### Examples
|
||||
|
||||
```
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@
|
|||
#define RedisModule_Assert assert
|
||||
#define _DEFAULT_SOURCE
|
||||
#define _USE_MATH_DEFINES
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
|
@ -620,11 +619,12 @@ exprstate *exprCompile(char *expr, int *errpos) {
|
|||
|
||||
if (token->token_type == EXPR_TOKEN_EOF) break;
|
||||
|
||||
/* Handle values (numbers, strings, selectors). */
|
||||
/* Handle values (numbers, strings, selectors, null). */
|
||||
if (token->token_type == EXPR_TOKEN_NUM ||
|
||||
token->token_type == EXPR_TOKEN_STR ||
|
||||
token->token_type == EXPR_TOKEN_TUPLE ||
|
||||
token->token_type == EXPR_TOKEN_SELECTOR)
|
||||
token->token_type == EXPR_TOKEN_SELECTOR ||
|
||||
token->token_type == EXPR_TOKEN_NULL)
|
||||
{
|
||||
exprStackPush(&es->program, token);
|
||||
exprTokenRetain(token);
|
||||
|
|
@ -734,6 +734,17 @@ int exprTokensEqual(exprtoken *a, exprtoken *b) {
|
|||
return exprTokenToNum(a) == exprTokenToNum(b);
|
||||
}
|
||||
|
||||
/* Return true if the string a is a substring of b. */
|
||||
int exprTokensStringIn(exprtoken *a, exprtoken *b) {
|
||||
RedisModule_Assert(a->token_type == EXPR_TOKEN_STR &&
|
||||
b->token_type == EXPR_TOKEN_STR);
|
||||
if (a->str.len > b->str.len) return 0; // A is bigger, can't be a substring.
|
||||
for (size_t i = 0; i <= b->str.len - a->str.len; i++) {
|
||||
if (memcmp(b->str.start+i,a->str.start,a->str.len) == 0) return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "fastjson.c" // JSON parser implementation used by exprRun().
|
||||
|
||||
/* Execute the compiled expression program. Returns 1 if the final stack value
|
||||
|
|
@ -823,7 +834,9 @@ int exprRun(exprstate *es, char *json, size_t json_len) {
|
|||
result->num = !exprTokensEqual(a, b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_IN: {
|
||||
// For 'in' operator, b must be a tuple.
|
||||
/* For 'in' operator, b must be a tuple, and we check for
|
||||
* membership. Otherwise both a and b must be strings, and
|
||||
* in this case we check if a is a substring of b. */
|
||||
result->num = 0; // Default to false.
|
||||
if (b->token_type == EXPR_TOKEN_TUPLE) {
|
||||
for (size_t j = 0; j < b->tuple.len; j++) {
|
||||
|
|
@ -832,6 +845,10 @@ int exprRun(exprstate *es, char *json, size_t json_len) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
} else if (a->token_type == EXPR_TOKEN_STR &&
|
||||
b->token_type == EXPR_TOKEN_STR)
|
||||
{
|
||||
result->num = exprTokensStringIn(a,b);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,124 +39,189 @@ class VSIMFilterExpressions(TestCase):
|
|||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5',
|
||||
'invalid json') # Intentionally malformed JSON
|
||||
|
||||
# Test 1: Basic equality with numbers
|
||||
# Basic equality with numbers
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age == 25')
|
||||
assert len(result) == 1, "Expected 1 result for age == 25"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25"
|
||||
|
||||
# Test 2: Greater than
|
||||
# Greater than
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age > 25')
|
||||
assert len(result) == 2, "Expected 2 results for age > 25"
|
||||
|
||||
# Test 3: Less than or equal
|
||||
# Less than or equal
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age <= 30')
|
||||
assert len(result) == 2, "Expected 2 results for age <= 30"
|
||||
|
||||
# Test 4: String equality
|
||||
# String equality
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name == "Alice"')
|
||||
assert len(result) == 1, "Expected 1 result for name == Alice"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice"
|
||||
|
||||
# Test 5: String inequality
|
||||
# String inequality
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name != "Alice"')
|
||||
assert len(result) == 2, "Expected 2 results for name != Alice"
|
||||
|
||||
# Test 6: Boolean value
|
||||
# Boolean value
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.active')
|
||||
assert len(result) == 1, "Expected 1 result for .active being true"
|
||||
|
||||
# Test 7: Logical AND
|
||||
# Logical AND
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age > 20 and .age < 30')
|
||||
assert len(result) == 1, "Expected 1 result for 20 < age < 30"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30"
|
||||
|
||||
# Test 8: Logical OR
|
||||
# Logical OR
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age < 30 or .age > 35')
|
||||
assert len(result) == 1, "Expected 1 result for age < 30 or age > 35"
|
||||
|
||||
# Test 9: Logical NOT
|
||||
# Logical NOT
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '!(.age == 25)')
|
||||
assert len(result) == 2, "Expected 2 results for NOT(age == 25)"
|
||||
|
||||
# Test 10: The "in" operator with array
|
||||
# The "in" operator with array
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age in [25, 35]')
|
||||
assert len(result) == 2, "Expected 2 results for age in [25, 35]"
|
||||
|
||||
# Test 11: The "in" operator with strings in array
|
||||
# The "in" operator with strings in array
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name in ["Alice", "David"]')
|
||||
assert len(result) == 1, "Expected 1 result for name in [Alice, David]"
|
||||
|
||||
# Test 12: Arithmetic operations - addition
|
||||
# The "in" operator for substring matching
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"lic" in .name')
|
||||
assert len(result) == 1, "Expected 1 result for 'lic' in name"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (Alice)"
|
||||
|
||||
# The "in" operator with city substring
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"ork" in .city')
|
||||
assert len(result) == 1, "Expected 1 result for 'ork' in city"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (New York)"
|
||||
|
||||
# The "in" operator with no matches
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"xyz" in .name')
|
||||
assert len(result) == 0, "Expected 0 results for 'xyz' in name"
|
||||
|
||||
# Off-by-one tests - substring at the beginning
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"Ali" in .name')
|
||||
assert len(result) == 1, "Expected 1 result for 'Ali' at beginning of 'Alice'"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
|
||||
|
||||
# Off-by-one tests - substring at the end
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"ice" in .name')
|
||||
assert len(result) == 1, "Expected 1 result for 'ice' at end of 'Alice'"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
|
||||
|
||||
# Off-by-one tests - exact match (entire string)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"Alice" in .name')
|
||||
assert len(result) == 1, "Expected 1 result for exact match 'Alice' in 'Alice'"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1"
|
||||
|
||||
# Off-by-one tests - single character
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"A" in .name')
|
||||
assert len(result) == 1, "Expected 1 result for single char 'A' in 'Alice'"
|
||||
|
||||
# Off-by-one tests - empty string (should match all strings)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"" in .name')
|
||||
assert len(result) == 3, "Expected 3 results for empty string (matches all strings)"
|
||||
|
||||
# Off-by-one tests - non-empty strings are never substrings of ""
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name in ""')
|
||||
assert len(result) == 0, "Expected 0 results for empty string on the right of IN operator"
|
||||
|
||||
# Off-by-one tests - empty string match empty string.
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '"" in .name && "" in ""')
|
||||
assert len(result) == 3, "Expected empty string matching empty string"
|
||||
|
||||
# Arithmetic operations - addition
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age + 10 > 40')
|
||||
assert len(result) == 1, "Expected 1 result for age + 10 > 40"
|
||||
|
||||
# Test 13: Arithmetic operations - multiplication
|
||||
# Arithmetic operations - multiplication
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age * 2 > 60')
|
||||
assert len(result) == 1, "Expected 1 result for age * 2 > 60"
|
||||
|
||||
# Test 14: Arithmetic operations - division
|
||||
# Arithmetic operations - division
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age / 5 == 5')
|
||||
assert len(result) == 1, "Expected 1 result for age / 5 == 5"
|
||||
|
||||
# Test 15: Arithmetic operations - modulo
|
||||
# Arithmetic operations - modulo
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age % 2 == 0')
|
||||
assert len(result) == 1, "Expected 1 result for age % 2 == 0"
|
||||
|
||||
# Test 16: Power operator
|
||||
# Power operator
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age ** 2 > 900')
|
||||
assert len(result) == 1, "Expected 1 result for age^2 > 900"
|
||||
|
||||
# Test 17: Missing attribute (should exclude items missing that attribute)
|
||||
# Missing attribute (should exclude items missing that attribute)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.missing_field == "value"')
|
||||
assert len(result) == 0, "Expected 0 results for missing_field == value"
|
||||
|
||||
# Test 18: No attribute set at all
|
||||
# No attribute set at all
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.any_field')
|
||||
assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded"
|
||||
|
||||
# Test 19: Malformed JSON
|
||||
# Malformed JSON
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.any_field')
|
||||
assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded"
|
||||
|
||||
# Test 20: Complex expression combining multiple operators
|
||||
# Complex expression combining multiple operators
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")')
|
||||
|
|
@ -164,13 +229,13 @@ class VSIMFilterExpressions(TestCase):
|
|||
expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2']
|
||||
assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression"
|
||||
|
||||
# Test 21: Parentheses to control operator precedence
|
||||
# Parentheses to control operator precedence
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age > (20 + 10)')
|
||||
assert len(result) == 1, "Expected 1 result for age > (20 + 10)"
|
||||
|
||||
# Test 22: Array access (arrays evaluate to true)
|
||||
# Array access (arrays evaluate to true)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.scores')
|
||||
|
|
|
|||
Loading…
Reference in a new issue