diff --git a/modules/vector-sets/README.md b/modules/vector-sets/README.md index 07180c1c6..cbc95a1b2 100644 --- a/modules/vector-sets/README.md +++ b/modules/vector-sets/README.md @@ -365,6 +365,18 @@ JSON attributes are converted in this way: Any other type is ignored, and accessig it will make the expression evaluate to false. +### The IN operator + +The `IN` operator works in two ways, it can test for membership in an array, like in: + + 5 in [1, 2, 3] + "foo" in [1, "foo", "bar"] + +But can also check for substrings, in case the A and B operators are both strings. + + "foo" in "barfoobar" # Will evaluate to true + "zap" in "foobar" # Will evaluate to false + ### Examples ``` diff --git a/modules/vector-sets/expr.c b/modules/vector-sets/expr.c index 9be54eb26..4f3a1ccf2 100644 --- a/modules/vector-sets/expr.c +++ b/modules/vector-sets/expr.c @@ -20,7 +20,6 @@ #define RedisModule_Assert assert #define _DEFAULT_SOURCE #define _USE_MATH_DEFINES -#define _POSIX_C_SOURCE 200809L #include #include #endif @@ -620,11 +619,12 @@ exprstate *exprCompile(char *expr, int *errpos) { if (token->token_type == EXPR_TOKEN_EOF) break; - /* Handle values (numbers, strings, selectors). */ + /* Handle values (numbers, strings, selectors, null). */ if (token->token_type == EXPR_TOKEN_NUM || token->token_type == EXPR_TOKEN_STR || token->token_type == EXPR_TOKEN_TUPLE || - token->token_type == EXPR_TOKEN_SELECTOR) + token->token_type == EXPR_TOKEN_SELECTOR || + token->token_type == EXPR_TOKEN_NULL) { exprStackPush(&es->program, token); exprTokenRetain(token); @@ -734,6 +734,17 @@ int exprTokensEqual(exprtoken *a, exprtoken *b) { return exprTokenToNum(a) == exprTokenToNum(b); } +/* Return true if the string a is a substring of b. */ +int exprTokensStringIn(exprtoken *a, exprtoken *b) { + RedisModule_Assert(a->token_type == EXPR_TOKEN_STR && + b->token_type == EXPR_TOKEN_STR); + if (a->str.len > b->str.len) return 0; // A is bigger, can't be a substring. + for (size_t i = 0; i <= b->str.len - a->str.len; i++) { + if (memcmp(b->str.start+i,a->str.start,a->str.len) == 0) return 1; + } + return 0; +} + #include "fastjson.c" // JSON parser implementation used by exprRun(). /* Execute the compiled expression program. Returns 1 if the final stack value @@ -823,7 +834,9 @@ int exprRun(exprstate *es, char *json, size_t json_len) { result->num = !exprTokensEqual(a, b) ? 1 : 0; break; case EXPR_OP_IN: { - // For 'in' operator, b must be a tuple. + /* For 'in' operator, b must be a tuple, and we check for + * membership. Otherwise both a and b must be strings, and + * in this case we check if a is a substring of b. */ result->num = 0; // Default to false. if (b->token_type == EXPR_TOKEN_TUPLE) { for (size_t j = 0; j < b->tuple.len; j++) { @@ -832,6 +845,10 @@ int exprRun(exprstate *es, char *json, size_t json_len) { break; } } + } else if (a->token_type == EXPR_TOKEN_STR && + b->token_type == EXPR_TOKEN_STR) + { + result->num = exprTokensStringIn(a,b); } break; } diff --git a/modules/vector-sets/tests/filter_expr.py b/modules/vector-sets/tests/filter_expr.py index 13abf7b65..364915d09 100644 --- a/modules/vector-sets/tests/filter_expr.py +++ b/modules/vector-sets/tests/filter_expr.py @@ -39,124 +39,189 @@ class VSIMFilterExpressions(TestCase): self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5', 'invalid json') # Intentionally malformed JSON - # Test 1: Basic equality with numbers + # Basic equality with numbers result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age == 25') assert len(result) == 1, "Expected 1 result for age == 25" assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25" - # Test 2: Greater than + # Greater than result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age > 25') assert len(result) == 2, "Expected 2 results for age > 25" - # Test 3: Less than or equal + # Less than or equal result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age <= 30') assert len(result) == 2, "Expected 2 results for age <= 30" - # Test 4: String equality + # String equality result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.name == "Alice"') assert len(result) == 1, "Expected 1 result for name == Alice" assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice" - # Test 5: String inequality + # String inequality result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.name != "Alice"') assert len(result) == 2, "Expected 2 results for name != Alice" - # Test 6: Boolean value + # Boolean value result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.active') assert len(result) == 1, "Expected 1 result for .active being true" - # Test 7: Logical AND + # Logical AND result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age > 20 and .age < 30') assert len(result) == 1, "Expected 1 result for 20 < age < 30" assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30" - # Test 8: Logical OR + # Logical OR result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age < 30 or .age > 35') assert len(result) == 1, "Expected 1 result for age < 30 or age > 35" - # Test 9: Logical NOT + # Logical NOT result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '!(.age == 25)') assert len(result) == 2, "Expected 2 results for NOT(age == 25)" - # Test 10: The "in" operator with array + # The "in" operator with array result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age in [25, 35]') assert len(result) == 2, "Expected 2 results for age in [25, 35]" - # Test 11: The "in" operator with strings in array + # The "in" operator with strings in array result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.name in ["Alice", "David"]') assert len(result) == 1, "Expected 1 result for name in [Alice, David]" - # Test 12: Arithmetic operations - addition + # The "in" operator for substring matching + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"lic" in .name') + assert len(result) == 1, "Expected 1 result for 'lic' in name" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (Alice)" + + # The "in" operator with city substring + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"ork" in .city') + assert len(result) == 1, "Expected 1 result for 'ork' in city" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 (New York)" + + # The "in" operator with no matches + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"xyz" in .name') + assert len(result) == 0, "Expected 0 results for 'xyz' in name" + + # Off-by-one tests - substring at the beginning + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"Ali" in .name') + assert len(result) == 1, "Expected 1 result for 'Ali' at beginning of 'Alice'" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" + + # Off-by-one tests - substring at the end + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"ice" in .name') + assert len(result) == 1, "Expected 1 result for 'ice' at end of 'Alice'" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" + + # Off-by-one tests - exact match (entire string) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"Alice" in .name') + assert len(result) == 1, "Expected 1 result for exact match 'Alice' in 'Alice'" + assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1" + + # Off-by-one tests - single character + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"A" in .name') + assert len(result) == 1, "Expected 1 result for single char 'A' in 'Alice'" + + # Off-by-one tests - empty string (should match all strings) + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"" in .name') + assert len(result) == 3, "Expected 3 results for empty string (matches all strings)" + + # Off-by-one tests - non-empty strings are never substrings of "" + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '.name in ""') + assert len(result) == 0, "Expected 0 results for empty string on the right of IN operator" + + # Off-by-one tests - empty string match empty string. + result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, + *[str(x) for x in vec1], + 'FILTER', '"" in .name && "" in ""') + assert len(result) == 3, "Expected empty string matching empty string" + + # Arithmetic operations - addition result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age + 10 > 40') assert len(result) == 1, "Expected 1 result for age + 10 > 40" - # Test 13: Arithmetic operations - multiplication + # Arithmetic operations - multiplication result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age * 2 > 60') assert len(result) == 1, "Expected 1 result for age * 2 > 60" - # Test 14: Arithmetic operations - division + # Arithmetic operations - division result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age / 5 == 5') assert len(result) == 1, "Expected 1 result for age / 5 == 5" - # Test 15: Arithmetic operations - modulo + # Arithmetic operations - modulo result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age % 2 == 0') assert len(result) == 1, "Expected 1 result for age % 2 == 0" - # Test 16: Power operator + # Power operator result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age ** 2 > 900') assert len(result) == 1, "Expected 1 result for age^2 > 900" - # Test 17: Missing attribute (should exclude items missing that attribute) + # Missing attribute (should exclude items missing that attribute) result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.missing_field == "value"') assert len(result) == 0, "Expected 0 results for missing_field == value" - # Test 18: No attribute set at all + # No attribute set at all result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.any_field') assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded" - # Test 19: Malformed JSON + # Malformed JSON result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.any_field') assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded" - # Test 20: Complex expression combining multiple operators + # Complex expression combining multiple operators result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")') @@ -164,13 +229,13 @@ class VSIMFilterExpressions(TestCase): expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2'] assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression" - # Test 21: Parentheses to control operator precedence + # Parentheses to control operator precedence result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.age > (20 + 10)') assert len(result) == 1, "Expected 1 result for age > (20 + 10)" - # Test 22: Array access (arrays evaluate to true) + # Array access (arrays evaluate to true) result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4, *[str(x) for x in vec1], 'FILTER', '.scores')