Skip to content

Commit 9b498ba

Browse files
committed
WL10973: Allow JSON types as operands for IN operator
This patch updates the parsing for Json Array and Json Object in the Expression Parser. It also adds the two new X operators: `cont_in` and `not_cont_in` to support these operations. The parsing of IN operator takes place as before, but we can now have an array, dict, or a document path in the RHS of the operator to check for the single element in the LHS. Tests have been added for regression.
1 parent 113bca9 commit 9b498ba

File tree

2 files changed

+169
-8
lines changed

2 files changed

+169
-8
lines changed

lib/mysqlx/expr.py

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,9 @@ class TokenType:
227227
"*": "*",
228228
"/": "/",
229229
"~": "~",
230-
"%": "%"
230+
"%": "%",
231+
"cast": "cast",
232+
"cont_in": "cont_in"
231233
}
232234

233235
unary_operators = {
@@ -243,7 +245,8 @@ class TokenType:
243245
"between": "not_between",
244246
"regexp": "not_regexp",
245247
"like": "not_like",
246-
"in": "not_in"
248+
"in": "not_in",
249+
"cont_in": "not_cont_in"
247250
}
248251

249252

@@ -602,6 +605,7 @@ def docpath_member(self):
602605
def docpath_array_loc(self):
603606
self.consume_token(TokenType.LSQBRACKET)
604607
if self.cur_token_type_is(TokenType.MUL):
608+
self.consume_token(TokenType.MUL)
605609
self.consume_token(TokenType.RSQBRACKET)
606610
doc_path_item = Message("Mysqlx.Expr.DocumentPathItem")
607611
doc_path_item["type"] = mysqlxpb_enum(
@@ -688,15 +692,30 @@ def column_identifier(self):
688692
col_id["table_name"] = parts[1]
689693
elif i == 2:
690694
col_id["schema_name"] = parts[2]
695+
696+
is_doc = False
691697
if self.cur_token_type_is(TokenType.DOLLAR):
698+
is_doc = True
692699
self.consume_token(TokenType.DOLLAR)
693700
col_id["document_path"] = self.document_path()
694701
elif self.cur_token_type_is(TokenType.ARROW):
702+
is_doc = True
695703
self.consume_token(TokenType.ARROW)
696-
self.consume_token(TokenType.QUOTE)
704+
is_quoted = False
705+
if self.cur_token_type_is(TokenType.QUOTE):
706+
is_quoted = True
707+
self.consume_token(TokenType.QUOTE)
697708
self.consume_token(TokenType.DOLLAR)
698709
col_id["document_path"] = self.document_path()
699-
self.consume_token(TokenType.QUOTE)
710+
if is_quoted:
711+
self.consume_token(TokenType.QUOTE)
712+
713+
if is_doc and len(col_id["document_path"]) == 0:
714+
doc_path_item = Message("Mysqlx.Expr.DocumentPathItem")
715+
doc_path_item["type"] = mysqlxpb_enum(
716+
"Mysqlx.Expr.DocumentPathItem.Type.MEMBER")
717+
doc_path_item["value"] = ""
718+
col_id["document_path"].extend([doc_path_item.get_message()])
700719

701720
msg_expr = Message("Mysqlx.Expr.Expr")
702721
msg_expr["type"] = mysqlxpb_enum("Mysqlx.Expr.Expr.Type.IDENT")
@@ -723,8 +742,46 @@ def consume_any_token(self):
723742
self.pos += 1
724743
return value
725744

745+
def parse_json_array(self):
746+
"""
747+
jsonArray ::= "[" [ expr ("," expr)* ] "]"
748+
"""
749+
msg = Message("Mysqlx.Expr.Array")
750+
while self.pos < len(self.tokens) and \
751+
not self.cur_token_type_is(TokenType.RSQBRACKET):
752+
msg["value"].extend([self.expr().get_message()])
753+
if not self.cur_token_type_is(TokenType.COMMA):
754+
break
755+
self.consume_token(TokenType.COMMA)
756+
self.consume_token(TokenType.RSQBRACKET)
757+
758+
expr = Message("Mysqlx.Expr.Expr")
759+
expr["type"] = mysqlxpb_enum("Mysqlx.Expr.Expr.Type.ARRAY")
760+
expr["array"] = msg.get_message()
761+
return expr
762+
726763
def parse_json_doc(self):
727-
o = Object()
764+
"""
765+
jsonDoc ::= "{" [jsonKeyValue ("," jsonKeyValue)*] "}"
766+
jsonKeyValue ::= STRING_DQ ":" expr
767+
"""
768+
msg = Message("Mysqlx.Expr.Object")
769+
while self.pos < len(self.tokens) and \
770+
not self.cur_token_type_is(TokenType.RCURLY):
771+
item = Message("Mysqlx.Expr.Object.ObjectField")
772+
item["key"] = self.consume_token(TokenType.LSTRING)
773+
self.consume_token(TokenType.COLON)
774+
item["value"] = self.expr().get_message()
775+
msg["fld"].extend([item.get_message()])
776+
if not self.cur_token_type_is(TokenType.COMMA):
777+
break
778+
self.consume_token(TokenType.COMMA)
779+
self.consume_token(TokenType.RCURLY)
780+
781+
expr = Message("Mysqlx.Expr.Expr")
782+
expr["type"] = mysqlxpb_enum("Mysqlx.Expr.Expr.Type.OBJECT")
783+
expr["object"] = msg.get_message()
784+
return expr
728785

729786
def parse_place_holder(self, token):
730787
place_holder_name = ""
@@ -829,6 +886,8 @@ def atomic_expr(self):
829886
return self.parse_place_holder(token)
830887
elif token.type == TokenType.LCURLY:
831888
return self.parse_json_doc()
889+
elif token.type == TokenType.LSQBRACKET:
890+
return self.parse_json_array()
832891
elif token.type == TokenType.CAST:
833892
return self.cast()
834893
elif token.type == TokenType.LPAREN:
@@ -978,7 +1037,11 @@ def ilri_expr(self):
9781037
params.append(self.comp_expr().get_message())
9791038
elif self.cur_token_type_is(TokenType.IN):
9801039
self.consume_token(TokenType.IN)
981-
params.extend(self.paren_expr_list())
1040+
if self.cur_token_type_is(TokenType.LPAREN):
1041+
params.extend(self.paren_expr_list())
1042+
else:
1043+
op_name = "cont_in"
1044+
params.append(self.comp_expr().get_message())
9821045
elif self.cur_token_type_is(TokenType.LIKE):
9831046
self.consume_token(TokenType.LIKE)
9841047
params.append(self.comp_expr().get_message())
@@ -1037,8 +1100,10 @@ def _table_fields(self):
10371100
temp.reverse()
10381101
while temp:
10391102
field = temp.pop()
1040-
while field.count("(") != field.count(")"):
1041-
field = "{0}{1}".format(temp.pop(), field)
1103+
while field.count("(") != field.count(")") or \
1104+
field.count("[") != field.count("]") or \
1105+
field.count("{") != field.count("}"):
1106+
field = "{1},{0}".format(temp.pop(), field)
10421107
fields.append(field.strip())
10431108
return fields
10441109

tests/test_mysqlx_crud.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,102 @@ def test_add(self):
474474

475475
self.schema.drop_collection(collection_name)
476476

477+
@unittest.skipIf(tests.MYSQL_VERSION < (8, 0, 2),
478+
"CONT_IN operator unavailable")
479+
def test_cont_in_operator(self):
480+
collection_name = "{0}.test".format(self.schema_name)
481+
collection = self.schema.create_collection(collection_name)
482+
collection.add({
483+
"_id": "a6f4b93e1a264a108393524f29546a8c",
484+
"title": "AFRICAN EGG",
485+
"description": "A Fast-Paced Documentary of a Pastry Chef And a "
486+
"Dentist who must Pursue a Forensic Psychologist in "
487+
"The Gulf of Mexico",
488+
"releaseyear": 2006,
489+
"language": "English",
490+
"duration": 130,
491+
"rating": "G",
492+
"genre": "Science fiction",
493+
"actors": [{
494+
"name": "MILLA PECK",
495+
"country": "Mexico",
496+
"birthdate": "12 Jan 1984"
497+
}, {
498+
"name": "VAL BOLGER",
499+
"country": "Botswana",
500+
"birthdate": "26 Jul 1975"
501+
}, {
502+
"name": "SCARLETT BENING",
503+
"country": "Syria",
504+
"birthdate": "16 Mar 1978"
505+
}],
506+
"additionalinfo": {
507+
"director": "Sharice Legaspi",
508+
"writers": ["Rusty Couturier", "Angelic Orduno", "Carin Postell"],
509+
"productioncompanies": ["Qvodrill", "Indigoholdings"]
510+
}
511+
}).execute()
512+
513+
tests = [
514+
("(1+5) in (1, 2, 3, 4, 5)", False),
515+
("(1>5) in (true, false)", True),
516+
("('a'>'b') in (true, false)", True),
517+
("(1>5) in [true, false]", None),
518+
("(1+5) in [1, 2, 3, 4, 5]", None),
519+
("('a'>'b') in [true, false]", None),
520+
("true IN [(1>5), !(false), (true || false), (false && true)]",
521+
True),
522+
("true IN ((1>5), !(false), (true || false), (false && true))",
523+
True),
524+
("{ 'name' : 'MILLA PECK' } IN actors", True),
525+
("{\"field\":true} IN (\"mystring\", 124, myvar, othervar.jsonobj)",
526+
None),
527+
("actor.name IN ['a name', null, (1<5-4), myvar.jsonobj.name]",
528+
None),
529+
("!false && true IN [true]", True),
530+
("1-5/2*2 > 3-2/1*2 IN [true, false]", None),
531+
("true IN [1-5/2*2 > 3-2/1*2]", False),
532+
("'African Egg' IN ('African Egg', 1, true, NULL, [0,1,2], "
533+
"{ 'title' : 'Atomic Firefighter' })", True),
534+
("1 IN ('African Egg', 1, true, NULL, [0,1,2], "
535+
"{ 'title' : 'Atomic Firefighter' })", True),
536+
("false IN ('African Egg', 1, true, NULL, [0,1,2], "
537+
"{ 'title' : 'Atomic Firefighter' })", True),
538+
("[0,1,2] IN ('African Egg', 1, true, NULL, [0,1,2], "
539+
"{ 'title' : 'Atomic Firefighter' })", True),
540+
("{ 'title' : 'Atomic Firefighter' } IN ('African Egg', 1, true, "
541+
"NULL, [0,1,2], { 'title' : 'Atomic Firefighter' })", True),
542+
("title IN ('African Egg', 'The Witcher', 'Jurassic Perk')", False),
543+
("releaseyear IN (2006, 2010, 2017)", True),
544+
("'African Egg' in movietitle", None),
545+
("0 NOT IN [1,2,3]", True),
546+
("1 NOT IN [1,2,3]", False),
547+
("'' IN title", False),
548+
("title IN ('', ' ')", False),
549+
("title IN ['', ' ']", False),
550+
("[\"Rusty Couturier\", \"Angelic Orduno\", \"Carin Postell\"] IN "
551+
"additionalinfo.writers", True),
552+
("{ \"name\" : \"MILLA PECK\", \"country\" : \"Mexico\", "
553+
"\"birthdate\": \"12 Jan 1984\"} IN actors", True),
554+
("releaseyear IN [2006, 2007, 2008]", True),
555+
("true IN title", False),
556+
("false IN genre", False),
557+
("'Sharice Legaspi' IN additionalinfo.director", True),
558+
("'Mexico' IN actors[*].country", True),
559+
("'Angelic Orduno' IN additionalinfo.writers", True),
560+
]
561+
562+
for test in tests:
563+
try:
564+
result = collection.find() \
565+
.fields("{0} as res".format(test[0])) \
566+
.execute().fetch_one()
567+
except:
568+
self.assertEqual(None, test[1])
569+
else:
570+
self.assertEqual(result['res'], test[1])
571+
self.schema.drop_collection(collection_name)
572+
477573
def test_ilri_expressions(self):
478574
collection_name = "{0}.test".format(self.schema_name)
479575
collection = self.schema.create_collection(collection_name)

0 commit comments

Comments
 (0)