WL10973: Allow JSON types as operands for IN operator

amitab · amitab · commit 9b498ba91391 · 2017-08-22T15:32:31.000+05:30
This patch updates the parsing for Json Array and Json Object in the Expression
Parser. It also adds the two new X operators: `cont_in` and `not_cont_in` to
support these operations.

The parsing of IN operator takes place as before, but we can now have an array,
dict, or a document path in the RHS of the operator to check for the single
element in the LHS.

Tests have been added for regression.
diff --git a/lib/mysqlx/expr.py b/lib/mysqlx/expr.py
@@ -227,7 +227,9 @@ class TokenType:
     "*": "*",
     "/": "/",
     "~": "~",
-    "%": "%"
+    "%": "%",
+    "cast": "cast",
+    "cont_in": "cont_in"
 }
 
 unary_operators = {
@@ -243,7 +245,8 @@ class TokenType:
     "between": "not_between",
     "regexp": "not_regexp",
     "like": "not_like",
-    "in": "not_in"
+    "in": "not_in",
+    "cont_in": "not_cont_in"
 }
 
 
@@ -602,6 +605,7 @@ def docpath_member(self):
     def docpath_array_loc(self):
         self.consume_token(TokenType.LSQBRACKET)
         if self.cur_token_type_is(TokenType.MUL):
+            self.consume_token(TokenType.MUL)
             self.consume_token(TokenType.RSQBRACKET)
             doc_path_item = Message("Mysqlx.Expr.DocumentPathItem")
             doc_path_item["type"] = mysqlxpb_enum(
@@ -688,15 +692,30 @@ def column_identifier(self):
                 col_id["table_name"] = parts[1]
             elif i == 2:
                 col_id["schema_name"] = parts[2]
+
+        is_doc = False
         if self.cur_token_type_is(TokenType.DOLLAR):
+            is_doc = True
             self.consume_token(TokenType.DOLLAR)
             col_id["document_path"] = self.document_path()
         elif self.cur_token_type_is(TokenType.ARROW):
+            is_doc = True
             self.consume_token(TokenType.ARROW)
-            self.consume_token(TokenType.QUOTE)
+            is_quoted = False
+            if self.cur_token_type_is(TokenType.QUOTE):
+                is_quoted = True
+                self.consume_token(TokenType.QUOTE)
             self.consume_token(TokenType.DOLLAR)
             col_id["document_path"] = self.document_path()
-            self.consume_token(TokenType.QUOTE)
+            if is_quoted:
+                self.consume_token(TokenType.QUOTE)
+
+        if is_doc and len(col_id["document_path"]) == 0:
+            doc_path_item = Message("Mysqlx.Expr.DocumentPathItem")
+            doc_path_item["type"] = mysqlxpb_enum(
+                "Mysqlx.Expr.DocumentPathItem.Type.MEMBER")
+            doc_path_item["value"] = ""
+            col_id["document_path"].extend([doc_path_item.get_message()])
 
         msg_expr = Message("Mysqlx.Expr.Expr")
         msg_expr["type"] = mysqlxpb_enum("Mysqlx.Expr.Expr.Type.IDENT")
@@ -723,8 +742,46 @@ def consume_any_token(self):
         self.pos += 1
         return value
 
+    def parse_json_array(self):
+        """
+        jsonArray            ::=  "[" [ expr ("," expr)* ] "]"
+        """
+        msg = Message("Mysqlx.Expr.Array")
+        while self.pos < len(self.tokens) and \
+            not self.cur_token_type_is(TokenType.RSQBRACKET):
+            msg["value"].extend([self.expr().get_message()])
+            if not self.cur_token_type_is(TokenType.COMMA):
+                break
+            self.consume_token(TokenType.COMMA)
+        self.consume_token(TokenType.RSQBRACKET)
+
+        expr = Message("Mysqlx.Expr.Expr")
+        expr["type"] = mysqlxpb_enum("Mysqlx.Expr.Expr.Type.ARRAY")
+        expr["array"] = msg.get_message()
+        return expr
+
     def parse_json_doc(self):
-        o = Object()
+        """
+        jsonDoc              ::=  "{" [jsonKeyValue ("," jsonKeyValue)*] "}"
+        jsonKeyValue         ::=  STRING_DQ ":" expr
+        """
+        msg = Message("Mysqlx.Expr.Object")
+        while self.pos < len(self.tokens) and \
+            not self.cur_token_type_is(TokenType.RCURLY):
+            item = Message("Mysqlx.Expr.Object.ObjectField")
+            item["key"] = self.consume_token(TokenType.LSTRING)
+            self.consume_token(TokenType.COLON)
+            item["value"] = self.expr().get_message()
+            msg["fld"].extend([item.get_message()])
+            if not self.cur_token_type_is(TokenType.COMMA):
+                break
+            self.consume_token(TokenType.COMMA)
+        self.consume_token(TokenType.RCURLY)
+
+        expr = Message("Mysqlx.Expr.Expr")
+        expr["type"] = mysqlxpb_enum("Mysqlx.Expr.Expr.Type.OBJECT")
+        expr["object"] = msg.get_message()
+        return expr
 
     def parse_place_holder(self, token):
         place_holder_name = ""
@@ -829,6 +886,8 @@ def atomic_expr(self):
             return self.parse_place_holder(token)
         elif token.type == TokenType.LCURLY:
             return self.parse_json_doc()
+        elif token.type == TokenType.LSQBRACKET:
+            return self.parse_json_array()
         elif token.type == TokenType.CAST:
             return self.cast()
         elif token.type == TokenType.LPAREN:
@@ -978,7 +1037,11 @@ def ilri_expr(self):
                 params.append(self.comp_expr().get_message())
             elif self.cur_token_type_is(TokenType.IN):
                 self.consume_token(TokenType.IN)
-                params.extend(self.paren_expr_list())
+                if self.cur_token_type_is(TokenType.LPAREN):
+                    params.extend(self.paren_expr_list())
+                else:
+                    op_name = "cont_in"
+                    params.append(self.comp_expr().get_message())
             elif self.cur_token_type_is(TokenType.LIKE):
                 self.consume_token(TokenType.LIKE)
                 params.append(self.comp_expr().get_message())
@@ -1037,8 +1100,10 @@ def _table_fields(self):
         temp.reverse()
         while temp:
             field = temp.pop()
-            while field.count("(") != field.count(")"):
-                field = "{0}{1}".format(temp.pop(), field)
+            while field.count("(") != field.count(")") or \
+                field.count("[") != field.count("]") or \
+                field.count("{") != field.count("}"):
+                field = "{1},{0}".format(temp.pop(), field)
             fields.append(field.strip())
         return fields
 
diff --git a/tests/test_mysqlx_crud.py b/tests/test_mysqlx_crud.py
@@ -474,6 +474,102 @@ def test_add(self):
 
         self.schema.drop_collection(collection_name)
 
+    @unittest.skipIf(tests.MYSQL_VERSION < (8, 0, 2),
+                     "CONT_IN operator unavailable")
+    def test_cont_in_operator(self):
+        collection_name = "{0}.test".format(self.schema_name)
+        collection = self.schema.create_collection(collection_name)
+        collection.add({
+          "_id": "a6f4b93e1a264a108393524f29546a8c",
+          "title": "AFRICAN EGG",
+          "description": "A Fast-Paced Documentary of a Pastry Chef And a "
+                         "Dentist who must Pursue a Forensic Psychologist in "
+                         "The Gulf of Mexico",
+          "releaseyear": 2006,
+          "language": "English",
+          "duration": 130,
+          "rating": "G",
+          "genre": "Science fiction",
+          "actors": [{
+            "name": "MILLA PECK",
+            "country": "Mexico",
+            "birthdate": "12 Jan 1984"
+          }, {
+            "name": "VAL BOLGER",
+            "country": "Botswana",
+            "birthdate": "26 Jul 1975"
+          }, {
+            "name": "SCARLETT BENING",
+            "country": "Syria",
+            "birthdate": "16 Mar 1978"
+          }],
+          "additionalinfo": {
+            "director": "Sharice Legaspi",
+            "writers": ["Rusty Couturier", "Angelic Orduno", "Carin Postell"],
+            "productioncompanies": ["Qvodrill", "Indigoholdings"]
+          }
+        }).execute()
+
+        tests = [
+            ("(1+5) in (1, 2, 3, 4, 5)", False),
+            ("(1>5) in (true, false)", True),
+            ("('a'>'b') in (true, false)", True),
+            ("(1>5) in [true, false]", None),
+            ("(1+5) in [1, 2, 3, 4, 5]", None),
+            ("('a'>'b') in [true, false]", None),
+            ("true IN [(1>5), !(false), (true || false), (false && true)]",
+             True),
+            ("true IN ((1>5), !(false), (true || false), (false && true))",
+             True),
+            ("{ 'name' : 'MILLA PECK' } IN actors", True),
+            ("{\"field\":true} IN (\"mystring\", 124, myvar, othervar.jsonobj)",
+             None),
+            ("actor.name IN ['a name', null, (1<5-4), myvar.jsonobj.name]",
+             None),
+            ("!false && true IN [true]", True),
+            ("1-5/2*2 > 3-2/1*2 IN [true, false]", None),
+            ("true IN [1-5/2*2 > 3-2/1*2]", False),
+            ("'African Egg' IN ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", True),
+            ("1 IN ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", True),
+            ("false IN ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", True),
+            ("[0,1,2] IN ('African Egg', 1, true, NULL, [0,1,2], "
+             "{ 'title' : 'Atomic Firefighter' })", True),
+            ("{ 'title' : 'Atomic Firefighter' } IN ('African Egg', 1, true, "
+             "NULL, [0,1,2], { 'title' : 'Atomic Firefighter' })", True),
+            ("title IN ('African Egg', 'The Witcher', 'Jurassic Perk')", False),
+            ("releaseyear IN (2006, 2010, 2017)", True),
+            ("'African Egg' in movietitle", None),
+            ("0 NOT IN [1,2,3]", True),
+            ("1 NOT IN [1,2,3]", False),
+            ("'' IN title", False),
+            ("title IN ('', ' ')", False),
+            ("title IN ['', ' ']", False),
+            ("[\"Rusty Couturier\", \"Angelic Orduno\", \"Carin Postell\"] IN "
+             "additionalinfo.writers", True),
+            ("{ \"name\" : \"MILLA PECK\", \"country\" : \"Mexico\", "
+             "\"birthdate\": \"12 Jan 1984\"} IN actors", True),
+            ("releaseyear IN [2006, 2007, 2008]", True),
+            ("true IN title", False),
+            ("false IN genre", False),
+            ("'Sharice Legaspi' IN additionalinfo.director", True),
+            ("'Mexico' IN actors[*].country", True),
+            ("'Angelic Orduno' IN additionalinfo.writers", True),
+        ]
+
+        for test in tests:
+            try:
+                result = collection.find() \
+                                   .fields("{0} as res".format(test[0])) \
+                                   .execute().fetch_one()
+            except:
+                self.assertEqual(None, test[1])
+            else:
+                self.assertEqual(result['res'], test[1])
+        self.schema.drop_collection(collection_name)
+
     def test_ilri_expressions(self):
         collection_name = "{0}.test".format(self.schema_name)
         collection = self.schema.create_collection(collection_name)