|
32 | 32 | from __future__ import annotations
|
33 | 33 |
|
34 | 34 | import re
|
| 35 | +import unicodedata |
35 | 36 | import warnings
|
36 | 37 | import weakref
|
37 | 38 |
|
38 |
| -from collections import namedtuple |
| 39 | +from collections import deque, namedtuple |
39 | 40 | from decimal import Decimal
|
40 | 41 | from typing import (
|
41 | 42 | TYPE_CHECKING,
|
42 | 43 | Any,
|
| 44 | + Deque, |
43 | 45 | Dict,
|
44 | 46 | Generator,
|
45 | 47 | Iterator,
|
|
115 | 117 | MAX_RESULTS = 4294967295
|
116 | 118 |
|
117 | 119 |
|
| 120 | +def is_eol_comment(stmt: bytes) -> bool: |
| 121 | + """Checks if statement is an end-of-line comment. |
| 122 | +
|
| 123 | + Double-dash comment style requires the second dash to be |
| 124 | + followed by at least one whitespace (Z) or control character (C) such |
| 125 | + as a space, tab, newline, and so on. |
| 126 | +
|
| 127 | + Hash comment simply requires start from `#` and nothing else. |
| 128 | +
|
| 129 | + Args: |
| 130 | + stmt: MySQL statement. |
| 131 | +
|
| 132 | + Returns: |
| 133 | + Whether or not the statement is an end-of-line comment. |
| 134 | +
|
| 135 | + References: |
| 136 | + [1]: https://dev.mysql.com/doc/refman/8.0/en/comments.html |
| 137 | + """ |
| 138 | + is_double_dash_comment = ( |
| 139 | + len(stmt) >= 3 |
| 140 | + and stmt.startswith(b"--") |
| 141 | + and unicodedata.category(chr(stmt[2]))[0] in {"Z", "C"} |
| 142 | + ) |
| 143 | + is_hash_comment = len(stmt) >= 2 and stmt.startswith(b"#") |
| 144 | + |
| 145 | + return is_double_dash_comment or is_hash_comment |
| 146 | + |
| 147 | + |
| 148 | +def parse_multi_statement_query(multi_stmt: bytes) -> Deque[bytes]: |
| 149 | + """Parses a multi-statement query/operation. |
| 150 | +
|
| 151 | + Parsing consists of removing empty (which includes just whitespaces and/or control |
| 152 | + characters) statements and EOL (end-of-line) comments. |
| 153 | +
|
| 154 | + However, there's a caveat, by rule, the last EOL comment found in the stream isn't |
| 155 | + removed if and only if it's the last statement. |
| 156 | +
|
| 157 | + Why? EOL comments do not generate results, however, when the last statement is an |
| 158 | + EOL comment the server returns an empty result. So, in other to match statements |
| 159 | + and results correctly we need to keep the last EOL comment statement. |
| 160 | +
|
| 161 | + Args: |
| 162 | + multi_stmt: Query representing multi-statement operations separated by semicolons. |
| 163 | +
|
| 164 | + Returns: |
| 165 | + A list of statements that aren't empty and don't contain leading |
| 166 | + ASCII whitespaces. Also, they aren't EOL comments except |
| 167 | + perhaps for the last one. |
| 168 | + """ |
| 169 | + executed_list: Deque[bytes] = deque(RE_SQL_SPLIT_STMTS.split(multi_stmt)) |
| 170 | + stmt, num_stms = b"", len(executed_list) |
| 171 | + while num_stms > 0: |
| 172 | + num_stms -= 1 |
| 173 | + stmt_next = executed_list.popleft().lstrip() |
| 174 | + if stmt_next: |
| 175 | + stmt = stmt_next |
| 176 | + if not is_eol_comment(stmt): |
| 177 | + executed_list.append(stmt) |
| 178 | + |
| 179 | + if is_eol_comment(stmt): |
| 180 | + executed_list.append(stmt) |
| 181 | + |
| 182 | + return executed_list |
| 183 | + |
| 184 | + |
118 | 185 | class _ParamSubstitutor:
|
119 | 186 | """
|
120 | 187 | Substitutes parameters into SQL statement.
|
@@ -554,27 +621,62 @@ def _execute_iter(
|
554 | 621 | """Generator returns MySQLCursor objects for multiple statements
|
555 | 622 |
|
556 | 623 | This method is only used when multiple statements are executed
|
557 |
| - by the execute() method. It uses zip() to make an iterator from the |
558 |
| - given query_iter (result of MySQLConnection.cmd_query_iter()) and |
559 |
| - the list of statements that were executed. |
560 |
| - """ |
561 |
| - executed_list = RE_SQL_SPLIT_STMTS.split(self._executed) |
562 |
| - |
563 |
| - i = 0 |
564 |
| - while True: |
565 |
| - try: |
566 |
| - result = next(query_iter) |
567 |
| - self._reset_result() |
568 |
| - self._handle_result(result) |
569 |
| - try: |
570 |
| - self._executed = executed_list[i].strip() |
571 |
| - i += 1 |
572 |
| - except IndexError: |
573 |
| - self._executed = executed_list[0] |
574 |
| - |
575 |
| - yield self |
576 |
| - except StopIteration: |
577 |
| - return |
| 624 | + by the `cursor.execute(multi_stmt_query, multi=True)` method. |
| 625 | +
|
| 626 | + It matches the given `query_iter` (result of `MySQLConnection.cmd_query_iter()`) |
| 627 | + and the list of statements that were executed. |
| 628 | +
|
| 629 | + How does this method work? To properly map each statement (stmt) to a result, |
| 630 | + the following facts must be considered: |
| 631 | +
|
| 632 | + 1. Read operations such as `SELECT` produce a non-empty result |
| 633 | + (calling `next(query_iter)` gets a result that includes at least one column). |
| 634 | + 2. Write operatios such as `INSERT` produce an empty result |
| 635 | + (calling `next(query_iter)` gets a result with no columns - aka empty). |
| 636 | + 3. End-of-line (EOL) comments do not produce a result, unless is the last stmt |
| 637 | + in which case produces an empty result. |
| 638 | + 4. Calling procedures such as `CALL my_proc` produce a sequence `(1)*0` which |
| 639 | + means it may produce zero or more non-empty results followed by just one |
| 640 | + empty result. In other words, a callproc stmt always terminates with an |
| 641 | + empty result. E.g., `my_proc` includes an update + select + select + update, |
| 642 | + then the result sequence will be `110` - note how the write ops results get |
| 643 | + annulated, just the read ops results are produced. Other examples: |
| 644 | + * insert + insert -> 0 |
| 645 | + * select + select + insert + select -> 1110 |
| 646 | + * select -> 10 |
| 647 | + Observe how 0 indicates the end of the result sequence. This property is |
| 648 | + vital to know what result corresponds to what callproc stmt. |
| 649 | +
|
| 650 | + In this regard, the implementation is composed of: |
| 651 | + 1. Parsing: the multi-statement is broken down into single statements, and then |
| 652 | + for each of these, leading white spaces are removed (including |
| 653 | + jumping line, vertical line, tab, etc.). Also, EOL comments are removed from |
| 654 | + the stream, except when the comment is the last statement of the |
| 655 | + multi-statement string. |
| 656 | + 2. Mapping: the facts described above as used as "game rules" to properly match |
| 657 | + statements and results. In case, if we run out of statements before running out |
| 658 | + of results we use a sentinel named "stmt_overflow!" to indicate that the mapping |
| 659 | + went wrong. |
| 660 | +
|
| 661 | + Acronyms |
| 662 | + 1: a non-empty result |
| 663 | + 2: an empty result |
| 664 | + """ |
| 665 | + executed_list = parse_multi_statement_query(multi_stmt=self._executed) |
| 666 | + self._executed = None |
| 667 | + stmt = executed_list.popleft() if executed_list else b"stmt_overflow!" |
| 668 | + for result in query_iter: |
| 669 | + self._reset_result() |
| 670 | + self._handle_result(result) |
| 671 | + |
| 672 | + if is_eol_comment(stmt): |
| 673 | + continue |
| 674 | + |
| 675 | + self._executed = stmt.rstrip() |
| 676 | + yield self |
| 677 | + |
| 678 | + if not stmt.upper().startswith(b"CALL") or "columns" not in result: |
| 679 | + stmt = executed_list.popleft() if executed_list else b"stmt_overflow!" |
578 | 680 |
|
579 | 681 | def execute(
|
580 | 682 | self,
|
|
0 commit comments