Skip to content

Commit eeefa7f

Browse files
bpo-43833: Emit warnings for numeric literals followed by keyword (GH-25466)
Emit a deprecation warning if the numeric literal is immediately followed by one of keywords: and, else, for, if, in, is, or. Raise a syntax error with more informative message if it is immediately followed by other keyword or identifier. Automerge-Triggered-By: GH:pablogsal (cherry picked from commit 2ea6d89) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 2cfe0e7 commit eeefa7f

File tree

5 files changed

+246
-3
lines changed

5 files changed

+246
-3
lines changed

Doc/whatsnew/3.10.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,6 +1451,17 @@ Optimizations
14511451
Deprecated
14521452
==========
14531453
1454+
* Currently Python accepts numeric literals immediately followed by keywords,
1455+
for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing
1456+
and ambigious expressions like ``[0x1for x in y]`` (which can be
1457+
interpreted as ``[0x1 for x in y]`` or ``[0x1f or x in y]``). Starting in
1458+
this release, a deprecation warning is raised if the numeric literal is
1459+
immediately followed by one of keywords :keyword:`and`, :keyword:`else`,
1460+
:keyword:`for`, :keyword:`if`, :keyword:`in`, :keyword:`is` and :keyword:`or`.
1461+
If future releases it will be changed to syntax warning, and finally to
1462+
syntax error.
1463+
(Contributed by Serhiy Storchaka in :issue:`43833`).
1464+
14541465
* Starting in this release, there will be a concerted effort to begin
14551466
cleaning up old import semantics that were kept for Python 2.7
14561467
compatibility. Specifically,
@@ -1677,6 +1688,18 @@ This section lists previously described changes and other bugfixes
16771688
that may require changes to your code.
16781689
16791690
1691+
Changes in the Python syntax
1692+
----------------------------
1693+
1694+
* Deprecation warning is now emitted when compiling previously valid syntax
1695+
if the numeric literal is immediately followed by a keyword (like in ``0in x``).
1696+
If future releases it will be changed to syntax warning, and finally to a
1697+
syntax error. To get rid of the warning and make the code compatible with
1698+
future releases just add a space between the numeric literal and the
1699+
following keyword.
1700+
(Contributed by Serhiy Storchaka in :issue:`43833`).
1701+
1702+
16801703
Changes in the Python API
16811704
-------------------------
16821705

Lib/test/test_compile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def test_literals_with_leading_zeroes(self):
162162
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
163163
"080000000000000", "000000000000009", "000000000000008",
164164
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
165-
"0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777",
165+
"0b101j", "0o153j", "0b100e1", "0o777e1", "0777",
166166
"000777", "000000000000007"]:
167167
self.assertRaises(SyntaxError, eval, arg)
168168

Lib/test/test_grammar.py

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,10 @@ def test_floats(self):
177177

178178
def test_float_exponent_tokenization(self):
179179
# See issue 21642.
180-
self.assertEqual(1 if 1else 0, 1)
181-
self.assertEqual(1 if 0else 0, 0)
180+
with warnings.catch_warnings():
181+
warnings.simplefilter('ignore', DeprecationWarning)
182+
self.assertEqual(eval("1 if 1else 0"), 1)
183+
self.assertEqual(eval("1 if 0else 0"), 0)
182184
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")
183185

184186
def test_underscore_literals(self):
@@ -211,6 +213,92 @@ def test_bad_numerical_literals(self):
211213
check("1e2_", "invalid decimal literal")
212214
check("1e+", "invalid decimal literal")
213215

216+
def test_end_of_numerical_literals(self):
217+
def check(test):
218+
with self.assertWarns(DeprecationWarning):
219+
compile(test, "<testcase>", "eval")
220+
221+
def check_error(test):
222+
with warnings.catch_warnings(record=True) as w:
223+
with self.assertRaises(SyntaxError):
224+
compile(test, "<testcase>", "eval")
225+
self.assertEqual(w, [])
226+
227+
check_error("0xfand x")
228+
check("0o7and x")
229+
check("0b1and x")
230+
check("9and x")
231+
check("0and x")
232+
check("1.and x")
233+
check("1e3and x")
234+
check("1jand x")
235+
236+
check("0xfor x")
237+
check("0o7or x")
238+
check("0b1or x")
239+
check("9or x")
240+
check_error("0or x")
241+
check("1.or x")
242+
check("1e3or x")
243+
check("1jor x")
244+
245+
check("0xfin x")
246+
check("0o7in x")
247+
check("0b1in x")
248+
check("9in x")
249+
check("0in x")
250+
check("1.in x")
251+
check("1e3in x")
252+
check("1jin x")
253+
254+
with warnings.catch_warnings():
255+
warnings.simplefilter('ignore', SyntaxWarning)
256+
check("0xfis x")
257+
check("0o7is x")
258+
check("0b1is x")
259+
check("9is x")
260+
check("0is x")
261+
check("1.is x")
262+
check("1e3is x")
263+
check("1jis x")
264+
265+
check("0xfif x else y")
266+
check("0o7if x else y")
267+
check("0b1if x else y")
268+
check("9if x else y")
269+
check("0if x else y")
270+
check("1.if x else y")
271+
check("1e3if x else y")
272+
check("1jif x else y")
273+
274+
check_error("x if 0xfelse y")
275+
check("x if 0o7else y")
276+
check("x if 0b1else y")
277+
check("x if 9else y")
278+
check("x if 0else y")
279+
check("x if 1.else y")
280+
check("x if 1e3else y")
281+
check("x if 1jelse y")
282+
283+
check("[0x1ffor x in ()]")
284+
check("[0x1for x in ()]")
285+
check("[0xfor x in ()]")
286+
check("[0o7for x in ()]")
287+
check("[0b1for x in ()]")
288+
check("[9for x in ()]")
289+
check("[1.for x in ()]")
290+
check("[1e3for x in ()]")
291+
check("[1jfor x in ()]")
292+
293+
check_error("0xfspam")
294+
check_error("0o7spam")
295+
check_error("0b1spam")
296+
check_error("9spam")
297+
check_error("0spam")
298+
check_error("1.spam")
299+
check_error("1e3spam")
300+
check_error("1jspam")
301+
214302
def test_string_literals(self):
215303
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
216304
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Emit a deprecation warning if the numeric literal is immediately followed by
2+
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
3+
more informative message if it is immediately followed by other keyword or
4+
identifier.

Parser/tokenizer.c

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,6 +1121,113 @@ indenterror(struct tok_state *tok)
11211121
return ERRORTOKEN;
11221122
}
11231123

1124+
static int
1125+
parser_warn(struct tok_state *tok, const char *format, ...)
1126+
{
1127+
PyObject *errmsg;
1128+
va_list vargs;
1129+
#ifdef HAVE_STDARG_PROTOTYPES
1130+
va_start(vargs, format);
1131+
#else
1132+
va_start(vargs);
1133+
#endif
1134+
errmsg = PyUnicode_FromFormatV(format, vargs);
1135+
va_end(vargs);
1136+
if (!errmsg) {
1137+
goto error;
1138+
}
1139+
1140+
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, errmsg, tok->filename,
1141+
tok->lineno, NULL, NULL) < 0) {
1142+
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
1143+
/* Replace the DeprecationWarning exception with a SyntaxError
1144+
to get a more accurate error report */
1145+
PyErr_Clear();
1146+
syntaxerror(tok, "%U", errmsg);
1147+
}
1148+
goto error;
1149+
}
1150+
Py_DECREF(errmsg);
1151+
return 0;
1152+
1153+
error:
1154+
Py_XDECREF(errmsg);
1155+
tok->done = E_ERROR;
1156+
return -1;
1157+
}
1158+
1159+
static int
1160+
lookahead(struct tok_state *tok, const char *test)
1161+
{
1162+
const char *s = test;
1163+
int res = 0;
1164+
while (1) {
1165+
int c = tok_nextc(tok);
1166+
if (*s == 0) {
1167+
res = !is_potential_identifier_char(c);
1168+
}
1169+
else if (c == *s) {
1170+
s++;
1171+
continue;
1172+
}
1173+
1174+
tok_backup(tok, c);
1175+
while (s != test) {
1176+
tok_backup(tok, *--s);
1177+
}
1178+
return res;
1179+
}
1180+
}
1181+
1182+
static int
1183+
verify_end_of_number(struct tok_state *tok, int c, const char *kind)
1184+
{
1185+
/* Emit a deprecation warning only if the numeric literal is immediately
1186+
* followed by one of keywords which can occurr after a numeric literal
1187+
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
1188+
* It allows to gradually deprecate existing valid code without adding
1189+
* warning before error in most cases of invalid numeric literal (which
1190+
* would be confusiong and break existing tests).
1191+
* Raise a syntax error with slighly better message than plain
1192+
* "invalid syntax" if the numeric literal is immediately followed by
1193+
* other keyword or identifier.
1194+
*/
1195+
int r = 0;
1196+
if (c == 'a') {
1197+
r = lookahead(tok, "nd");
1198+
}
1199+
else if (c == 'e') {
1200+
r = lookahead(tok, "lse");
1201+
}
1202+
else if (c == 'f') {
1203+
r = lookahead(tok, "or");
1204+
}
1205+
else if (c == 'i') {
1206+
int c2 = tok_nextc(tok);
1207+
if (c2 == 'f' || c2 == 'n' || c2 == 's') {
1208+
r = 1;
1209+
}
1210+
tok_backup(tok, c2);
1211+
}
1212+
else if (c == 'o') {
1213+
r = lookahead(tok, "r");
1214+
}
1215+
if (r) {
1216+
tok_backup(tok, c);
1217+
if (parser_warn(tok, "invalid %s literal", kind)) {
1218+
return 0;
1219+
}
1220+
tok_nextc(tok);
1221+
}
1222+
else /* In future releases, only error will remain. */
1223+
if (is_potential_identifier_char(c)) {
1224+
tok_backup(tok, c);
1225+
syntaxerror(tok, "invalid %s literal", kind);
1226+
return 0;
1227+
}
1228+
return 1;
1229+
}
1230+
11241231
/* Verify that the identifier follows PEP 3131.
11251232
All identifier strings are guaranteed to be "ready" unicode objects.
11261233
*/
@@ -1569,6 +1676,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
15691676
c = tok_nextc(tok);
15701677
} while (isxdigit(c));
15711678
} while (c == '_');
1679+
if (!verify_end_of_number(tok, c, "hexadecimal")) {
1680+
return ERRORTOKEN;
1681+
}
15721682
}
15731683
else if (c == 'o' || c == 'O') {
15741684
/* Octal */
@@ -1595,6 +1705,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
15951705
return syntaxerror(tok,
15961706
"invalid digit '%c' in octal literal", c);
15971707
}
1708+
if (!verify_end_of_number(tok, c, "octal")) {
1709+
return ERRORTOKEN;
1710+
}
15981711
}
15991712
else if (c == 'b' || c == 'B') {
16001713
/* Binary */
@@ -1621,6 +1734,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
16211734
return syntaxerror(tok,
16221735
"invalid digit '%c' in binary literal", c);
16231736
}
1737+
if (!verify_end_of_number(tok, c, "binary")) {
1738+
return ERRORTOKEN;
1739+
}
16241740
}
16251741
else {
16261742
int nonzero = 0;
@@ -1664,6 +1780,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
16641780
"literals are not permitted; "
16651781
"use an 0o prefix for octal integers");
16661782
}
1783+
if (!verify_end_of_number(tok, c, "decimal")) {
1784+
return ERRORTOKEN;
1785+
}
16671786
}
16681787
}
16691788
else {
@@ -1699,6 +1818,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
16991818
}
17001819
} else if (!isdigit(c)) {
17011820
tok_backup(tok, c);
1821+
if (!verify_end_of_number(tok, e, "decimal")) {
1822+
return ERRORTOKEN;
1823+
}
17021824
tok_backup(tok, e);
17031825
*p_start = tok->start;
17041826
*p_end = tok->cur;
@@ -1713,6 +1835,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
17131835
/* Imaginary part */
17141836
imaginary:
17151837
c = tok_nextc(tok);
1838+
if (!verify_end_of_number(tok, c, "imaginary")) {
1839+
return ERRORTOKEN;
1840+
}
1841+
}
1842+
else if (!verify_end_of_number(tok, c, "decimal")) {
1843+
return ERRORTOKEN;
17161844
}
17171845
}
17181846
}

0 commit comments

Comments
 (0)