Skip to content

Commit 56eda25

Browse files
authored
gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (#116049)
1 parent 49b1103 commit 56eda25

File tree

5 files changed

+80
-17
lines changed

5 files changed

+80
-17
lines changed

Lib/test/test_cmd_line_script.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self):
660660
self.assertEqual(
661661
stderr.splitlines()[-3:],
662662
[ b' foo = """\\q"""',
663-
b' ^^^^^^^^',
663+
b' ^^',
664664
b'SyntaxError: "\\q" is an invalid escape sequence. '
665665
b'Did you mean "\\\\q"? A raw string is also an option.'
666666
],

Lib/test/test_string_literals.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def test_eval_str_invalid_escape(self):
120120
r'Such sequences will not work in the future. '
121121
r'Did you mean "\\z"? A raw string is also an option.')
122122
self.assertEqual(w[0].filename, '<string>')
123-
self.assertEqual(w[0].lineno, 1)
123+
self.assertEqual(w[0].lineno, 2)
124124

125125
with warnings.catch_warnings(record=True) as w:
126126
warnings.simplefilter('error', category=SyntaxWarning)
@@ -131,7 +131,7 @@ def test_eval_str_invalid_escape(self):
131131
self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. '
132132
r'Did you mean "\\z"? A raw string is also an option.')
133133
self.assertEqual(exc.filename, '<string>')
134-
self.assertEqual(exc.lineno, 1)
134+
self.assertEqual(exc.lineno, 2)
135135
self.assertEqual(exc.offset, 1)
136136

137137
# Check that the warning is raised only once if there are syntax errors
@@ -160,7 +160,7 @@ def test_eval_str_invalid_octal_escape(self):
160160
r'Such sequences will not work in the future. '
161161
r'Did you mean "\\407"? A raw string is also an option.')
162162
self.assertEqual(w[0].filename, '<string>')
163-
self.assertEqual(w[0].lineno, 1)
163+
self.assertEqual(w[0].lineno, 2)
164164

165165
with warnings.catch_warnings(record=True) as w:
166166
warnings.simplefilter('error', category=SyntaxWarning)
@@ -171,9 +171,32 @@ def test_eval_str_invalid_octal_escape(self):
171171
self.assertEqual(exc.msg, r'"\407" is an invalid octal escape sequence. '
172172
r'Did you mean "\\407"? A raw string is also an option.')
173173
self.assertEqual(exc.filename, '<string>')
174-
self.assertEqual(exc.lineno, 1)
174+
self.assertEqual(exc.lineno, 2)
175175
self.assertEqual(exc.offset, 1)
176176

177+
def test_invalid_escape_locations_with_offset(self):
178+
with warnings.catch_warnings(record=True) as w:
179+
warnings.simplefilter('always', category=SyntaxWarning)
180+
eval("\"'''''''''''''''''''''invalid\ Escape\"")
181+
self.assertEqual(len(w), 1)
182+
self.assertEqual(str(w[0].message),
183+
r'"\ " is an invalid escape sequence. Such sequences '
184+
r'will not work in the future. Did you mean "\\ "? '
185+
r'A raw string is also an option.')
186+
self.assertEqual(w[0].filename, '<string>')
187+
self.assertEqual(w[0].lineno, 1)
188+
189+
with warnings.catch_warnings(record=True) as w:
190+
warnings.simplefilter('always', category=SyntaxWarning)
191+
eval("\"''Incorrect \ logic?\"")
192+
self.assertEqual(len(w), 1)
193+
self.assertEqual(str(w[0].message),
194+
r'"\ " is an invalid escape sequence. Such sequences '
195+
r'will not work in the future. Did you mean "\\ "? '
196+
r'A raw string is also an option.')
197+
self.assertEqual(w[0].filename, '<string>')
198+
self.assertEqual(w[0].lineno, 1)
199+
177200
def test_eval_str_raw(self):
178201
self.assertEqual(eval(""" r'x' """), 'x')
179202
self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
@@ -215,7 +238,7 @@ def test_eval_bytes_invalid_escape(self):
215238
r'Such sequences will not work in the future. '
216239
r'Did you mean "\\z"? A raw string is also an option.')
217240
self.assertEqual(w[0].filename, '<string>')
218-
self.assertEqual(w[0].lineno, 1)
241+
self.assertEqual(w[0].lineno, 2)
219242

220243
with warnings.catch_warnings(record=True) as w:
221244
warnings.simplefilter('error', category=SyntaxWarning)
@@ -226,7 +249,7 @@ def test_eval_bytes_invalid_escape(self):
226249
self.assertEqual(exc.msg, r'"\z" is an invalid escape sequence. '
227250
r'Did you mean "\\z"? A raw string is also an option.')
228251
self.assertEqual(exc.filename, '<string>')
229-
self.assertEqual(exc.lineno, 1)
252+
self.assertEqual(exc.lineno, 2)
230253

231254
def test_eval_bytes_invalid_octal_escape(self):
232255
for i in range(0o400, 0o1000):
@@ -241,7 +264,7 @@ def test_eval_bytes_invalid_octal_escape(self):
241264
r'Such sequences will not work in the future. '
242265
r'Did you mean "\\407"? A raw string is also an option.')
243266
self.assertEqual(w[0].filename, '<string>')
244-
self.assertEqual(w[0].lineno, 1)
267+
self.assertEqual(w[0].lineno, 2)
245268

246269
with warnings.catch_warnings(record=True) as w:
247270
warnings.simplefilter('error', category=SyntaxWarning)
@@ -252,7 +275,7 @@ def test_eval_bytes_invalid_octal_escape(self):
252275
self.assertEqual(exc.msg, r'"\407" is an invalid octal escape sequence. '
253276
r'Did you mean "\\407"? A raw string is also an option.')
254277
self.assertEqual(exc.filename, '<string>')
255-
self.assertEqual(exc.lineno, 1)
278+
self.assertEqual(exc.lineno, 2)
256279

257280
def test_eval_bytes_raw(self):
258281
self.assertEqual(eval(""" br'x' """), b'x')
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by
2+
Pablo Galindo

Parser/pegen_errors.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
352352
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
353353

354354
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
355-
Py_ssize_t size = p->tok->inp - p->tok->buf;
356-
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
355+
Py_ssize_t size = p->tok->inp - p->tok->line_start;
356+
error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace");
357357
}
358358
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
359359
error_line = get_error_line_from_tokenizer_buffers(p, lineno);

Parser/string_parser.c

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//// STRING HANDLING FUNCTIONS ////
1212

1313
static int
14-
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
14+
warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t)
1515
{
1616
if (p->call_invalid_rules) {
1717
// Do not report warnings if we are in the second pass of the parser
@@ -48,8 +48,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
4848
else {
4949
category = PyExc_DeprecationWarning;
5050
}
51+
52+
// Calculate the lineno and the col_offset of the invalid escape sequence
53+
const char *start = buffer;
54+
const char *end = first_invalid_escape;
55+
int lineno = t->lineno;
56+
int col_offset = t->col_offset;
57+
while (start < end) {
58+
if (*start == '\n') {
59+
lineno++;
60+
col_offset = 0;
61+
}
62+
else {
63+
col_offset++;
64+
}
65+
start++;
66+
}
67+
68+
// Count the number of quotes in the token
69+
char first_quote = 0;
70+
if (lineno == t->lineno) {
71+
int quote_count = 0;
72+
char* tok = PyBytes_AsString(t->bytes);
73+
for (int i = 0; i < PyBytes_Size(t->bytes); i++) {
74+
if (tok[i] == '\'' || tok[i] == '\"') {
75+
if (quote_count == 0) {
76+
first_quote = tok[i];
77+
}
78+
if (tok[i] == first_quote) {
79+
quote_count++;
80+
}
81+
} else {
82+
break;
83+
}
84+
}
85+
86+
col_offset += quote_count;
87+
}
88+
5189
if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
52-
t->lineno, NULL, NULL) < 0) {
90+
lineno, NULL, NULL) < 0) {
5391
if (PyErr_ExceptionMatches(category)) {
5492
/* Replace the Syntax/DeprecationWarning exception with a SyntaxError
5593
to get a more accurate error report */
@@ -60,13 +98,13 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
6098
error location, if p->known_err_token is not set. */
6199
p->known_err_token = t;
62100
if (octal) {
63-
RAISE_SYNTAX_ERROR(
101+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
64102
"\"\\%.3s\" is an invalid octal escape sequence. "
65103
"Did you mean \"\\\\%.3s\"? A raw string is also an option.",
66104
first_invalid_escape, first_invalid_escape);
67105
}
68106
else {
69-
RAISE_SYNTAX_ERROR(
107+
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1,
70108
"\"\\%c\" is an invalid escape sequence. "
71109
"Did you mean \"\\\\%c\"? A raw string is also an option.",
72110
c, c);
@@ -163,7 +201,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
163201
// HACK: later we can simply pass the line no, since we don't preserve the tokens
164202
// when we are decoding the string but we preserve the line numbers.
165203
if (v != NULL && first_invalid_escape != NULL && t != NULL) {
166-
if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
204+
if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) {
167205
/* We have not decref u before because first_invalid_escape points
168206
inside u. */
169207
Py_XDECREF(u);
@@ -185,7 +223,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
185223
}
186224

187225
if (first_invalid_escape != NULL) {
188-
if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
226+
if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) {
189227
Py_DECREF(result);
190228
return NULL;
191229
}

0 commit comments

Comments
 (0)