Skip to content

Commit 7e8f341

Browse files
committed
co_lnotab supports negative line number delta
Issue #26107: The format of the co_lnotab attribute of code objects changes to support negative line number delta. Changes: * assemble_lnotab(): if line number delta is less than -128 or greater than 127, emit multiple (offset_delta, lineno_delta) in co_lnotab * update functions decoding co_lnotab to use signed 8-bit integers - dis.findlinestarts() - PyCode_Addr2Line() - _PyCode_CheckLineNumber() - frame_setlineno() * update lnotab_notes.txt * increase importlib MAGIC_NUMBER to 3361 * document the change in What's New in Python 3.6 * cleanup also PyCode_Optimize() to use better variable names
1 parent 24bab92 commit 7e8f341

File tree

11 files changed

+203
-161
lines changed

11 files changed

+203
-161
lines changed

Doc/whatsnew/3.6.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,16 @@ that may require changes to your code.
244244
Changes in the Python API
245245
-------------------------
246246

247+
* The format of the ``co_lnotab`` attribute of code objects changed to support
248+
negative line number delta. By default, Python does not emit bytecode with
249+
negative line number delta. Functions using ``frame.f_lineno``,
250+
``PyFrame_GetLineNumber()`` or ``PyCode_Addr2Line()`` are not affected.
251+
Functions decoding directly ``co_lnotab`` should be updated to use a signed
252+
8-bit integer type for the line number delta, but it's only required to
253+
support applications using negative line number delta. See
254+
``Objects/lnotab_notes.txt`` for the ``co_lnotab`` format and how to decode
255+
it, and see the :pep:`511` for the rationale.
256+
247257
* The functions in the :mod:`compileall` module now return booleans instead
248258
of ``1`` or ``0`` to represent success or failure, respectively. Thanks to
249259
booleans being a subclass of integers, this should only be an issue if you

Include/code.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co,
117117
#endif
118118

119119
PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts,
120-
PyObject *names, PyObject *lineno_obj);
120+
PyObject *names, PyObject *lnotab);
121121

122122
#ifdef __cplusplus
123123
}

Lib/dis.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,8 +397,8 @@ def findlinestarts(code):
397397
Generate pairs (offset, lineno) as described in Python/compile.c.
398398
399399
"""
400-
byte_increments = list(code.co_lnotab[0::2])
401-
line_increments = list(code.co_lnotab[1::2])
400+
byte_increments = code.co_lnotab[0::2]
401+
line_increments = code.co_lnotab[1::2]
402402

403403
lastlineno = None
404404
lineno = code.co_firstlineno
@@ -409,6 +409,9 @@ def findlinestarts(code):
409409
yield (addr, lineno)
410410
lastlineno = lineno
411411
addr += byte_incr
412+
if line_incr >= 0x80:
413+
# line_increments is an array of 8-bit signed integers
414+
line_incr -= 0x100
412415
lineno += line_incr
413416
if lineno != lastlineno:
414417
yield (addr, lineno)

Lib/importlib/_bootstrap_external.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,13 +223,14 @@ def _write_atomic(path, data, mode=0o666):
223223
# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations)
224224
# Python 3.5b2 3340 (fix dictionary display evaluation order #11205)
225225
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
226-
# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483)
226+
# Python 3.6a0 3360 (add FORMAT_VALUE opcode #25483
227+
# Python 3.6a0 3361 (lineno delta of code.co_lnotab becomes signed)
227228
#
228229
# MAGIC must change whenever the bytecode emitted by the compiler may no
229230
# longer be understood by older implementations of the eval loop (usually
230231
# due to the addition of new opcodes).
231232

232-
MAGIC_NUMBER = (3360).to_bytes(2, 'little') + b'\r\n'
233+
MAGIC_NUMBER = (3361).to_bytes(2, 'little') + b'\r\n'
233234
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
234235

235236
_PYCACHE = '__pycache__'

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ Release date: tba
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #26107: The format of the ``co_lnotab`` attribute of code objects
14+
changes to support negative line number delta.
15+
1316
- Issue #26154: Add a new private _PyThreadState_UncheckedGet() function to get
1417
the current Python thread state, but don't issue a fatal error if it is NULL.
1518
This new function must be used instead of accessing directly the

Objects/codeobject.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,8 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq)
557557
addr += *p++;
558558
if (addr > addrq)
559559
break;
560-
line += *p++;
560+
line += (signed char)*p;
561+
p++;
561562
}
562563
return line;
563564
}
@@ -592,17 +593,19 @@ _PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
592593
if (addr + *p > lasti)
593594
break;
594595
addr += *p++;
595-
if (*p)
596+
if ((signed char)*p)
596597
bounds->ap_lower = addr;
597-
line += *p++;
598+
line += (signed char)*p;
599+
p++;
598600
--size;
599601
}
600602

601603
if (size > 0) {
602604
while (--size >= 0) {
603605
addr += *p++;
604-
if (*p++)
606+
if ((signed char)*p)
605607
break;
608+
p++;
606609
}
607610
bounds->ap_upper = addr;
608611
}

Objects/frameobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)
137137
new_lasti = -1;
138138
for (offset = 0; offset < lnotab_len; offset += 2) {
139139
addr += lnotab[offset];
140-
line += lnotab[offset+1];
140+
line += (signed char)lnotab[offset+1];
141141
if (line >= new_lineno) {
142142
new_lasti = addr;
143143
new_lineno = line;

Objects/lnotab_notes.txt

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,42 +12,47 @@ pairs. The details are important and delicate, best illustrated by example:
1212
0 1
1313
6 2
1414
50 7
15-
350 307
16-
361 308
15+
350 207
16+
361 208
1717

1818
Instead of storing these numbers literally, we compress the list by storing only
19-
the increments from one row to the next. Conceptually, the stored list might
19+
the difference from one row to the next. Conceptually, the stored list might
2020
look like:
2121

22-
0, 1, 6, 1, 44, 5, 300, 300, 11, 1
22+
0, 1, 6, 1, 44, 5, 300, 200, 11, 1
2323

24-
The above doesn't really work, but it's a start. Note that an unsigned byte
25-
can't hold negative values, or values larger than 255, and the above example
26-
contains two such values. So we make two tweaks:
24+
The above doesn't really work, but it's a start. An unsigned byte (byte code
25+
offset)) can't hold negative values, or values larger than 255, a signed byte
26+
(line number) can't hold values larger than 127 or less than -128, and the
27+
above example contains two such values. So we make two tweaks:
2728

28-
(a) there's a deep assumption that byte code offsets and their corresponding
29-
line #s both increase monotonically, and
30-
(b) if at least one column jumps by more than 255 from one row to the next,
31-
more than one pair is written to the table. In case #b, there's no way to know
32-
from looking at the table later how many were written. That's the delicate
33-
part. A user of co_lnotab desiring to find the source line number
34-
corresponding to a bytecode address A should do something like this
29+
(a) there's a deep assumption that byte code offsets increase monotonically,
30+
and
31+
(b) if byte code offset jumps by more than 255 from one row to the next, or if
32+
source code line number jumps by more than 127 or less than -128 from one row
33+
to the next, more than one pair is written to the table. In case #b,
34+
there's no way to know from looking at the table later how many were written.
35+
That's the delicate part. A user of co_lnotab desiring to find the source
36+
line number corresponding to a bytecode address A should do something like
37+
this:
3538

3639
lineno = addr = 0
3740
for addr_incr, line_incr in co_lnotab:
3841
addr += addr_incr
3942
if addr > A:
4043
return lineno
44+
if line_incr >= 0x80:
45+
line_incr -= 0x100
4146
lineno += line_incr
4247

4348
(In C, this is implemented by PyCode_Addr2Line().) In order for this to work,
4449
when the addr field increments by more than 255, the line # increment in each
4550
pair generated must be 0 until the remaining addr increment is < 256. So, in
4651
the example above, assemble_lnotab in compile.c should not (as was actually done
47-
until 2.2) expand 300, 300 to
52+
until 2.2) expand 300, 200 to
4853
255, 255, 45, 45,
4954
but to
50-
255, 0, 45, 255, 0, 45.
55+
255, 0, 45, 128, 0, 72.
5156

5257
The above is sufficient to reconstruct line numbers for tracebacks, but not for
5358
line tracing. Tracing is handled by PyCode_CheckLineNumber() in codeobject.c

Python/compile.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4452,7 +4452,6 @@ assemble_lnotab(struct assembler *a, struct instr *i)
44524452
d_lineno = i->i_lineno - a->a_lineno;
44534453

44544454
assert(d_bytecode >= 0);
4455-
assert(d_lineno >= 0);
44564455

44574456
if(d_bytecode == 0 && d_lineno == 0)
44584457
return 1;
@@ -4482,9 +4481,21 @@ assemble_lnotab(struct assembler *a, struct instr *i)
44824481
d_bytecode -= ncodes * 255;
44834482
a->a_lnotab_off += ncodes * 2;
44844483
}
4485-
assert(d_bytecode <= 255);
4486-
if (d_lineno > 255) {
4487-
int j, nbytes, ncodes = d_lineno / 255;
4484+
assert(0 <= d_bytecode && d_bytecode <= 255);
4485+
4486+
if (d_lineno < -128 || 127 < d_lineno) {
4487+
int j, nbytes, ncodes, k;
4488+
if (d_lineno < 0) {
4489+
k = -128;
4490+
/* use division on positive numbers */
4491+
ncodes = (-d_lineno) / 128;
4492+
}
4493+
else {
4494+
k = 127;
4495+
ncodes = d_lineno / 127;
4496+
}
4497+
d_lineno -= ncodes * k;
4498+
assert(ncodes >= 1);
44884499
nbytes = a->a_lnotab_off + 2 * ncodes;
44894500
len = PyBytes_GET_SIZE(a->a_lnotab);
44904501
if (nbytes >= len) {
@@ -4502,15 +4513,15 @@ assemble_lnotab(struct assembler *a, struct instr *i)
45024513
lnotab = (unsigned char *)
45034514
PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off;
45044515
*lnotab++ = d_bytecode;
4505-
*lnotab++ = 255;
4516+
*lnotab++ = k;
45064517
d_bytecode = 0;
45074518
for (j = 1; j < ncodes; j++) {
45084519
*lnotab++ = 0;
4509-
*lnotab++ = 255;
4520+
*lnotab++ = k;
45104521
}
4511-
d_lineno -= ncodes * 255;
45124522
a->a_lnotab_off += ncodes * 2;
45134523
}
4524+
assert(-128 <= d_lineno && d_lineno <= 127);
45144525

45154526
len = PyBytes_GET_SIZE(a->a_lnotab);
45164527
if (a->a_lnotab_off + 2 >= len) {

0 commit comments

Comments
 (0)