From ae7dbca8b224a73206a330d0e6819d43ec7c9c8d Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Thu, 7 Dec 2023 12:55:28 +0000 Subject: [PATCH 1/9] gh-112962: in dis module, put cache information in the Instruction instead of creating fake Instructions to represent it --- Doc/library/dis.rst | 5 +++- Lib/dis.py | 62 +++++++++++++++++++++++--------------------- Lib/test/test_dis.py | 37 ++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 35 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 0d93bc9f5da774..e43801484170f3 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -328,13 +328,16 @@ operation is being performed, so the intermediate analysis object isn't useful: source line information (if any) is taken directly from the disassembled code object. - The *show_caches* and *adaptive* parameters work as they do in :func:`dis`. + The *adaptive* parameter works as it does in :func:`dis`. .. versionadded:: 3.4 .. versionchanged:: 3.11 Added the *show_caches* and *adaptive* parameters. + .. versionchanged:: 3.13 + The *show_caches* parameter is deprecated and has no effect. + .. function:: findlinestarts(code) diff --git a/Lib/dis.py b/Lib/dis.py index efa935c5a6a0b6..71bf40bd4823e0 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -570,10 +570,10 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): linestarts=linestarts, line_offset=line_offset, co_positions=co.co_positions(), - show_caches=show_caches, original_code=original_code, arg_resolver=arg_resolver) + def _get_const_value(op, arg, co_consts): """Helper to get the value of the const in a hasconst op. @@ -645,8 +645,7 @@ def _is_backward_jump(op): 'ENTER_EXECUTOR') def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, - show_caches=False, original_code=None, labels_map=None, - arg_resolver=None): + original_code=None, labels_map=None, arg_resolver=None): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each @@ -682,32 +681,22 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N else: argval, argrepr = arg, repr(arg) - yield Instruction(_all_opname[op], op, arg, argval, argrepr, - offset, start_offset, starts_line, line_number, - labels_map.get(offset, None), positions) + instr = Instruction(_all_opname[op], op, arg, argval, argrepr, + offset, start_offset, starts_line, line_number, + labels_map.get(offset, None), positions) caches = _get_cache_size(_all_opname[deop]) - if not caches: - continue - if not show_caches: - # We still need to advance the co_positions iterator: - for _ in range(caches): - next(co_positions, ()) - continue - for name, size in _cache_format[opname[deop]].items(): - for i in range(size): - offset += 2 - # Only show the fancy argrepr for a CACHE instruction when it's - # the first entry for a particular cache value: - if i == 0: - data = code[offset: offset + 2 * size] - argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" - else: - argrepr = "" - yield Instruction( - "CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None, - Positions(*next(co_positions, ())) - ) + # Advance the co_positions iterator: + for _ in range(caches): + next(co_positions, ()) + cache_format = [] + if caches: + for name, size in _cache_format[opname[deop]].items(): + data = code[offset + 2: offset + 2 + 2 * size] + cache_format.append((name, size, data)) + instr.cache_format = cache_format + yield instr + def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, show_offsets=False): @@ -787,7 +776,6 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, instrs = _get_instructions_bytes(code, linestarts=linestarts, line_offset=line_offset, co_positions=co_positions, - show_caches=show_caches, original_code=original_code, labels_map=labels_map, arg_resolver=arg_resolver) @@ -805,6 +793,23 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False, is_current_instr = instr.offset <= lasti \ <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) formatter.print_instruction(instr, is_current_instr) + deop = _deoptop(instr.opcode) + if show_caches: + offset = instr.offset + for name, size, data in getattr(instr, 'cache_format', ()): + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + formatter.print_instruction( + Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions), + is_current_instr) + formatter.print_exception_table(exception_entries) def _disassemble_str(source, **kwargs): @@ -952,7 +957,6 @@ def __iter__(self): linestarts=self._linestarts, line_offset=self._line_offset, co_positions=co.co_positions(), - show_caches=self.show_caches, original_code=original_code, labels_map=labels_map, arg_resolver=arg_resolver) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 0ea4dc4566a4a4..82b883a7fd9309 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -13,6 +13,7 @@ import opcode +CACHE = dis.opmap["CACHE"] def get_tb(): def _error(): @@ -1227,9 +1228,9 @@ def f(): else: # "copy" the code to un-quicken it: f.__code__ = f.__code__.replace() - for instruction in dis.get_instructions( + for instruction in _unroll_caches_as_Instructions(dis.get_instructions( f, show_caches=True, adaptive=adaptive - ): + ), show_caches=True): if instruction.opname == "CACHE": yield instruction.argrepr @@ -1262,7 +1263,8 @@ def f(): # However, this might change in the future. So we explicitly try to find # a CACHE entry in the instructions. If we can't do that, fail the test - for inst in dis.get_instructions(f, show_caches=True): + for inst in _unroll_caches_as_Instructions( + dis.get_instructions(f, show_caches=True), show_caches=True): if inst.opname == "CACHE": op_offset = inst.offset - 2 cache_offset = inst.offset @@ -1890,9 +1892,9 @@ def roots(a, b, c): instruction.positions.col_offset, instruction.positions.end_col_offset, ) - for instruction in dis.get_instructions( + for instruction in _unroll_caches_as_Instructions(dis.get_instructions( code, adaptive=adaptive, show_caches=show_caches - ) + ), show_caches=show_caches) ] self.assertEqual(co_positions, dis_positions) @@ -2233,6 +2235,31 @@ def get_disassembly(self, tb): dis.distb(tb, file=output) return output.getvalue() +def _unroll_caches_as_Instructions(instrs, show_caches=False): + # Cache entries are no longer reported by dis as fake instructions, + # but some tests assume that do. We should rewrite the tests to assume + # the new API, but it will be clearer to keep the tests working as + # before and do that in a separate PR. + + for instr in instrs: + yield instr + if not show_caches: + continue + + offset = instr.offset + for name, size, data in instr.cache_format: + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + + yield Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions) + if __name__ == "__main__": unittest.main() From 4f0f36038cc945469336049ef5a64fee5e2983f2 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 12 Dec 2023 16:33:01 +0000 Subject: [PATCH 2/9] add news --- .../Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst diff --git a/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst b/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst new file mode 100644 index 00000000000000..b99e6bc90ae791 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst @@ -0,0 +1,3 @@ +:mod:`dis` module functions add cache information to the +:class:`~dis.Instruction` instance rather than creating fake +:class:`~dis.Instruction` instances to represent the cache entries. From f7812358c79aabbf14fab9bbbfd25feeab884b39 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 12 Dec 2023 17:04:17 +0000 Subject: [PATCH 3/9] rename cache_format to cache_info and make it a field of _Instruction. Document it. --- Doc/library/dis.rst | 12 ++++++++++-- Lib/dis.py | 26 ++++++++++++++++++-------- Lib/test/test_dis.py | 6 +++--- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index e43801484170f3..f9dac72c762c3d 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -485,6 +485,14 @@ details of bytecode instructions as :class:`Instruction` instances: :class:`dis.Positions` object holding the start and end locations that are covered by this instruction. + .. data::cache_info + + Information about the cache entries of this instruction, as + triplets of the form ``(name, size, data)``, where the ``name`` + and ``size`` describe the cache format and data is the contents + of the cache. It is ``None`` if the instruction does not have + caches. + .. versionadded:: 3.4 .. versionchanged:: 3.11 @@ -496,8 +504,8 @@ details of bytecode instructions as :class:`Instruction` instances: Changed field ``starts_line``. Added fields ``start_offset``, ``cache_offset``, ``end_offset``, - ``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, and - ``line_number``. + ``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, + ``line_number`` and ``cache_info``. .. class:: Positions diff --git a/Lib/dis.py b/Lib/dis.py index 71bf40bd4823e0..8867fc0516e9e1 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -267,9 +267,10 @@ def show_code(co, *, file=None): 'starts_line', 'line_number', 'label', - 'positions' + 'positions', + 'cache_info', ], - defaults=[None, None] + defaults=[None, None, None] ) _Instruction.opname.__doc__ = "Human readable name for operation" @@ -286,6 +287,7 @@ def show_code(co, *, file=None): _Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None" _Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None" _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction" +_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction" _ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase", "start end target depth lasti") @@ -334,6 +336,8 @@ class Instruction(_Instruction): label - A label if this instruction is a jump target, otherwise None positions - Optional dis.Positions object holding the span of source code covered by this instruction + cache_info - information about the format and content of the instruction's cache + entries (if any) """ @property @@ -689,13 +693,19 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N # Advance the co_positions iterator: for _ in range(caches): next(co_positions, ()) - cache_format = [] + if caches: + cache_info = [] for name, size in _cache_format[opname[deop]].items(): data = code[offset + 2: offset + 2 + 2 * size] - cache_format.append((name, size, data)) - instr.cache_format = cache_format - yield instr + cache_info.append((name, size, data)) + else: + cache_info = None + + yield Instruction(_all_opname[op], op, arg, argval, argrepr, + offset, start_offset, starts_line, line_number, + labels_map.get(offset, None), positions, cache_info) + def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, @@ -794,9 +804,9 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False, <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) formatter.print_instruction(instr, is_current_instr) deop = _deoptop(instr.opcode) - if show_caches: + if show_caches and instr.cache_info: offset = instr.offset - for name, size, data in getattr(instr, 'cache_format', ()): + for name, size, data in instr.cache_info: for i in range(size): offset += 2 # Only show the fancy argrepr for a CACHE instruction when it's diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 82b883a7fd9309..12e2c57e50b0ba 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1777,8 +1777,8 @@ def simple(): pass class InstructionTestCase(BytecodeTestCase): def assertInstructionsEqual(self, instrs_1, instrs_2, /): - instrs_1 = [instr_1._replace(positions=None) for instr_1 in instrs_1] - instrs_2 = [instr_2._replace(positions=None) for instr_2 in instrs_2] + instrs_1 = [instr_1._replace(positions=None, cache_info=None) for instr_1 in instrs_1] + instrs_2 = [instr_2._replace(positions=None, cache_info=None) for instr_2 in instrs_2] self.assertEqual(instrs_1, instrs_2) class InstructionTests(InstructionTestCase): @@ -2247,7 +2247,7 @@ def _unroll_caches_as_Instructions(instrs, show_caches=False): continue offset = instr.offset - for name, size, data in instr.cache_format: + for name, size, data in (instr.cache_info or ()): for i in range(size): offset += 2 # Only show the fancy argrepr for a CACHE instruction when it's From 744f4038382c063f3ab631a67285f0057147f152 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 12 Dec 2023 19:57:23 +0000 Subject: [PATCH 4/9] fix test_code and test_compile --- Lib/test/support/bytecode_helper.py | 11 +++++++++++ Lib/test/test_code.py | 8 ++++---- Lib/test/test_compile.py | 5 +++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Lib/test/support/bytecode_helper.py b/Lib/test/support/bytecode_helper.py index 388d1266773c8a..ce24969dd6a78e 100644 --- a/Lib/test/support/bytecode_helper.py +++ b/Lib/test/support/bytecode_helper.py @@ -7,6 +7,17 @@ _UNSPECIFIED = object() +def instructions_with_positions(instrs, co_positions): + # Return (instr, positions) pairs from the instrs list and co_positions + # iterator. The latter contains items for cache lines and the former + # doesn't, so those need to be skipped. + + co_positions = co_positions or iter(()) + for instr in instrs: + yield instr, next(co_positions, ()) + for _ in (instr.cache_info or ()): + next(co_positions, ()) + class BytecodeTestCase(unittest.TestCase): """Custom assertion methods for inspecting bytecode.""" diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index a961ddbe17a3d3..d8fb826edeb681 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -144,6 +144,8 @@ gc_collect) from test.support.script_helper import assert_python_ok from test.support import threading_helper +from test.support.bytecode_helper import (BytecodeTestCase, + instructions_with_positions) from opcode import opmap, opname COPY_FREE_VARS = opmap['COPY_FREE_VARS'] @@ -384,10 +386,8 @@ def test_co_positions_artificial_instructions(self): code = traceback.tb_frame.f_code artificial_instructions = [] - for instr, positions in zip( - dis.get_instructions(code, show_caches=True), - code.co_positions(), - strict=True + for instr, positions in instructions_with_positions( + dis.get_instructions(code), code.co_positions() ): # If any of the positions is None, then all have to # be None as well for the case above. There are still diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index df6e5e4b55f728..f681d125db7d7a 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -12,6 +12,7 @@ from test import support from test.support import (script_helper, requires_debug_ranges, requires_specialization, Py_C_RECURSION_LIMIT) +from test.support.bytecode_helper import instructions_with_positions from test.support.os_helper import FakePath class TestSpecifics(unittest.TestCase): @@ -1346,8 +1347,8 @@ def generic_visit(self, node): def assertOpcodeSourcePositionIs(self, code, opcode, line, end_line, column, end_column, occurrence=1): - for instr, position in zip( - dis.Bytecode(code, show_caches=True), code.co_positions(), strict=True + for instr, position in instructions_with_positions( + dis.Bytecode(code), code.co_positions() ): if instr.opname == opcode: occurrence -= 1 From a42ec5b41332dae985b7f11f172ecfe055d7cca9 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Tue, 12 Dec 2023 20:24:17 +0000 Subject: [PATCH 5/9] tweaks --- Doc/library/dis.rst | 2 +- Lib/dis.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index f9dac72c762c3d..1c17f44ebf1ce7 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -490,7 +490,7 @@ details of bytecode instructions as :class:`Instruction` instances: Information about the cache entries of this instruction, as triplets of the form ``(name, size, data)``, where the ``name`` and ``size`` describe the cache format and data is the contents - of the cache. It is ``None`` if the instruction does not have + of the cache. ``cache_info`` is ``None`` if the instruction does not have caches. .. versionadded:: 3.4 diff --git a/Lib/dis.py b/Lib/dis.py index 8867fc0516e9e1..183091cb0d6098 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -577,7 +577,6 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): original_code=original_code, arg_resolver=arg_resolver) - def _get_const_value(op, arg, co_consts): """Helper to get the value of the const in a hasconst op. @@ -818,7 +817,7 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False, formatter.print_instruction( Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None, instr.positions), - is_current_instr) + is_current_instr) formatter.print_exception_table(exception_entries) From 12008a208a926b18badaf992ec6a7c185acfa1d3 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Tue, 12 Dec 2023 20:46:35 +0000 Subject: [PATCH 6/9] fix test util --- Lib/test/support/bytecode_helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/support/bytecode_helper.py b/Lib/test/support/bytecode_helper.py index ce24969dd6a78e..a4845065a5322e 100644 --- a/Lib/test/support/bytecode_helper.py +++ b/Lib/test/support/bytecode_helper.py @@ -15,8 +15,9 @@ def instructions_with_positions(instrs, co_positions): co_positions = co_positions or iter(()) for instr in instrs: yield instr, next(co_positions, ()) - for _ in (instr.cache_info or ()): - next(co_positions, ()) + for _, size, _ in (instr.cache_info or ()): + for i in range(size): + next(co_positions, ()) class BytecodeTestCase(unittest.TestCase): """Custom assertion methods for inspecting bytecode.""" From 730c435997a3b8ad8d8595ee554ef901d30f8bdc Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 13 Dec 2023 11:29:28 +0000 Subject: [PATCH 7/9] clarify change in get_instructions --- Doc/library/dis.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 1c17f44ebf1ce7..5647021d6a9ba6 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -336,7 +336,8 @@ operation is being performed, so the intermediate analysis object isn't useful: Added the *show_caches* and *adaptive* parameters. .. versionchanged:: 3.13 - The *show_caches* parameter is deprecated and has no effect. + The *show_caches* parameter is deprecated and has no effect. The *cache_info* + field of each instruction is populated regardless of its value. .. function:: findlinestarts(code) From 867e3de7002073ed9228b5cb937f2d93de46047a Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 13 Dec 2023 19:56:26 +0000 Subject: [PATCH 8/9] Pass arg_resolver and arg_resolver to most functions. Update tests that access internals. --- Lib/dis.py | 157 ++++++++++++++++++++++++------------------- Lib/test/test_dis.py | 18 +++-- 2 files changed, 102 insertions(+), 73 deletions(-) diff --git a/Lib/dis.py b/Lib/dis.py index 183091cb0d6098..da9a493123e2e4 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -113,7 +113,14 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, elif hasattr(x, 'co_code'): # Code object _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) elif isinstance(x, (bytes, bytearray)): # Raw bytecode - _disassemble_bytes(x, file=file, show_caches=show_caches, show_offsets=show_offsets) + labels_map = _make_labels_map(x) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(labels_map=labels_map) + _disassemble_bytes(x, labels_map=labels_map, arg_resolver=arg_resolver, formatter=formatter) elif isinstance(x, str): # Source code _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) else: @@ -394,23 +401,41 @@ def __str__(self): class Formatter: def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0, - line_offset=0): + line_offset=0, show_caches=False): """Create a Formatter *file* where to write the output *lineno_width* sets the width of the line number field (0 omits it) *offset_width* sets the width of the instruction offset field *label_width* sets the width of the label field + *show_caches* is a boolean indicating whether to display cache lines - *line_offset* the line number (within the code unit) """ self.file = file self.lineno_width = lineno_width self.offset_width = offset_width self.label_width = label_width - + self.show_caches = show_caches def print_instruction(self, instr, mark_as_current=False): + self.print_instruction_line(instr, mark_as_current) + if self.show_caches and instr.cache_info: + offset = instr.offset + for name, size, data in instr.cache_info: + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + self.print_instruction_line( + Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions), + False) + + def print_instruction_line(self, instr, mark_as_current): """Format instruction details for inclusion in disassembly output.""" lineno_width = self.lineno_width offset_width = self.offset_width @@ -474,7 +499,7 @@ def print_exception_table(self, exception_entries): class ArgResolver: - def __init__(self, co_consts, names, varname_from_oparg, labels_map): + def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None): self.co_consts = co_consts self.names = names self.varname_from_oparg = varname_from_oparg @@ -547,8 +572,7 @@ def get_argval_argrepr(self, op, arg, offset): argrepr = _intrinsic_2_descs[arg] return argval, argrepr - -def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): +def get_instructions(x, *, first_line=None, show_caches=None, adaptive=False): """Iterator for the opcodes in methods, functions or code Generates a series of Instruction named tuples giving the details of @@ -568,13 +592,16 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): original_code = co.co_code labels_map = _make_labels_map(original_code) - arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg, - labels_map) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) return _get_instructions_bytes(_get_code_array(co, adaptive), linestarts=linestarts, line_offset=line_offset, co_positions=co.co_positions(), original_code=original_code, + labels_map=labels_map, arg_resolver=arg_resolver) def _get_const_value(op, arg, co_consts): @@ -661,7 +688,7 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N original_code = original_code or code co_positions = co_positions or iter(()) - labels_map = labels_map or _make_labels_map(original_code) + assert labels_map is not None starts_line = False local_line_number = None @@ -684,10 +711,6 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N else: argval, argrepr = arg, repr(arg) - instr = Instruction(_all_opname[op], op, arg, argval, argrepr, - offset, start_offset, starts_line, line_number, - labels_map.get(offset, None), positions) - caches = _get_cache_size(_all_opname[deop]) # Advance the co_positions iterator: for _ in range(caches): @@ -712,12 +735,20 @@ def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, """Disassemble a code object.""" linestarts = dict(findlinestarts(co)) exception_entries = _parse_exception_table(co) - _disassemble_bytes(_get_code_array(co, adaptive), - lasti, co._varname_from_oparg, - co.co_names, co.co_consts, linestarts, file=file, - exception_entries=exception_entries, - co_positions=co.co_positions(), show_caches=show_caches, - original_code=co.co_code, show_offsets=show_offsets) + labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + lineno_width=_get_lineno_width(linestarts), + offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts, + exception_entries=exception_entries, co_positions=co.co_positions(), + original_code=co.co_code, labels_map=labels_map, arg_resolver=arg_resolver, formatter=formatter) def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False): disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) @@ -764,24 +795,15 @@ def _get_lineno_width(linestarts): return lineno_width -def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, - names=None, co_consts=None, linestarts=None, - *, file=None, line_offset=0, exception_entries=(), - co_positions=None, show_caches=False, original_code=None, - show_offsets=False): - - offset_width = len(str(max(len(code) - 2, 9999))) if show_offsets else 0 +def _disassemble_bytes(code, lasti=-1, linestarts=None, + *, line_offset=0, exception_entries=(), + co_positions=None, original_code=None, + labels_map=None, arg_resolver=None, formatter=None): - labels_map = _make_labels_map(original_code or code, exception_entries) - label_width = 4 + len(str(len(labels_map))) + assert labels_map is not None + assert formatter is not None + assert arg_resolver is not None - formatter = Formatter(file=file, - lineno_width=_get_lineno_width(linestarts), - offset_width=offset_width, - label_width=label_width, - line_offset=line_offset) - - arg_resolver = ArgResolver(co_consts, names, varname_from_oparg, labels_map) instrs = _get_instructions_bytes(code, linestarts=linestarts, line_offset=line_offset, co_positions=co_positions, @@ -789,35 +811,15 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, labels_map=labels_map, arg_resolver=arg_resolver) - print_instructions(instrs, exception_entries, formatter, - show_caches=show_caches, lasti=lasti) + print_instructions(instrs, exception_entries, formatter, lasti=lasti) -def print_instructions(instrs, exception_entries, formatter, show_caches=False, lasti=-1): +def print_instructions(instrs, exception_entries, formatter, lasti=-1): for instr in instrs: - if show_caches: - is_current_instr = instr.offset == lasti - else: - # Each CACHE takes 2 bytes - is_current_instr = instr.offset <= lasti \ - <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) + # Each CACHE takes 2 bytes + is_current_instr = instr.offset <= lasti \ + <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) formatter.print_instruction(instr, is_current_instr) - deop = _deoptop(instr.opcode) - if show_caches and instr.cache_info: - offset = instr.offset - for name, size, data in instr.cache_info: - for i in range(size): - offset += 2 - # Only show the fancy argrepr for a CACHE instruction when it's - # the first entry for a particular cache value: - if i == 0: - argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" - else: - argrepr = "" - formatter.print_instruction( - Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, - False, None, None, instr.positions), - is_current_instr) formatter.print_exception_table(exception_entries) @@ -960,8 +962,10 @@ def __iter__(self): co = self.codeobj original_code = co.co_code labels_map = _make_labels_map(original_code, self.exception_entries) - arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg, - labels_map) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) return _get_instructions_bytes(_get_code_array(co, self.adaptive), linestarts=self._linestarts, line_offset=self._line_offset, @@ -995,18 +999,33 @@ def dis(self): else: offset = -1 with io.StringIO() as output: - _disassemble_bytes(_get_code_array(co, self.adaptive), - varname_from_oparg=co._varname_from_oparg, - names=co.co_names, co_consts=co.co_consts, + code = _get_code_array(co, self.adaptive) + offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0 + + + labels_map = _make_labels_map(co.co_code, self.exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=output, + lineno_width=_get_lineno_width(self._linestarts), + offset_width=offset_width, + label_width=label_width, + line_offset=self._line_offset, + show_caches=self.show_caches) + + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(code, linestarts=self._linestarts, line_offset=self._line_offset, - file=output, lasti=offset, exception_entries=self.exception_entries, co_positions=co.co_positions(), - show_caches=self.show_caches, original_code=co.co_code, - show_offsets=self.show_offsets) + labels_map=labels_map, + arg_resolver=arg_resolver, + formatter=formatter) return output.getvalue() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 12e2c57e50b0ba..e78f2460cfee0e 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -2,6 +2,7 @@ import contextlib import dis +import functools import io import re import sys @@ -1982,19 +1983,28 @@ def f(opcode, oparg, offset, *init_args): self.assertEqual(f(opcode.opmap["BINARY_OP"], 3, *args), (3, '<<')) self.assertEqual(f(opcode.opmap["CALL_INTRINSIC_1"], 2, *args), (2, 'INTRINSIC_IMPORT_STAR')) + def get_instructions(self, code): + labels_map = dis._make_labels_map(code) + return dis._get_instructions_bytes(code, labels_map=labels_map) + def test_start_offset(self): # When no extended args are present, # start_offset should be equal to offset + instructions = list(dis.Bytecode(_f)) for instruction in instructions: self.assertEqual(instruction.offset, instruction.start_offset) + def last_item(iterable): + return functools.reduce(lambda a, b : b, iterable) + code = bytes([ opcode.opmap["LOAD_FAST"], 0x00, opcode.opmap["EXTENDED_ARG"], 0x01, opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF, ]) - jump = list(dis._get_instructions_bytes(code))[-1] + labels_map = dis._make_labels_map(code) + jump = last_item(self.get_instructions(code)) self.assertEqual(4, jump.offset) self.assertEqual(2, jump.start_offset) @@ -2006,7 +2016,7 @@ def test_start_offset(self): opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF, opcode.opmap["CACHE"], 0x00, ]) - jump = list(dis._get_instructions_bytes(code))[-1] + jump = last_item(self.get_instructions(code)) self.assertEqual(8, jump.offset) self.assertEqual(2, jump.start_offset) @@ -2021,7 +2031,7 @@ def test_start_offset(self): opcode.opmap["POP_JUMP_IF_TRUE"], 0xFF, opcode.opmap["CACHE"], 0x00, ]) - instructions = list(dis._get_instructions_bytes(code)) + instructions = list(self.get_instructions(code)) # 1st jump self.assertEqual(4, instructions[2].offset) self.assertEqual(2, instructions[2].start_offset) @@ -2042,7 +2052,7 @@ def test_cache_offset_and_end_offset(self): opcode.opmap["CACHE"], 0x00, opcode.opmap["CACHE"], 0x00 ]) - instructions = list(dis._get_instructions_bytes(code)) + instructions = list(self.get_instructions(code)) self.assertEqual(2, instructions[0].cache_offset) self.assertEqual(10, instructions[0].end_offset) self.assertEqual(12, instructions[1].cache_offset) From 8374d416b4b4658fe53dad98b27d6828bab7545a Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Wed, 13 Dec 2023 21:32:53 +0000 Subject: [PATCH 9/9] arg_resolver has labels_map so we don't need to pass both around everywhere --- Lib/dis.py | 26 +++++++++++--------------- Lib/test/test_dis.py | 3 +-- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/Lib/dis.py b/Lib/dis.py index da9a493123e2e4..bb59d69f6f0e49 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -120,7 +120,7 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, label_width=label_width, show_caches=show_caches) arg_resolver = ArgResolver(labels_map=labels_map) - _disassemble_bytes(x, labels_map=labels_map, arg_resolver=arg_resolver, formatter=formatter) + _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter) elif isinstance(x, str): # Source code _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) else: @@ -503,7 +503,10 @@ def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_m self.co_consts = co_consts self.names = names self.varname_from_oparg = varname_from_oparg - self.labels_map = labels_map + self.labels_map = labels_map or {} + + def get_label_for_offset(self, offset): + return self.labels_map.get(offset, None) def get_argval_argrepr(self, op, arg, offset): get_name = None if self.names is None else self.names.__getitem__ @@ -591,17 +594,15 @@ def get_instructions(x, *, first_line=None, show_caches=None, adaptive=False): line_offset = 0 original_code = co.co_code - labels_map = _make_labels_map(original_code) arg_resolver = ArgResolver(co_consts=co.co_consts, names=co.co_names, varname_from_oparg=co._varname_from_oparg, - labels_map=labels_map) + labels_map=_make_labels_map(original_code)) return _get_instructions_bytes(_get_code_array(co, adaptive), linestarts=linestarts, line_offset=line_offset, co_positions=co.co_positions(), original_code=original_code, - labels_map=labels_map, arg_resolver=arg_resolver) def _get_const_value(op, arg, co_consts): @@ -675,7 +676,7 @@ def _is_backward_jump(op): 'ENTER_EXECUTOR') def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, - original_code=None, labels_map=None, arg_resolver=None): + original_code=None, arg_resolver=None): """Iterate over the instructions in a bytecode string. Generates a sequence of Instruction namedtuples giving the details of each @@ -688,8 +689,6 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N original_code = original_code or code co_positions = co_positions or iter(()) - assert labels_map is not None - starts_line = False local_line_number = None line_number = None @@ -724,9 +723,10 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N else: cache_info = None + label = arg_resolver.get_label_for_offset(offset) if arg_resolver else None yield Instruction(_all_opname[op], op, arg, argval, argrepr, offset, start_offset, starts_line, line_number, - labels_map.get(offset, None), positions, cache_info) + label, positions, cache_info) @@ -748,7 +748,7 @@ def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, labels_map=labels_map) _disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts, exception_entries=exception_entries, co_positions=co.co_positions(), - original_code=co.co_code, labels_map=labels_map, arg_resolver=arg_resolver, formatter=formatter) + original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter) def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False): disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) @@ -798,9 +798,8 @@ def _get_lineno_width(linestarts): def _disassemble_bytes(code, lasti=-1, linestarts=None, *, line_offset=0, exception_entries=(), co_positions=None, original_code=None, - labels_map=None, arg_resolver=None, formatter=None): + arg_resolver=None, formatter=None): - assert labels_map is not None assert formatter is not None assert arg_resolver is not None @@ -808,7 +807,6 @@ def _disassemble_bytes(code, lasti=-1, linestarts=None, line_offset=line_offset, co_positions=co_positions, original_code=original_code, - labels_map=labels_map, arg_resolver=arg_resolver) print_instructions(instrs, exception_entries, formatter, lasti=lasti) @@ -971,7 +969,6 @@ def __iter__(self): line_offset=self._line_offset, co_positions=co.co_positions(), original_code=original_code, - labels_map=labels_map, arg_resolver=arg_resolver) def __repr__(self): @@ -1023,7 +1020,6 @@ def dis(self): exception_entries=self.exception_entries, co_positions=co.co_positions(), original_code=co.co_code, - labels_map=labels_map, arg_resolver=arg_resolver, formatter=formatter) return output.getvalue() diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index e78f2460cfee0e..0c7fd60f640854 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1984,8 +1984,7 @@ def f(opcode, oparg, offset, *init_args): self.assertEqual(f(opcode.opmap["CALL_INTRINSIC_1"], 2, *args), (2, 'INTRINSIC_IMPORT_STAR')) def get_instructions(self, code): - labels_map = dis._make_labels_map(code) - return dis._get_instructions_bytes(code, labels_map=labels_map) + return dis._get_instructions_bytes(code) def test_start_offset(self): # When no extended args are present,