import bisect import dataclasses import dis import sys from typing import Any, Set, Union TERMINAL_OPCODES = { dis.opmap["RETURN_VALUE"], dis.opmap["JUMP_FORWARD"], dis.opmap["RAISE_VARARGS"], # TODO(jansel): double check exception handling } if sys.version_info >= (3, 9): TERMINAL_OPCODES.add(dis.opmap["RERAISE"]) if sys.version_info >= (3, 11): TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD"]) TERMINAL_OPCODES.add(dis.opmap["JUMP_FORWARD"]) else: TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"]) JUMP_OPCODES = set(dis.hasjrel + dis.hasjabs) JUMP_OPNAMES = {dis.opname[opcode] for opcode in JUMP_OPCODES} HASLOCAL = set(dis.haslocal) HASFREE = set(dis.hasfree) stack_effect = dis.stack_effect def get_indexof(insts): """ Get a mapping from instruction memory address to index in instruction list. Additionally checks that each instruction only appears once in the list. """ indexof = {} for i, inst in enumerate(insts): assert inst not in indexof indexof[inst] = i return indexof def remove_dead_code(instructions): """Dead code elimination""" indexof = get_indexof(instructions) live_code = set() def find_live_code(start): for i in range(start, len(instructions)): if i in live_code: return live_code.add(i) inst = instructions[i] if inst.exn_tab_entry: find_live_code(indexof[inst.exn_tab_entry.target]) if inst.opcode in JUMP_OPCODES: find_live_code(indexof[inst.target]) if inst.opcode in TERMINAL_OPCODES: return find_live_code(0) # change exception table entries if start/end instructions are dead # assumes that exception table entries have been propagated, # e.g. with bytecode_transformation.propagate_inst_exn_table_entries, # and that instructions with an exn_tab_entry lies within its start/end. if sys.version_info >= (3, 11): live_idx = sorted(live_code) for i, inst in enumerate(instructions): if i in live_code and inst.exn_tab_entry: # find leftmost live instruction >= start start_idx = bisect.bisect_left( live_idx, indexof[inst.exn_tab_entry.start] ) assert start_idx < len(live_idx) # find rightmost live instruction <= end end_idx = ( bisect.bisect_right(live_idx, indexof[inst.exn_tab_entry.end]) - 1 ) assert end_idx >= 0 assert live_idx[start_idx] <= i <= live_idx[end_idx] inst.exn_tab_entry.start = instructions[live_idx[start_idx]] inst.exn_tab_entry.end = instructions[live_idx[end_idx]] return [inst for i, inst in enumerate(instructions) if i in live_code] def remove_pointless_jumps(instructions): """Eliminate jumps to the next instruction""" pointless_jumps = { id(a) for a, b in zip(instructions, instructions[1:]) if a.opname == "JUMP_ABSOLUTE" and a.target is b } return [inst for inst in instructions if id(inst) not in pointless_jumps] def propagate_line_nums(instructions): """Ensure every instruction has line number set in case some are removed""" cur_line_no = None def populate_line_num(inst): nonlocal cur_line_no if inst.starts_line: cur_line_no = inst.starts_line inst.starts_line = cur_line_no for inst in instructions: populate_line_num(inst) def remove_extra_line_nums(instructions): """Remove extra starts line properties before packing bytecode""" cur_line_no = None def remove_line_num(inst): nonlocal cur_line_no if inst.starts_line is None: return elif inst.starts_line == cur_line_no: inst.starts_line = None else: cur_line_no = inst.starts_line for inst in instructions: remove_line_num(inst) @dataclasses.dataclass class ReadsWrites: reads: Set[Any] writes: Set[Any] visited: Set[Any] def livevars_analysis(instructions, instruction): indexof = get_indexof(instructions) must = ReadsWrites(set(), set(), set()) may = ReadsWrites(set(), set(), set()) def walk(state, start): if start in state.visited: return state.visited.add(start) for i in range(start, len(instructions)): inst = instructions[i] if inst.opcode in HASLOCAL or inst.opcode in HASFREE: if "LOAD" in inst.opname or "DELETE" in inst.opname: if inst.argval not in must.writes: state.reads.add(inst.argval) elif "STORE" in inst.opname: state.writes.add(inst.argval) elif inst.opname == "MAKE_CELL": pass else: raise NotImplementedError(f"unhandled {inst.opname}") if inst.exn_tab_entry: walk(may, indexof[inst.exn_tab_entry.target]) if inst.opcode in JUMP_OPCODES: walk(may, indexof[inst.target]) state = may if inst.opcode in TERMINAL_OPCODES: return walk(must, indexof[instruction]) return must.reads | may.reads @dataclasses.dataclass class FixedPointBox: value: bool = True @dataclasses.dataclass class StackSize: low: Union[int, float] high: Union[int, float] fixed_point: FixedPointBox def zero(self): self.low = 0 self.high = 0 self.fixed_point.value = False def offset_of(self, other, n): prior = (self.low, self.high) self.low = min(self.low, other.low + n) self.high = max(self.high, other.high + n) if (self.low, self.high) != prior: self.fixed_point.value = False def exn_tab_jump(self, depth): prior = (self.low, self.high) self.low = min(self.low, depth) self.high = max(self.high, depth) if (self.low, self.high) != prior: self.fixed_point.value = False def stacksize_analysis(instructions) -> Union[int, float]: assert instructions fixed_point = FixedPointBox() stack_sizes = { inst: StackSize(float("inf"), float("-inf"), fixed_point) for inst in instructions } stack_sizes[instructions[0]].zero() for _ in range(100): if fixed_point.value: break fixed_point.value = True for inst, next_inst in zip(instructions, instructions[1:] + [None]): stack_size = stack_sizes[inst] # CALL_FINALLY in Python 3.8 is handled differently when determining stack depth. # See https://github.com/python/cpython/blob/3.8/Python/compile.c#L5450. # Essentially, the stack effect of CALL_FINALLY is computed with jump=True, # but the resulting stack depth is propagated to the next instruction, not the # jump target. is_call_finally = ( sys.version_info < (3, 9) and inst.opcode == dis.opmap["CALL_FINALLY"] ) if inst.opcode not in TERMINAL_OPCODES: assert next_inst is not None, f"missing next inst: {inst}" stack_sizes[next_inst].offset_of( stack_size, stack_effect(inst.opcode, inst.arg, jump=is_call_finally), ) if inst.opcode in JUMP_OPCODES and not is_call_finally: stack_sizes[inst.target].offset_of( stack_size, stack_effect(inst.opcode, inst.arg, jump=True) ) if inst.exn_tab_entry: # see https://github.com/python/cpython/blob/3.11/Objects/exception_handling_notes.txt # on why depth is computed this way. depth = inst.exn_tab_entry.depth + int(inst.exn_tab_entry.lasti) + 1 stack_sizes[inst.exn_tab_entry.target].exn_tab_jump(depth) if False: for inst in instructions: stack_size = stack_sizes[inst] print(stack_size.low, stack_size.high, inst) low = min([x.low for x in stack_sizes.values()]) high = max([x.high for x in stack_sizes.values()]) assert fixed_point.value, "failed to reach fixed point" assert low >= 0 return high