009-目标代码生成与链接

最新推荐文章于 2026-04-07 10:05:17 发布

原创最新推荐文章于 2026-04-07 10:05:17 发布 · 1.1k 阅读

24 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

收录于

程序设计语言

009-目标代码生成与链接

9.1 目标代码生成概述

9.1.1 目标代码生成的作用

目标代码生成是编译器的最后阶段，负责将中间代码转换为目标机器可以直接执行的机器代码。这个过程包括：

指令选择：选择合适的机器指令来实现中间代码的操作
寄存器分配：为变量和临时值分配寄存器或内存位置
指令调度：重新排列指令以提高执行效率
代码优化：在目标代码级别进行进一步优化

9.1.2 目标代码的特点

from enum import Enum
from typing import List, Dict, Optional, Union
from dataclasses import dataclass

class TargetArchitecture(Enum):
    """目标架构类型"""
    X86_32 = "x86_32"
    X86_64 = "x86_64"
    ARM_32 = "arm_32"
    ARM_64 = "arm_64"
    MIPS = "mips"
    RISC_V = "risc_v"

class InstructionType(Enum):
    """指令类型"""
    ARITHMETIC = "arithmetic"    # 算术指令
    LOGICAL = "logical"          # 逻辑指令
    MEMORY = "memory"            # 内存访问指令
    CONTROL = "control"          # 控制流指令
    COMPARISON = "comparison"    # 比较指令
    MOVE = "move"                # 数据移动指令

@dataclass
class Register:
    """寄存器"""
    name: str
    size: int  # 位数
    is_general: bool = True
    is_available: bool = True
    
    def __str__(self):
        return self.name

@dataclass
class MemoryOperand:
    """内存操作数"""
    base_register: Optional[Register] = None
    offset: int = 0
    index_register: Optional[Register] = None
    scale: int = 1  # 1, 2, 4, 8
    
    def __str__(self):
        if self.base_register and self.index_register:
            return f"[{self.base_register} + {self.index_register}*{self.scale} + {self.offset}]"
        elif self.base_register:
            return f"[{self.base_register} + {self.offset}]"
        else:
            return f"[{self.offset}]"

@dataclass
class Instruction:
    """机器指令"""
    opcode: str
    operands: List[Union[Register, MemoryOperand, int, str]]
    instruction_type: InstructionType
    size: int = 4  # 指令大小（字节）
    
    def __str__(self):
        operand_strs = [str(op) for op in self.operands]
        return f"{self.opcode} {', '.join(operand_strs)}"

class TargetMachine:
    """目标机器描述"""
    
    def __init__(self, architecture: TargetArchitecture):
        self.architecture = architecture
        self.registers = self._init_registers()
        self.instruction_set = self._init_instruction_set()
        self.calling_convention = self._init_calling_convention()
    
    def _init_registers(self) -> Dict[str, Register]:
        """初始化寄存器集合"""
        if self.architecture == TargetArchitecture.X86_64:
            return {
                # 通用寄存器
                'rax': Register('rax', 64),
                'rbx': Register('rbx', 64),
                'rcx': Register('rcx', 64),
                'rdx': Register('rdx', 64),
                'rsi': Register('rsi', 64),
                'rdi': Register('rdi', 64),
                'rbp': Register('rbp', 64, is_general=False),  # 基址指针
                'rsp': Register('rsp', 64, is_general=False),  # 栈指针
                'r8': Register('r8', 64),
                'r9': Register('r9', 64),
                'r10': Register('r10', 64),
                'r11': Register('r11', 64),
                'r12': Register('r12', 64),
                'r13': Register('r13', 64),
                'r14': Register('r14', 64),
                'r15': Register('r15', 64),
            }
        else:
            # 简化的通用寄存器集合
            return {
                f'r{i}': Register(f'r{i}', 32) for i in range(16)
            }
    
    def _init_instruction_set(self) -> Dict[str, InstructionType]:
        """初始化指令集"""
        return {
            # 算术指令
            'add': InstructionType.ARITHMETIC,
            'sub': InstructionType.ARITHMETIC,
            'mul': InstructionType.ARITHMETIC,
            'div': InstructionType.ARITHMETIC,
            'inc': InstructionType.ARITHMETIC,
            'dec': InstructionType.ARITHMETIC,
            
            # 逻辑指令
            'and': InstructionType.LOGICAL,
            'or': InstructionType.LOGICAL,
            'xor': InstructionType.LOGICAL,
            'not': InstructionType.LOGICAL,
            'shl': InstructionType.LOGICAL,
            'shr': InstructionType.LOGICAL,
            
            # 内存访问指令
            'mov': InstructionType.MOVE,
            'load': InstructionType.MEMORY,
            'store': InstructionType.MEMORY,
            'lea': InstructionType.MEMORY,  # Load Effective Address
            
            # 比较指令
            'cmp': InstructionType.COMPARISON,
            'test': InstructionType.COMPARISON,
            
            # 控制流指令
            'jmp': InstructionType.CONTROL,
            'je': InstructionType.CONTROL,
            'jne': InstructionType.CONTROL,
            'jl': InstructionType.CONTROL,
            'jg': InstructionType.CONTROL,
            'call': InstructionType.CONTROL,
            'ret': InstructionType.CONTROL,
        }
    
    def _init_calling_convention(self) -> Dict[str, List[str]]:
        """初始化调用约定"""
        if self.architecture == TargetArchitecture.X86_64:
            return {
                'parameter_registers': ['rdi', 'rsi', 'rdx', 'rcx', 'r8', 'r9'],
                'return_registers': ['rax', 'rdx'],
                'caller_saved': ['rax', 'rcx', 'rdx', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11'],
                'callee_saved': ['rbx', 'rbp', 'r12', 'r13', 'r14', 'r15'],
            }
        else:
            return {
                'parameter_registers': ['r0', 'r1', 'r2', 'r3'],
                'return_registers': ['r0'],
                'caller_saved': ['r0', 'r1', 'r2', 'r3'],
                'callee_saved': ['r4', 'r5', 'r6', 'r7'],
            }
    
    def get_available_registers(self) -> List[Register]:
        """获取可用的通用寄存器"""
        return [reg for reg in self.registers.values() 
                if reg.is_general and reg.is_available]

# 测试目标机器描述
def test_target_machine():
    """测试目标机器描述"""
    print("\n=== 目标机器描述测试 ===")
    
    machine = TargetMachine(TargetArchitecture.X86_64)
    
    print(f"架构: {machine.architecture.value}")
    print(f"寄存器数量: {len(machine.registers)}")
    print(f"指令类型数量: {len(machine.instruction_set)}")
    
    print("\n可用通用寄存器:")
    for reg in machine.get_available_registers():
        print(f"  {reg.name} ({reg.size}位)")
    
    print("\n调用约定:")
    for key, value in machine.calling_convention.items():
        print(f"  {key}: {value}")
    
    # 创建示例指令
    rax = machine.registers['rax']
    rbx = machine.registers['rbx']
    
    instructions = [
        Instruction('mov', [rax, 42], InstructionType.MOVE),
        Instruction('add', [rax, rbx], InstructionType.ARITHMETIC),
        Instruction('cmp', [rax, 0], InstructionType.COMPARISON),
        Instruction('je', ['label1'], InstructionType.CONTROL),
    ]
    
    print("\n示例指令:")
    for instr in instructions:
        print(f"  {instr}")

if __name__ == "__main__":
    test_target_machine()

9.2 指令选择

9.2.1 指令选择算法

指令选择是将中间代码操作映射到目标机器指令的过程。主要方法包括：

简单映射：一对一的直接映射
模式匹配：使用模式匹配技术选择最优指令序列
动态规划：考虑指令成本的最优选择
图着色：将指令选择建模为图着色问题

from typing import Dict, List, Tuple, Optional
from abc import ABC, abstractmethod

class InstructionSelector(ABC):
    """指令选择器抽象基类"""
    
    def __init__(self, target_machine: TargetMachine):
        self.target_machine = target_machine
        self.instruction_patterns = self._init_patterns()
    
    @abstractmethod
    def _init_patterns(self) -> Dict[str, List[str]]:
        """初始化指令模式"""
        pass
    
    @abstractmethod
    def select_instructions(self, intermediate_code: List[str]) -> List[Instruction]:
        """选择指令"""
        pass

class SimpleInstructionSelector(InstructionSelector):
    """简单指令选择器"""
    
    def _init_patterns(self) -> Dict[str, List[str]]:
        """初始化简单映射模式"""
        return {
            # 三地址码 -> 目标指令模式
            'ADD': ['mov {src1}, {dst}', 'add {dst}, {src2}'],
            'SUB': ['mov {src1}, {dst}', 'sub {dst}, {src2}'],
            'MUL': ['mov {src1}, {dst}', 'imul {dst}, {src2}'],
            'DIV': ['mov {src1}, rax', 'cqo', 'idiv {src2}', 'mov rax, {dst}'],
            'ASSIGN': ['mov {src}, {dst}'],
            'LOAD': ['mov [{src}], {dst}'],
            'STORE': ['mov {src}, [{dst}]'],
            'CMP': ['cmp {src1}, {src2}'],
            'JMP': ['jmp {label}'],
            'JE': ['je {label}'],
            'JNE': ['jne {label}'],
            'JL': ['jl {label}'],
            'JG': ['jg {label}'],
            'CALL': ['call {function}'],
            'RET': ['ret'],
        }
    
    def select_instructions(self, intermediate_code: List[str]) -> List[Instruction]:
        """选择指令"""
        instructions = []
        
        for line in intermediate_code:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            
            # 解析三地址码
            parsed = self._parse_intermediate_instruction(line)
            if parsed:
                selected = self._select_for_operation(parsed)
                instructions.extend(selected)
        
        return instructions
    
    def _parse_intermediate_instruction(self, instruction: str) -> Optional[Dict[str, str]]:
        """解析中间代码指令"""
        # 简化的解析逻辑
        if '=' in instruction:
            # 赋值操作
            parts = instruction.split('=')
            dst = parts[0].strip()
            expr = parts[1].strip()
            
            if '+' in expr:
                operands = expr.split('+')
                return {
                    'op': 'ADD',
                    'dst': dst,
                    'src1': operands[0].strip(),
                    'src2': operands[1].strip()
                }
            elif '-' in expr:
                operands = expr.split('-')
                return {
                    'op': 'SUB',
                    'dst': dst,
                    'src1': operands[0].strip(),
                    'src2': operands[1].strip()
                }
            elif '*' in expr:
                operands = expr.split('*')
                return {
                    'op': 'MUL',
                    'dst': dst,
                    'src1': operands[0].strip(),
                    'src2': operands[1].strip()
                }
            else:
                return {
                    'op': 'ASSIGN',
                    'dst': dst,
                    'src': expr
                }
        
        elif instruction.startswith('if'):
            # 条件跳转
            # 简化处理：if condition goto label
            parts = instruction.split()
            if len(parts) >= 4 and parts[2] == 'goto':
                return {
                    'op': 'JE',  # 简化为相等跳转
                    'label': parts[3]
                }
        
        elif instruction.startswith('goto'):
            # 无条件跳转
            parts = instruction.split()
            if len(parts) >= 2:
                return {
                    'op': 'JMP',
                    'label': parts[1]
                }
        
        elif instruction.startswith('call'):
            # 函数调用
            parts = instruction.split()
            if len(parts) >= 2:
                return {
                    'op': 'CALL',
                    'function': parts[1]
                }
        
        elif instruction.startswith('return'):
            # 返回
            return {'op': 'RET'}
        
        return None
    
    def _select_for_operation(self, parsed: Dict[str, str]) -> List[Instruction]:
        """为操作选择指令"""
        op = parsed['op']
        if op not in self.instruction_patterns:
            return []
        
        patterns = self.instruction_patterns[op]
        instructions = []
        
        for pattern in patterns:
            # 替换模式中的占位符
            instruction_str = pattern
            for key, value in parsed.items():
                if key != 'op':
                    instruction_str = instruction_str.replace(f'{{{key}}}', value)
            
            # 解析指令字符串并创建指令对象
            instr = self._parse_instruction_string(instruction_str)
            if instr:
                instructions.append(instr)
        
        return instructions
    
    def _parse_instruction_string(self, instr_str: str) -> Optional[Instruction]:
        """解析指令字符串"""
        parts = instr_str.split()
        if not parts:
            return None
        
        opcode = parts[0]
        operands = []
        
        if len(parts) > 1:
            operand_str = ' '.join(parts[1:]).replace(',', '')
            operands = [op.strip() for op in operand_str.split() if op.strip()]
        
        # 确定指令类型
        instr_type = self.target_machine.instruction_set.get(
            opcode, InstructionType.ARITHMETIC
        )
        
        return Instruction(opcode, operands, instr_type)

class OptimizedInstructionSelector(InstructionSelector):
    """优化的指令选择器"""
    
    def __init__(self, target_machine: TargetMachine):
        super().__init__(target_machine)
        self.instruction_costs = self._init_instruction_costs()
    
    def _init_patterns(self) -> Dict[str, List[str]]:
        """初始化优化的指令模式"""
        return {
            # 包含多种实现方式的模式
            'ADD': [
                # 方式1：直接加法
                ['mov {src1}, {dst}', 'add {dst}, {src2}'],
                # 方式2：使用LEA指令（如果是简单加法）
                ['lea [{src1} + {src2}], {dst}'],
            ],
            'MUL_CONST': [
                # 乘以常数的优化
                ['shl {src}, {shift_amount}'],  # 乘以2的幂
                ['lea [{src} + {src}*{scale}], {dst}'],  # 使用LEA
                ['imul {src}, {const}, {dst}'],  # 直接乘法
            ],
            'ARRAY_ACCESS': [
                # 数组访问优化
                ['lea [{base} + {index}*{scale} + {offset}], {dst}'],
            ],
        }
    
    def _init_instruction_costs(self) -> Dict[str, int]:
        """初始化指令成本"""
        return {
            'mov': 1,
            'add': 1,
            'sub': 1,
            'lea': 1,
            'shl': 1,
            'shr': 1,
            'imul': 3,
            'idiv': 20,
            'call': 5,
            'jmp': 2,
            'je': 2,
            'jne': 2,
        }
    
    def select_instructions(self, intermediate_code: List[str]) -> List[Instruction]:
        """选择最优指令序列"""
        instructions = []
        
        for line in intermediate_code:
            line = line.strip()
            if not line or line.startswith('#'):
                continue
            
            # 解析并优化选择
            parsed = self._parse_intermediate_instruction(line)
            if parsed:
                optimized = self._select_optimal_instructions(parsed)
                instructions.extend(optimized)
        
        return instructions
    
    def _parse_intermediate_instruction(self, instruction: str) -> Optional[Dict[str, str]]:
        """解析中间代码指令（扩展版本）"""
        # 复用简单选择器的解析逻辑，并添加优化检测
        simple_selector = SimpleInstructionSelector(self.target_machine)
        parsed = simple_selector._parse_intermediate_instruction(instruction)
        
        if parsed and parsed['op'] == 'MUL':
            # 检查是否为常数乘法
            if parsed['src2'].isdigit():
                const_val = int(parsed['src2'])
                if const_val > 0 and (const_val & (const_val - 1)) == 0:
                    # 是2的幂，可以用移位优化
                    parsed['op'] = 'MUL_CONST'
                    parsed['shift_amount'] = str(const_val.bit_length() - 1)
        
        return parsed
    
    def _select_optimal_instructions(self, parsed: Dict[str, str]) -> List[Instruction]:
        """选择最优指令序列"""
        op = parsed['op']
        
        if op == 'MUL_CONST':
            # 常数乘法优化
            shift_amount = parsed.get('shift_amount', '1')
            src = parsed['src1']
            dst = parsed['dst']
            
            return [Instruction('shl', [src, shift_amount], InstructionType.LOGICAL)]
        
        # 使用简单选择器作为后备
        simple_selector = SimpleInstructionSelector(self.target_machine)
        return simple_selector._select_for_operation(parsed)

# 测试指令选择
def test_instruction_selection():
    """测试指令选择"""
    print("\n=== 指令选择测试 ===")
    
    machine = TargetMachine(TargetArchitecture.X86_64)
    
    # 测试简单指令选择器
    simple_selector = SimpleInstructionSelector(machine)
    
    intermediate_code = [
        "t1 = a + b",
        "t2 = t1 * 2",
        "result = t2 - c",
        "if t1 > 0 goto L1",
        "goto L2",
        "L1:",
        "call print",
        "L2:",
        "return result"
    ]
    
    print("中间代码:")
    for line in intermediate_code:
        print(f"  {line}")
    
    print("\n简单指令选择结果:")
    simple_instructions = simple_selector.select_instructions(intermediate_code)
    for instr in simple_instructions:
        print(f"  {instr}")
    
    # 测试优化指令选择器
    optimized_selector = OptimizedInstructionSelector(machine)
    
    print("\n优化指令选择结果:")
    optimized_instructions = optimized_selector.select_instructions(intermediate_code)
    for instr in optimized_instructions:
        print(f"  {instr}")

if __name__ == "__main__":
    test_instruction_selection()

9.3 寄存器分配

9.3.1 寄存器分配问题

寄存器分配是将程序中的变量和临时值分配到有限的寄存器中的过程。主要挑战包括：

寄存器数量有限：现代处理器通常只有16-32个通用寄存器
变量生命周期重叠：多个变量可能同时需要寄存器
调用约定约束：某些寄存器有特殊用途
溢出处理：当寄存器不够时需要将变量存储到内存

from typing import Set, Dict, List, Optional, Tuple
from dataclasses import dataclass, field
from collections import defaultdict
import heapq

@dataclass
class LiveInterval:
    """变量的生命周期区间"""
    variable: str
    start: int
    end: int
    register: Optional[Register] = None
    spilled: bool = False
    spill_location: Optional[int] = None  # 栈偏移
    
    def overlaps(self, other: 'LiveInterval') -> bool:
        """检查是否与另一个区间重叠"""
        return not (self.end < other.start or other.end < self.start)
    
    def __lt__(self, other):
        return self.end < other.end

class InterferenceGraph:
    """冲突图"""
    
    def __init__(self):
        self.nodes: Set[str] = set()
        self.edges: Set[Tuple[str, str]] = set()
        self.adjacency: Dict[str, Set[str]] = defaultdict(set)
    
    def add_node(self, variable: str):
        """添加节点"""
        self.nodes.add(variable)
    
    def add_edge(self, var1: str, var2: str):
        """添加冲突边"""
        if var1 != var2:
            edge = tuple(sorted([var1, var2]))
            self.edges.add(edge)
            self.adjacency[var1].add(var2)
            self.adjacency[var2].add(var1)
    
    def get_neighbors(self, variable: str) -> Set[str]:
        """获取邻居节点"""
        return self.adjacency[variable]
    
    def remove_node(self, variable: str):
        """移除节点"""
        if variable in self.nodes:
            self.nodes.remove(variable)
            # 移除相关边
            neighbors = self.adjacency[variable].copy()
            for neighbor in neighbors:
                self.adjacency[neighbor].discard(variable)
                edge = tuple(sorted([variable, neighbor]))
                self.edges.discard(edge)
            del self.adjacency[variable]
    
    def degree(self, variable: str) -> int:
        """获取节点度数"""
        return len(self.adjacency[variable])

class RegisterAllocator(ABC):
    """寄存器分配器抽象基类"""
    
    def __init__(self, target_machine: TargetMachine):
        self.target_machine = target_machine
        self.available_registers = target_machine.get_available_registers()
        self.num_registers = len(self.available_registers)
    
    @abstractmethod
    def allocate(self, live_intervals: List[LiveInterval]) -> Dict[str, Register]:
        """分配寄存器"""
        pass

class LinearScanAllocator(RegisterAllocator):
    """线性扫描寄存器分配器"""
    
    def allocate(self, live_intervals: List[LiveInterval]) -> Dict[str, Register]:
        """使用线性扫描算法分配寄存器"""
        # 按开始时间排序
        intervals = sorted(live_intervals, key=lambda x: x.start)
        
        # 活跃区间列表（按结束时间排序）
        active = []
        # 可用寄存器池
        free_registers = self.available_registers.copy()
        # 分配结果
        allocation = {}
        # 溢出的变量
        spilled_vars = []
        
        for interval in intervals:
            # 释放已结束的区间占用的寄存器
            while active and active[0].end < interval.start:
                expired = heapq.heappop(active)
                if expired.register:
                    free_registers.append(expired.register)
            
            # 尝试分配寄存器
            if free_registers:
                # 有可用寄存器
                register = free_registers.pop(0)
                interval.register = register
                allocation[interval.variable] = register
                heapq.heappush(active, interval)
            else:
                # 没有可用寄存器，需要溢出
                if active and active[0].end > interval.end:
                    # 溢出结束时间最晚的活跃区间
                    spill_interval = heapq.heappop(active)
                    spill_interval.spilled = True
                    spilled_vars.append(spill_interval.variable)
                    
                    # 将当前区间分配到释放的寄存器
                    interval.register = spill_interval.register
                    allocation[interval.variable] = spill_interval.register
                    heapq.heappush(active, interval)
                    
                    # 从分配结果中移除溢出的变量
                    if spill_interval.variable in allocation:
                        del allocation[spill_interval.variable]
                else:
                    # 溢出当前区间
                    interval.spilled = True
                    spilled_vars.append(interval.variable)
        
        print(f"溢出变量: {spilled_vars}")
        return allocation

class GraphColoringAllocator(RegisterAllocator):
    """图着色寄存器分配器"""
    
    def allocate(self, live_intervals: List[LiveInterval]) -> Dict[str, Register]:
        """使用图着色算法分配寄存器"""
        # 构建冲突图
        interference_graph = self._build_interference_graph(live_intervals)
        
        # 图着色
        coloring = self._color_graph(interference_graph)
        
        # 将颜色映射到寄存器
        allocation = {}
        for variable, color in coloring.items():
            if color < len(self.available_registers):
                allocation[variable] = self.available_registers[color]
        
        return allocation
    
    def _build_interference_graph(self, live_intervals: List[LiveInterval]) -> InterferenceGraph:
        """构建冲突图"""
        graph = InterferenceGraph()
        
        # 添加所有变量作为节点
        for interval in live_intervals:
            graph.add_node(interval.variable)
        
        # 添加冲突边
        for i, interval1 in enumerate(live_intervals):
            for j, interval2 in enumerate(live_intervals[i+1:], i+1):
                if interval1.overlaps(interval2):
                    graph.add_edge(interval1.variable, interval2.variable)
        
        return graph
    
    def _color_graph(self, graph: InterferenceGraph) -> Dict[str, int]:
        """图着色算法"""
        coloring = {}
        stack = []
        
        # 简化阶段：移除度数小于k的节点
        temp_graph = InterferenceGraph()
        temp_graph.nodes = graph.nodes.copy()
        temp_graph.edges = graph.edges.copy()
        temp_graph.adjacency = {k: v.copy() for k, v in graph.adjacency.items()}
        
        while temp_graph.nodes:
            # 寻找度数小于寄存器数量的节点
            low_degree_node = None
            for node in temp_graph.nodes:
                if temp_graph.degree(node) < self.num_registers:
                    low_degree_node = node
                    break
            
            if low_degree_node:
                # 找到低度数节点，压入栈并移除
                stack.append(low_degree_node)
                temp_graph.remove_node(low_degree_node)
            else:
                # 没有低度数节点，选择度数最高的节点溢出
                max_degree_node = max(temp_graph.nodes, 
                                     key=lambda x: temp_graph.degree(x))
                stack.append(max_degree_node)
                temp_graph.remove_node(max_degree_node)
        
        # 着色阶段：从栈中弹出节点并着色
        while stack:
            node = stack.pop()
            
            # 收集邻居的颜色
            neighbor_colors = set()
            for neighbor in graph.get_neighbors(node):
                if neighbor in coloring:
                    neighbor_colors.add(coloring[neighbor])
            
            # 选择第一个可用颜色
            color = 0
            while color in neighbor_colors:
                color += 1
            
            coloring[node] = color
        
        return coloring

class LivenessAnalyzer:
    """活跃性分析器"""
    
    def __init__(self):
        self.variable_uses = defaultdict(list)
        self.variable_defs = defaultdict(list)
    
    def analyze(self, instructions: List[Instruction]) -> List[LiveInterval]:
        """分析变量的生命周期"""
        # 收集变量的定义和使用位置
        for i, instr in enumerate(instructions):
            self._analyze_instruction(instr, i)
        
        # 计算生命周期区间
        intervals = []
        for variable in set(list(self.variable_uses.keys()) + list(self.variable_defs.keys())):
            interval = self._compute_live_interval(variable)
            if interval:
                intervals.append(interval)
        
        return intervals
    
    def _analyze_instruction(self, instr: Instruction, position: int):
        """分析单条指令的变量使用"""
        # 简化的分析：假设第一个操作数是目标，其余是源
        if instr.operands:
            # 第一个操作数通常是目标（定义）
            dst = str(instr.operands[0])
            if self._is_variable(dst):
                self.variable_defs[dst].append(position)
            
            # 其余操作数是源（使用）
            for operand in instr.operands[1:]:
                src = str(operand)
                if self._is_variable(src):
                    self.variable_uses[src].append(position)
    
    def _is_variable(self, operand: str) -> bool:
        """判断操作数是否为变量"""
        # 简化判断：不是数字且不是寄存器名
        return (not operand.isdigit() and 
                not operand.startswith('r') and
                not operand.startswith('[') and
                operand not in ['rax', 'rbx', 'rcx', 'rdx', 'rsi', 'rdi', 'rbp', 'rsp'])
    
    def _compute_live_interval(self, variable: str) -> Optional[LiveInterval]:
        """计算变量的生命周期区间"""
        uses = self.variable_uses.get(variable, [])
        defs = self.variable_defs.get(variable, [])
        
        if not uses and not defs:
            return None
        
        all_positions = uses + defs
        start = min(all_positions)
        end = max(all_positions)
        
        return LiveInterval(variable, start, end)

# 测试寄存器分配
def test_register_allocation():
    """测试寄存器分配"""
    print("\n=== 寄存器分配测试 ===")
    
    machine = TargetMachine(TargetArchitecture.X86_64)
    
    # 创建示例指令序列
    instructions = [
        Instruction('mov', ['a', 'rax'], InstructionType.MOVE),
        Instruction('add', ['b', 'rax'], InstructionType.ARITHMETIC),
        Instruction('mov', ['rax', 'c'], InstructionType.MOVE),
        Instruction('mul', ['c', 'd'], InstructionType.ARITHMETIC),
        Instruction('mov', ['d', 'result'], InstructionType.MOVE),
    ]
    
    print("指令序列:")
    for i, instr in enumerate(instructions):
        print(f"  {i}: {instr}")
    
    # 活跃性分析
    analyzer = LivenessAnalyzer()
    live_intervals = analyzer.analyze(instructions)
    
    print("\n生命周期区间:")
    for interval in live_intervals:
        print(f"  {interval.variable}: [{interval.start}, {interval.end}]")
    
    # 线性扫描分配
    linear_allocator = LinearScanAllocator(machine)
    linear_allocation = linear_allocator.allocate(live_intervals)
    
    print("\n线性扫描分配结果:")
    for variable, register in linear_allocation.items():
        print(f"  {variable} -> {register.name}")
    
    # 图着色分配
    graph_allocator = GraphColoringAllocator(machine)
    graph_allocation = graph_allocator.allocate(live_intervals)
    
    print("\n图着色分配结果:")
    for variable, register in graph_allocation.items():
        print(f"  {variable} -> {register.name}")

if __name__ == "__main__":
    test_register_allocation()

9.4 指令调度

9.4.1 指令调度的目标

指令调度是重新排列指令执行顺序以提高性能的技术。主要目标包括：

减少流水线停顿：避免数据相关导致的流水线气泡
提高指令级并行性：充分利用超标量处理器的多个执行单元
优化缓存访问：改善内存访问的局部性
减少分支预测失败：优化分支指令的布局

from typing import List, Dict, Set, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
import copy

class DependencyType(Enum):
    """依赖类型"""
    RAW = "read_after_write"    # 真依赖
    WAR = "write_after_read"    # 反依赖
    WAW = "write_after_write"   # 输出依赖
    CONTROL = "control"         # 控制依赖

@dataclass
class Dependency:
    """指令依赖关系"""
    from_instr: int  # 源指令索引
    to_instr: int    # 目标指令索引
    dep_type: DependencyType
    latency: int = 1  # 延迟周期数
    
    def __str__(self):
        return f"{self.from_instr} -> {self.to_instr} ({self.dep_type.value}, {self.latency})";

class ExecutionUnit(Enum):
    """执行单元类型"""
    ALU = "alu"              # 算术逻辑单元
    MULTIPLIER = "multiplier" # 乘法器
    DIVIDER = "divider"       # 除法器
    LOAD_STORE = "load_store" # 加载存储单元
    BRANCH = "branch"         # 分支单元

@dataclass
class ProcessorModel:
    """处理器模型"""
    execution_units: Dict[ExecutionUnit, int]  # 每种执行单元的数量
    instruction_latencies: Dict[str, int]      # 指令延迟
    instruction_units: Dict[str, ExecutionUnit] # 指令到执行单元的映射
    
    def __post_init__(self):
        # 默认指令延迟
        if not self.instruction_latencies:
            self.instruction_latencies = {
                'mov': 1, 'add': 1, 'sub': 1, 'and': 1, 'or': 1,
                'mul': 3, 'imul': 3, 'div': 20, 'idiv': 20,
                'load': 3, 'store': 1, 'jmp': 1, 'call': 2
            }
        
        # 默认指令单元映射
        if not self.instruction_units:
            self.instruction_units = {
                'mov': ExecutionUnit.ALU, 'add': ExecutionUnit.ALU,
                'sub': ExecutionUnit.ALU, 'and': ExecutionUnit.ALU,
                'or': ExecutionUnit.ALU, 'mul': ExecutionUnit.MULTIPLIER,
                'imul': ExecutionUnit.MULTIPLIER, 'div': ExecutionUnit.DIVIDER,
                'idiv': ExecutionUnit.DIVIDER, 'load': ExecutionUnit.LOAD_STORE,
                'store': ExecutionUnit.LOAD_STORE, 'jmp': ExecutionUnit.BRANCH,
                'call': ExecutionUnit.BRANCH
            }

class DependencyAnalyzer:
    """依赖分析器"""
    
    def analyze(self, instructions: List[Instruction]) -> List[Dependency]:
        """分析指令间的依赖关系"""
        dependencies = []
        
        for i in range(len(instructions)):
            for j in range(i + 1, len(instructions)):
                deps = self._check_dependency(instructions[i], i, instructions[j], j)
                dependencies.extend(deps)
        
        return dependencies
    
    def _check_dependency(self, instr1: Instruction, idx1: int, 
                         instr2: Instruction, idx2: int) -> List[Dependency]:
        """检查两条指令间的依赖关系"""
        dependencies = []
        
        # 获取指令的读写操作数
        reads1, writes1 = self._get_operands(instr1)
        reads2, writes2 = self._get_operands(instr2)
        
        # RAW依赖：instr1写，instr2读
        for write_op in writes1:
            if write_op in reads2:
                dependencies.append(Dependency(idx1, idx2, DependencyType.RAW))
        
        # WAR依赖：instr1读，instr2写
        for read_op in reads1:
            if read_op in writes2:
                dependencies.append(Dependency(idx1, idx2, DependencyType.WAR))
        
        # WAW依赖：instr1写，instr2写
        for write_op1 in writes1:
            if write_op1 in writes2:
                dependencies.append(Dependency(idx1, idx2, DependencyType.WAW))
        
        # 控制依赖：分支指令
        if instr1.instruction_type == InstructionType.CONTROL:
            dependencies.append(Dependency(idx1, idx2, DependencyType.CONTROL))
        
        return dependencies
    
    def _get_operands(self, instr: Instruction) -> Tuple[Set[str], Set[str]]:
        """获取指令的读写操作数"""
        reads = set()
        writes = set()
        
        if not instr.operands:
            return reads, writes
        
        # 简化的操作数分析
        if instr.instruction_type == InstructionType.MOVE:
            # mov src, dst
            if len(instr.operands) >= 2:
                reads.add(str(instr.operands[0]))
                writes.add(str(instr.operands[1]))
        elif instr.instruction_type == InstructionType.ARITHMETIC:
            # add src, dst (dst = dst + src)
            if len(instr.operands) >= 2:
                reads.add(str(instr.operands[0]))
                reads.add(str(instr.operands[1]))
                writes.add(str(instr.operands[1]))
        elif instr.instruction_type == InstructionType.MEMORY:
            if instr.opcode == 'load':
                # load [src], dst
                if len(instr.operands) >= 2:
                    reads.add(str(instr.operands[0]))
                    writes.add(str(instr.operands[1]))
            elif instr.opcode == 'store':
                # store src, [dst]
                if len(instr.operands) >= 2:
                    reads.add(str(instr.operands[0]))
                    reads.add(str(instr.operands[1]))
        
        return reads, writes

class ListScheduler:
    """列表调度器"""
    
    def __init__(self, processor_model: ProcessorModel):
        self.processor_model = processor_model
    
    def schedule(self, instructions: List[Instruction]) -> List[Instruction]:
        """使用列表调度算法重排指令"""
        # 分析依赖关系
        analyzer = DependencyAnalyzer()
        dependencies = analyzer.analyze(instructions)
        
        # 构建依赖图
        dep_graph = self._build_dependency_graph(instructions, dependencies)
        
        # 调度指令
        scheduled = self._list_schedule(instructions, dep_graph)
        
        return scheduled
    
    def _build_dependency_graph(self, instructions: List[Instruction], 
                               dependencies: List[Dependency]) -> Dict[int, List[int]]:
        """构建依赖图"""
        graph = {i: [] for i in range(len(instructions))}
        
        for dep in dependencies:
            if dep.dep_type in [DependencyType.RAW, DependencyType.CONTROL]:
                # 只考虑真依赖和控制依赖
                graph[dep.from_instr].append(dep.to_instr)
        
        return graph
    
    def _list_schedule(self, instructions: List[Instruction], 
                      dep_graph: Dict[int, List[int]]) -> List[Instruction]:
        """列表调度算法"""
        scheduled = []
        ready_queue = []
        in_degree = [0] * len(instructions)
        
        # 计算入度
        for successors in dep_graph.values():
            for succ in successors:
                in_degree[succ] += 1
        
        # 初始化就绪队列
        for i, degree in enumerate(in_degree):
            if degree == 0:
                ready_queue.append(i)
        
        # 模拟执行单元状态
        unit_busy_until = {unit: 0 for unit in ExecutionUnit}
        current_cycle = 0
        
        while ready_queue or any(busy > current_cycle for busy in unit_busy_until.values()):
            # 选择可以在当前周期执行的指令
            executable = []
            for idx in ready_queue:
                instr = instructions[idx]
                unit = self.processor_model.instruction_units.get(instr.opcode, ExecutionUnit.ALU)
                if unit_busy_until[unit] <= current_cycle:
                    executable.append(idx)
            
            if executable:
                # 选择优先级最高的指令（简化：选择第一个）
                selected = executable[0]
                ready_queue.remove(selected)
                
                # 调度指令
                instr = instructions[selected]
                unit = self.processor_model.instruction_units.get(instr.opcode, ExecutionUnit.ALU)
                latency = self.processor_model.instruction_latencies.get(instr.opcode, 1)
                
                scheduled.append(instr)
                unit_busy_until[unit] = current_cycle + latency
                
                # 更新后继指令的入度
                for succ in dep_graph[selected]:
                    in_degree[succ] -= 1
                    if in_degree[succ] == 0:
                        ready_queue.append(succ)
            
            current_cycle += 1
        
        return scheduled

class BasicBlockScheduler:
    """基本块调度器"""
    
    def __init__(self, processor_model: ProcessorModel):
        self.processor_model = processor_model
        self.list_scheduler = ListScheduler(processor_model)
    
    def schedule_basic_blocks(self, instructions: List[Instruction]) -> List[Instruction]:
        """对基本块进行指令调度"""
        # 识别基本块
        basic_blocks = self._identify_basic_blocks(instructions)
        
        # 对每个基本块进行调度
        scheduled_instructions = []
        for block in basic_blocks:
            block_instructions = [instructions[i] for i in block]
            scheduled_block = self.list_scheduler.schedule(block_instructions)
            scheduled_instructions.extend(scheduled_block)
        
        return scheduled_instructions
    
    def _identify_basic_blocks(self, instructions: List[Instruction]) -> List[List[int]]:
        """识别基本块"""
        blocks = []
        current_block = []
        
        for i, instr in enumerate(instructions):
            current_block.append(i)
            
            # 基本块结束条件：控制流指令或跳转目标
            if (instr.instruction_type == InstructionType.CONTROL or 
                self._is_jump_target(i, instructions)):
                blocks.append(current_block)
                current_block = []
        
        if current_block:
            blocks.append(current_block)
        
        return blocks
    
    def _is_jump_target(self, index: int, instructions: List[Instruction]) -> bool:
        """检查是否为跳转目标"""
        # 简化实现：检查是否有标签
        if index < len(instructions):
            instr = instructions[index]
            # 假设标签以':'结尾
            return any(':' in str(op) for op in instr.operands)
        return False

# 测试指令调度
def test_instruction_scheduling():
    """测试指令调度"""
    print("\n=== 指令调度测试 ===")
    
    # 创建处理器模型
    processor = ProcessorModel(
        execution_units={
            ExecutionUnit.ALU: 2,
            ExecutionUnit.MULTIPLIER: 1,
            ExecutionUnit.LOAD_STORE: 1,
            ExecutionUnit.BRANCH: 1
        },
        instruction_latencies={},
        instruction_units={}
    )
    
    # 创建示例指令序列（有依赖关系）
    instructions = [
        Instruction('load', ['[a]', 'r1'], InstructionType.MEMORY),
        Instruction('load', ['[b]', 'r2'], InstructionType.MEMORY),
        Instruction('add', ['r1', 'r2'], InstructionType.ARITHMETIC),
        Instruction('mul', ['r2', 'r3'], InstructionType.ARITHMETIC),
        Instruction('store', ['r3', '[c]'], InstructionType.MEMORY),
    ]
    
    print("原始指令序列:")
    for i, instr in enumerate(instructions):
        print(f"  {i}: {instr}")
    
    # 依赖分析
    analyzer = DependencyAnalyzer()
    dependencies = analyzer.analyze(instructions)
    
    print("\n依赖关系:")
    for dep in dependencies:
        print(f"  {dep}")
    
    # 列表调度
    scheduler = ListScheduler(processor)
    scheduled = scheduler.schedule(instructions)
    
    print("\n调度后的指令序列:")
    for i, instr in enumerate(scheduled):
        print(f"  {i}: {instr}")

if __name__ == "__main__":
    test_instruction_scheduling()

9.5 代码优化

9.5.1 目标代码级优化

在目标代码生成阶段，可以进行多种优化来提高代码质量：

窥孔优化：在小的指令窗口内进行局部优化
指令合并：将多条简单指令合并为一条复杂指令
地址模式优化：利用复杂寻址模式减少指令数量
分支优化：优化分支指令的布局和预测

from typing import List, Dict, Optional, Tuple, Pattern
import re
from abc import ABC, abstractmethod

class OptimizationRule(ABC):
    """优化规则抽象基类"""
    
    @abstractmethod
    def match(self, instructions: List[Instruction], start: int) -> Optional[int]:
        """匹配优化模式，返回匹配长度"""
        pass
    
    @abstractmethod
    def transform(self, instructions: List[Instruction], start: int, length: int) -> List[Instruction]:
        """应用优化变换"""
        pass
    
    @property
    @abstractmethod
    def name(self) -> str:
        """优化规则名称"""
        pass

class RedundantMoveElimination(OptimizationRule):
    """冗余移动消除"""
    
    @property
    def name(self) -> str:
        return "Redundant Move Elimination"
    
    def match(self, instructions: List[Instruction], start: int) -> Optional[int]:
        """匹配 mov r1, r2; mov r2, r1 模式"""
        if start + 1 >= len(instructions):
            return None
        
        instr1 = instructions[start]
        instr2 = instructions[start + 1]
        
        if (instr1.opcode == 'mov' and instr2.opcode == 'mov' and
            len(instr1.operands) >= 2 and len(instr2.operands) >= 2):
            
            # 检查是否为相互移动
            if (str(instr1.operands[0]) == str(instr2.operands[1]) and
                str(instr1.operands[1]) == str(instr2.operands[0])):
                return 2
        
        return None
    
    def transform(self, instructions: List[Instruction], start: int, length: int) -> List[Instruction]:
        """消除冗余移动"""
        # 保留第一条移动指令，删除第二条
        return [instructions[start]]

class ConstantFolding(OptimizationRule):
    """常量折叠"""
    
    @property
    def name(self) -> str:
        return "Constant Folding"
    
    def match(self, instructions: List[Instruction], start: int) -> Optional[int]:
        """匹配常量运算模式"""
        if start >= len(instructions):
            return None
        
        instr = instructions[start]
        
        if (instr.opcode in ['add', 'sub', 'mul'] and
            len(instr.operands) >= 2):
            
            # 检查操作数是否都是常量
            try:
                int(str(instr.operands[0]))
                int(str(instr.operands[1]))
                return 1
            except ValueError:
                pass
        
        return None
    
    def transform(self, instructions: List[Instruction], start: int, length: int) -> List[Instruction]:
        """执行常量折叠"""
        instr = instructions[start]
        
        try:
            val1 = int(str(instr.operands[0]))
            val2 = int(str(instr.operands[1]))
            
            if instr.opcode == 'add':
                result = val1 + val2
            elif instr.opcode == 'sub':
                result = val1 - val2
            elif instr.opcode == 'mul':
                result = val1 * val2
            else:
                return [instr]
            
            # 创建新的移动指令
            return [Instruction('mov', [str(result), instr.operands[1]], InstructionType.MOVE)]
        
        except (ValueError, IndexError):
            return [instr]

class StrengthReduction(OptimizationRule):
    """强度削弱"""
    
    @property
    def name(self) -> str:
        return "Strength Reduction"
    
    def match(self, instructions: List[Instruction], start: int) -> Optional[int]:
        """匹配可以强度削弱的模式"""
        if start >= len(instructions):
            return None
        
        instr = instructions[start]
        
        if (instr.opcode == 'mul' and len(instr.operands) >= 2):
            # 检查是否乘以2的幂
            try:
                multiplier = int(str(instr.operands[1]))
                if multiplier > 0 and (multiplier & (multiplier - 1)) == 0:
                    return 1
            except ValueError:
                pass
        
        return None
    
    def transform(self, instructions: List[Instruction], start: int, length: int) -> List[Instruction]:
        """应用强度削弱"""
        instr = instructions[start]
        
        try:
            multiplier = int(str(instr.operands[1]))
            shift_amount = multiplier.bit_length() - 1
            
            # 用左移替换乘法
            return [Instruction('shl', [instr.operands[0], str(shift_amount)], 
                              InstructionType.LOGICAL)]
        
        except (ValueError, IndexError):
            return [instr]

class AddressingModeOptimization(OptimizationRule):
    """寻址模式优化"""
    
    @property
    def name(self) -> str:
        return "Addressing Mode Optimization"
    
    def match(self, instructions: List[Instruction], start: int) -> Optional[int]:
        """匹配可以优化寻址模式的模式"""
        if start + 1 >= len(instructions):
            return None
        
        instr1 = instructions[start]
        instr2 = instructions[start + 1]
        
        # 匹配 add reg, const; mov [reg], dst 模式
        if (instr1.opcode == 'add' and instr2.opcode == 'mov' and
            len(instr1.operands) >= 2 and len(instr2.operands) >= 2):
            
            try:
                # 检查第一条指令是否为寄存器加常量
                reg = str(instr1.operands[0])
                const = int(str(instr1.operands[1]))
                
                # 检查第二条指令是否使用该寄存器作为地址
                addr = str(instr2.operands[0])
                if f'[{reg}]' == addr:
                    return 2
            except ValueError:
                pass
        
        return None
    
    def transform(self, instructions: List[Instruction], start: int, length: int) -> List[Instruction]:
        """优化寻址模式"""
        instr1 = instructions[start]
        instr2 = instructions[start + 1]
        
        try:
            reg = str(instr1.operands[0])
            const = int(str(instr1.operands[1]))
            dst = instr2.operands[1]
            
            # 创建带偏移的寻址模式
            new_addr = f'[{reg} + {const}]'
            return [Instruction('mov', [new_addr, dst], InstructionType.MEMORY)]
        
        except (ValueError, IndexError):
            return instructions[start:start + length]

class PeepholeOptimizer:
    """窥孔优化器"""
    
    def __init__(self):
        self.rules = [
            RedundantMoveElimination(),
            ConstantFolding(),
            StrengthReduction(),
            AddressingModeOptimization(),
        ]
        self.optimization_stats = {rule.name: 0 for rule in self.rules}
    
    def optimize(self, instructions: List[Instruction]) -> List[Instruction]:
        """应用窥孔优化"""
        optimized = instructions.copy()
        changed = True
        iteration = 0
        
        while changed and iteration < 10:  # 限制迭代次数
            changed = False
            iteration += 1
            new_instructions = []
            i = 0
            
            while i < len(optimized):
                applied = False
                
                # 尝试应用每个优化规则
                for rule in self.rules:
                    match_length = rule.match(optimized, i)
                    if match_length:
                        # 应用优化
                        transformed = rule.transform(optimized, i, match_length)
                        new_instructions.extend(transformed)
                        i += match_length
                        applied = True
                        changed = True
                        self.optimization_stats[rule.name] += 1
                        break
                
                if not applied:
                    new_instructions.append(optimized[i])
                    i += 1
            
            optimized = new_instructions
        
        return optimized
    
    def get_optimization_stats(self) -> Dict[str, int]:
        """获取优化统计信息"""
        return self.optimization_stats.copy()

class BranchOptimizer:
    """分支优化器"""
    
    def optimize_branches(self, instructions: List[Instruction]) -> List[Instruction]:
        """优化分支指令"""
        optimized = []
        i = 0
        
        while i < len(instructions):
            instr = instructions[i]
            
            if instr.instruction_type == InstructionType.CONTROL:
                # 分支优化
                optimized_branch = self._optimize_single_branch(instructions, i)
                optimized.extend(optimized_branch)
                i += len(optimized_branch)
            else:
                optimized.append(instr)
                i += 1
        
        return optimized
    
    def _optimize_single_branch(self, instructions: List[Instruction], 
                               start: int) -> List[Instruction]:
        """优化单个分支指令"""
        instr = instructions[start]
        
        # 简化的分支优化：消除不必要的跳转
        if instr.opcode == 'jmp' and len(instr.operands) > 0:
            target = str(instr.operands[0])
            
            # 检查是否跳转到下一条指令
            if start + 1 < len(instructions):
                next_instr = instructions[start + 1]
                if any(target in str(op) for op in next_instr.operands):
                    # 跳转到下一条指令，可以消除
                    return []
        
        return [instr]

# 测试代码优化
def test_code_optimization():
    """测试代码优化"""
    print("\n=== 代码优化测试 ===")
    
    # 创建包含可优化模式的指令序列
    instructions = [
        Instruction('mov', ['5', 'r1'], InstructionType.MOVE),
        Instruction('mov', ['3', 'r2'], InstructionType.MOVE),
        Instruction('add', ['5', '3'], InstructionType.ARITHMETIC),  # 常量折叠
        Instruction('mul', ['r1', '8'], InstructionType.ARITHMETIC),  # 强度削弱
        Instruction('mov', ['r1', 'r2'], InstructionType.MOVE),
        Instruction('mov', ['r2', 'r1'], InstructionType.MOVE),  # 冗余移动
        Instruction('add', ['r3', '4'], InstructionType.ARITHMETIC),
        Instruction('mov', ['[r3]', 'r4'], InstructionType.MEMORY),  # 寻址模式优化
    ]
    
    print("原始指令序列:")
    for i, instr in enumerate(instructions):
        print(f"  {i}: {instr}")
    
    # 窥孔优化
    peephole_optimizer = PeepholeOptimizer()
    optimized = peephole_optimizer.optimize(instructions)
    
    print("\n窥孔优化后:")
    for i, instr in enumerate(optimized):
        print(f"  {i}: {instr}")
    
    print("\n优化统计:")
    stats = peephole_optimizer.get_optimization_stats()
    for rule_name, count in stats.items():
        if count > 0:
            print(f"  {rule_name}: {count} 次")
    
    # 分支优化
    branch_instructions = [
        Instruction('cmp', ['r1', '0'], InstructionType.COMPARISON),
        Instruction('je', ['label1'], InstructionType.CONTROL),
        Instruction('jmp', ['label2'], InstructionType.CONTROL),
        Instruction('mov', ['1', 'r2'], InstructionType.MOVE),  # label1
    ]
    
    print("\n分支指令序列:")
    for i, instr in enumerate(branch_instructions):
        print(f"  {i}: {instr}")
    
    branch_optimizer = BranchOptimizer()
    optimized_branches = branch_optimizer.optimize_branches(branch_instructions)
    
    print("\n分支优化后:")
    for i, instr in enumerate(optimized_branches):
        print(f"  {i}: {instr}")

if __name__ == "__main__":
    test_code_optimization()

9.6 链接过程

9.6.1 链接的基本概念

链接是将多个目标文件和库文件组合成一个可执行文件的过程。链接器的主要任务包括：

符号解析：解析外部符号引用
重定位：调整代码和数据的地址
库链接：链接静态库和动态库
段合并：合并相同类型的段

from typing import Dict, List, Set, Optional, Tuple
from dataclasses import dataclass, field
from enum import Enum
import copy

class SymbolType(Enum):
    """符号类型"""
    FUNCTION = "function"
    VARIABLE = "variable"
    LABEL = "label"
    SECTION = "section"

class SymbolBinding(Enum):
    """符号绑定类型"""
    LOCAL = "local"      # 局部符号
    GLOBAL = "global"    # 全局符号
    WEAK = "weak"        # 弱符号
    EXTERN = "extern"    # 外部符号

class SectionType(Enum):
    """段类型"""
    TEXT = "text"        # 代码段
    DATA = "data"        # 数据段
    BSS = "bss"          # 未初始化数据段
    RODATA = "rodata"    # 只读数据段
    DEBUG = "debug"      # 调试信息段

@dataclass
class Symbol:
    """符号表项"""
    name: str
    symbol_type: SymbolType
    binding: SymbolBinding
    section: str
    offset: int
    size: int = 0
    value: Optional[int] = None
    
    def __str__(self):
        return f"{self.name}@{self.section}+{self.offset} ({self.binding.value} {self.symbol_type.value})"

@dataclass
class Relocation:
    """重定位项"""
    offset: int          # 重定位位置
    symbol: str          # 符号名称
    reloc_type: str      # 重定位类型
    addend: int = 0      # 附加值
    
    def __str__(self):
        return f"@{self.offset}: {self.symbol} ({self.reloc_type}) + {self.addend}"

@dataclass
class Section:
    """段"""
    name: str
    section_type: SectionType
    data: bytearray = field(default_factory=bytearray)
    address: int = 0
    size: int = 0
    alignment: int = 1
    flags: Set[str] = field(default_factory=set)
    relocations: List[Relocation] = field(default_factory=list)
    
    def __post_init__(self):
        if self.size == 0:
            self.size = len(self.data)
    
    def __str__(self):
        return f"{self.name} ({self.section_type.value}): {self.size} bytes @ 0x{self.address:08x}"

@dataclass
class ObjectFile:
    """目标文件"""
    filename: str
    sections: Dict[str, Section] = field(default_factory=dict)
    symbols: Dict[str, Symbol] = field(default_factory=dict)
    relocations: List[Relocation] = field(default_factory=list)
    dependencies: Set[str] = field(default_factory=set)
    
    def add_section(self, section: Section):
        """添加段"""
        self.sections[section.name] = section
    
    def add_symbol(self, symbol: Symbol):
        """添加符号"""
        self.symbols[symbol.name] = symbol
    
    def get_global_symbols(self) -> Dict[str, Symbol]:
        """获取全局符号"""
        return {name: sym for name, sym in self.symbols.items() 
                if sym.binding == SymbolBinding.GLOBAL}
    
    def get_undefined_symbols(self) -> Set[str]:
        """获取未定义符号"""
        undefined = set()
        for reloc in self.relocations:
            if reloc.symbol not in self.symbols:
                undefined.add(reloc.symbol)
        return undefined
    
    def __str__(self):
        return f"ObjectFile({self.filename}): {len(self.sections)} sections, {len(self.symbols)} symbols"

class SymbolTable:
    """全局符号表"""
    
    def __init__(self):
        self.symbols: Dict[str, Symbol] = {}
        self.conflicts: List[Tuple[str, Symbol, Symbol]] = []
    
    def add_symbol(self, symbol: Symbol, source_file: str) -> bool:
        """添加符号到全局符号表"""
        if symbol.name in self.symbols:
            existing = self.symbols[symbol.name]
            
            # 处理符号冲突
            if not self._resolve_symbol_conflict(existing, symbol):
                self.conflicts.append((symbol.name, existing, symbol))
                return False
        
        self.symbols[symbol.name] = symbol
        return True
    
    def _resolve_symbol_conflict(self, existing: Symbol, new: Symbol) -> bool:
        """解析符号冲突"""
        # 强符号优于弱符号
        if existing.binding == SymbolBinding.WEAK and new.binding == SymbolBinding.GLOBAL:
            return True
        elif existing.binding == SymbolBinding.GLOBAL and new.binding == SymbolBinding.WEAK:
            return False
        
        # 相同绑定类型的冲突
        if existing.binding == new.binding == SymbolBinding.GLOBAL:
            return False  # 多重定义错误
        
        return True
    
    def resolve_symbol(self, name: str) -> Optional[Symbol]:
        """解析符号"""
        return self.symbols.get(name)
    
    def get_undefined_symbols(self, object_files: List[ObjectFile]) -> Set[str]:
        """获取所有未定义符号"""
        undefined = set()
        for obj_file in object_files:
            undefined.update(obj_file.get_undefined_symbols())
        
        # 移除已定义的符号
        undefined -= set(self.symbols.keys())
        return undefined

class AddressAllocator:
    """地址分配器"""
    
    def __init__(self, base_address: int = 0x400000):
        self.base_address = base_address
        self.current_address = base_address
        self.section_addresses: Dict[str, int] = {}
    
    def allocate_section(self, section: Section) -> int:
        """为段分配地址"""
        # 对齐地址
        aligned_address = self._align_address(self.current_address, section.alignment)
        
        section.address = aligned_address
        self.section_addresses[section.name] = aligned_address
        
        # 更新当前地址
        self.current_address = aligned_address + section.size
        
        return aligned_address
    
    def _align_address(self, address: int, alignment: int) -> int:
        """地址对齐"""
        if alignment <= 1:
            return address
        return (address + alignment - 1) // alignment * alignment
    
    def get_section_address(self, section_name: str) -> Optional[int]:
        """获取段地址"""
        return self.section_addresses.get(section_name)

class Relocator:
    """重定位器"""
    
    def __init__(self, symbol_table: SymbolTable, address_allocator: AddressAllocator):
        self.symbol_table = symbol_table
        self.address_allocator = address_allocator
    
    def relocate_object_file(self, obj_file: ObjectFile) -> bool:
        """重定位目标文件"""
        success = True
        
        for reloc in obj_file.relocations:
            if not self._apply_relocation(obj_file, reloc):
                success = False
        
        return success
    
    def _apply_relocation(self, obj_file: ObjectFile, reloc: Relocation) -> bool:
        """应用重定位"""
        # 解析符号
        symbol = self.symbol_table.resolve_symbol(reloc.symbol)
        if not symbol:
            print(f"错误：未定义符号 '{reloc.symbol}'")
            return False
        
        # 计算目标地址
        target_address = self._calculate_target_address(symbol)
        if target_address is None:
            return False
        
        # 应用重定位
        return self._patch_instruction(obj_file, reloc, target_address)
    
    def _calculate_target_address(self, symbol: Symbol) -> Optional[int]:
        """计算目标地址"""
        section_addr = self.address_allocator.get_section_address(symbol.section)
        if section_addr is None:
            return None
        
        return section_addr + symbol.offset
    
    def _patch_instruction(self, obj_file: ObjectFile, reloc: Relocation, 
                          target_address: int) -> bool:
        """修补指令"""
        # 简化实现：假设所有重定位都是32位绝对地址
        try:
            # 找到包含重定位位置的段
            section = self._find_section_for_offset(obj_file, reloc.offset)
            if not section:
                return False
            
            # 计算段内偏移
            section_offset = reloc.offset - section.address
            
            # 修补地址（小端序）
            final_address = target_address + reloc.addend
            section.data[section_offset:section_offset+4] = final_address.to_bytes(4, 'little')
            
            return True
        except (IndexError, ValueError):
            return False
    
    def _find_section_for_offset(self, obj_file: ObjectFile, offset: int) -> Optional[Section]:
        """查找包含指定偏移的段"""
        for section in obj_file.sections.values():
            if section.address <= offset < section.address + section.size:
                return section
        return None

class Linker:
    """链接器"""
    
    def __init__(self, base_address: int = 0x400000):
        self.symbol_table = SymbolTable()
        self.address_allocator = AddressAllocator(base_address)
        self.relocator = Relocator(self.symbol_table, self.address_allocator)
        self.linked_sections: Dict[str, Section] = {}
    
    def link(self, object_files: List[ObjectFile]) -> Optional['ExecutableFile']:
        """链接目标文件"""
        print(f"开始链接 {len(object_files)} 个目标文件...")
        
        # 第一遍：收集符号
        if not self._collect_symbols(object_files):
            return None
        
        # 检查未定义符号
        undefined = self.symbol_table.get_undefined_symbols(object_files)
        if undefined:
            print(f"错误：未定义符号 {undefined}")
            return None
        
        # 第二遍：分配地址
        self._allocate_addresses(object_files)
        
        # 第三遍：重定位
        if not self._perform_relocations(object_files):
            return None
        
        # 合并段
        self._merge_sections(object_files)
        
        # 创建可执行文件
        executable = self._create_executable()
        
        print("链接完成")
        return executable
    
    def _collect_symbols(self, object_files: List[ObjectFile]) -> bool:
        """收集符号"""
        print("收集符号...")
        
        for obj_file in object_files:
            for symbol in obj_file.get_global_symbols().values():
                if not self.symbol_table.add_symbol(symbol, obj_file.filename):
                    print(f"符号冲突：{symbol.name}")
                    return False
        
        print(f"收集到 {len(self.symbol_table.symbols)} 个全局符号")
        return True
    
    def _allocate_addresses(self, object_files: List[ObjectFile]):
        """分配地址"""
        print("分配地址...")
        
        # 按段类型排序：代码段、只读数据段、数据段、BSS段
        section_order = [SectionType.TEXT, SectionType.RODATA, SectionType.DATA, SectionType.BSS]
        
        for section_type in section_order:
            for obj_file in object_files:
                for section in obj_file.sections.values():
                    if section.section_type == section_type:
                        address = self.address_allocator.allocate_section(section)
                        print(f"  {section.name}: 0x{address:08x} - 0x{address + section.size:08x}")
    
    def _perform_relocations(self, object_files: List[ObjectFile]) -> bool:
        """执行重定位"""
        print("执行重定位...")
        
        for obj_file in object_files:
            if not self.relocator.relocate_object_file(obj_file):
                return False
        
        return True
    
    def _merge_sections(self, object_files: List[ObjectFile]):
        """合并段"""
        print("合并段...")
        
        section_groups: Dict[str, List[Section]] = {}
        
        # 按名称分组段
        for obj_file in object_files:
            for section in obj_file.sections.values():
                if section.name not in section_groups:
                    section_groups[section.name] = []
                section_groups[section.name].append(section)
        
        # 合并每组段
        for section_name, sections in section_groups.items():
            merged = self._merge_section_group(section_name, sections)
            self.linked_sections[section_name] = merged
    
    def _merge_section_group(self, name: str, sections: List[Section]) -> Section:
        """合并一组段"""
        if not sections:
            return Section(name, SectionType.TEXT)
        
        # 使用第一个段作为基础
        merged = Section(
            name=name,
            section_type=sections[0].section_type,
            address=min(s.address for s in sections),
            alignment=max(s.alignment for s in sections)
        )
        
        # 合并数据
        total_size = 0
        for section in sorted(sections, key=lambda s: s.address):
            # 计算填充
            padding = section.address - (merged.address + total_size)
            if padding > 0:
                merged.data.extend(b'\x00' * padding)
                total_size += padding
            
            # 添加段数据
            merged.data.extend(section.data)
            total_size += len(section.data)
        
        merged.size = total_size
        return merged
    
    def _create_executable(self) -> 'ExecutableFile':
        """创建可执行文件"""
        entry_point = self.symbol_table.resolve_symbol('_start')
        entry_address = 0
        
        if entry_point:
            section_addr = self.address_allocator.get_section_address(entry_point.section)
            if section_addr:
                entry_address = section_addr + entry_point.offset
        
        return ExecutableFile(
            sections=self.linked_sections,
            entry_point=entry_address,
            symbol_table=self.symbol_table.symbols
        )

@dataclass
class ExecutableFile:
    """可执行文件"""
    sections: Dict[str, Section]
    entry_point: int
    symbol_table: Dict[str, Symbol]
    
    def save_to_file(self, filename: str):
        """保存到文件（简化实现）"""
        print(f"保存可执行文件到 {filename}")
        
        with open(filename, 'wb') as f:
            # 写入简化的文件头
            f.write(b'EXEC')  # 魔数
            f.write(self.entry_point.to_bytes(4, 'little'))  # 入口点
            f.write(len(self.sections).to_bytes(4, 'little'))  # 段数量
            
            # 写入段信息
            for section in self.sections.values():
                name_bytes = section.name.encode('utf-8')[:16].ljust(16, b'\x00')
                f.write(name_bytes)  # 段名
                f.write(section.address.to_bytes(4, 'little'))  # 地址
                f.write(section.size.to_bytes(4, 'little'))  # 大小
            
            # 写入段数据
            for section in self.sections.values():
                f.write(section.data)
    
    def __str__(self):
        return f"ExecutableFile: entry=0x{self.entry_point:08x}, {len(self.sections)} sections"

# 测试链接过程
def test_linking_process():
    """测试链接过程"""
    print("\n=== 链接过程测试 ===")
    
    # 创建第一个目标文件（main.o）
    main_obj = ObjectFile("main.o")
    
    # 添加代码段
    text_section = Section(".text", SectionType.TEXT)
    text_section.data = bytearray(b'\x48\x89\xe5')  # 示例机器码
    text_section.size = len(text_section.data)
    main_obj.add_section(text_section)
    
    # 添加符号
    main_symbol = Symbol("main", SymbolType.FUNCTION, SymbolBinding.GLOBAL, ".text", 0, 10)
    start_symbol = Symbol("_start", SymbolType.FUNCTION, SymbolBinding.GLOBAL, ".text", 10, 5)
    main_obj.add_symbol(main_symbol)
    main_obj.add_symbol(start_symbol)
    
    # 添加重定位
    reloc = Relocation(5, "printf", "R_X86_64_PLT32", -4)
    main_obj.relocations.append(reloc)
    
    # 创建第二个目标文件（lib.o）
    lib_obj = ObjectFile("lib.o")
    
    lib_text = Section(".text", SectionType.TEXT)
    lib_text.data = bytearray(b'\x55\x48\x89\xe5')  # 示例机器码
    lib_text.size = len(lib_text.data)
    lib_obj.add_section(lib_text)
    
    printf_symbol = Symbol("printf", SymbolType.FUNCTION, SymbolBinding.GLOBAL, ".text", 0, 20)
    lib_obj.add_symbol(printf_symbol)
    
    # 执行链接
    linker = Linker(0x400000)
    executable = linker.link([main_obj, lib_obj])
    
    if executable:
        print(f"\n链接成功：{executable}")
        
        print("\n段信息：")
        for section in executable.sections.values():
            print(f"  {section}")
        
        print("\n符号表：")
        for name, symbol in executable.symbol_table.items():
            print(f"  {symbol}")
        
        # 保存可执行文件
        executable.save_to_file("output.exe")
    else:
        print("链接失败")

if __name__ == "__main__":
    test_linking_process()

9.7 实践练习

9.7.1 扩展目标代码生成器

练习1：支持更多指令类型

# 扩展指令类型
class ExtendedInstructionType(Enum):
    MOVE = "move"
    ARITHMETIC = "arithmetic"
    LOGICAL = "logical"
    COMPARISON = "comparison"
    CONTROL = "control"
    MEMORY = "memory"
    FLOATING_POINT = "floating_point"  # 新增
    VECTOR = "vector"                   # 新增
    SYSTEM = "system"                   # 新增

# 扩展目标机器模型
class ExtendedTargetMachine(TargetMachine):
    def __init__(self, architecture: TargetArchitecture):
        super().__init__(architecture)
        
        # 添加浮点寄存器
        self.fp_registers = [
            Register(f"xmm{i}", "float", 128) for i in range(16)
        ]
        
        # 添加向量指令
        self.vector_instructions = {
            'addps': ExtendedInstructionType.VECTOR,
            'mulps': ExtendedInstructionType.VECTOR,
            'movaps': ExtendedInstructionType.VECTOR,
        }

练习2：实现SIMD指令生成

class SIMDCodeGenerator:
    """SIMD指令生成器"""
    
    def __init__(self, target_machine: ExtendedTargetMachine):
        self.target_machine = target_machine
    
    def generate_vector_add(self, src1: str, src2: str, dst: str) -> List[Instruction]:
        """生成向量加法指令"""
        return [
            Instruction('movaps', [src1, 'xmm0'], ExtendedInstructionType.VECTOR),
            Instruction('addps', ['xmm0', src2], ExtendedInstructionType.VECTOR),
            Instruction('movaps', ['xmm0', dst], ExtendedInstructionType.VECTOR)
        ]
    
    def generate_parallel_loop(self, loop_body: List[Instruction]) -> List[Instruction]:
        """生成并行循环"""
        vectorized = []
        
        # 循环展开和向量化
        for i in range(0, len(loop_body), 4):  # 4路并行
            batch = loop_body[i:i+4]
            vectorized.extend(self._vectorize_batch(batch))
        
        return vectorized
    
    def _vectorize_batch(self, instructions: List[Instruction]) -> List[Instruction]:
        """向量化指令批次"""
        # 简化实现
        return instructions

9.7.2 优化算法实现

练习3：实现图着色寄存器分配

class ImprovedGraphColoringAllocator(RegisterAllocator):
    """改进的图着色寄存器分配器"""
    
    def __init__(self, available_registers: List[Register]):
        super().__init__(available_registers)
        self.spill_cost_threshold = 10
    
    def allocate(self, live_intervals: List[LiveInterval]) -> Dict[str, Register]:
        """使用改进的图着色算法分配寄存器"""
        # 构建干扰图
        interference_graph = self._build_interference_graph(live_intervals)
        
        # 计算溢出代价
        spill_costs = self._calculate_spill_costs(live_intervals)
        
        # 简化阶段
        simplified_nodes = self._simplify_graph(interference_graph, spill_costs)
        
        # 着色阶段
        allocation = self._color_graph(simplified_nodes, interference_graph)
        
        return allocation
    
    def _calculate_spill_costs(self, live_intervals: List[LiveInterval]) -> Dict[str, float]:
        """计算溢出代价"""
        costs = {}
        
        for interval in live_intervals:
            # 基于使用频率和循环深度计算代价
            use_count = len(interval.use_positions)
            loop_depth = self._estimate_loop_depth(interval)
            
            costs[interval.variable] = use_count * (10 ** loop_depth)
        
        return costs
    
    def _estimate_loop_depth(self, interval: LiveInterval) -> int:
        """估计循环深度"""
        # 简化实现：基于生存区间长度估计
        length = interval.end - interval.start
        if length > 100:
            return 2
        elif length > 50:
            return 1
        return 0

练习4：实现高级指令调度

class AdvancedScheduler:
    """高级指令调度器"""
    
    def __init__(self, processor_model: ProcessorModel):
        self.processor_model = processor_model
        self.scheduling_window = 32  # 调度窗口大小
    
    def schedule_with_speculation(self, instructions: List[Instruction]) -> List[Instruction]:
        """带推测执行的指令调度"""
        scheduled = []
        i = 0
        
        while i < len(instructions):
            # 获取调度窗口
            window = instructions[i:i + self.scheduling_window]
            
            # 分析控制依赖
            control_deps = self._analyze_control_dependencies(window)
            
            # 推测调度
            speculative_schedule = self._speculative_schedule(window, control_deps)
            
            scheduled.extend(speculative_schedule)
            i += len(speculative_schedule)
        
        return scheduled
    
    def _analyze_control_dependencies(self, instructions: List[Instruction]) -> Dict[int, List[int]]:
        """分析控制依赖"""
        control_deps = {}
        
        for i, instr in enumerate(instructions):
            if instr.instruction_type == InstructionType.CONTROL:
                # 分支指令影响后续所有指令
                control_deps[i] = list(range(i + 1, len(instructions)))
        
        return control_deps
    
    def _speculative_schedule(self, instructions: List[Instruction], 
                            control_deps: Dict[int, List[int]]) -> List[Instruction]:
        """推测调度"""
        # 简化实现：优先调度非控制依赖指令
        independent = []
        dependent = []
        
        for i, instr in enumerate(instructions):
            is_dependent = any(i in deps for deps in control_deps.values())
            
            if is_dependent:
                dependent.append(instr)
            else:
                independent.append(instr)
        
        # 先调度独立指令，再调度依赖指令
        return independent + dependent

9.7.3 链接器扩展

练习5：实现动态链接支持

class DynamicLinker(Linker):
    """动态链接器"""
    
    def __init__(self, base_address: int = 0x400000):
        super().__init__(base_address)
        self.dynamic_symbols: Dict[str, Symbol] = {}
        self.plt_entries: Dict[str, int] = {}  # 过程链接表
        self.got_entries: Dict[str, int] = {}  # 全局偏移表
    
    def link_dynamic(self, object_files: List[ObjectFile], 
                    shared_libraries: List[str]) -> Optional[ExecutableFile]:
        """动态链接"""
        print(f"动态链接：{len(object_files)} 个目标文件，{len(shared_libraries)} 个共享库")
        
        # 加载共享库符号
        self._load_shared_library_symbols(shared_libraries)
        
        # 创建PLT和GOT
        self._create_plt_got(object_files)
        
        # 执行常规链接
        return self.link(object_files)
    
    def _load_shared_library_symbols(self, libraries: List[str]):
        """加载共享库符号"""
        for lib in libraries:
            print(f"加载共享库：{lib}")
            
            # 简化实现：模拟加载libc符号
            if 'libc' in lib:
                libc_symbols = [
                    Symbol('printf', SymbolType.FUNCTION, SymbolBinding.GLOBAL, '.text', 0),
                    Symbol('malloc', SymbolType.FUNCTION, SymbolBinding.GLOBAL, '.text', 0),
                    Symbol('free', SymbolType.FUNCTION, SymbolBinding.GLOBAL, '.text', 0),
                ]
                
                for sym in libc_symbols:
                    self.dynamic_symbols[sym.name] = sym
    
    def _create_plt_got(self, object_files: List[ObjectFile]):
        """创建PLT和GOT"""
        # 收集需要动态链接的符号
        dynamic_refs = set()
        
        for obj_file in object_files:
            for reloc in obj_file.relocations:
                if reloc.symbol in self.dynamic_symbols:
                    dynamic_refs.add(reloc.symbol)
        
        # 为每个动态符号创建PLT和GOT条目
        plt_offset = 0
        got_offset = 0
        
        for symbol_name in dynamic_refs:
            self.plt_entries[symbol_name] = plt_offset
            self.got_entries[symbol_name] = got_offset
            
            plt_offset += 16  # 每个PLT条目16字节
            got_offset += 8   # 每个GOT条目8字节
        
        print(f"创建了 {len(self.plt_entries)} 个PLT条目和 {len(self.got_entries)} 个GOT条目")