DAY13 启发式算法-CSDN博客

1.启发式算法

import lightgbm as lgb  # 核心替换：导入LightGBM库
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import time  # 保留时间统计库
warnings.filterwarnings("ignore")  # 忽略所有警告信息

# --- 1. 默认参数的LightGBM ---
# 评估基准模型，这里确实不需要验证集
print("--- 1. 默认参数LightGBM (训练集 -> 测试集) ---")

start_time = time.time()  # 记录开始时间
# 核心替换：初始化LightGBM分类器（默认参数，保持随机种子一致）
lgb_model = lgb.LGBMClassifier(
    random_state=42,  # 保证结果可复现，与原随机森林一致
    verbose=-1  # 关闭训练过程中的冗余输出，保持输出整洁
)

lgb_model.fit(X_train, y_train)  # 在训练集上训练（API与sklearn一致，无需修改）
lgb_pred = lgb_model.predict(X_test)  # 在测试集上预测（API与sklearn一致，无需修改）
end_time = time.time()  # 记录结束时间

# 保留原有输出格式，仅调整模型名称相关描述
print(f"训练与预测耗时: {end_time - start_time:.4f} 秒")
print("\n默认LightGBM 在测试集上的分类报告：")
print(classification_report(y_test, lgb_pred))
print("默认LightGBM 在测试集上的混淆矩阵：")
print(confusion_matrix(y_test, lgb_pred))

2.遗传算法

import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import time
from deap import base, creator, tools, algorithms
import random
import numpy as np

warnings.filterwarnings("ignore")

# --- 2. 遗传算法优化LightGBM (训练集 -> 测试集) ---
print("\n--- 2. 遗传算法优化LightGBM (训练集 -> 测试集) ---")

# 定义适应度函数和个体类型（与原代码一致，无需修改）
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# ======================================
# 关键修改1：替换为LightGBM的核心超参数及合理范围
# ======================================
num_leaves_range = (31, 127)        # LightGBM核心参数，需满足 num_leaves < 2^max_depth
max_depth_range = (3, 15)           # LightGBM树深度，无需过大
learning_rate_range = (0.01, 0.3)   # 学习率（浮点型，后续需特殊处理）
min_child_samples_range = (5, 50)   # 叶子节点最小样本数，对应随机森林min_samples_leaf

# ======================================
# 关键修改2：适配浮点型参数（learning_rate）的基因生成器
# ======================================
# 整数型参数：随机整数生成
toolbox = base.Toolbox()
toolbox.register("attr_num_leaves", random.randint, *num_leaves_range)
toolbox.register("attr_max_depth", random.randint, *max_depth_range)
toolbox.register("attr_min_child_samples", random.randint, *min_child_samples_range)

# 浮点型参数：随机浮点数生成（learning_rate）
def attr_learning_rate():
    return random.uniform(*learning_rate_range)
toolbox.register("attr_learning_rate", attr_learning_rate)

# ======================================
# 关键修改3：更新个体生成器，组合LightGBM的超参数
# ======================================
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_num_leaves, toolbox.attr_max_depth,
                  toolbox.attr_learning_rate, toolbox.attr_min_child_samples), n=1)

# 定义种群生成器（与原代码一致，无需修改）
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# ======================================
# 关键修改4：重写评估函数，替换为LightGBM模型
# ======================================
def evaluate(individual):
    # 解包个体对应的LightGBM超参数
    num_leaves, max_depth, learning_rate, min_child_samples = individual
    
    # 转换为LightGBM要求的参数类型（整数型参数强转int）
    num_leaves = int(num_leaves)
    max_depth = int(max_depth)
    min_child_samples = int(min_child_samples)
    
    # 构建LightGBM分类器
    model = lgb.LGBMClassifier(
        num_leaves=num_leaves,
        max_depth=max_depth,
        learning_rate=learning_rate,
        min_child_samples=min_child_samples,
        n_estimators=200,  # 固定迭代次数（也可加入遗传算法优化，此处简化）
        objective="binary",  # 二分类任务（对应Credit Default标签）
        random_state=42,
        n_jobs=-1,
        verbose=-1  # 关闭冗余训练输出
    )
    
    # 模型训练与预测
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # 计算适应度（准确率，与原代码一致）
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy,

# 注册评估函数（与原代码一致，无需修改）
toolbox.register("evaluate", evaluate)

# ======================================
# 关键修改5：更新变异操作，适配LightGBM的参数范围和浮点型参数
# ======================================
def mutate_individual(individual):
    # 分别处理整数型和浮点型参数的变异
    # 1. 整数型参数：num_leaves, max_depth, min_child_samples
    int_params = [0, 1, 3]  # 对应individual中的整数型参数索引
    int_ranges = [num_leaves_range, max_depth_range, min_child_samples_range]
    
    for idx, (low, high) in zip(int_params, int_ranges):
        if random.random() < 0.1:  # 对应原indpb=0.1的变异概率
            individual[idx] = random.randint(low, high)
    
    # 2. 浮点型参数：learning_rate（索引2）
    if random.random() < 0.1:
        individual[2] = random.uniform(*learning_rate_range)
    
    return individual,

# 注册自定义变异函数（替换原有的mutUniformInt）
toolbox.register("mutate", mutate_individual)

# 注册交叉和选择操作（交叉操作无需修改，选择操作与原代码一致）
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("select", tools.selTournament, tournsize=3)

# ======================================
# 遗传算法执行流程（与原代码一致，无需修改）
# ======================================
# 初始化种群
pop = toolbox.population(n=20)

# 遗传算法参数
NGEN = 10  # 迭代代数
CXPB = 0.5 # 交叉概率
MUTPB = 0.2 # 变异概率

start_time = time.time()

# 运行遗传算法
for gen in range(NGEN):
    offspring = algorithms.varAnd(pop, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fits = toolbox.map(toolbox.evaluate, offspring)
    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit
    pop = toolbox.select(offspring, k=len(pop))

end_time = time.time()

# ======================================
# 关键修改6：解析LightGBM的最优参数并输出
# ======================================
# 找到最优个体
best_ind = tools.selBest(pop, k=1)[0]
best_num_leaves, best_max_depth, best_learning_rate, best_min_child_samples = best_ind

# 类型转换（整数型参数强转int，保证参数可用性）
best_num_leaves = int(best_num_leaves)
best_max_depth = int(best_max_depth)
best_min_child_samples = int(best_min_child_samples)

print(f"遗传算法优化耗时: {end_time - start_time:.4f} 秒")
print("最佳参数: ", {
    'num_leaves': best_num_leaves,
    'max_depth': best_max_depth,
    'learning_rate': round(best_learning_rate, 4),  # 浮点型参数保留4位小数
    'min_child_samples': best_min_child_samples
})

# ======================================
# 关键修改7：使用最优参数构建LightGBM模型并评估
# ======================================
best_model = lgb.LGBMClassifier(
    num_leaves=best_num_leaves,
    max_depth=best_max_depth,
    learning_rate=best_learning_rate,
    min_child_samples=best_min_child_samples,
    n_estimators=200,
    objective="binary",
    random_state=42,
    n_jobs=-1,
    verbose=-1
)
best_model.fit(X_train, y_train)
best_pred = best_model.predict(X_test)

print("\n遗传算法优化后的LightGBM 在测试集上的分类报告：")
print(classification_report(y_test, best_pred))
print("遗传算法优化后的LightGBM 在测试集上的混淆矩阵：")
print(confusion_matrix(y_test, best_pred))

3.粒子群算法

import lightgbm as lgb
import numpy as np
import random
import time
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

# --- 2. 粒子群优化算法优化LightGBM (训练集 -> 测试集) ---
print("\n--- 2. 粒子群优化算法优化LightGBM (训练集 -> 测试集) ---")

# ======================================
# 关键修改1：重写适应度函数，替换为LightGBM模型
# ======================================
def fitness_function(params): 
    """
    适应度函数：构建LightGBM模型，返回测试集准确率
    params：粒子对应的LightGBM超参数数组
    """
    # 序列解包：对应LightGBM的核心超参数
    num_leaves, max_depth, learning_rate, min_child_samples = params
    
    # 类型转换：LightGBM整数型参数强转int，浮点型参数保持不变
    num_leaves = int(num_leaves)
    max_depth = int(max_depth)
    min_child_samples = int(min_child_samples)
    
    # 构建LightGBM分类器
    model = lgb.LGBMClassifier(
        num_leaves=num_leaves,
        max_depth=max_depth,
        learning_rate=learning_rate,
        min_child_samples=min_child_samples,
        n_estimators=200,  # 固定迭代次数（也可加入PSO优化，此处简化）
        objective="binary",  # 适配二分类任务（Credit Default）
        random_state=42,
        n_jobs=-1,
        verbose=-1  # 关闭冗余训练输出
    )
    
    # 模型训练与预测
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # 计算适应度（准确率）
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# ======================================
# 粒子群优化算法实现（框架不变，无需修改）
# ======================================
def pso(num_particles, num_iterations, c1, c2, w, bounds):
    """
    粒子群优化算法核心函数：搜索最优超参数组合
    框架完全保留，仅适配输入的LightGBM参数边界
    """
    num_params = len(bounds)  # 超参数的数量
    # 初始化粒子位置
    particles = np.array([[random.uniform(bounds[i][0], bounds[i][1]) for i in range(num_params)] for _ in
                          range(num_particles)])
    # 初始化速度为0
    velocities = np.array([[0] * num_params for _ in range(num_particles)])
    # 初始化个体最佳位置和适应度
    personal_best = particles.copy()
    personal_best_fitness = np.array([fitness_function(p) for p in particles])
    # 初始化全局最佳位置和适应度
    global_best_index = np.argmax(personal_best_fitness)
    global_best = personal_best[global_best_index]
    global_best_fitness = personal_best_fitness[global_best_index]

    # 迭代更新粒子位置和速度
    for _ in range(num_iterations):
        r1 = np.array([[random.random() for _ in range(num_params)] for _ in range(num_particles)])
        r2 = np.array([[random.random() for _ in range(num_params)] for _ in range(num_particles)])

        # 更新速度
        velocities = w * velocities + c1 * r1 * (personal_best - particles) + c2 * r2 * (
                global_best - particles)
        # 更新位置
        particles = particles + velocities

        # 边界约束：保持粒子在参数范围内
        for i in range(num_particles):
            for j in range(num_params):
                if particles[i][j] < bounds[j][0]:
                    particles[i][j] = bounds[j][0]
                elif particles[i][j] > bounds[j][1]:
                    particles[i][j] = bounds[j][1]

        # 更新个体最佳和全局最佳
        fitness_values = np.array([fitness_function(p) for p in particles])
        improved_indices = fitness_values > personal_best_fitness
        personal_best[improved_indices] = particles[improved_indices]
        personal_best_fitness[improved_indices] = fitness_values[improved_indices]

        current_best_index = np.argmax(personal_best_fitness)
        if personal_best_fitness[current_best_index] > global_best_fitness:
            global_best = personal_best[current_best_index]
            global_best_fitness = personal_best[current_best_fitness[current_best_index]]

    return global_best, global_best_fitness

# ======================================
# 关键修改2：替换为LightGBM的超参数边界范围
# ======================================
# 超参数边界：[num_leaves, max_depth, learning_rate, min_child_samples]
# 适配LightGBM参数特性，合理设置范围
bounds = [
    (31, 127),    # num_leaves：核心参数，需满足 num_leaves < 2^max_depth
    (3, 15),      # max_depth：树深度，无需过大
    (0.01, 0.3),  # learning_rate：浮点型，梯度下降步长
    (5, 50)       # min_child_samples：叶子节点最小样本数，防止过拟合
]

# ======================================
# 粒子群优化算法参数（保持不变，无需修改）
# ======================================
num_particles = 20
num_iterations = 10
c1 = 1.5
c2 = 1.5
w = 0.5

# ======================================
# 运行PSO优化并统计耗时（框架不变，仅适配输出）
# ======================================
start_time = time.time()
best_params, best_fitness = pso(num_particles, num_iterations, c1, c2, w, bounds)
end_time = time.time()

# ======================================
# 关键修改3：解析LightGBM最优参数并输出
# ======================================
print(f"粒子群优化算法优化耗时: {end_time - start_time:.4f} 秒")
print("最佳参数: ", {
    'num_leaves': int(best_params[0]),
    'max_depth': int(best_params[1]),
    'learning_rate': round(best_params[2], 4),  # 浮点型保留4位小数，提升可读性
    'min_child_samples': int(best_params[3])
})

# ======================================
# 关键修改4：使用最优参数构建LightGBM模型并评估
# ======================================
best_model = lgb.LGBMClassifier(
    num_leaves=int(best_params[0]),
    max_depth=int(best_params[1]),
    learning_rate=best_params[2],
    min_child_samples=int(best_params[3]),
    n_estimators=200,
    objective="binary",
    random_state=42,
    n_jobs=-1,
    verbose=-1
)
best_model.fit(X_train, y_train)
best_pred = best_model.predict(X_test)

# ======================================
# 输出评估结果（仅修改模型名称描述）
# ======================================
print("\n粒子群优化算法优化后的LightGBM 在测试集上的分类报告：")
print(classification_report(y_test, best_pred))
print("粒子群优化算法优化后的LightGBM 在测试集上的混淆矩阵：")
print(confusion_matrix(y_test, best_pred))

@浙大疏锦行