DAY13 启发式算法

1.启发式算法

import lightgbm as lgb  # 核心替换:导入LightGBM库
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import time  # 保留时间统计库
warnings.filterwarnings("ignore")  # 忽略所有警告信息

# --- 1. 默认参数的LightGBM ---
# 评估基准模型,这里确实不需要验证集
print("--- 1. 默认参数LightGBM (训练集 -> 测试集) ---")

start_time = time.time()  # 记录开始时间
# 核心替换:初始化LightGBM分类器(默认参数,保持随机种子一致)
lgb_model = lgb.LGBMClassifier(
    random_state=42,  # 保证结果可复现,与原随机森林一致
    verbose=-1  # 关闭训练过程中的冗余输出,保持输出整洁
)

lgb_model.fit(X_train, y_train)  # 在训练集上训练(API与sklearn一致,无需修改)
lgb_pred = lgb_model.predict(X_test)  # 在测试集上预测(API与sklearn一致,无需修改)
end_time = time.time()  # 记录结束时间

# 保留原有输出格式,仅调整模型名称相关描述
print(f"训练与预测耗时: {end_time - start_time:.4f} 秒")
print("\n默认LightGBM 在测试集上的分类报告:")
print(classification_report(y_test, lgb_pred))
print("默认LightGBM 在测试集上的混淆矩阵:")
print(confusion_matrix(y_test, lgb_pred))

2.遗传算法

import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import time
from deap import base, creator, tools, algorithms
import random
import numpy as np

warnings.filterwarnings("ignore")

# --- 2. 遗传算法优化LightGBM (训练集 -> 测试集) ---
print("\n--- 2. 遗传算法优化LightGBM (训练集 -> 测试集) ---")

# 定义适应度函数和个体类型(与原代码一致,无需修改)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# ======================================
# 关键修改1:替换为LightGBM的核心超参数及合理范围
# ======================================
num_leaves_range = (31, 127)        # LightGBM核心参数,需满足 num_leaves < 2^max_depth
max_depth_range = (3, 15)           # LightGBM树深度,无需过大
learning_rate_range = (0.01, 0.3)   # 学习率(浮点型,后续需特殊处理)
min_child_samples_range = (5, 50)   # 叶子节点最小样本数,对应随机森林min_samples_leaf

# ======================================
# 关键修改2:适配浮点型参数(learning_rate)的基因生成器
# ======================================
# 整数型参数:随机整数生成
toolbox = base.Toolbox()
toolbox.register("attr_num_leaves", random.randint, *num_leaves_range)
toolbox.register("attr_max_depth", random.randint, *max_depth_range)
toolbox.register("attr_min_child_samples", random.randint, *min_child_samples_range)

# 浮点型参数:随机浮点数生成(learning_rate)
def attr_learning_rate():
    return random.uniform(*learning_rate_range)
toolbox.register("attr_learning_rate", attr_learning_rate)

# ======================================
# 关键修改3:更新个体生成器,组合LightGBM的超参数
# ======================================
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_num_leaves, toolbox.attr_max_depth,
                  toolbox.attr_learning_rate, toolbox.attr_min_child_samples), n=1)

# 定义种群生成器(与原代码一致,无需修改)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# ======================================
# 关键修改4:重写评估函数,替换为LightGBM模型
# ======================================
def evaluate(individual):
    # 解包个体对应的LightGBM超参数
    num_leaves, max_depth, learning_rate, min_child_samples = individual
    
    # 转换为LightGBM要求的参数类型(整数型参数强转int)
    num_leaves = int(num_leaves)
    max_depth = int(max_depth)
    min_child_samples = int(min_child_samples)
    
    # 构建LightGBM分类器
    model = lgb.LGBMClassifier(
        num_leaves=num_leaves,
        max_depth=max_depth,
        learning_rate=learning_rate,
        min_child_samples=min_child_samples,
        n_estimators=200,  # 固定迭代次数(也可加入遗传算法优化,此处简化)
        objective="binary",  # 二分类任务(对应Credit Default标签)
        random_state=42,
        n_jobs=-1,
        verbose=-1  # 关闭冗余训练输出
    )
    
    # 模型训练与预测
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # 计算适应度(准确率,与原代码一致)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy,

# 注册评估函数(与原代码一致,无需修改)
toolbox.register("evaluate", evaluate)

# ======================================
# 关键修改5:更新变异操作,适配LightGBM的参数范围和浮点型参数
# ======================================
def mutate_individual(individual):
    # 分别处理整数型和浮点型参数的变异
    # 1. 整数型参数:num_leaves, max_depth, min_child_samples
    int_params = [0, 1, 3]  # 对应individual中的整数型参数索引
    int_ranges = [num_leaves_range, max_depth_range, min_child_samples_range]
    
    for idx, (low, high) in zip(int_params, int_ranges):
        if random.random() < 0.1:  # 对应原indpb=0.1的变异概率
            individual[idx] = random.randint(low, high)
    
    # 2. 浮点型参数:learning_rate(索引2)
    if random.random() < 0.1:
        individual[2] = random.uniform(*learning_rate_range)
    
    return individual,

# 注册自定义变异函数(替换原有的mutUniformInt)
toolbox.register("mutate", mutate_individual)

# 注册交叉和选择操作(交叉操作无需修改,选择操作与原代码一致)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("select", tools.selTournament, tournsize=3)

# ======================================
# 遗传算法执行流程(与原代码一致,无需修改)
# ======================================
# 初始化种群
pop = toolbox.population(n=20)

# 遗传算法参数
NGEN = 10  # 迭代代数
CXPB = 0.5 # 交叉概率
MUTPB = 0.2 # 变异概率

start_time = time.time()

# 运行遗传算法
for gen in range(NGEN):
    offspring = algorithms.varAnd(pop, toolbox, cxpb=CXPB, mutpb=MUTPB)
    fits = toolbox.map(toolbox.evaluate, offspring)
    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit
    pop = toolbox.select(offspring, k=len(pop))

end_time = time.time()

# ======================================
# 关键修改6:解析LightGBM的最优参数并输出
# ======================================
# 找到最优个体
best_ind = tools.selBest(pop, k=1)[0]
best_num_leaves, best_max_depth, best_learning_rate, best_min_child_samples = best_ind

# 类型转换(整数型参数强转int,保证参数可用性)
best_num_leaves = int(best_num_leaves)
best_max_depth = int(best_max_depth)
best_min_child_samples = int(best_min_child_samples)

print(f"遗传算法优化耗时: {end_time - start_time:.4f} 秒")
print("最佳参数: ", {
    'num_leaves': best_num_leaves,
    'max_depth': best_max_depth,
    'learning_rate': round(best_learning_rate, 4),  # 浮点型参数保留4位小数
    'min_child_samples': best_min_child_samples
})

# ======================================
# 关键修改7:使用最优参数构建LightGBM模型并评估
# ======================================
best_model = lgb.LGBMClassifier(
    num_leaves=best_num_leaves,
    max_depth=best_max_depth,
    learning_rate=best_learning_rate,
    min_child_samples=best_min_child_samples,
    n_estimators=200,
    objective="binary",
    random_state=42,
    n_jobs=-1,
    verbose=-1
)
best_model.fit(X_train, y_train)
best_pred = best_model.predict(X_test)

print("\n遗传算法优化后的LightGBM 在测试集上的分类报告:")
print(classification_report(y_test, best_pred))
print("遗传算法优化后的LightGBM 在测试集上的混淆矩阵:")
print(confusion_matrix(y_test, best_pred))

3.粒子群算法

import lightgbm as lgb
import numpy as np
import random
import time
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

# --- 2. 粒子群优化算法优化LightGBM (训练集 -> 测试集) ---
print("\n--- 2. 粒子群优化算法优化LightGBM (训练集 -> 测试集) ---")

# ======================================
# 关键修改1:重写适应度函数,替换为LightGBM模型
# ======================================
def fitness_function(params): 
    """
    适应度函数:构建LightGBM模型,返回测试集准确率
    params:粒子对应的LightGBM超参数数组
    """
    # 序列解包:对应LightGBM的核心超参数
    num_leaves, max_depth, learning_rate, min_child_samples = params
    
    # 类型转换:LightGBM整数型参数强转int,浮点型参数保持不变
    num_leaves = int(num_leaves)
    max_depth = int(max_depth)
    min_child_samples = int(min_child_samples)
    
    # 构建LightGBM分类器
    model = lgb.LGBMClassifier(
        num_leaves=num_leaves,
        max_depth=max_depth,
        learning_rate=learning_rate,
        min_child_samples=min_child_samples,
        n_estimators=200,  # 固定迭代次数(也可加入PSO优化,此处简化)
        objective="binary",  # 适配二分类任务(Credit Default)
        random_state=42,
        n_jobs=-1,
        verbose=-1  # 关闭冗余训练输出
    )
    
    # 模型训练与预测
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # 计算适应度(准确率)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# ======================================
# 粒子群优化算法实现(框架不变,无需修改)
# ======================================
def pso(num_particles, num_iterations, c1, c2, w, bounds):
    """
    粒子群优化算法核心函数:搜索最优超参数组合
    框架完全保留,仅适配输入的LightGBM参数边界
    """
    num_params = len(bounds)  # 超参数的数量
    # 初始化粒子位置
    particles = np.array([[random.uniform(bounds[i][0], bounds[i][1]) for i in range(num_params)] for _ in
                          range(num_particles)])
    # 初始化速度为0
    velocities = np.array([[0] * num_params for _ in range(num_particles)])
    # 初始化个体最佳位置和适应度
    personal_best = particles.copy()
    personal_best_fitness = np.array([fitness_function(p) for p in particles])
    # 初始化全局最佳位置和适应度
    global_best_index = np.argmax(personal_best_fitness)
    global_best = personal_best[global_best_index]
    global_best_fitness = personal_best_fitness[global_best_index]

    # 迭代更新粒子位置和速度
    for _ in range(num_iterations):
        r1 = np.array([[random.random() for _ in range(num_params)] for _ in range(num_particles)])
        r2 = np.array([[random.random() for _ in range(num_params)] for _ in range(num_particles)])

        # 更新速度
        velocities = w * velocities + c1 * r1 * (personal_best - particles) + c2 * r2 * (
                global_best - particles)
        # 更新位置
        particles = particles + velocities

        # 边界约束:保持粒子在参数范围内
        for i in range(num_particles):
            for j in range(num_params):
                if particles[i][j] < bounds[j][0]:
                    particles[i][j] = bounds[j][0]
                elif particles[i][j] > bounds[j][1]:
                    particles[i][j] = bounds[j][1]

        # 更新个体最佳和全局最佳
        fitness_values = np.array([fitness_function(p) for p in particles])
        improved_indices = fitness_values > personal_best_fitness
        personal_best[improved_indices] = particles[improved_indices]
        personal_best_fitness[improved_indices] = fitness_values[improved_indices]

        current_best_index = np.argmax(personal_best_fitness)
        if personal_best_fitness[current_best_index] > global_best_fitness:
            global_best = personal_best[current_best_index]
            global_best_fitness = personal_best[current_best_fitness[current_best_index]]

    return global_best, global_best_fitness

# ======================================
# 关键修改2:替换为LightGBM的超参数边界范围
# ======================================
# 超参数边界:[num_leaves, max_depth, learning_rate, min_child_samples]
# 适配LightGBM参数特性,合理设置范围
bounds = [
    (31, 127),    # num_leaves:核心参数,需满足 num_leaves < 2^max_depth
    (3, 15),      # max_depth:树深度,无需过大
    (0.01, 0.3),  # learning_rate:浮点型,梯度下降步长
    (5, 50)       # min_child_samples:叶子节点最小样本数,防止过拟合
]

# ======================================
# 粒子群优化算法参数(保持不变,无需修改)
# ======================================
num_particles = 20
num_iterations = 10
c1 = 1.5
c2 = 1.5
w = 0.5

# ======================================
# 运行PSO优化并统计耗时(框架不变,仅适配输出)
# ======================================
start_time = time.time()
best_params, best_fitness = pso(num_particles, num_iterations, c1, c2, w, bounds)
end_time = time.time()

# ======================================
# 关键修改3:解析LightGBM最优参数并输出
# ======================================
print(f"粒子群优化算法优化耗时: {end_time - start_time:.4f} 秒")
print("最佳参数: ", {
    'num_leaves': int(best_params[0]),
    'max_depth': int(best_params[1]),
    'learning_rate': round(best_params[2], 4),  # 浮点型保留4位小数,提升可读性
    'min_child_samples': int(best_params[3])
})

# ======================================
# 关键修改4:使用最优参数构建LightGBM模型并评估
# ======================================
best_model = lgb.LGBMClassifier(
    num_leaves=int(best_params[0]),
    max_depth=int(best_params[1]),
    learning_rate=best_params[2],
    min_child_samples=int(best_params[3]),
    n_estimators=200,
    objective="binary",
    random_state=42,
    n_jobs=-1,
    verbose=-1
)
best_model.fit(X_train, y_train)
best_pred = best_model.predict(X_test)

# ======================================
# 输出评估结果(仅修改模型名称描述)
# ======================================
print("\n粒子群优化算法优化后的LightGBM 在测试集上的分类报告:")
print(classification_report(y_test, best_pred))
print("粒子群优化算法优化后的LightGBM 在测试集上的混淆矩阵:")
print(confusion_matrix(y_test, best_pred))

@浙大疏锦行

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值