1.启发式算法
import lightgbm as lgb # 核心替换:导入LightGBM库
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import time # 保留时间统计库
warnings.filterwarnings("ignore") # 忽略所有警告信息
# --- 1. 默认参数的LightGBM ---
# 评估基准模型,这里确实不需要验证集
print("--- 1. 默认参数LightGBM (训练集 -> 测试集) ---")
start_time = time.time() # 记录开始时间
# 核心替换:初始化LightGBM分类器(默认参数,保持随机种子一致)
lgb_model = lgb.LGBMClassifier(
random_state=42, # 保证结果可复现,与原随机森林一致
verbose=-1 # 关闭训练过程中的冗余输出,保持输出整洁
)
lgb_model.fit(X_train, y_train) # 在训练集上训练(API与sklearn一致,无需修改)
lgb_pred = lgb_model.predict(X_test) # 在测试集上预测(API与sklearn一致,无需修改)
end_time = time.time() # 记录结束时间
# 保留原有输出格式,仅调整模型名称相关描述
print(f"训练与预测耗时: {end_time - start_time:.4f} 秒")
print("\n默认LightGBM 在测试集上的分类报告:")
print(classification_report(y_test, lgb_pred))
print("默认LightGBM 在测试集上的混淆矩阵:")
print(confusion_matrix(y_test, lgb_pred))
2.遗传算法
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import time
from deap import base, creator, tools, algorithms
import random
import numpy as np
warnings.filterwarnings("ignore")
# --- 2. 遗传算法优化LightGBM (训练集 -> 测试集) ---
print("\n--- 2. 遗传算法优化LightGBM (训练集 -> 测试集) ---")
# 定义适应度函数和个体类型(与原代码一致,无需修改)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
# ======================================
# 关键修改1:替换为LightGBM的核心超参数及合理范围
# ======================================
num_leaves_range = (31, 127) # LightGBM核心参数,需满足 num_leaves < 2^max_depth
max_depth_range = (3, 15) # LightGBM树深度,无需过大
learning_rate_range = (0.01, 0.3) # 学习率(浮点型,后续需特殊处理)
min_child_samples_range = (5, 50) # 叶子节点最小样本数,对应随机森林min_samples_leaf
# ======================================
# 关键修改2:适配浮点型参数(learning_rate)的基因生成器
# ======================================
# 整数型参数:随机整数生成
toolbox = base.Toolbox()
toolbox.register("attr_num_leaves", random.randint, *num_leaves_range)
toolbox.register("attr_max_depth", random.randint, *max_depth_range)
toolbox.register("attr_min_child_samples", random.randint, *min_child_samples_range)
# 浮点型参数:随机浮点数生成(learning_rate)
def attr_learning_rate():
return random.uniform(*learning_rate_range)
toolbox.register("attr_learning_rate", attr_learning_rate)
# ======================================
# 关键修改3:更新个体生成器,组合LightGBM的超参数
# ======================================
toolbox.register("individual", tools.initCycle, creator.Individual,
(toolbox.attr_num_leaves, toolbox.attr_max_depth,
toolbox.attr_learning_rate, toolbox.attr_min_child_samples), n=1)
# 定义种群生成器(与原代码一致,无需修改)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# ======================================
# 关键修改4:重写评估函数,替换为LightGBM模型
# ======================================
def evaluate(individual):
# 解包个体对应的LightGBM超参数
num_leaves, max_depth, learning_rate, min_child_samples = individual
# 转换为LightGBM要求的参数类型(整数型参数强转int)
num_leaves = int(num_leaves)
max_depth = int(max_depth)
min_child_samples = int(min_child_samples)
# 构建LightGBM分类器
model = lgb.LGBMClassifier(
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
min_child_samples=min_child_samples,
n_estimators=200, # 固定迭代次数(也可加入遗传算法优化,此处简化)
objective="binary", # 二分类任务(对应Credit Default标签)
random_state=42,
n_jobs=-1,
verbose=-1 # 关闭冗余训练输出
)
# 模型训练与预测
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 计算适应度(准确率,与原代码一致)
accuracy = accuracy_score(y_test, y_pred)
return accuracy,
# 注册评估函数(与原代码一致,无需修改)
toolbox.register("evaluate", evaluate)
# ======================================
# 关键修改5:更新变异操作,适配LightGBM的参数范围和浮点型参数
# ======================================
def mutate_individual(individual):
# 分别处理整数型和浮点型参数的变异
# 1. 整数型参数:num_leaves, max_depth, min_child_samples
int_params = [0, 1, 3] # 对应individual中的整数型参数索引
int_ranges = [num_leaves_range, max_depth_range, min_child_samples_range]
for idx, (low, high) in zip(int_params, int_ranges):
if random.random() < 0.1: # 对应原indpb=0.1的变异概率
individual[idx] = random.randint(low, high)
# 2. 浮点型参数:learning_rate(索引2)
if random.random() < 0.1:
individual[2] = random.uniform(*learning_rate_range)
return individual,
# 注册自定义变异函数(替换原有的mutUniformInt)
toolbox.register("mutate", mutate_individual)
# 注册交叉和选择操作(交叉操作无需修改,选择操作与原代码一致)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("select", tools.selTournament, tournsize=3)
# ======================================
# 遗传算法执行流程(与原代码一致,无需修改)
# ======================================
# 初始化种群
pop = toolbox.population(n=20)
# 遗传算法参数
NGEN = 10 # 迭代代数
CXPB = 0.5 # 交叉概率
MUTPB = 0.2 # 变异概率
start_time = time.time()
# 运行遗传算法
for gen in range(NGEN):
offspring = algorithms.varAnd(pop, toolbox, cxpb=CXPB, mutpb=MUTPB)
fits = toolbox.map(toolbox.evaluate, offspring)
for fit, ind in zip(fits, offspring):
ind.fitness.values = fit
pop = toolbox.select(offspring, k=len(pop))
end_time = time.time()
# ======================================
# 关键修改6:解析LightGBM的最优参数并输出
# ======================================
# 找到最优个体
best_ind = tools.selBest(pop, k=1)[0]
best_num_leaves, best_max_depth, best_learning_rate, best_min_child_samples = best_ind
# 类型转换(整数型参数强转int,保证参数可用性)
best_num_leaves = int(best_num_leaves)
best_max_depth = int(best_max_depth)
best_min_child_samples = int(best_min_child_samples)
print(f"遗传算法优化耗时: {end_time - start_time:.4f} 秒")
print("最佳参数: ", {
'num_leaves': best_num_leaves,
'max_depth': best_max_depth,
'learning_rate': round(best_learning_rate, 4), # 浮点型参数保留4位小数
'min_child_samples': best_min_child_samples
})
# ======================================
# 关键修改7:使用最优参数构建LightGBM模型并评估
# ======================================
best_model = lgb.LGBMClassifier(
num_leaves=best_num_leaves,
max_depth=best_max_depth,
learning_rate=best_learning_rate,
min_child_samples=best_min_child_samples,
n_estimators=200,
objective="binary",
random_state=42,
n_jobs=-1,
verbose=-1
)
best_model.fit(X_train, y_train)
best_pred = best_model.predict(X_test)
print("\n遗传算法优化后的LightGBM 在测试集上的分类报告:")
print(classification_report(y_test, best_pred))
print("遗传算法优化后的LightGBM 在测试集上的混淆矩阵:")
print(confusion_matrix(y_test, best_pred))
3.粒子群算法
import lightgbm as lgb
import numpy as np
import random
import time
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")
# --- 2. 粒子群优化算法优化LightGBM (训练集 -> 测试集) ---
print("\n--- 2. 粒子群优化算法优化LightGBM (训练集 -> 测试集) ---")
# ======================================
# 关键修改1:重写适应度函数,替换为LightGBM模型
# ======================================
def fitness_function(params):
"""
适应度函数:构建LightGBM模型,返回测试集准确率
params:粒子对应的LightGBM超参数数组
"""
# 序列解包:对应LightGBM的核心超参数
num_leaves, max_depth, learning_rate, min_child_samples = params
# 类型转换:LightGBM整数型参数强转int,浮点型参数保持不变
num_leaves = int(num_leaves)
max_depth = int(max_depth)
min_child_samples = int(min_child_samples)
# 构建LightGBM分类器
model = lgb.LGBMClassifier(
num_leaves=num_leaves,
max_depth=max_depth,
learning_rate=learning_rate,
min_child_samples=min_child_samples,
n_estimators=200, # 固定迭代次数(也可加入PSO优化,此处简化)
objective="binary", # 适配二分类任务(Credit Default)
random_state=42,
n_jobs=-1,
verbose=-1 # 关闭冗余训练输出
)
# 模型训练与预测
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# 计算适应度(准确率)
accuracy = accuracy_score(y_test, y_pred)
return accuracy
# ======================================
# 粒子群优化算法实现(框架不变,无需修改)
# ======================================
def pso(num_particles, num_iterations, c1, c2, w, bounds):
"""
粒子群优化算法核心函数:搜索最优超参数组合
框架完全保留,仅适配输入的LightGBM参数边界
"""
num_params = len(bounds) # 超参数的数量
# 初始化粒子位置
particles = np.array([[random.uniform(bounds[i][0], bounds[i][1]) for i in range(num_params)] for _ in
range(num_particles)])
# 初始化速度为0
velocities = np.array([[0] * num_params for _ in range(num_particles)])
# 初始化个体最佳位置和适应度
personal_best = particles.copy()
personal_best_fitness = np.array([fitness_function(p) for p in particles])
# 初始化全局最佳位置和适应度
global_best_index = np.argmax(personal_best_fitness)
global_best = personal_best[global_best_index]
global_best_fitness = personal_best_fitness[global_best_index]
# 迭代更新粒子位置和速度
for _ in range(num_iterations):
r1 = np.array([[random.random() for _ in range(num_params)] for _ in range(num_particles)])
r2 = np.array([[random.random() for _ in range(num_params)] for _ in range(num_particles)])
# 更新速度
velocities = w * velocities + c1 * r1 * (personal_best - particles) + c2 * r2 * (
global_best - particles)
# 更新位置
particles = particles + velocities
# 边界约束:保持粒子在参数范围内
for i in range(num_particles):
for j in range(num_params):
if particles[i][j] < bounds[j][0]:
particles[i][j] = bounds[j][0]
elif particles[i][j] > bounds[j][1]:
particles[i][j] = bounds[j][1]
# 更新个体最佳和全局最佳
fitness_values = np.array([fitness_function(p) for p in particles])
improved_indices = fitness_values > personal_best_fitness
personal_best[improved_indices] = particles[improved_indices]
personal_best_fitness[improved_indices] = fitness_values[improved_indices]
current_best_index = np.argmax(personal_best_fitness)
if personal_best_fitness[current_best_index] > global_best_fitness:
global_best = personal_best[current_best_index]
global_best_fitness = personal_best[current_best_fitness[current_best_index]]
return global_best, global_best_fitness
# ======================================
# 关键修改2:替换为LightGBM的超参数边界范围
# ======================================
# 超参数边界:[num_leaves, max_depth, learning_rate, min_child_samples]
# 适配LightGBM参数特性,合理设置范围
bounds = [
(31, 127), # num_leaves:核心参数,需满足 num_leaves < 2^max_depth
(3, 15), # max_depth:树深度,无需过大
(0.01, 0.3), # learning_rate:浮点型,梯度下降步长
(5, 50) # min_child_samples:叶子节点最小样本数,防止过拟合
]
# ======================================
# 粒子群优化算法参数(保持不变,无需修改)
# ======================================
num_particles = 20
num_iterations = 10
c1 = 1.5
c2 = 1.5
w = 0.5
# ======================================
# 运行PSO优化并统计耗时(框架不变,仅适配输出)
# ======================================
start_time = time.time()
best_params, best_fitness = pso(num_particles, num_iterations, c1, c2, w, bounds)
end_time = time.time()
# ======================================
# 关键修改3:解析LightGBM最优参数并输出
# ======================================
print(f"粒子群优化算法优化耗时: {end_time - start_time:.4f} 秒")
print("最佳参数: ", {
'num_leaves': int(best_params[0]),
'max_depth': int(best_params[1]),
'learning_rate': round(best_params[2], 4), # 浮点型保留4位小数,提升可读性
'min_child_samples': int(best_params[3])
})
# ======================================
# 关键修改4:使用最优参数构建LightGBM模型并评估
# ======================================
best_model = lgb.LGBMClassifier(
num_leaves=int(best_params[0]),
max_depth=int(best_params[1]),
learning_rate=best_params[2],
min_child_samples=int(best_params[3]),
n_estimators=200,
objective="binary",
random_state=42,
n_jobs=-1,
verbose=-1
)
best_model.fit(X_train, y_train)
best_pred = best_model.predict(X_test)
# ======================================
# 输出评估结果(仅修改模型名称描述)
# ======================================
print("\n粒子群优化算法优化后的LightGBM 在测试集上的分类报告:")
print(classification_report(y_test, best_pred))
print("粒子群优化算法优化后的LightGBM 在测试集上的混淆矩阵:")
print(confusion_matrix(y_test, best_pred))
804

被折叠的 条评论
为什么被折叠?



