def objective(trial):
# 定义超参数搜索空间
params = {
'n_estimators': trial.suggest_int('n_estimators', 250, 350),
'max_depth': trial.suggest_int('max_depth', 1, 5),
'learning_rate': trial.suggest_float('learning_rate', 0.06, 0.1),
'num_leaves': trial.suggest_int('num_leaves', 20, 60),
'subsample': trial.suggest_float('subsample', 0.6, 1.0),
'subsample_freq': trial.suggest_int('subsample_freq', 1, 3),
'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.9),
'lambda_l1': trial.suggest_float('lambda_l1', 0.1, 1),
'lambda_l2': trial.suggest_float('lambda_l2', 0.1, 1),
'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0.1, 2),
'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 5, 70),
'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 0.9),
'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 0.3),
}
# 创建 LightGBM 模型
lgbm = LGBMClassifier(
**params,
random_state=94,
# class_weight='balanced',
verbose=-1
)
# 创建包含预处理和模型的管道
clf = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', lgbm)
])
# 进行交叉验证
scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='roc_auc', n_jobs=-1) # 将交叉验证的折数减少到 3
return scores.mean()