1
+ import xgboost as xgb
2
+ from sklearn .datasets import load_breast_cancer
3
+ from sklearn .model_selection import train_test_split
4
+ from sklearn .metrics import accuracy_score , classification_report
5
+ import swanlab
6
+ from swanlab .integration .xgboost import SwanLabCallback
7
+
8
+ # 初始化swanlab
9
+ swanlab .init (project = "xgboost-breast-cancer" , config = {
10
+ "learning_rate" : 0.1 ,
11
+ "max_depth" : 3 ,
12
+ "subsample" : 0.8 ,
13
+ "colsample_bytree" : 0.8 ,
14
+ "num_round" : 100
15
+ })
16
+
17
+ # 加载数据集
18
+ data = load_breast_cancer ()
19
+ X = data .data
20
+ y = data .target
21
+
22
+ # 将数据集分为训练集和测试集
23
+ X_train , X_test , y_train , y_test = train_test_split (X , y , test_size = 0.2 , random_state = 42 )
24
+
25
+ # 转换为DMatrix格式,这是XGBoost的内部数据格式
26
+ dtrain = xgb .DMatrix (X_train , label = y_train )
27
+ dtest = xgb .DMatrix (X_test , label = y_test )
28
+
29
+ # 设置参数
30
+ params = {
31
+ 'objective' : 'binary:logistic' , # 二分类任务
32
+ 'max_depth' : 3 , # 树的最大深度
33
+ 'eta' : 0.1 , # 学习率
34
+ 'subsample' : 0.8 , # 样本采样比例
35
+ 'colsample_bytree' : 0.8 , # 特征采样比例
36
+ 'eval_metric' : 'logloss' # 评估指标
37
+ }
38
+
39
+ # 训练模型
40
+ num_round = 100 # 迭代次数
41
+ bst = xgb .train (params , dtrain , num_round , evals = [(dtrain , 'train' ), (dtest , 'test' )], callbacks = [SwanLabCallback ()])
42
+
43
+ # 进行预测
44
+ y_pred = bst .predict (dtest )
45
+ y_pred_binary = [round (value ) for value in y_pred ] # 将概率转换为二分类结果
46
+
47
+ # 评估模型
48
+ accuracy = accuracy_score (y_test , y_pred_binary )
49
+ print (f"Accuracy: { accuracy :.4f} " )
50
+
51
+ # 打印分类报告
52
+ print ("Classification Report:" )
53
+ print (classification_report (y_test , y_pred_binary , target_names = data .target_names ))
54
+
55
+ # 保存模型
56
+ bst .save_model ('xgboost_model.model' )
57
+
58
+ # 结束swanlab会话
59
+ swanlab .finish ()
0 commit comments