决策树分类及可视化

#对癌症数据集使用决策树分类

from sklearn.tree import DecisionTreeClassifier

cancer =load_breast_cancer()
X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,stratify=cancer.target,random_state=42)
tree = DecisionTreeClassifier(random_state=0)
tree.fit(X_train,y_train)
print("Accuracy on training set:{:3f}".format(tree.score(X_train,y_train)))

print("Accuracy on testing set:{:3f}".format(tree.score(X_test,y_test)))

Accuracy on training set:1.000000
Accuracy on testing set:0.937063


#出现过拟合现象采用预剪枝策略

tree = DecisionTreeClassifier(max_depth=4,random_state=0)
tree.fit(X_train,y_train)
print("Accuracy on training set:{:.3f}".format(tree.score(X_train,y_train)))

print("Accuracy on testing set:{:.3f}".format(tree.score(X_test,y_test)))

Accuracy on training set:0.988
Accuracy on testing set:0.951


#结果可视化

from sklearn.tree import export_graphviz

export_graphviz(tree, out_file="tree.dot", class_names=["malignant", "benign"],feature_names=cancer.feature_names, impurity=False, filled=True)

import graphviz
with open("tree.dot") as f: 
    dot_graph = f.read()

graphviz.Source(dot_graph)

决策树分类及可视化


#特征重要性可视化

决策树分类及可视化