决策树分类及可视化
#对癌症数据集使用决策树分类
from sklearn.tree import DecisionTreeClassifier
cancer =load_breast_cancer()X_train,X_test,y_train,y_test = train_test_split(cancer.data,cancer.target,stratify=cancer.target,random_state=42)
tree = DecisionTreeClassifier(random_state=0)
tree.fit(X_train,y_train)
print("Accuracy on training set:{:3f}".format(tree.score(X_train,y_train)))
print("Accuracy on testing set:{:3f}".format(tree.score(X_test,y_test)))
Accuracy on training set:1.000000 Accuracy on testing set:0.937063
#出现过拟合现象采用预剪枝策略
tree = DecisionTreeClassifier(max_depth=4,random_state=0)
tree.fit(X_train,y_train)
print("Accuracy on training set:{:.3f}".format(tree.score(X_train,y_train)))
print("Accuracy on testing set:{:.3f}".format(tree.score(X_test,y_test)))
Accuracy on training set:0.988 Accuracy on testing set:0.951
#结果可视化
from sklearn.tree import export_graphviz
export_graphviz(tree, out_file="tree.dot", class_names=["malignant", "benign"],feature_names=cancer.feature_names, impurity=False, filled=True)
import graphviz
with open("tree.dot") as f:
dot_graph = f.read()
graphviz.Source(dot_graph)
#特征重要性可视化