当先锋百科网

首页 1 2 3 4 5 6 7

决策树

from math import log
import pandas as pd
import numpy as np
from sklearn import tree
from six import StringIO
from sklearn.model_selection import train_test_split
from sklearn.metrics import auc, roc_curve
from numpy.lib.function_base import interp
from matplotlib import pyplot as plt
from itertools import cycle

 
#创建数据集 (返回DataFrame)
def createdata():
    #df=pd.read_csv('data2.csv',columns=['Number of entries',	'Number of sales',	'Input amount',	'Sales amount','profit',	'Input void rate',	'Cancellation rate',	'Credit rating',])
    df=pd.read_csv('datac.csv',usecols=[0,1,2,3,4,5,6,7,8,9,10])
    #df=pd.read_csv('data2.csv',usecols=[0,1,2,3,4,5,6,7])
    #data = pd.DataFrame(df)
    #data.dropna(inplace=True)
    #print(df)
    return df

#画决策树pdf图   (DataFrame)
  
def showtree_pdf(data):
    from sklearn import tree    #导入sklearn的决策树模型(包括分类和回归两种)
    import pydotplus    #画句子的依存结构树
    a = data.iloc[:,:9]    #特征矩阵
    b = data.iloc[:,-1]     #目标变量
    #a = data.iloc[:,:6]    #特征矩阵
    #b = data.iloc[:,-1]     #目标变量
    X_train, X_test, y_train, y_test = train_test_split(a, b, test_size=.01,random_state=4)
    clf = tree.DecisionTreeClassifier() #分类决策树
    clf.fit(X_train,y_train)
    p=clf.feature_importances_
    #score=clf.score(X_test,y_test)
    print(p)
    #print(score)
    #pre=pd.read_csv('data4.csv',usecols=[0,1,2,3,4,5,6])
    #print(clf.predict(pre))
    #l1=clf.predict(pre)
    #l2=clf.predict_proba(pre)
    #l=pd.DataFrame(l1,l2)
    #l.to_csv('result.csv')

    dot_data = tree.export_graphviz(clf, out_file=None) #利用export_graphviz将树导出为Graphviz格式
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_pdf("d3.pdf")  #保存树图iris.pdf到本地

if __name__=="__main__":
    data = createdata() 
    showtree_pdf(data)