From 4a80eee31c04381aeb8fa6f363272ef03a55068a Mon Sep 17 00:00:00 2001 From: AnnaGe <40264376+AnnaXJGe@users.noreply.github.com> Date: Sat, 11 Aug 2018 12:20:16 +0800 Subject: [PATCH] Update Day 33 Random Forests --- Code/Day 33 Random Forests | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Code/Day 33 Random Forests b/Code/Day 33 Random Forests index 1daa6db..190828b 100644 --- a/Code/Day 33 Random Forests +++ b/Code/Day 33 Random Forests @@ -2,23 +2,27 @@

+ ### 导入库 ```python import numpy as np import matplotlib.pyplot as plt import pandas as pd ``` + ### 导入数据集 ```python dataset = pd.read_csv('Social_Network_Ads.csv') X = dataset.iloc[:, [2, 3]].values y = dataset.iloc[:, 4].values ``` + ### 将数据集拆分成训练集和测试集 ```python from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) ``` + ### 特征缩放 ```python from sklearn.preprocessing import StandardScaler @@ -26,21 +30,25 @@ sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) ``` + ### 调试训练集的随机森林 ```python from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0) classifier.fit(X_train, y_train) ``` + ### 预测测试集结果 ```python y_pred = classifier.predict(X_test) ``` + ### 生成混淆矩阵,也称作误差矩阵 ```python from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) ``` + ### 将训练集结果可视化 ```python from matplotlib.colors import ListedColormap @@ -60,6 +68,7 @@ plt.ylabel('Estimated Salary') plt.legend() plt.show() ``` + ### 将数据集结果可视化 ```python from matplotlib.colors import ListedColormap @@ -78,3 +87,4 @@ plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() +```