Update Day 33 Random Forests

2018-08-11 12:20:16 +08:00
parent 371ce26259
commit 4a80eee31c
1 changed files with 10 additions and 0 deletions
--- a/Code/Day
+++ b/Code/Day
@ -2,23 +2,27 @@
 <p align="center">
  <img src="https://github.com/MachineLearning100/100-Days-Of-ML-Code/blob/master/Info-graphs/Day%2033.png">
 </p>
 ### 导入库
 ```python
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 ```
 ### 导入数据集
 ```python
 dataset = pd.read_csv('Social_Network_Ads.csv')
 X = dataset.iloc[:, [2, 3]].values
 y = dataset.iloc[:, 4].values
 ```
 ### 将数据集拆分成训练集和测试集
 ```python
 from sklearn.cross_validation import train_test_split
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
 ```
 ### 特征缩放
 ```python
 from sklearn.preprocessing import StandardScaler
@ -26,21 +30,25 @@ sc = StandardScaler()
 X_train = sc.fit_transform(X_train)
 X_test = sc.transform(X_test)
 ```
 ### 调试训练集的随机森林
 ```python
 from sklearn.ensemble import RandomForestClassifier
 classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
 classifier.fit(X_train, y_train)
 ```
 ### 预测测试集结果
 ```python
 y_pred = classifier.predict(X_test)
 ```
 ### 生成混淆矩阵，也称作误差矩阵
 ```python
 from sklearn.metrics import confusion_matrix
 cm = confusion_matrix(y_test, y_pred)
 ```
 ### 将训练集结果可视化
 ```python
 from matplotlib.colors import ListedColormap
@ -60,6 +68,7 @@ plt.ylabel('Estimated Salary')
 plt.legend()
 plt.show()
 ```
 ### 将数据集结果可视化
 ```python
 from matplotlib.colors import ListedColormap
@ -78,3 +87,4 @@ plt.xlabel('Age')
 plt.ylabel('Estimated Salary')
 plt.legend()
 plt.show()
 ```