diff --git a/Code/Day 1_Data_Preprocessing.ipynb b/Code/Day 1_Data_Preprocessing.ipynb index 8bd9e38..6ddcbda 100644 --- a/Code/Day 1_Data_Preprocessing.ipynb +++ b/Code/Day 1_Data_Preprocessing.ipynb @@ -11,6 +11,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "搭建anaconda环境,参考 https://zhuanlan.zhihu.com/p/33358809\n", + "\n", "## 第一步:导入需要的库\n", "这两个是我们每次都需要导入的库。NumPy包含数学计算函数。Pandas用于导入和管理数据集。" ] @@ -18,9 +20,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -63,7 +63,9 @@ ], "source": [ "dataset = pd.read_csv('../datasets/Data.csv')\n", + "# 不包括最后一列的所有列\n", "X = dataset.iloc[ : , :-1].values\n", + "#取最后一列\n", "Y = dataset.iloc[ : , 3].values\n", "print(\"Step 2: Importing dataset\")\n", "print(\"X\")\n", @@ -108,6 +110,7 @@ ], "source": [ "from sklearn.preprocessing import Imputer\n", + "# axis=0表示按列进行\n", "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", "imputer = imputer.fit(X[ : , 1:3])\n", "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", @@ -138,26 +141,26 @@ "---------------------\n", "Step 4: Encoding categorical data\n", "X\n", - "[[ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", - " 7.20000000e+04]\n", - " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", - " 4.80000000e+04]\n", - " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", - " 5.40000000e+04]\n", - " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", - " 6.10000000e+04]\n", - " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", - " 6.37777778e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", - " 5.80000000e+04]\n", - " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", - " 5.20000000e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", - " 7.90000000e+04]\n", - " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", - " 8.30000000e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", - " 6.70000000e+04]]\n", + "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", + " 7.20000000e+04]\n", + " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", + " 4.80000000e+04]\n", + " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", + " 5.40000000e+04]\n", + " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", + " 6.10000000e+04]\n", + " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", + " 6.37777778e+04]\n", + " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", + " 5.80000000e+04]\n", + " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", + " 5.20000000e+04]\n", + " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", + " 7.90000000e+04]\n", + " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", + " 8.30000000e+04]\n", + " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", + " 6.70000000e+04]]\n", "Y\n", "[0 1 0 0 1 1 0 1 0 1]\n" ] @@ -323,7 +326,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.6.5" } }, "nbformat": 4,