From 85b143e2d62571cf48ec05eb471cb5e309a61aa7 Mon Sep 17 00:00:00 2001 From: yx-xyc <60683403+yx-xyc@users.noreply.github.com> Date: Wed, 13 Jan 2021 17:47:06 +0800 Subject: [PATCH 1/3] Update Day 1_Data_Preprocessing.ipynb --- Code/Day 1_Data_Preprocessing.ipynb | 210 ++++++++++++---------------- 1 file changed, 90 insertions(+), 120 deletions(-) diff --git a/Code/Day 1_Data_Preprocessing.ipynb b/Code/Day 1_Data_Preprocessing.ipynb index 9f26c9c..ee873ee 100644 --- a/Code/Day 1_Data_Preprocessing.ipynb +++ b/Code/Day 1_Data_Preprocessing.ipynb @@ -19,7 +19,55 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\nSklearn verion is 0.23.1\n" + ] + } + ], + "source": [ + "import sklearn\n", + "from sklearn.impute import SimpleImputer\n", + "#This block is an example used to learn SimpleImputer\n", + "imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n", + "imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n", + "X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n", + "print(imp_mean.transform(X))\n", + "print(\"Sklearn verion is {}\".format(sklearn.__version__))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "enc = OneHotEncoder(handle_unknown='ignore')\n", + "X = [['Male', 1], ['Female', 3], ['Female', 2]]\n", + ">>> enc.fit(X)\n", + "OneHotEncoder(handle_unknown='ignore')\n", + ">>> enc.categories_\n", + "[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n", + ">>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\n", + "array([[1., 0., 1., 0., 0.],\n", + " [0., 1., 0., 0., 0.]])\n", + ">>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\n", + "array([['Male', 1],\n", + " [None, 2]], dtype=object)\n", + ">>> enc.get_feature_names(['gender', 'group'])\n", + "array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n", + " dtype=object)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -37,27 +85,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 57, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Step 2: Importing dataset\n", - "X\n", - "[['France' 44.0 72000.0]\n", - " ['Spain' 27.0 48000.0]\n", - " ['Germany' 30.0 54000.0]\n", - " ['Spain' 38.0 61000.0]\n", - " ['Germany' 40.0 nan]\n", - " ['France' 35.0 58000.0]\n", - " ['Spain' nan 52000.0]\n", - " ['France' 48.0 79000.0]\n", - " ['Germany' 50.0 83000.0]\n", - " ['France' 37.0 67000.0]]\n", - "Y\n", - "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" + "Step 2: Importing dataset\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n[[44.0 72000.0]\n [27.0 48000.0]\n [30.0 54000.0]\n [38.0 61000.0]\n [40.0 nan]\n [35.0 58000.0]\n [nan 52000.0]\n [48.0 79000.0]\n [50.0 83000.0]\n [37.0 67000.0]]\n" ] } ], @@ -71,7 +106,8 @@ "print(\"X\")\n", "print(X)\n", "print(\"Y\")\n", - "print(Y)" + "print(Y)\n", + "print(X[ : , 1:3])" ] }, { @@ -84,39 +120,30 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 58, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "---------------------\n", - "Step 3: Handling the missing data\n", - "step2\n", - "X\n", - "[['France' 44.0 72000.0]\n", - " ['Spain' 27.0 48000.0]\n", - " ['Germany' 30.0 54000.0]\n", - " ['Spain' 38.0 61000.0]\n", - " ['Germany' 40.0 63777.77777777778]\n", - " ['France' 35.0 58000.0]\n", - " ['Spain' 38.77777777777778 52000.0]\n", - " ['France' 48.0 79000.0]\n", - " ['Germany' 50.0 83000.0]\n", - " ['France' 37.0 67000.0]]\n" + "---------------------\nStep 3: Handling the missing data\nstep2\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 63777.77777777778]\n ['France' 35.0 58000.0]\n ['Spain' 38.77777777777778 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\n" ] } ], "source": [ - "# If you use the newest version of sklearn, use the lines of code commented out", - "# from sklearn.impute import SimpleImputer", - "# imputer = SimpleImputer(missing_values=\"NaN\", strategy=\"mean\")", - "from sklearn.preprocessing import Imputer\n", + "# If you use the newest version of sklearn, use the lines of code commented out\n", + "from sklearn.impute import SimpleImputer\n", + "imputer = SimpleImputer(missing_values=np.nan, strategy=\"mean\")\n", + "#from sklearn.preprocessing import Imputer\n", "# axis=0表示按列进行\n", - "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", - "imputer = imputer.fit(X[ : , 1:3])\n", - "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", + "#imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", + "#print(imputer)\n", + "#\n", + "# print(X[ : , 1:3])\n", + "imputer = imputer.fit(X[ : , 1:3]) #put the data we want to process in to this imputer\n", + "X[ : , 1:3] = imputer.transform(X[ : , 1:3]) #replace the np.nan with mean\n", + "#print(X[ : , 1:3])\n", "print(\"---------------------\")\n", "print(\"Step 3: Handling the missing data\")\n", "print(\"step2\")\n", @@ -134,47 +161,26 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 59, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "---------------------\n", - "Step 4: Encoding categorical data\n", - "X\n", - "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", - " 7.20000000e+04]\n", - " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", - " 4.80000000e+04]\n", - " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", - " 5.40000000e+04]\n", - " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", - " 6.10000000e+04]\n", - " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", - " 6.37777778e+04]\n", - " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", - " 5.80000000e+04]\n", - " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", - " 5.20000000e+04]\n", - " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", - " 7.90000000e+04]\n", - " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", - " 8.30000000e+04]\n", - " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", - " 6.70000000e+04]]\n", - "Y\n", - "[0 1 0 0 1 1 0 1 0 1]\n" + "['France' 'Spain' 'Germany' 'Spain' 'Germany' 'France' 'Spain' 'France'\n 'Germany' 'France']\n[0 2 1 2 1 0 2 0 1 0]\n[[0 44.0 72000.0]\n [2 27.0 48000.0]\n [1 30.0 54000.0]\n [2 38.0 61000.0]\n [1 40.0 63777.77777777778]\n [0 35.0 58000.0]\n [2 38.77777777777778 52000.0]\n [0 48.0 79000.0]\n [1 50.0 83000.0]\n [0 37.0 67000.0]]\n---------------------\nStep 4: Encoding categorical data\nX\n[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n [0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n [0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]\n [0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]\n [0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n [1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n [0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]\n [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n [1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]\nY\n[0 1 0 0 1 1 0 1 0 1]\n" ] } ], "source": [ "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", "labelencoder_X = LabelEncoder()\n", + "print(X[ : , 0])\n", "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", + "print(X[ : , 0])\n", "#Creating a dummy variable\n", - "onehotencoder = OneHotEncoder(categorical_features = [0])\n", + "onehotencoder = OneHotEncoder('auto')\n", + "print(X)\n", "X = onehotencoder.fit_transform(X).toarray()\n", "labelencoder_Y = LabelEncoder()\n", "Y = labelencoder_Y.fit_transform(Y)\n", @@ -196,41 +202,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 60, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "---------------------\n", - "Step 5: Splitting the datasets into training sets and Test sets\n", - "X_train\n", - "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", - " 6.37777778e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", - " 6.70000000e+04]\n", - " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", - " 4.80000000e+04]\n", - " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", - " 5.20000000e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", - " 7.90000000e+04]\n", - " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", - " 6.10000000e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", - " 7.20000000e+04]\n", - " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", - " 5.80000000e+04]]\n", - "X_test\n", - "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", - " 5.40000000e+04]\n", - " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", - " 8.30000000e+04]]\n", - "Y_train\n", - "[1 1 1 0 1 0 0 1]\n", - "Y_test\n", - "[0 0]\n" + "---------------------\nStep 5: Splitting the datasets into training sets and Test sets\nX_train\n[[0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n [1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]\n [0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n [0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]\n [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n [0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]\n [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n [1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]\nX_test\n[[0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]\n [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]\nY_train\n[1 1 1 0 1 0 0 1]\nY_test\n[0 0]\n" ] } ], @@ -259,27 +238,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 44, "metadata": {}, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "---------------------\n", - "Step 6: Feature Scaling\n", - "X_train\n", - "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", - " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", - " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", - " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", - " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", - " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", - " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", - " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", - "X_test\n", - "[[ 0. 0. 0. -1. -1.]\n", - " [ 0. 0. 0. 1. 1.]]\n" + "---------------------\nStep 6: Feature Scaling\nX_train\n[[-1. 2.64575131 -0.77459667 -0.37796447 0. -0.37796447\n -0.37796447 -0.37796447 -0.37796447 2.64575131 -0.37796447 -0.37796447\n 0. -0.37796447 -0.37796447 0. -0.37796447 -0.37796447\n 2.64575131 -0.37796447 -0.37796447 -0.37796447 0. ]\n [ 1. -0.37796447 -0.77459667 -0.37796447 0. -0.37796447\n 2.64575131 -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447\n 0. -0.37796447 -0.37796447 0. -0.37796447 -0.37796447\n -0.37796447 2.64575131 -0.37796447 -0.37796447 0. ]\n [-1. -0.37796447 1.29099445 2.64575131 0. -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447\n 0. 2.64575131 -0.37796447 0. -0.37796447 -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 0. ]\n [-1. -0.37796447 1.29099445 -0.37796447 0. -0.37796447\n -0.37796447 -0.37796447 2.64575131 -0.37796447 -0.37796447 -0.37796447\n 0. -0.37796447 2.64575131 0. -0.37796447 -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 0. ]\n [ 1. -0.37796447 -0.77459667 -0.37796447 0. -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447 2.64575131\n 0. -0.37796447 -0.37796447 0. -0.37796447 -0.37796447\n -0.37796447 -0.37796447 -0.37796447 2.64575131 0. ]\n [-1. -0.37796447 1.29099445 -0.37796447 0. -0.37796447\n -0.37796447 2.64575131 -0.37796447 -0.37796447 -0.37796447 -0.37796447\n 0. -0.37796447 -0.37796447 0. -0.37796447 2.64575131\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 0. ]\n [ 1. -0.37796447 -0.77459667 -0.37796447 0. -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 2.64575131 -0.37796447\n 0. -0.37796447 -0.37796447 0. -0.37796447 -0.37796447\n -0.37796447 -0.37796447 2.64575131 -0.37796447 0. ]\n [ 1. -0.37796447 -0.77459667 -0.37796447 0. 2.64575131\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447\n 0. -0.37796447 -0.37796447 0. 2.64575131 -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 0. ]]\nX_test\n[[-1. 2.64575131 -0.77459667 -0.37796447 1. -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447\n 0. -0.37796447 -0.37796447 1. -0.37796447 -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 0. ]\n [-1. 2.64575131 -0.77459667 -0.37796447 0. -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447 -0.37796447\n 1. -0.37796447 -0.37796447 0. -0.37796447 -0.37796447\n -0.37796447 -0.37796447 -0.37796447 -0.37796447 1. ]]\n" ] } ], @@ -315,9 +281,13 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.8.3 64-bit (conda)", + "metadata": { + "interpreter": { + "hash": "1b78ff499ec469310b6a6795c4effbbfc85eb20a6ba0cf828a15721670711b2c" + } + } }, "language_info": { "codemirror_mode": { @@ -329,9 +299,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5" + "version": "3.8.3-final" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file From c428d085a6e7a62a5e17ca2c58b1478f7c5e8437 Mon Sep 17 00:00:00 2001 From: yx-xyc <60683403+yx-xyc@users.noreply.github.com> Date: Wed, 13 Jan 2021 18:10:43 +0800 Subject: [PATCH 2/3] Update Day 1_Data_Preprocessing.ipynb --- Code/Day 1_Data_Preprocessing.ipynb | 54 ++++++++++++++--------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/Code/Day 1_Data_Preprocessing.ipynb b/Code/Day 1_Data_Preprocessing.ipynb index ee873ee..be22f60 100644 --- a/Code/Day 1_Data_Preprocessing.ipynb +++ b/Code/Day 1_Data_Preprocessing.ipynb @@ -19,7 +19,17 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -42,10 +52,6 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "enc = OneHotEncoder(handle_unknown='ignore')\n", @@ -63,16 +69,19 @@ ">>> enc.get_feature_names(['gender', 'group'])\n", "array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n", " dtype=object)" - ] - }, - { + ], "cell_type": "code", - "execution_count": 4, "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd" + "execution_count": 11, + "outputs": [ + { + "output_type": "error", + "ename": "SyntaxError", + "evalue": "invalid syntax (, line 4)", + "traceback": [ + "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m4\u001b[0m\n\u001b[1;33m >>> enc.fit(X)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" + ] + } ] }, { @@ -85,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -120,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -161,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -202,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -238,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -268,15 +277,6 @@ "source": [ "完整的项目请前往Github项目100-Days-Of-ML-Code查看。有任何的建议或者意见欢迎在issue中提出~" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] } ], "metadata": { From 719200ee62cb9e467352c3bb0f95962fdb43afa0 Mon Sep 17 00:00:00 2001 From: yx-xyc <60683403+yx-xyc@users.noreply.github.com> Date: Wed, 13 Jan 2021 18:13:31 +0800 Subject: [PATCH 3/3] Update Day 1_Data_Preprocessing.ipynb --- Code/Day 1_Data_Preprocessing.ipynb | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Code/Day 1_Data_Preprocessing.ipynb b/Code/Day 1_Data_Preprocessing.ipynb index be22f60..df7a288 100644 --- a/Code/Day 1_Data_Preprocessing.ipynb +++ b/Code/Day 1_Data_Preprocessing.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -72,14 +72,14 @@ ], "cell_type": "code", "metadata": {}, - "execution_count": 11, + "execution_count": 4, "outputs": [ { "output_type": "error", "ename": "SyntaxError", - "evalue": "invalid syntax (, line 4)", + "evalue": "invalid syntax (, line 4)", "traceback": [ - "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m4\u001b[0m\n\u001b[1;33m >>> enc.fit(X)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" + "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m4\u001b[0m\n\u001b[1;33m >>> enc.fit(X)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" ] } ] @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -129,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -211,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -247,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -263,12 +263,13 @@ "sc_X = StandardScaler()\n", "X_train = sc_X.fit_transform(X_train)\n", "X_test = sc_X.transform(X_test)\n", + "\n", "print(\"---------------------\")\n", "print(\"Step 6: Feature Scaling\")\n", "print(\"X_train\")\n", "print(X_train)\n", "print(\"X_test\")\n", - "print(X_test)" + "print(X_test)\n" ] }, {