diff --git a/Code/Day 1_Data_Preprocessing.ipynb b/Code/Day 1_Data_Preprocessing.ipynb index 7ebf597..d19db6a 100644 --- a/Code/Day 1_Data_Preprocessing.ipynb +++ b/Code/Day 1_Data_Preprocessing.ipynb @@ -19,11 +19,7 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream "execution_count": 4, -======= - "execution_count": 2, ->>>>>>> Stashed changes "metadata": {}, "outputs": [], "source": [ @@ -41,44 +37,15 @@ }, { "cell_type": "code", -<<<<<<< Updated upstream "execution_count": 7, -======= - "execution_count": 6, ->>>>>>> Stashed changes "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ -<<<<<<< Updated upstream "Step 2: Importing dataset\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" -======= - "Step 2: Importing dataset\nX:\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY:\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" ->>>>>>> Stashed changes ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " Country Age Salary Purchased\n", - "0 France 44.0 72000.0 No\n", - "1 Spain 27.0 48000.0 Yes\n", - "2 Germany 30.0 54000.0 No\n", - "3 Spain 38.0 61000.0 No\n", - "4 Germany 40.0 NaN Yes\n", - "5 France 35.0 58000.0 Yes\n", - "6 Spain NaN 52000.0 No\n", - "7 France 48.0 79000.0 Yes\n", - "8 Germany 50.0 83000.0 No\n", - "9 France 37.0 67000.0 Yes" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CountryAgeSalaryPurchased
0France44.072000.0No
1Spain27.048000.0Yes
2Germany30.054000.0No
3Spain38.061000.0No
4Germany40.0NaNYes
5France35.058000.0Yes
6SpainNaN52000.0No
7France48.079000.0Yes
8Germany50.083000.0No
9France37.067000.0Yes
\n
" - }, - "metadata": {}, - "execution_count": 6 } ], "source": [ @@ -88,11 +55,10 @@ "#取最后一列\n", "Y = dataset.iloc[ : , 3].values\n", "print(\"Step 2: Importing dataset\")\n", - "print(\"X:\")\n", + "print(\"X\")\n", "print(X)\n", - "print(\"Y:\")\n", - "print(Y)\n", - "dataset.head(100)" + "print(\"Y\")\n", + "print(Y)" ] }, { @@ -132,7 +98,6 @@ ], "source": [ "# If you use the newest version of sklearn, use the lines of code commented out\n", -<<<<<<< Updated upstream "from sklearn.impute import SimpleImputer\n", "imputer = SimpleImputer(missing_values=np.nan, strategy=\"mean\")\n", "#from sklearn.preprocessing import Imputer\n", @@ -140,15 +105,6 @@ "#imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", "print(X[ 0:4 , 1:3])\n", "imputer = imputer.fit(X[ 0:4 , 1:3])\n", -======= - "# from sklearn.impute import SimpleImputer\n", - "# imputer = SimpleImputer(missing_values=\"NaN\", strategy=\"mean\")\n", - "from sklearn.preprocessing import Imputer\n", - "# axis=0表示按列进行\n", - "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", - "print(imputer)\n", - "imputer = imputer.fit(X[ : , 1:3])\n", ->>>>>>> Stashed changes "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", "print(\"---------------------\")\n", "print(\"Step 3: Handling the missing data\")\n",