From d4ec8ddb9957e101c29a9639ce8b689aac46a585 Mon Sep 17 00:00:00 2001
From: yx-xyc <60683403+yx-xyc@users.noreply.github.com>
Date: Wed, 13 Jan 2021 17:34:20 +0800
Subject: [PATCH] Update Day 1_Data_Preprocessing.ipynb

---
 Code/Day 1_Data_Preprocessing.ipynb | 50 +++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)
diff --git a/Code/Day 1_Data_Preprocessing.ipynb b/Code/Day 1_Data_Preprocessing.ipynb
index d19db6a..7ebf597 100644
--- a/Code/Day 1_Data_Preprocessing.ipynb	
+++ b/Code/Day 1_Data_Preprocessing.ipynb	
@@ -19,7 +19,11 @@
   },
   {
    "cell_type": "code",
+<<<<<<< Updated upstream
    "execution_count": 4,
+=======
+   "execution_count": 2,
+>>>>>>> Stashed changes
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,15 +41,44 @@
   },
   {
    "cell_type": "code",
+<<<<<<< Updated upstream
    "execution_count": 7,
+=======
+   "execution_count": 6,
+>>>>>>> Stashed changes
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "name": "stdout",
      "text": [
+<<<<<<< Updated upstream
       "Step 2: Importing dataset\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
+=======
+      "Step 2: Importing dataset\nX:\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY:\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
+>>>>>>> Stashed changes
      ]
+    },
+    {
+     "output_type": "execute_result",
+     "data": {
+      "text/plain": [
+       "   Country   Age   Salary Purchased\n",
+       "0   France  44.0  72000.0        No\n",
+       "1    Spain  27.0  48000.0       Yes\n",
+       "2  Germany  30.0  54000.0        No\n",
+       "3    Spain  38.0  61000.0        No\n",
+       "4  Germany  40.0      NaN       Yes\n",
+       "5   France  35.0  58000.0       Yes\n",
+       "6    Spain   NaN  52000.0        No\n",
+       "7   France  48.0  79000.0       Yes\n",
+       "8  Germany  50.0  83000.0        No\n",
+       "9   France  37.0  67000.0       Yes"
+      ],
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Country</th>\n      <th>Age</th>\n      <th>Salary</th>\n      <th>Purchased</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>France</td>\n      <td>44.0</td>\n      <td>72000.0</td>\n      <td>No</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Spain</td>\n      <td>27.0</td>\n      <td>48000.0</td>\n      <td>Yes</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Germany</td>\n      <td>30.0</td>\n      <td>54000.0</td>\n      <td>No</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Spain</td>\n      <td>38.0</td>\n      <td>61000.0</td>\n      <td>No</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Germany</td>\n      <td>40.0</td>\n      <td>NaN</td>\n      <td>Yes</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>France</td>\n      <td>35.0</td>\n      <td>58000.0</td>\n      <td>Yes</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Spain</td>\n      <td>NaN</td>\n      <td>52000.0</td>\n      <td>No</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>France</td>\n      <td>48.0</td>\n      <td>79000.0</td>\n      <td>Yes</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>Germany</td>\n      <td>50.0</td>\n      <td>83000.0</td>\n      <td>No</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>France</td>\n      <td>37.0</td>\n      <td>67000.0</td>\n      <td>Yes</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "metadata": {},
+     "execution_count": 6
     }
    ],
    "source": [
@@ -55,10 +88,11 @@
     "#取最后一列\n",
     "Y = dataset.iloc[ : , 3].values\n",
     "print(\"Step 2: Importing dataset\")\n",
-    "print(\"X\")\n",
+    "print(\"X:\")\n",
     "print(X)\n",
-    "print(\"Y\")\n",
-    "print(Y)"
+    "print(\"Y:\")\n",
+    "print(Y)\n",
+    "dataset.head(100)"
    ]
   },
   {
@@ -98,6 +132,7 @@
    ],
    "source": [
     "# If you use the newest version of sklearn, use the lines of code commented out\n",
+<<<<<<< Updated upstream
     "from sklearn.impute import SimpleImputer\n",
     "imputer = SimpleImputer(missing_values=np.nan, strategy=\"mean\")\n",
     "#from sklearn.preprocessing import Imputer\n",
@@ -105,6 +140,15 @@
     "#imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
     "print(X[ 0:4 , 1:3])\n",
     "imputer = imputer.fit(X[ 0:4 , 1:3])\n",
+=======
+    "# from sklearn.impute import SimpleImputer\n",
+    "# imputer = SimpleImputer(missing_values=\"NaN\", strategy=\"mean\")\n",
+    "from sklearn.preprocessing import Imputer\n",
+    "# axis=0表示按列进行\n",
+    "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
+    "print(imputer)\n",
+    "imputer = imputer.fit(X[ : , 1:3])\n",
+>>>>>>> Stashed changes
     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
     "print(\"---------------------\")\n",
     "print(\"Step 3: Handling the missing data\")\n",

	Country	Age	Salary	Purchased
0	France	44.0	72000.0	No
1	Spain	27.0	48000.0	Yes
2	Germany	30.0	54000.0	No
3	Spain	38.0	61000.0	No
4	Germany	40.0	NaN	Yes
5	France	35.0	58000.0	Yes
6	Spain	NaN	52000.0	No
7	France	48.0	79000.0	Yes
8	Germany	50.0	83000.0	No
9	France	37.0	67000.0	Yes