Update Day 3_Multiple_Linear_Regression.ipynb

This commit is contained in:
yx-xyc
2021-01-18 19:24:18 +08:00
parent aa869023d1
commit a7f2d63764

View File

@ -40,40 +40,44 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"name": "stdout",
"text": [ "text": [
"[[165349.2 136897.8 471784.1 'New York']\n", "[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']]\n[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n 155752.6 152211.77 149759.96 146121.95 144259.4 141585.52 134307.35\n 132602.65 129917.04 126992.93 125370.37 124266.9 122776.86 118474.03\n 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n 103282.38 101004.64 99937.59 97483.56 97427.84 96778.92 96712.8\n 96479.51 90708.19 89949.14 81229.06 81005.76 78239.91 77798.83\n 71498.49 69758.98 65200.33 64926.08 49490.75 42559.73 35673.41\n 14681.4 ]\n['New York' 'California' 'Florida' 'New York' 'Florida' 'New York'\n 'California' 'Florida' 'New York' 'California' 'Florida' 'California'\n 'Florida' 'California' 'Florida' 'New York' 'California' 'New York'\n 'Florida' 'New York' 'California' 'New York' 'Florida' 'Florida'\n 'New York' 'California' 'Florida' 'New York' 'Florida' 'New York'\n 'Florida' 'New York' 'California' 'Florida' 'California' 'New York'\n 'Florida' 'California' 'New York' 'California' 'California' 'Florida'\n 'California' 'New York' 'California' 'New York' 'Florida' 'California'\n 'New York' 'California']\n"
" [162597.7 151377.59 443898.53 'California']\n",
" [153441.51 101145.55 407934.54 'Florida']\n",
" [144372.41 118671.85 383199.62 'New York']\n",
" [142107.34 91391.77 366168.42 'Florida']\n",
" [131876.9 99814.71 362861.36 'New York']\n",
" [134615.46 147198.87 127716.82 'California']\n",
" [130298.13 145530.06 323876.68 'Florida']\n",
" [120542.52 148718.95 311613.29 'New York']\n",
" [123334.88 108679.17 304981.62 'California']]\n",
"[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n",
" 155752.6 152211.77 149759.96 146121.95 144259.4 141585.52 134307.35\n",
" 132602.65 129917.04 126992.93 125370.37 124266.9 122776.86 118474.03\n",
" 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n",
" 103282.38 101004.64 99937.59 97483.56 97427.84 96778.92 96712.8\n",
" 96479.51 90708.19 89949.14 81229.06 81005.76 78239.91 77798.83\n",
" 71498.49 69758.98 65200.33 64926.08 49490.75 42559.73 35673.41\n",
" 14681.4 ]\n"
] ]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
" R&D Spend Administration Marketing Spend State Profit\n",
"0 165349.20 136897.80 471784.10 New York 192261.83\n",
"1 162597.70 151377.59 443898.53 California 191792.06\n",
"2 153441.51 101145.55 407934.54 Florida 191050.39\n",
"3 144372.41 118671.85 383199.62 New York 182901.99\n",
"4 142107.34 91391.77 366168.42 Florida 166187.94"
],
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>R&amp;D Spend</th>\n <th>Administration</th>\n <th>Marketing Spend</th>\n <th>State</th>\n <th>Profit</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>165349.20</td>\n <td>136897.80</td>\n <td>471784.10</td>\n <td>New York</td>\n <td>192261.83</td>\n </tr>\n <tr>\n <th>1</th>\n <td>162597.70</td>\n <td>151377.59</td>\n <td>443898.53</td>\n <td>California</td>\n <td>191792.06</td>\n </tr>\n <tr>\n <th>2</th>\n <td>153441.51</td>\n <td>101145.55</td>\n <td>407934.54</td>\n <td>Florida</td>\n <td>191050.39</td>\n </tr>\n <tr>\n <th>3</th>\n <td>144372.41</td>\n <td>118671.85</td>\n <td>383199.62</td>\n <td>New York</td>\n <td>182901.99</td>\n </tr>\n <tr>\n <th>4</th>\n <td>142107.34</td>\n <td>91391.77</td>\n <td>366168.42</td>\n <td>Florida</td>\n <td>166187.94</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 33
} }
], ],
"source": [ "source": [
"dataset = pd.read_csv('../datasets/50_Startups.csv')\n", "dataset = pd.read_csv('../datasets/50_Startups.csv')\n",
"X = dataset.iloc[ : , :-1].values\n", "X = dataset.iloc[ : , :-1].values\n",
"Y = dataset.iloc[ : , 4 ].values\n", "Y = dataset.iloc[ : , 4 ].values\n",
"Z = dataset.iloc[ : , 3 ].values\n",
"print(\"X:\")\n",
"print(X[:10])\n", "print(X[:10])\n",
"print(Y)" "print(Y)\n",
"print(\"Y:\")\n",
"print(Z)\n",
"dataset.head(5)"
] ]
}, },
{ {
@ -85,56 +89,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"name": "stdout",
"text": [ "text": [
"labelencoder:\n", "[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']]\nlabelencoder:\n[[165349.2 136897.8 471784.1 2]\n [162597.7 151377.59 443898.53 0]\n [153441.51 101145.55 407934.54 1]\n [144372.41 118671.85 383199.62 2]\n [142107.34 91391.77 366168.42 1]\n [131876.9 99814.71 362861.36 2]\n [134615.46 147198.87 127716.82 0]\n [130298.13 145530.06 323876.68 1]\n [120542.52 148718.95 311613.29 2]\n [123334.88 108679.17 304981.62 0]]\nonehot:\n[[0.0 0.0 1.0 165349.2 136897.8 471784.1]\n [1.0 0.0 0.0 162597.7 151377.59 443898.53]\n [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 0.0 1.0 144372.41 118671.85 383199.62]\n [0.0 1.0 0.0 142107.34 91391.77 366168.42]\n [0.0 0.0 1.0 131876.9 99814.71 362861.36]\n [1.0 0.0 0.0 134615.46 147198.87 127716.82]\n [0.0 1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 0.0 1.0 120542.52 148718.95 311613.29]\n [1.0 0.0 0.0 123334.88 108679.17 304981.62]]\n"
"[[165349.2 136897.8 471784.1 2]\n",
" [162597.7 151377.59 443898.53 0]\n",
" [153441.51 101145.55 407934.54 1]\n",
" [144372.41 118671.85 383199.62 2]\n",
" [142107.34 91391.77 366168.42 1]\n",
" [131876.9 99814.71 362861.36 2]\n",
" [134615.46 147198.87 127716.82 0]\n",
" [130298.13 145530.06 323876.68 1]\n",
" [120542.52 148718.95 311613.29 2]\n",
" [123334.88 108679.17 304981.62 0]]\n",
"onehot:\n",
"[[0.0000000e+00 0.0000000e+00 1.0000000e+00 1.6534920e+05 1.3689780e+05\n",
" 4.7178410e+05]\n",
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.6259770e+05 1.5137759e+05\n",
" 4.4389853e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.5344151e+05 1.0114555e+05\n",
" 4.0793454e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 1.4437241e+05 1.1867185e+05\n",
" 3.8319962e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.4210734e+05 9.1391770e+04\n",
" 3.6616842e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 1.3187690e+05 9.9814710e+04\n",
" 3.6286136e+05]\n",
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.3461546e+05 1.4719887e+05\n",
" 1.2771682e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.3029813e+05 1.4553006e+05\n",
" 3.2387668e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 1.2054252e+05 1.4871895e+05\n",
" 3.1161329e+05]\n",
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.2333488e+05 1.0867917e+05\n",
" 3.0498162e+05]]\n"
] ]
} }
], ],
"source": [ "source": [
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer \n",
"labelencoder = LabelEncoder()\n", "labelencoder = LabelEncoder()\n",
"print(X[:10])\n",
"#print(X[: , 3])\n",
"X[: , 3] = labelencoder.fit_transform(X[ : , 3])\n", "X[: , 3] = labelencoder.fit_transform(X[ : , 3])\n",
"#print(X[: , 3])\n",
"print(\"labelencoder:\")\n", "print(\"labelencoder:\")\n",
"print(X[:10])\n", "print(X[:10])\n",
"onehotencoder = OneHotEncoder(categorical_features = [3])\n", "ct = ColumnTransformer([(\"\", OneHotEncoder(), [3])], remainder = 'passthrough')\n",
"X = onehotencoder.fit_transform(X).toarray()\n", "X = ct.fit_transform(X)\n",
"#onehotencoder = OneHotEncoder(categorical_features = [3])\n",
"#X = onehotencoder.fit_transform(X).toarray()\n",
"print(\"onehot:\")\n", "print(\"onehot:\")\n",
"print(X[:10])" "print(X[:10])"
] ]
@ -156,13 +135,30 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"X1 = X[: , 1:]" "X1 = X[: , 1:]"
] ]
}, },
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[[0.0 1.0 165349.2 136897.8 471784.1]\n [0.0 0.0 162597.7 151377.59 443898.53]\n [1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 1.0 144372.41 118671.85 383199.62]\n [1.0 0.0 142107.34 91391.77 366168.42]\n [0.0 1.0 131876.9 99814.71 362861.36]\n [0.0 0.0 134615.46 147198.87 127716.82]\n [1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 1.0 120542.52 148718.95 311613.29]\n [0.0 0.0 123334.88 108679.17 304981.62]\n [1.0 0.0 101913.08 110594.11 229160.95]\n [0.0 0.0 100671.96 91790.61 249744.55]\n [1.0 0.0 93863.75 127320.38 249839.44]\n [0.0 0.0 91992.39 135495.07 252664.93]\n [1.0 0.0 119943.24 156547.42 256512.92]\n [0.0 1.0 114523.61 122616.84 261776.23]\n [0.0 0.0 78013.11 121597.55 264346.06]\n [0.0 1.0 94657.16 145077.58 282574.31]\n [1.0 0.0 91749.16 114175.79 294919.57]\n [0.0 1.0 86419.7 153514.11 0.0]\n [0.0 0.0 76253.86 113867.3 298664.47]\n [0.0 1.0 78389.47 153773.43 299737.29]\n [1.0 0.0 73994.56 122782.75 303319.26]\n [1.0 0.0 67532.53 105751.03 304768.73]\n [0.0 1.0 77044.01 99281.34 140574.81]\n [0.0 0.0 64664.71 139553.16 137962.62]\n [1.0 0.0 75328.87 144135.98 134050.07]\n [0.0 1.0 72107.6 127864.55 353183.81]\n [1.0 0.0 66051.52 182645.56 118148.2]\n [0.0 1.0 65605.48 153032.06 107138.38]\n [1.0 0.0 61994.48 115641.28 91131.24]\n [0.0 1.0 61136.38 152701.92 88218.23]\n [0.0 0.0 63408.86 129219.61 46085.25]\n [1.0 0.0 55493.95 103057.49 214634.81]\n [0.0 0.0 46426.07 157693.92 210797.67]\n [0.0 1.0 46014.02 85047.44 205517.64]\n [1.0 0.0 28663.76 127056.21 201126.82]\n [0.0 0.0 44069.95 51283.14 197029.42]\n [0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 0.0 38558.51 82982.09 174999.3]\n [0.0 0.0 28754.33 118546.05 172795.67]\n [1.0 0.0 27892.92 84710.77 164470.71]\n [0.0 0.0 23640.93 96189.63 148001.11]\n [0.0 1.0 15505.73 127382.3 35534.17]\n [0.0 0.0 22177.74 154806.14 28334.72]\n [0.0 1.0 1000.23 124153.04 1903.93]\n [1.0 0.0 1315.46 115816.21 297114.46]\n [0.0 0.0 0.0 135426.92 0.0]\n [0.0 1.0 542.05 51743.15 0.0]\n [0.0 0.0 0.0 116983.8 45173.06]]\n"
]
}
],
"source": [
"print(X1)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -172,47 +168,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 29,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"name": "stdout",
"text": [ "text": [
"[[0.0000000e+00 1.0000000e+00 0.0000000e+00 6.6051520e+04 1.8264556e+05\n", "[[0.0 1.0 0.0 66051.52 182645.56 118148.2]\n [1.0 0.0 0.0 100671.96 91790.61 249744.55]\n [0.0 1.0 0.0 101913.08 110594.11 229160.95]\n [0.0 1.0 0.0 27892.92 84710.77 164470.71]\n [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 0.0 1.0 72107.6 127864.55 353183.81]\n [0.0 0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 0.0 1.0 61136.38 152701.92 88218.23]\n [0.0 1.0 0.0 73994.56 122782.75 303319.26]\n [0.0 1.0 0.0 142107.34 91391.77 366168.42]]\n[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n 97483.56 110352.25 166187.94]\n[[1.0 0.0 66051.52 182645.56 118148.2]\n [0.0 0.0 100671.96 91790.61 249744.55]\n [1.0 0.0 101913.08 110594.11 229160.95]\n [1.0 0.0 27892.92 84710.77 164470.71]\n [1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 1.0 72107.6 127864.55 353183.81]\n [0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 1.0 61136.38 152701.92 88218.23]\n [1.0 0.0 73994.56 122782.75 303319.26]\n [1.0 0.0 142107.34 91391.77 366168.42]]\n[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n 97483.56 110352.25 166187.94]\n"
" 1.1814820e+05]\n",
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.0067196e+05 9.1790610e+04\n",
" 2.4974455e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.0191308e+05 1.1059411e+05\n",
" 2.2916095e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 2.7892920e+04 8.4710770e+04\n",
" 1.6447071e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.5344151e+05 1.0114555e+05\n",
" 4.0793454e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 7.2107600e+04 1.2786455e+05\n",
" 3.5318381e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 2.0229590e+04 6.5947930e+04\n",
" 1.8526510e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 6.1136380e+04 1.5270192e+05\n",
" 8.8218230e+04]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 7.3994560e+04 1.2278275e+05\n",
" 3.0331926e+05]\n",
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.4210734e+05 9.1391770e+04\n",
" 3.6616842e+05]]\n",
"[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n",
" 97483.56 110352.25 166187.94]\n",
"[[1.0000000e+00 0.0000000e+00 6.6051520e+04 1.8264556e+05 1.1814820e+05]\n",
" [0.0000000e+00 0.0000000e+00 1.0067196e+05 9.1790610e+04 2.4974455e+05]\n",
" [1.0000000e+00 0.0000000e+00 1.0191308e+05 1.1059411e+05 2.2916095e+05]\n",
" [1.0000000e+00 0.0000000e+00 2.7892920e+04 8.4710770e+04 1.6447071e+05]\n",
" [1.0000000e+00 0.0000000e+00 1.5344151e+05 1.0114555e+05 4.0793454e+05]\n",
" [0.0000000e+00 1.0000000e+00 7.2107600e+04 1.2786455e+05 3.5318381e+05]\n",
" [0.0000000e+00 1.0000000e+00 2.0229590e+04 6.5947930e+04 1.8526510e+05]\n",
" [0.0000000e+00 1.0000000e+00 6.1136380e+04 1.5270192e+05 8.8218230e+04]\n",
" [1.0000000e+00 0.0000000e+00 7.3994560e+04 1.2278275e+05 3.0331926e+05]\n",
" [1.0000000e+00 0.0000000e+00 1.4210734e+05 9.1391770e+04 3.6616842e+05]]\n",
"[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n",
" 97483.56 110352.25 166187.94]\n"
] ]
} }
], ],
@ -235,18 +198,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 30,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "execute_result",
"data": { "data": {
"text/plain": [ "text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" "LinearRegression()"
] ]
}, },
"execution_count": 40,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "execution_count": 30
} }
], ],
"source": [ "source": [
@ -266,7 +229,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 41, "execution_count": 31,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -276,19 +239,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 42, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout",
"output_type": "stream", "output_type": "stream",
"name": "stdout",
"text": [ "text": [
"[103015.20159796 132582.27760815 132447.73845173 71976.09851258\n", "[103015.20159796 132582.27760816 132447.73845174 71976.09851258\n 178537.48221055 116161.24230166 67851.69209676 98791.73374686\n 113969.43533013 167921.06569551]\n[103015.20159796 132582.27760815 132447.73845175 71976.09851258\n 178537.48221056 116161.24230166 67851.69209676 98791.73374687\n 113969.43533013 167921.06569551]\n"
" 178537.48221051 116161.24230163 67851.69209676 98791.73374689\n",
" 113969.43533011 167921.06569547]\n",
"[103015.20159795 132582.27760817 132447.73845176 71976.09851257\n",
" 178537.48221058 116161.24230165 67851.69209675 98791.73374686\n",
" 113969.43533013 167921.06569553]\n"
] ]
} }
], ],
@ -303,13 +261,6 @@
"source": [ "source": [
"**完整的项目请前往Github项目100-Days-Of-ML-Code查看。有任何的建议或者意见欢迎在issue中提出~**" "**完整的项目请前往Github项目100-Days-Of-ML-Code查看。有任何的建议或者意见欢迎在issue中提出~**"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -328,7 +279,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.5" "version": "3.8.3-final"
} }
}, },
"nbformat": 4, "nbformat": 4,