@ -19,7 +19,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -27,6 +27,63 @@
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\nSklearn verion is 0.23.1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sklearn\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"#This block is an example used to learn SimpleImputer\n",
|
||||
"imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n",
|
||||
"imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n",
|
||||
"X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n",
|
||||
"print(imp_mean.transform(X))\n",
|
||||
"print(\"Sklearn verion is {}\".format(sklearn.__version__))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"enc = OneHotEncoder(handle_unknown='ignore')\n",
|
||||
"X = [['Male', 1], ['Female', 3], ['Female', 2]]\n",
|
||||
">>> enc.fit(X)\n",
|
||||
"OneHotEncoder(handle_unknown='ignore')\n",
|
||||
">>> enc.categories_\n",
|
||||
"[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n",
|
||||
">>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\n",
|
||||
"array([[1., 0., 1., 0., 0.],\n",
|
||||
" [0., 1., 0., 0., 0.]])\n",
|
||||
">>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\n",
|
||||
"array([['Male', 1],\n",
|
||||
" [None, 2]], dtype=object)\n",
|
||||
">>> enc.get_feature_names(['gender', 'group'])\n",
|
||||
"array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n",
|
||||
" dtype=object)"
|
||||
],
|
||||
"cell_type": "code",
|
||||
"metadata": {},
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "error",
|
||||
"ename": "SyntaxError",
|
||||
"evalue": "invalid syntax (<ipython-input-4-44f585aeb41d>, line 4)",
|
||||
"traceback": [
|
||||
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-4-44f585aeb41d>\"\u001b[1;36m, line \u001b[1;32m4\u001b[0m\n\u001b[1;33m >>> enc.fit(X)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -37,27 +94,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Step 2: Importing dataset\n",
|
||||
"X\n",
|
||||
"[['France' 44.0 72000.0]\n",
|
||||
" ['Spain' 27.0 48000.0]\n",
|
||||
" ['Germany' 30.0 54000.0]\n",
|
||||
" ['Spain' 38.0 61000.0]\n",
|
||||
" ['Germany' 40.0 nan]\n",
|
||||
" ['France' 35.0 58000.0]\n",
|
||||
" ['Spain' nan 52000.0]\n",
|
||||
" ['France' 48.0 79000.0]\n",
|
||||
" ['Germany' 50.0 83000.0]\n",
|
||||
" ['France' 37.0 67000.0]]\n",
|
||||
"Y\n",
|
||||
"['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
|
||||
"Step 2: Importing dataset\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n[[44.0 72000.0]\n [27.0 48000.0]\n [30.0 54000.0]\n [38.0 61000.0]\n [40.0 nan]\n [35.0 58000.0]\n [nan 52000.0]\n [48.0 79000.0]\n [50.0 83000.0]\n [37.0 67000.0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -71,7 +115,8 @@
|
||||
"print(\"X\")\n",
|
||||
"print(X)\n",
|
||||
"print(\"Y\")\n",
|
||||
"print(Y)"
|
||||
"print(Y)\n",
|
||||
"print(X[ : , 1:3])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -84,39 +129,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 53,
|
||||
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"---------------------\n",
|
||||
"Step 3: Handling the missing data\n",
|
||||
"step2\n",
|
||||
"X\n",
|
||||
"[['France' 44.0 72000.0]\n",
|
||||
" ['Spain' 27.0 48000.0]\n",
|
||||
" ['Germany' 30.0 54000.0]\n",
|
||||
" ['Spain' 38.0 61000.0]\n",
|
||||
" ['Germany' 40.0 63777.77777777778]\n",
|
||||
" ['France' 35.0 58000.0]\n",
|
||||
" ['Spain' 38.77777777777778 52000.0]\n",
|
||||
" ['France' 48.0 79000.0]\n",
|
||||
" ['Germany' 50.0 83000.0]\n",
|
||||
" ['France' 37.0 67000.0]]\n"
|
||||
"---------------------\nStep 3: Handling the missing data\nstep2\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 63777.77777777778]\n ['France' 35.0 58000.0]\n ['Spain' 38.77777777777778 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If you use the newest version of sklearn, use the lines of code commented out",
|
||||
"# from sklearn.impute import SimpleImputer",
|
||||
"# imputer = SimpleImputer(missing_values=\"NaN\", strategy=\"mean\")",
|
||||
"from sklearn.preprocessing import Imputer\n",
|
||||
"# If you use the newest version of sklearn, use the lines of code commented out\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"imputer = SimpleImputer(missing_values=np.nan, strategy=\"mean\")\n",
|
||||
"#from sklearn.preprocessing import Imputer\n",
|
||||
"# axis=0表示按列进行\n",
|
||||
"imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
|
||||
"imputer = imputer.fit(X[ : , 1:3])\n",
|
||||
"X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
|
||||
"#imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
|
||||
"#print(imputer)\n",
|
||||
"#\n",
|
||||
"# print(X[ : , 1:3])\n",
|
||||
"imputer = imputer.fit(X[ : , 1:3]) #put the data we want to process in to this imputer\n",
|
||||
"X[ : , 1:3] = imputer.transform(X[ : , 1:3]) #replace the np.nan with mean\n",
|
||||
"#print(X[ : , 1:3])\n",
|
||||
"print(\"---------------------\")\n",
|
||||
"print(\"Step 3: Handling the missing data\")\n",
|
||||
"print(\"step2\")\n",
|
||||
@ -134,48 +171,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"---------------------\n",
|
||||
"Step 4: Encoding categorical data\n",
|
||||
"X\n",
|
||||
"[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
|
||||
" 7.20000000e+04]\n",
|
||||
" [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
|
||||
" 4.80000000e+04]\n",
|
||||
" [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
|
||||
" 5.40000000e+04]\n",
|
||||
" [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
|
||||
" 6.10000000e+04]\n",
|
||||
" [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
|
||||
" 6.37777778e+04]\n",
|
||||
" [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
|
||||
" 5.80000000e+04]\n",
|
||||
" [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
|
||||
" 5.20000000e+04]\n",
|
||||
" [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
|
||||
" 7.90000000e+04]\n",
|
||||
" [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
|
||||
" 8.30000000e+04]\n",
|
||||
" [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
|
||||
" 6.70000000e+04]]\n",
|
||||
"Y\n",
|
||||
"[0 1 0 0 1 1 0 1 0 1]\n"
|
||||
"---------------------\nStep 4: Encoding categorical data\nX\n[[1.0 0.0 0.0 44.0 72000.0]\n [0.0 0.0 1.0 27.0 48000.0]\n [0.0 1.0 0.0 30.0 54000.0]\n [0.0 0.0 1.0 38.0 61000.0]\n [0.0 1.0 0.0 40.0 63777.77777777778]\n [1.0 0.0 0.0 35.0 58000.0]\n [0.0 0.0 1.0 38.77777777777778 52000.0]\n [1.0 0.0 0.0 48.0 79000.0]\n [0.0 1.0 0.0 50.0 83000.0]\n [1.0 0.0 0.0 37.0 67000.0]]\nY\n[0 1 0 0 1 1 0 1 0 1]\n"
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
|
||||
"labelencoder_X = LabelEncoder()\n",
|
||||
"X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
|
||||
|
||||
"from sklearn.compose import ColumnTransformer \n",
|
||||
"#labelencoder_X = LabelEncoder()\n",
|
||||
"#X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
|
||||
"#Creating a dummy variable\n",
|
||||
"onehotencoder = OneHotEncoder(categorical_features = [0])\n",
|
||||
"X = onehotencoder.fit_transform(X).toarray()\n",
|
||||
"#print(X)\n",
|
||||
"ct = ColumnTransformer([(\"\", OneHotEncoder(), [0])], remainder = 'passthrough')\n",
|
||||
"X = ct.fit_transform(X)\n",
|
||||
"#onehotencoder = OneHotEncoder(categorical_features = [0])\n",
|
||||
"#X = onehotencoder.fit_transform(X).toarray()\n",
|
||||
"labelencoder_Y = LabelEncoder()\n",
|
||||
"Y = labelencoder_Y.fit_transform(Y)\n",
|
||||
"print(\"---------------------\")\n",
|
||||
@ -196,41 +215,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"---------------------\n",
|
||||
"Step 5: Splitting the datasets into training sets and Test sets\n",
|
||||
"X_train\n",
|
||||
"[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
|
||||
" 6.37777778e+04]\n",
|
||||
" [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
|
||||
" 6.70000000e+04]\n",
|
||||
" [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
|
||||
" 4.80000000e+04]\n",
|
||||
" [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
|
||||
" 5.20000000e+04]\n",
|
||||
" [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
|
||||
" 7.90000000e+04]\n",
|
||||
" [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
|
||||
" 6.10000000e+04]\n",
|
||||
" [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
|
||||
" 7.20000000e+04]\n",
|
||||
" [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
|
||||
" 5.80000000e+04]]\n",
|
||||
"X_test\n",
|
||||
"[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
|
||||
" 5.40000000e+04]\n",
|
||||
" [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
|
||||
" 8.30000000e+04]]\n",
|
||||
"Y_train\n",
|
||||
"[1 1 1 0 1 0 0 1]\n",
|
||||
"Y_test\n",
|
||||
"[0 0]\n"
|
||||
"---------------------\nStep 5: Splitting the datasets into training sets and Test sets\nX_train\n[[0.0 1.0 0.0 40.0 63777.77777777778]\n [1.0 0.0 0.0 37.0 67000.0]\n [0.0 0.0 1.0 27.0 48000.0]\n [0.0 0.0 1.0 38.77777777777778 52000.0]\n [1.0 0.0 0.0 48.0 79000.0]\n [0.0 0.0 1.0 38.0 61000.0]\n [1.0 0.0 0.0 44.0 72000.0]\n [1.0 0.0 0.0 35.0 58000.0]]\nX_test\n[[0.0 1.0 0.0 30.0 54000.0]\n [0.0 1.0 0.0 50.0 83000.0]]\nY_train\n[1 1 1 0 1 0 0 1]\nY_test\n[0 0]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -259,27 +251,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"---------------------\n",
|
||||
"Step 6: Feature Scaling\n",
|
||||
"X_train\n",
|
||||
"[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n",
|
||||
" [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n",
|
||||
" [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n",
|
||||
" [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n",
|
||||
" [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n",
|
||||
" [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n",
|
||||
" [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n",
|
||||
" [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
|
||||
"X_test\n",
|
||||
"[[ 0. 0. 0. -1. -1.]\n",
|
||||
" [ 0. 0. 0. 1. 1.]]\n"
|
||||
"---------------------\nStep 6: Feature Scaling\nX_train\n[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\nX_test\n[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n"
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -287,13 +267,13 @@
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"sc_X = StandardScaler()\n",
|
||||
"X_train = sc_X.fit_transform(X_train)\n",
|
||||
"X_test = sc_X.transform(X_test)\n",
|
||||
"X_test = sc_X.transform(X_test) #we should not use fit_transfer cause the u and z is determined from x_train\n",
|
||||
"print(\"---------------------\")\n",
|
||||
"print(\"Step 6: Feature Scaling\")\n",
|
||||
"print(\"X_train\")\n",
|
||||
"print(X_train)\n",
|
||||
"print(\"X_test\")\n",
|
||||
"print(X_test)"
|
||||
"print(X_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -302,22 +282,17 @@
|
||||
"source": [
|
||||
"<b>完整的项目请前往Github项目<a href=\"https://github.com/MachineLearning100/100-Days-Of-ML-Code\">100-Days-Of-ML-Code</a>查看。有任何的建议或者意见欢迎在issue中提出~</b>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "python3",
|
||||
"display_name": "Python 3.8.3 64-bit (conda)",
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "1b78ff499ec469310b6a6795c4effbbfc85eb20a6ba0cf828a15721670711b2c"
|
||||
}
|
||||
}
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@ -329,9 +304,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
"version": "3.8.3-final"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
@ -1,71 +1,76 @@
|
||||
#Day 1: Data Prepocessing
|
||||
|
||||
#Step 1: Importing the libraries
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
#Step 2: Importing dataset
|
||||
dataset = pd.read_csv('../datasets/Data.csv')
|
||||
X = dataset.iloc[ : , :-1].values
|
||||
Y = dataset.iloc[ : , 3].values
|
||||
print("Step 2: Importing dataset")
|
||||
print("X")
|
||||
print(X)
|
||||
print("Y")
|
||||
print(Y)
|
||||
|
||||
#Step 3: Handling the missing data
|
||||
# If you use the newest version of sklearn, use the lines of code commented out
|
||||
# from sklearn.impute import SimpleImputer
|
||||
# imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
|
||||
from sklearn.preprocessing import Imputer
|
||||
imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
|
||||
imputer = imputer.fit(X[ : , 1:3])
|
||||
X[ : , 1:3] = imputer.transform(X[ : , 1:3])
|
||||
print("---------------------")
|
||||
print("Step 3: Handling the missing data")
|
||||
print("step2")
|
||||
print("X")
|
||||
print(X)
|
||||
|
||||
#Step 4: Encoding categorical data
|
||||
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
|
||||
labelencoder_X = LabelEncoder()
|
||||
X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
|
||||
#Creating a dummy variable
|
||||
onehotencoder = OneHotEncoder(categorical_features = [0])
|
||||
X = onehotencoder.fit_transform(X).toarray()
|
||||
labelencoder_Y = LabelEncoder()
|
||||
Y = labelencoder_Y.fit_transform(Y)
|
||||
print("---------------------")
|
||||
print("Step 4: Encoding categorical data")
|
||||
print("X")
|
||||
print(X)
|
||||
print("Y")
|
||||
print(Y)
|
||||
|
||||
#Step 5: Splitting the datasets into training sets and Test sets
|
||||
from sklearn.model_selection import train_test_split
|
||||
X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
|
||||
print("---------------------")
|
||||
print("Step 5: Splitting the datasets into training sets and Test sets")
|
||||
print("X_train")
|
||||
print(X_train)
|
||||
print("X_test")
|
||||
print(X_test)
|
||||
print("Y_train")
|
||||
print(Y_train)
|
||||
print("Y_test")
|
||||
print(Y_test)
|
||||
|
||||
#Step 6: Feature Scaling
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
sc_X = StandardScaler()
|
||||
X_train = sc_X.fit_transform(X_train)
|
||||
X_test = sc_X.transform(X_test)
|
||||
print("---------------------")
|
||||
print("Step 6: Feature Scaling")
|
||||
print("X_train")
|
||||
print(X_train)
|
||||
print("X_test")
|
||||
print(X_test)
|
||||
#Day 1: Data Prepocessing
|
||||
|
||||
#Step 1: Importing the libraries
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
#Step 2: Importing dataset
|
||||
dataset = pd.read_csv('../datasets/Data.csv')
|
||||
X = dataset.iloc[ : , :-1].values
|
||||
Y = dataset.iloc[ : , 3].values
|
||||
print("Step 2: Importing dataset")
|
||||
print("X")
|
||||
print(X)
|
||||
print("Y")
|
||||
print(Y)
|
||||
|
||||
#Step 3: Handling the missing data
|
||||
# If you use the newest version of sklearn, use the lines of code commented out
|
||||
from sklearn.impute import SimpleImputer
|
||||
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
|
||||
#from sklearn.preprocessing import Imputer
|
||||
# axis=0表示按列进行
|
||||
#imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
|
||||
imputer = imputer.fit(X[ : , 1:3])
|
||||
X[ : , 1:3] = imputer.transform(X[ : , 1:3])
|
||||
print("---------------------")
|
||||
print("Step 3: Handling the missing data")
|
||||
print("step2")
|
||||
print("X")
|
||||
print(X)
|
||||
|
||||
#Step 4: Encoding categorical data
|
||||
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
|
||||
from sklearn.compose import ColumnTransformer
|
||||
#labelencoder_X = LabelEncoder()
|
||||
#X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
|
||||
#Creating a dummy variable
|
||||
#print(X)
|
||||
ct = ColumnTransformer([("", OneHotEncoder(), [0])], remainder = 'passthrough')
|
||||
X = ct.fit_transform(X)
|
||||
#onehotencoder = OneHotEncoder(categorical_features = [0])
|
||||
#X = onehotencoder.fit_transform(X).toarray()
|
||||
labelencoder_Y = LabelEncoder()
|
||||
Y = labelencoder_Y.fit_transform(Y)
|
||||
print("---------------------")
|
||||
print("Step 4: Encoding categorical data")
|
||||
print("X")
|
||||
print(X)
|
||||
print("Y")
|
||||
print(Y)
|
||||
|
||||
#Step 5: Splitting the datasets into training sets and Test sets
|
||||
from sklearn.model_selection import train_test_split
|
||||
X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
|
||||
print("---------------------")
|
||||
print("Step 5: Splitting the datasets into training sets and Test sets")
|
||||
print("X_train")
|
||||
print(X_train)
|
||||
print("X_test")
|
||||
print(X_test)
|
||||
print("Y_train")
|
||||
print(Y_train)
|
||||
print("Y_test")
|
||||
print(Y_test)
|
||||
|
||||
#Step 6: Feature Scaling
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
sc_X = StandardScaler()
|
||||
X_train = sc_X.fit_transform(X_train)
|
||||
X_test = sc_X.transform(X_test)
|
||||
print("---------------------")
|
||||
print("Step 6: Feature Scaling")
|
||||
print("X_train")
|
||||
print(X_train)
|
||||
print("X_test")
|
||||
print(X_test)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -23,7 +23,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -40,40 +40,64 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[[165349.2 136897.8 471784.1 'New York']\n",
|
||||
" [162597.7 151377.59 443898.53 'California']\n",
|
||||
" [153441.51 101145.55 407934.54 'Florida']\n",
|
||||
" [144372.41 118671.85 383199.62 'New York']\n",
|
||||
" [142107.34 91391.77 366168.42 'Florida']\n",
|
||||
" [131876.9 99814.71 362861.36 'New York']\n",
|
||||
" [134615.46 147198.87 127716.82 'California']\n",
|
||||
" [130298.13 145530.06 323876.68 'Florida']\n",
|
||||
" [120542.52 148718.95 311613.29 'New York']\n",
|
||||
" [123334.88 108679.17 304981.62 'California']]\n",
|
||||
"[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n",
|
||||
" 155752.6 152211.77 149759.96 146121.95 144259.4 141585.52 134307.35\n",
|
||||
" 132602.65 129917.04 126992.93 125370.37 124266.9 122776.86 118474.03\n",
|
||||
" 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n",
|
||||
" 103282.38 101004.64 99937.59 97483.56 97427.84 96778.92 96712.8\n",
|
||||
" 96479.51 90708.19 89949.14 81229.06 81005.76 78239.91 77798.83\n",
|
||||
" 71498.49 69758.98 65200.33 64926.08 49490.75 42559.73 35673.41\n",
|
||||
" 14681.4 ]\n"
|
||||
"X:\n[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']]\nY:\n[192261.83 191792.06 191050.39 182901.99 166187.94 156991.12 156122.51\n 155752.6 152211.77 149759.96 146121.95 144259.4 141585.52 134307.35\n 132602.65 129917.04 126992.93 125370.37 124266.9 122776.86 118474.03\n 111313.02 110352.25 108733.99 108552.04 107404.34 105733.54 105008.31\n 103282.38 101004.64 99937.59 97483.56 97427.84 96778.92 96712.8\n 96479.51 90708.19 89949.14 81229.06 81005.76 78239.91 77798.83\n 71498.49 69758.98 65200.33 64926.08 49490.75 42559.73 35673.41\n 14681.4 ]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
" R&D Spend Administration Marketing Spend State Profit\n",
|
||||
"0 165349.20 136897.80 471784.10 New York 192261.83\n",
|
||||
"1 162597.70 151377.59 443898.53 California 191792.06\n",
|
||||
"2 153441.51 101145.55 407934.54 Florida 191050.39\n",
|
||||
"3 144372.41 118671.85 383199.62 New York 182901.99\n",
|
||||
"4 142107.34 91391.77 366168.42 Florida 166187.94"
|
||||
],
|
||||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>R&D Spend</th>\n <th>Administration</th>\n <th>Marketing Spend</th>\n <th>State</th>\n <th>Profit</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>165349.20</td>\n <td>136897.80</td>\n <td>471784.10</td>\n <td>New York</td>\n <td>192261.83</td>\n </tr>\n <tr>\n <th>1</th>\n <td>162597.70</td>\n <td>151377.59</td>\n <td>443898.53</td>\n <td>California</td>\n <td>191792.06</td>\n </tr>\n <tr>\n <th>2</th>\n <td>153441.51</td>\n <td>101145.55</td>\n <td>407934.54</td>\n <td>Florida</td>\n <td>191050.39</td>\n </tr>\n <tr>\n <th>3</th>\n <td>144372.41</td>\n <td>118671.85</td>\n <td>383199.62</td>\n <td>New York</td>\n <td>182901.99</td>\n </tr>\n <tr>\n <th>4</th>\n <td>142107.34</td>\n <td>91391.77</td>\n <td>366168.42</td>\n <td>Florida</td>\n <td>166187.94</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 57
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset = pd.read_csv('../datasets/50_Startups.csv')\n",
|
||||
"X = dataset.iloc[ : , :-1].values\n",
|
||||
"Y = dataset.iloc[ : , 4 ].values\n",
|
||||
"Z = dataset.iloc[ : , 0 ].values\n",
|
||||
"print(\"X:\")\n",
|
||||
"print(X[:10])\n",
|
||||
"print(Y)"
|
||||
"print(\"Y:\")\n",
|
||||
"print(Y)\n",
|
||||
"dataset.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']\n [101913.08 110594.11 229160.95 'Florida']\n [100671.96 91790.61 249744.55 'California']\n [93863.75 127320.38 249839.44 'Florida']\n [91992.39 135495.07 252664.93 'California']\n [119943.24 156547.42 256512.92 'Florida']\n [114523.61 122616.84 261776.23 'New York']\n [78013.11 121597.55 264346.06 'California']\n [94657.16 145077.58 282574.31 'New York']\n [91749.16 114175.79 294919.57 'Florida']\n [86419.7 153514.11 224494.78489361703 'New York']\n [76253.86 113867.3 298664.47 'California']\n [78389.47 153773.43 299737.29 'New York']\n [73994.56 122782.75 303319.26 'Florida']\n [67532.53 105751.03 304768.73 'Florida']\n [77044.01 99281.34 140574.81 'New York']\n [64664.71 139553.16 137962.62 'California']\n [75328.87 144135.98 134050.07 'Florida']\n [72107.6 127864.55 353183.81 'New York']\n [66051.52 182645.56 118148.2 'Florida']\n [65605.48 153032.06 107138.38 'New York']\n [61994.48 115641.28 91131.24 'Florida']\n [61136.38 152701.92 88218.23 'New York']\n [63408.86 129219.61 46085.25 'California']\n [55493.95 103057.49 214634.81 'Florida']\n [46426.07 157693.92 210797.67 'California']\n [46014.02 85047.44 205517.64 'New York']\n [28663.76 127056.21 201126.82 'Florida']\n [44069.95 51283.14 197029.42 'California']\n [20229.59 65947.93 185265.1 'New York']\n [38558.51 82982.09 174999.3 'California']\n [28754.33 118546.05 172795.67 'California']\n [27892.92 84710.77 164470.71 'Florida']\n [23640.93 96189.63 148001.11 'California']\n [15505.73 127382.3 35534.17 'New York']\n [22177.74 154806.14 28334.72 'California']\n [1000.23 124153.04 1903.93 'New York']\n [1315.46 115816.21 297114.46 'Florida']\n [76793.34958333334 135426.92 224494.78489361703 'California']\n [542.05 51743.15 224494.78489361703 'New York']\n [76793.34958333334 116983.8 45173.06 'California']]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"imputer = SimpleImputer(missing_values=0.0, strategy=\"mean\")\n",
|
||||
"imputer = imputer.fit(X[ : , 0:3])\n",
|
||||
"X[ : , 0:3] = imputer.transform(X[ : , 0:3])\n",
|
||||
"print(X)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -85,56 +109,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"labelencoder:\n",
|
||||
"[[165349.2 136897.8 471784.1 2]\n",
|
||||
" [162597.7 151377.59 443898.53 0]\n",
|
||||
" [153441.51 101145.55 407934.54 1]\n",
|
||||
" [144372.41 118671.85 383199.62 2]\n",
|
||||
" [142107.34 91391.77 366168.42 1]\n",
|
||||
" [131876.9 99814.71 362861.36 2]\n",
|
||||
" [134615.46 147198.87 127716.82 0]\n",
|
||||
" [130298.13 145530.06 323876.68 1]\n",
|
||||
" [120542.52 148718.95 311613.29 2]\n",
|
||||
" [123334.88 108679.17 304981.62 0]]\n",
|
||||
"onehot:\n",
|
||||
"[[0.0000000e+00 0.0000000e+00 1.0000000e+00 1.6534920e+05 1.3689780e+05\n",
|
||||
" 4.7178410e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.6259770e+05 1.5137759e+05\n",
|
||||
" 4.4389853e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.5344151e+05 1.0114555e+05\n",
|
||||
" 4.0793454e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 1.4437241e+05 1.1867185e+05\n",
|
||||
" 3.8319962e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.4210734e+05 9.1391770e+04\n",
|
||||
" 3.6616842e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 1.3187690e+05 9.9814710e+04\n",
|
||||
" 3.6286136e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.3461546e+05 1.4719887e+05\n",
|
||||
" 1.2771682e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.3029813e+05 1.4553006e+05\n",
|
||||
" 3.2387668e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 1.2054252e+05 1.4871895e+05\n",
|
||||
" 3.1161329e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.2333488e+05 1.0867917e+05\n",
|
||||
" 3.0498162e+05]]\n"
|
||||
"original:\n[[165349.2 136897.8 471784.1 'New York']\n [162597.7 151377.59 443898.53 'California']\n [153441.51 101145.55 407934.54 'Florida']\n [144372.41 118671.85 383199.62 'New York']\n [142107.34 91391.77 366168.42 'Florida']\n [131876.9 99814.71 362861.36 'New York']\n [134615.46 147198.87 127716.82 'California']\n [130298.13 145530.06 323876.68 'Florida']\n [120542.52 148718.95 311613.29 'New York']\n [123334.88 108679.17 304981.62 'California']]\nlabelencoder:\n[[165349.2 136897.8 471784.1 2]\n [162597.7 151377.59 443898.53 0]\n [153441.51 101145.55 407934.54 1]\n [144372.41 118671.85 383199.62 2]\n [142107.34 91391.77 366168.42 1]\n [131876.9 99814.71 362861.36 2]\n [134615.46 147198.87 127716.82 0]\n [130298.13 145530.06 323876.68 1]\n [120542.52 148718.95 311613.29 2]\n [123334.88 108679.17 304981.62 0]]\nonehot:\n[[0.0 0.0 1.0 165349.2 136897.8 471784.1]\n [1.0 0.0 0.0 162597.7 151377.59 443898.53]\n [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 0.0 1.0 144372.41 118671.85 383199.62]\n [0.0 1.0 0.0 142107.34 91391.77 366168.42]\n [0.0 0.0 1.0 131876.9 99814.71 362861.36]\n [1.0 0.0 0.0 134615.46 147198.87 127716.82]\n [0.0 1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 0.0 1.0 120542.52 148718.95 311613.29]\n [1.0 0.0 0.0 123334.88 108679.17 304981.62]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
|
||||
"from sklearn.compose import ColumnTransformer \n",
|
||||
"labelencoder = LabelEncoder()\n",
|
||||
"print(\"original:\")\n",
|
||||
"print(X[:10])\n",
|
||||
"#print(X[: , 3])\n",
|
||||
"X[: , 3] = labelencoder.fit_transform(X[ : , 3])\n",
|
||||
"#print(X[: , 3])\n",
|
||||
"print(\"labelencoder:\")\n",
|
||||
"print(X[:10])\n",
|
||||
"onehotencoder = OneHotEncoder(categorical_features = [3])\n",
|
||||
"X = onehotencoder.fit_transform(X).toarray()\n",
|
||||
"ct = ColumnTransformer([( \"encoder\", OneHotEncoder(), [3])], remainder = 'passthrough')\n",
|
||||
"X = ct.fit_transform(X)\n",
|
||||
"#onehotencoder = OneHotEncoder(categorical_features = [3])\n",
|
||||
"#X = onehotencoder.fit_transform(X).toarray()\n",
|
||||
"print(\"onehot:\")\n",
|
||||
"print(X[:10])"
|
||||
]
|
||||
@ -156,13 +156,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X1 = X[: , 1:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[[0.0 1.0 165349.2 136897.8 471784.1]\n [0.0 0.0 162597.7 151377.59 443898.53]\n [1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 1.0 144372.41 118671.85 383199.62]\n [1.0 0.0 142107.34 91391.77 366168.42]\n [0.0 1.0 131876.9 99814.71 362861.36]\n [0.0 0.0 134615.46 147198.87 127716.82]\n [1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 1.0 120542.52 148718.95 311613.29]\n [0.0 0.0 123334.88 108679.17 304981.62]\n [1.0 0.0 101913.08 110594.11 229160.95]\n [0.0 0.0 100671.96 91790.61 249744.55]\n [1.0 0.0 93863.75 127320.38 249839.44]\n [0.0 0.0 91992.39 135495.07 252664.93]\n [1.0 0.0 119943.24 156547.42 256512.92]\n [0.0 1.0 114523.61 122616.84 261776.23]\n [0.0 0.0 78013.11 121597.55 264346.06]\n [0.0 1.0 94657.16 145077.58 282574.31]\n [1.0 0.0 91749.16 114175.79 294919.57]\n [0.0 1.0 86419.7 153514.11 224494.78489361703]\n [0.0 0.0 76253.86 113867.3 298664.47]\n [0.0 1.0 78389.47 153773.43 299737.29]\n [1.0 0.0 73994.56 122782.75 303319.26]\n [1.0 0.0 67532.53 105751.03 304768.73]\n [0.0 1.0 77044.01 99281.34 140574.81]\n [0.0 0.0 64664.71 139553.16 137962.62]\n [1.0 0.0 75328.87 144135.98 134050.07]\n [0.0 1.0 72107.6 127864.55 353183.81]\n [1.0 0.0 66051.52 182645.56 118148.2]\n [0.0 1.0 65605.48 153032.06 107138.38]\n [1.0 0.0 61994.48 115641.28 91131.24]\n [0.0 1.0 61136.38 152701.92 88218.23]\n [0.0 0.0 63408.86 129219.61 46085.25]\n [1.0 0.0 55493.95 103057.49 214634.81]\n [0.0 0.0 46426.07 157693.92 210797.67]\n [0.0 1.0 46014.02 85047.44 205517.64]\n [1.0 0.0 28663.76 127056.21 201126.82]\n [0.0 0.0 44069.95 51283.14 197029.42]\n [0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 0.0 38558.51 82982.09 174999.3]\n [0.0 0.0 28754.33 118546.05 172795.67]\n [1.0 0.0 27892.92 84710.77 164470.71]\n [0.0 0.0 23640.93 96189.63 148001.11]\n [0.0 1.0 15505.73 127382.3 35534.17]\n [0.0 0.0 22177.74 154806.14 28334.72]\n [0.0 1.0 1000.23 124153.04 1903.93]\n [1.0 0.0 1315.46 115816.21 297114.46]\n [0.0 0.0 76793.34958333334 135426.92 224494.78489361703]\n [0.0 1.0 542.05 51743.15 224494.78489361703]\n [0.0 0.0 76793.34958333334 116983.8 45173.06]]\n[[0.0 0.0 1.0 165349.2 136897.8 471784.1]\n [1.0 0.0 0.0 162597.7 151377.59 443898.53]\n [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 0.0 1.0 144372.41 118671.85 383199.62]\n [0.0 1.0 0.0 142107.34 91391.77 366168.42]\n [0.0 0.0 1.0 131876.9 99814.71 362861.36]\n [1.0 0.0 0.0 134615.46 147198.87 127716.82]\n [0.0 1.0 0.0 130298.13 145530.06 323876.68]\n [0.0 0.0 1.0 120542.52 148718.95 311613.29]\n [1.0 0.0 0.0 123334.88 108679.17 304981.62]\n [0.0 1.0 0.0 101913.08 110594.11 229160.95]\n [1.0 0.0 0.0 100671.96 91790.61 249744.55]\n [0.0 1.0 0.0 93863.75 127320.38 249839.44]\n [1.0 0.0 0.0 91992.39 135495.07 252664.93]\n [0.0 1.0 0.0 119943.24 156547.42 256512.92]\n [0.0 0.0 1.0 114523.61 122616.84 261776.23]\n [1.0 0.0 0.0 78013.11 121597.55 264346.06]\n [0.0 0.0 1.0 94657.16 145077.58 282574.31]\n [0.0 1.0 0.0 91749.16 114175.79 294919.57]\n [0.0 0.0 1.0 86419.7 153514.11 224494.78489361703]\n [1.0 0.0 0.0 76253.86 113867.3 298664.47]\n [0.0 0.0 1.0 78389.47 153773.43 299737.29]\n [0.0 1.0 0.0 73994.56 122782.75 303319.26]\n [0.0 1.0 0.0 67532.53 105751.03 304768.73]\n [0.0 0.0 1.0 77044.01 99281.34 140574.81]\n [1.0 0.0 0.0 64664.71 139553.16 137962.62]\n [0.0 1.0 0.0 75328.87 144135.98 134050.07]\n [0.0 0.0 1.0 72107.6 127864.55 353183.81]\n [0.0 1.0 0.0 66051.52 182645.56 118148.2]\n [0.0 0.0 1.0 65605.48 153032.06 107138.38]\n [0.0 1.0 0.0 61994.48 115641.28 91131.24]\n [0.0 0.0 1.0 61136.38 152701.92 88218.23]\n [1.0 0.0 0.0 63408.86 129219.61 46085.25]\n [0.0 1.0 0.0 55493.95 103057.49 214634.81]\n [1.0 0.0 0.0 46426.07 157693.92 210797.67]\n [0.0 0.0 1.0 46014.02 85047.44 205517.64]\n [0.0 1.0 0.0 28663.76 127056.21 201126.82]\n [1.0 0.0 0.0 44069.95 51283.14 197029.42]\n [0.0 0.0 1.0 20229.59 65947.93 185265.1]\n [1.0 0.0 0.0 38558.51 82982.09 174999.3]\n [1.0 0.0 0.0 28754.33 118546.05 172795.67]\n [0.0 1.0 0.0 27892.92 84710.77 164470.71]\n [1.0 0.0 0.0 23640.93 96189.63 148001.11]\n [0.0 0.0 1.0 15505.73 127382.3 35534.17]\n [1.0 0.0 0.0 22177.74 154806.14 28334.72]\n [0.0 0.0 1.0 1000.23 124153.04 1903.93]\n [0.0 1.0 0.0 1315.46 115816.21 297114.46]\n [1.0 0.0 0.0 76793.34958333334 135426.92 224494.78489361703]\n [0.0 0.0 1.0 542.05 51743.15 224494.78489361703]\n [1.0 0.0 0.0 76793.34958333334 116983.8 45173.06]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(X1)\n",
|
||||
"print(X)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -172,47 +190,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[[0.0000000e+00 1.0000000e+00 0.0000000e+00 6.6051520e+04 1.8264556e+05\n",
|
||||
" 1.1814820e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 0.0000000e+00 1.0067196e+05 9.1790610e+04\n",
|
||||
" 2.4974455e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.0191308e+05 1.1059411e+05\n",
|
||||
" 2.2916095e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 2.7892920e+04 8.4710770e+04\n",
|
||||
" 1.6447071e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.5344151e+05 1.0114555e+05\n",
|
||||
" 4.0793454e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 7.2107600e+04 1.2786455e+05\n",
|
||||
" 3.5318381e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 2.0229590e+04 6.5947930e+04\n",
|
||||
" 1.8526510e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0000000e+00 6.1136380e+04 1.5270192e+05\n",
|
||||
" 8.8218230e+04]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 7.3994560e+04 1.2278275e+05\n",
|
||||
" 3.0331926e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 0.0000000e+00 1.4210734e+05 9.1391770e+04\n",
|
||||
" 3.6616842e+05]]\n",
|
||||
"[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n",
|
||||
" 97483.56 110352.25 166187.94]\n",
|
||||
"[[1.0000000e+00 0.0000000e+00 6.6051520e+04 1.8264556e+05 1.1814820e+05]\n",
|
||||
" [0.0000000e+00 0.0000000e+00 1.0067196e+05 9.1790610e+04 2.4974455e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 1.0191308e+05 1.1059411e+05 2.2916095e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 2.7892920e+04 8.4710770e+04 1.6447071e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 1.5344151e+05 1.0114555e+05 4.0793454e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 7.2107600e+04 1.2786455e+05 3.5318381e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 2.0229590e+04 6.5947930e+04 1.8526510e+05]\n",
|
||||
" [0.0000000e+00 1.0000000e+00 6.1136380e+04 1.5270192e+05 8.8218230e+04]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 7.3994560e+04 1.2278275e+05 3.0331926e+05]\n",
|
||||
" [1.0000000e+00 0.0000000e+00 1.4210734e+05 9.1391770e+04 3.6616842e+05]]\n",
|
||||
"[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n",
|
||||
" 97483.56 110352.25 166187.94]\n"
|
||||
"[[0.0 1.0 0.0 66051.52 182645.56 118148.2]\n [1.0 0.0 0.0 100671.96 91790.61 249744.55]\n [0.0 1.0 0.0 101913.08 110594.11 229160.95]\n [0.0 1.0 0.0 27892.92 84710.77 164470.71]\n [0.0 1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 0.0 1.0 72107.6 127864.55 353183.81]\n [0.0 0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 0.0 1.0 61136.38 152701.92 88218.23]\n [0.0 1.0 0.0 73994.56 122782.75 303319.26]\n [0.0 1.0 0.0 142107.34 91391.77 366168.42]]\n[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n 97483.56 110352.25 166187.94]\n[[1.0 0.0 66051.52 182645.56 118148.2]\n [0.0 0.0 100671.96 91790.61 249744.55]\n [1.0 0.0 101913.08 110594.11 229160.95]\n [1.0 0.0 27892.92 84710.77 164470.71]\n [1.0 0.0 153441.51 101145.55 407934.54]\n [0.0 1.0 72107.6 127864.55 353183.81]\n [0.0 1.0 20229.59 65947.93 185265.1]\n [0.0 1.0 61136.38 152701.92 88218.23]\n [1.0 0.0 73994.56 122782.75 303319.26]\n [1.0 0.0 142107.34 91391.77 366168.42]]\n[103282.38 144259.4 146121.95 77798.83 191050.39 105008.31 81229.06\n 97483.56 110352.25 166187.94]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -235,18 +220,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
|
||||
"LinearRegression()"
|
||||
]
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"execution_count": 64
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -266,7 +251,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -276,19 +261,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[103015.20159796 132582.27760815 132447.73845173 71976.09851258\n",
|
||||
" 178537.48221051 116161.24230163 67851.69209676 98791.73374689\n",
|
||||
" 113969.43533011 167921.06569547]\n",
|
||||
"[103015.20159795 132582.27760817 132447.73845176 71976.09851257\n",
|
||||
" 178537.48221058 116161.24230165 67851.69209675 98791.73374686\n",
|
||||
" 113969.43533013 167921.06569553]\n"
|
||||
"[102388.94113041 121465.72713517 127340.57708619 71709.47538912\n 174211.0848 121771.65061494 68393.54360668 95588.5313349\n 116596.3467699 162514.07218551]\n[102388.94113046 121465.72713518 127340.57708619 71709.47538916\n 174211.08479987 121771.65061482 68393.5436067 95588.53133498\n 116596.34676982 162514.07218541]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -303,13 +283,6 @@
|
||||
"source": [
|
||||
"**完整的项目请前往Github项目100-Days-Of-ML-Code查看。有任何的建议或者意见欢迎在issue中提出~**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -328,9 +301,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
"version": "3.8.3-final"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user