Merge branch 'master' into mybranch
This commit is contained in:
@ -27,6 +27,63 @@
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"[[ 7. 2. 3. ]\n [ 4. 3.5 6. ]\n [10. 3.5 9. ]]\nSklearn verion is 0.23.1\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sklearn\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"#This block is an example used to learn SimpleImputer\n",
|
||||
"imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n",
|
||||
"imp_mean.fit([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])\n",
|
||||
"X = [[np.nan, 2, 3], [4, np.nan, 6], [10, np.nan, 9]]\n",
|
||||
"print(imp_mean.transform(X))\n",
|
||||
"print(\"Sklearn verion is {}\".format(sklearn.__version__))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"source": [
|
||||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"enc = OneHotEncoder(handle_unknown='ignore')\n",
|
||||
"X = [['Male', 1], ['Female', 3], ['Female', 2]]\n",
|
||||
">>> enc.fit(X)\n",
|
||||
"OneHotEncoder(handle_unknown='ignore')\n",
|
||||
">>> enc.categories_\n",
|
||||
"[array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]\n",
|
||||
">>> enc.transform([['Female', 1], ['Male', 4]]).toarray()\n",
|
||||
"array([[1., 0., 1., 0., 0.],\n",
|
||||
" [0., 1., 0., 0., 0.]])\n",
|
||||
">>> enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])\n",
|
||||
"array([['Male', 1],\n",
|
||||
" [None, 2]], dtype=object)\n",
|
||||
">>> enc.get_feature_names(['gender', 'group'])\n",
|
||||
"array(['gender_Female', 'gender_Male', 'group_1', 'group_2', 'group_3'],\n",
|
||||
" dtype=object)"
|
||||
],
|
||||
"cell_type": "code",
|
||||
"metadata": {},
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "error",
|
||||
"ename": "SyntaxError",
|
||||
"evalue": "invalid syntax (<ipython-input-4-44f585aeb41d>, line 4)",
|
||||
"traceback": [
|
||||
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-4-44f585aeb41d>\"\u001b[1;36m, line \u001b[1;32m4\u001b[0m\n\u001b[1;33m >>> enc.fit(X)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -44,7 +101,7 @@
|
||||
"output_type": "stream",
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"Step 2: Importing dataset\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
|
||||
"Step 2: Importing dataset\nX\n[['France' 44.0 72000.0]\n ['Spain' 27.0 48000.0]\n ['Germany' 30.0 54000.0]\n ['Spain' 38.0 61000.0]\n ['Germany' 40.0 nan]\n ['France' 35.0 58000.0]\n ['Spain' nan 52000.0]\n ['France' 48.0 79000.0]\n ['Germany' 50.0 83000.0]\n ['France' 37.0 67000.0]]\nY\n['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n[[44.0 72000.0]\n [27.0 48000.0]\n [30.0 54000.0]\n [38.0 61000.0]\n [40.0 nan]\n [35.0 58000.0]\n [nan 52000.0]\n [48.0 79000.0]\n [50.0 83000.0]\n [37.0 67000.0]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -58,7 +115,8 @@
|
||||
"print(\"X\")\n",
|
||||
"print(X)\n",
|
||||
"print(\"Y\")\n",
|
||||
"print(Y)"
|
||||
"print(Y)\n",
|
||||
"print(X[ : , 1:3])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -72,6 +130,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -89,8 +148,12 @@
|
||||
"#from sklearn.preprocessing import Imputer\n",
|
||||
"# axis=0表示按列进行\n",
|
||||
"#imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
|
||||
"imputer = imputer.fit(X[ : , 1:3])\n",
|
||||
"X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
|
||||
"#print(imputer)\n",
|
||||
"#\n",
|
||||
"# print(X[ : , 1:3])\n",
|
||||
"imputer = imputer.fit(X[ : , 1:3]) #put the data we want to process in to this imputer\n",
|
||||
"X[ : , 1:3] = imputer.transform(X[ : , 1:3]) #replace the np.nan with mean\n",
|
||||
"#print(X[ : , 1:3])\n",
|
||||
"print(\"---------------------\")\n",
|
||||
"print(\"Step 3: Handling the missing data\")\n",
|
||||
"print(\"step2\")\n",
|
||||
@ -116,11 +179,13 @@
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"---------------------\nStep 4: Encoding categorical data\nX\n[[1.0 0.0 0.0 44.0 72000.0]\n [0.0 0.0 1.0 27.0 48000.0]\n [0.0 1.0 0.0 30.0 54000.0]\n [0.0 0.0 1.0 38.0 61000.0]\n [0.0 1.0 0.0 40.0 63777.77777777778]\n [1.0 0.0 0.0 35.0 58000.0]\n [0.0 0.0 1.0 38.77777777777778 52000.0]\n [1.0 0.0 0.0 48.0 79000.0]\n [0.0 1.0 0.0 50.0 83000.0]\n [1.0 0.0 0.0 37.0 67000.0]]\nY\n[0 1 0 0 1 1 0 1 0 1]\n"
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
|
||||
|
||||
"from sklearn.compose import ColumnTransformer \n",
|
||||
"#labelencoder_X = LabelEncoder()\n",
|
||||
"#X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
|
||||
@ -194,6 +259,7 @@
|
||||
"name": "stdout",
|
||||
"text": [
|
||||
"---------------------\nStep 6: Feature Scaling\nX_train\n[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\nX_test\n[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n"
|
||||
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -207,7 +273,7 @@
|
||||
"print(\"X_train\")\n",
|
||||
"print(X_train)\n",
|
||||
"print(\"X_test\")\n",
|
||||
"print(X_test)"
|
||||
"print(X_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -220,9 +286,13 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "python3",
|
||||
"display_name": "Python 3.8.3 64-bit (conda)",
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "1b78ff499ec469310b6a6795c4effbbfc85eb20a6ba0cf828a15721670711b2c"
|
||||
}
|
||||
}
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
Reference in New Issue
Block a user