Update Day 1_Data_Preprocessing.py

This commit is contained in:
yx-xyc
2021-01-14 12:42:31 +08:00
parent 4cd3341120
commit fac63b9c7c

View File

@ -16,10 +16,11 @@ print(Y)
#Step 3: Handling the missing data
# If you use the newest version of sklearn, use the lines of code commented out
# from sklearn.impute import SimpleImputer
# imputer = SimpleImputer(missing_values="NaN", strategy="mean")
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values = np.nan, strategy = "mean", axis = 0)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
#from sklearn.preprocessing import Imputer
# axis=0表示按列进行
#imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
imputer = imputer.fit(X[ : , 1:3])
X[ : , 1:3] = imputer.transform(X[ : , 1:3])
print("---------------------")
@ -30,11 +31,15 @@ print(X)
#Step 4: Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X = LabelEncoder()
X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
from sklearn.compose import ColumnTransformer
#labelencoder_X = LabelEncoder()
#X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
#Creating a dummy variable
onehotencoder = OneHotEncoder('auto')
X = onehotencoder.fit_transform(X).toarray()
#print(X)
ct = ColumnTransformer([("", OneHotEncoder(), [0])], remainder = 'passthrough')
X = ct.fit_transform(X)
#onehotencoder = OneHotEncoder(categorical_features = [0])
#X = onehotencoder.fit_transform(X).toarray()
labelencoder_Y = LabelEncoder()
Y = labelencoder_Y.fit_transform(Y)
print("---------------------")