Update Day 1_Data_Preprocessing.py
This commit is contained in:
@ -16,10 +16,11 @@ print(Y)
|
|||||||
|
|
||||||
#Step 3: Handling the missing data
|
#Step 3: Handling the missing data
|
||||||
# If you use the newest version of sklearn, use the lines of code commented out
|
# If you use the newest version of sklearn, use the lines of code commented out
|
||||||
# from sklearn.impute import SimpleImputer
|
from sklearn.impute import SimpleImputer
|
||||||
# imputer = SimpleImputer(missing_values="NaN", strategy="mean")
|
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
|
||||||
from sklearn.preprocessing import Imputer
|
#from sklearn.preprocessing import Imputer
|
||||||
imputer = Imputer(missing_values = np.nan, strategy = "mean", axis = 0)
|
# axis=0表示按列进行
|
||||||
|
#imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
|
||||||
imputer = imputer.fit(X[ : , 1:3])
|
imputer = imputer.fit(X[ : , 1:3])
|
||||||
X[ : , 1:3] = imputer.transform(X[ : , 1:3])
|
X[ : , 1:3] = imputer.transform(X[ : , 1:3])
|
||||||
print("---------------------")
|
print("---------------------")
|
||||||
@ -30,11 +31,15 @@ print(X)
|
|||||||
|
|
||||||
#Step 4: Encoding categorical data
|
#Step 4: Encoding categorical data
|
||||||
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
|
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
|
||||||
labelencoder_X = LabelEncoder()
|
from sklearn.compose import ColumnTransformer
|
||||||
X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
|
#labelencoder_X = LabelEncoder()
|
||||||
|
#X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
|
||||||
#Creating a dummy variable
|
#Creating a dummy variable
|
||||||
onehotencoder = OneHotEncoder('auto')
|
#print(X)
|
||||||
X = onehotencoder.fit_transform(X).toarray()
|
ct = ColumnTransformer([("", OneHotEncoder(), [0])], remainder = 'passthrough')
|
||||||
|
X = ct.fit_transform(X)
|
||||||
|
#onehotencoder = OneHotEncoder(categorical_features = [0])
|
||||||
|
#X = onehotencoder.fit_transform(X).toarray()
|
||||||
labelencoder_Y = LabelEncoder()
|
labelencoder_Y = LabelEncoder()
|
||||||
Y = labelencoder_Y.fit_transform(Y)
|
Y = labelencoder_Y.fit_transform(Y)
|
||||||
print("---------------------")
|
print("---------------------")
|
||||||
|
|||||||
Reference in New Issue
Block a user