Merge branch 'master' into mybranch

This commit is contained in:
Yanchong Xu
2021-01-18 19:49:16 +08:00
committed by GitHub

View File

@ -1,76 +1,76 @@
#Day 1: Data Prepocessing #Day 1: Data Prepocessing
#Step 1: Importing the libraries #Step 1: Importing the libraries
import numpy as np import numpy as np
import pandas as pd import pandas as pd
#Step 2: Importing dataset #Step 2: Importing dataset
dataset = pd.read_csv('../datasets/Data.csv') dataset = pd.read_csv('../datasets/Data.csv')
X = dataset.iloc[ : , :-1].values X = dataset.iloc[ : , :-1].values
Y = dataset.iloc[ : , 3].values Y = dataset.iloc[ : , 3].values
print("Step 2: Importing dataset") print("Step 2: Importing dataset")
print("X") print("X")
print(X) print(X)
print("Y") print("Y")
print(Y) print(Y)
#Step 3: Handling the missing data #Step 3: Handling the missing data
# If you use the newest version of sklearn, use the lines of code commented out # If you use the newest version of sklearn, use the lines of code commented out
from sklearn.impute import SimpleImputer from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy="mean") imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
#from sklearn.preprocessing import Imputer #from sklearn.preprocessing import Imputer
# axis=0表示按列进行 # axis=0表示按列进行
#imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) #imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
imputer = imputer.fit(X[ : , 1:3]) imputer = imputer.fit(X[ : , 1:3])
X[ : , 1:3] = imputer.transform(X[ : , 1:3]) X[ : , 1:3] = imputer.transform(X[ : , 1:3])
print("---------------------") print("---------------------")
print("Step 3: Handling the missing data") print("Step 3: Handling the missing data")
print("step2") print("step2")
print("X") print("X")
print(X) print(X)
#Step 4: Encoding categorical data #Step 4: Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer from sklearn.compose import ColumnTransformer
#labelencoder_X = LabelEncoder() #labelencoder_X = LabelEncoder()
#X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) #X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
#Creating a dummy variable #Creating a dummy variable
#print(X) #print(X)
ct = ColumnTransformer([("", OneHotEncoder(), [0])], remainder = 'passthrough') ct = ColumnTransformer([("", OneHotEncoder(), [0])], remainder = 'passthrough')
X = ct.fit_transform(X) X = ct.fit_transform(X)
#onehotencoder = OneHotEncoder(categorical_features = [0]) #onehotencoder = OneHotEncoder(categorical_features = [0])
#X = onehotencoder.fit_transform(X).toarray() #X = onehotencoder.fit_transform(X).toarray()
labelencoder_Y = LabelEncoder() labelencoder_Y = LabelEncoder()
Y = labelencoder_Y.fit_transform(Y) Y = labelencoder_Y.fit_transform(Y)
print("---------------------") print("---------------------")
print("Step 4: Encoding categorical data") print("Step 4: Encoding categorical data")
print("X") print("X")
print(X) print(X)
print("Y") print("Y")
print(Y) print(Y)
#Step 5: Splitting the datasets into training sets and Test sets #Step 5: Splitting the datasets into training sets and Test sets
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
print("---------------------") print("---------------------")
print("Step 5: Splitting the datasets into training sets and Test sets") print("Step 5: Splitting the datasets into training sets and Test sets")
print("X_train") print("X_train")
print(X_train) print(X_train)
print("X_test") print("X_test")
print(X_test) print(X_test)
print("Y_train") print("Y_train")
print(Y_train) print(Y_train)
print("Y_test") print("Y_test")
print(Y_test) print(Y_test)
#Step 6: Feature Scaling #Step 6: Feature Scaling
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler() sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train) X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test) X_test = sc_X.transform(X_test)
print("---------------------") print("---------------------")
print("Step 6: Feature Scaling") print("Step 6: Feature Scaling")
print("X_train") print("X_train")
print(X_train) print(X_train)
print("X_test") print("X_test")
print(X_test) print(X_test)