diff --git a/Code/Day 1_Data Prepocessing.ipynb b/Code/Day 1_Data Prepocessing.ipynb index d5ea181..ba886da 100644 --- a/Code/Day 1_Data Prepocessing.ipynb +++ b/Code/Day 1_Data Prepocessing.ipynb @@ -41,21 +41,23 @@ "metadata": {}, "outputs": [ { - "ename": "FileNotFoundError", - "evalue": "File b'Data.csv' does not exist", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Data.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Step 2: Importing dataset\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"X\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/usr/miniconda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 653\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/usr/miniconda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 403\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 405\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 406\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/usr/miniconda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 762\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 763\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 764\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 765\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 766\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/usr/miniconda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 983\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 984\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 985\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 986\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 987\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/usr/miniconda/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1603\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'allow_leading_cols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1604\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1605\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1606\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1607\u001b[0m \u001b[0;31m# XXX\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__ (pandas/_libs/parsers.c:4209)\u001b[0;34m()\u001b[0m\n", - "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source (pandas/_libs/parsers.c:8873)\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: File b'Data.csv' does not exist" + "name": "stdout", + "output_type": "stream", + "text": [ + "Step 2: Importing dataset\n", + "X\n", + "[['France' 44.0 72000.0]\n", + " ['Spain' 27.0 48000.0]\n", + " ['Germany' 30.0 54000.0]\n", + " ['Spain' 38.0 61000.0]\n", + " ['Germany' 40.0 nan]\n", + " ['France' 35.0 58000.0]\n", + " ['Spain' nan 52000.0]\n", + " ['France' 48.0 79000.0]\n", + " ['Germany' 50.0 83000.0]\n", + " ['France' 37.0 67000.0]]\n", + "Y\n", + "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" ] } ], @@ -84,14 +86,23 @@ "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'X' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpreprocessing\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImputer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mimputer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImputer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmissing_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"NaN\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstrategy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"mean\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mimputer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimputer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimputer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"---------------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'X' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------\n", + "Step 3: Handling the missing data\n", + "step2\n", + "X\n", + "[['France' 44.0 72000.0]\n", + " ['Spain' 27.0 48000.0]\n", + " ['Germany' 30.0 54000.0]\n", + " ['Spain' 38.0 61000.0]\n", + " ['Germany' 40.0 63777.77777777778]\n", + " ['France' 35.0 58000.0]\n", + " ['Spain' 38.77777777777778 52000.0]\n", + " ['France' 48.0 79000.0]\n", + " ['Germany' 50.0 83000.0]\n", + " ['France' 37.0 67000.0]]\n" ] } ], @@ -121,14 +132,34 @@ "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'X' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpreprocessing\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mOneHotEncoder\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mlabelencoder_X\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLabelEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlabelencoder_X\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m[\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;31m#Creating a dummy variable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0monehotencoder\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOneHotEncoder\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcategorical_features\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'X' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------\n", + "Step 4: Encoding categorical data\n", + "X\n", + "[[ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", + " 7.20000000e+04]\n", + " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", + " 4.80000000e+04]\n", + " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", + " 5.40000000e+04]\n", + " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", + " 6.10000000e+04]\n", + " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", + " 6.37777778e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", + " 5.80000000e+04]\n", + " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", + " 5.20000000e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", + " 7.90000000e+04]\n", + " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", + " 8.30000000e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", + " 6.70000000e+04]]\n", + "Y\n", + "[0 1 0 0 1 1 0 1 0 1]\n" ] } ], @@ -162,6 +193,40 @@ "execution_count": 5, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------\n", + "Step 5: Splitting the datasets into training sets and Test sets\n", + "X_train\n", + "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", + " 6.37777778e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", + " 6.70000000e+04]\n", + " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", + " 4.80000000e+04]\n", + " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", + " 5.20000000e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", + " 7.90000000e+04]\n", + " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", + " 6.10000000e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", + " 7.20000000e+04]\n", + " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", + " 5.80000000e+04]]\n", + "X_test\n", + "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", + " 5.40000000e+04]\n", + " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", + " 8.30000000e+04]]\n", + "Y_train\n", + "[1 1 1 0 1 0 0 1]\n", + "Y_test\n", + "[0 0]\n" + ] + }, { "name": "stderr", "output_type": "stream", @@ -169,17 +234,6 @@ "/home/ymao/usr/miniconda/lib/python3.6/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", " \"This module will be removed in 0.20.\", DeprecationWarning)\n" ] - }, - { - "ename": "NameError", - "evalue": "name 'X' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcross_validation\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrandom_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"---------------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Step 5: Splitting the datasets into training sets and Test sets\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"X_train\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'X' is not defined" - ] } ], "source": [ @@ -211,14 +265,23 @@ "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'X_train' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpreprocessing\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mStandardScaler\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0msc_X\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStandardScaler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mX_train\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msc_X\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msc_X\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"---------------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'X_train' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------\n", + "Step 6: Feature Scaling\n", + "X_train\n", + "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", + " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", + " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", + " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", + " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", + " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", + " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", + " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", + "X_test\n", + "[[ 0. 0. 0. -1. -1.]\n", + " [ 0. 0. 0. 1. 1.]]\n" ] } ],