{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5849e5b1", "metadata": {}, "outputs": [], "source": [ "# import required packages\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib as plt\n", "import seaborn as sns\n", "\n", "from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split\n", "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n", "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "id": "6b77e2c3", "metadata": {}, "source": [ "## Preporcessing" ] }, { "cell_type": "code", "execution_count": 2, "id": "3725e933", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "Channel | \n", "Week Day | \n", "TimeBand | \n", "Share | \n", "AMA | \n", "rate | \n", "daily reach | \n", "cume reach | \n", "ATS | \n", "Unrolled | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "02:00:00 - 02:30:00 | \n", "0.081305 | \n", "0.123363 | \n", "0.000433 | \n", "3.70 | \n", "3.700893 | \n", "00:01:00 | \n", "0.000000 | \n", "
1 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "02:30:00 - 03:00:00 | \n", "0.469995 | \n", "0.394070 | \n", "0.001383 | \n", "11.82 | \n", "11.822103 | \n", "00:01:00 | \n", "0.000000 | \n", "
2 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "03:00:00 - 03:30:00 | \n", "1.723084 | \n", "0.361537 | \n", "0.001269 | \n", "10.85 | \n", "10.846120 | \n", "00:01:00 | \n", "0.000000 | \n", "
3 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "03:30:00 - 04:00:00 | \n", "2.019206 | \n", "0.251790 | \n", "0.000884 | \n", "7.55 | \n", "7.553692 | \n", "00:01:00 | \n", "0.000000 | \n", "
4 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "04:00:00 - 04:30:00 | \n", "1.163916 | \n", "0.333603 | \n", "0.001171 | \n", "10.01 | \n", "10.008100 | \n", "00:01:00 | \n", "0.000000 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
12091 | \n", "15'23 | \n", "Aaj Tak | \n", "Friday | \n", "23:30:00 - 24:00:00 | \n", "0.315975 | \n", "6.315608 | \n", "0.028382 | \n", "52.33 | \n", "52.334241 | \n", "00:03:37 | \n", "1.870176 | \n", "
12092 | \n", "15'23 | \n", "Aaj Tak | \n", "Friday | \n", "24:00:00 - 24:30:00 | \n", "0.690376 | \n", "8.010992 | \n", "0.036001 | \n", "33.65 | \n", "33.651447 | \n", "00:07:09 | \n", "6.204409 | \n", "
12093 | \n", "15'23 | \n", "Aaj Tak | \n", "Friday | \n", "24:30:00 - 25:00:00 | \n", "1.313761 | \n", "8.575085 | \n", "0.038536 | \n", "26.97 | \n", "26.974041 | \n", "00:09:32 | \n", "6.526442 | \n", "
12094 | \n", "15'23 | \n", "Aaj Tak | \n", "Friday | \n", "25:00:00 - 25:30:00 | \n", "1.141046 | \n", "4.483507 | \n", "0.020149 | \n", "37.21 | \n", "37.214790 | \n", "00:03:37 | \n", "5.011646 | \n", "
12095 | \n", "15'23 | \n", "Aaj Tak | \n", "Friday | \n", "25:30:00 - 26:00:00 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "0 | \n", "0.000000 | \n", "
12096 rows × 11 columns
\n", "\n", " | Week number | \n", "Channel | \n", "Week Day | \n", "TimeBand | \n", "Share | \n", "AMA | \n", "rate | \n", "daily reach | \n", "cume reach | \n", "ATS | \n", "Unrolled | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "02:00:00 - 02:30:00 | \n", "0.081305 | \n", "0.123363 | \n", "0.000433 | \n", "3.70 | \n", "3.700893 | \n", "00:01:00 | \n", "0.0 | \n", "
1 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "02:30:00 - 03:00:00 | \n", "0.469995 | \n", "0.394070 | \n", "0.001383 | \n", "11.82 | \n", "11.822103 | \n", "00:01:00 | \n", "0.0 | \n", "
2 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "03:00:00 - 03:30:00 | \n", "1.723084 | \n", "0.361537 | \n", "0.001269 | \n", "10.85 | \n", "10.846120 | \n", "00:01:00 | \n", "0.0 | \n", "
3 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "03:30:00 - 04:00:00 | \n", "2.019206 | \n", "0.251790 | \n", "0.000884 | \n", "7.55 | \n", "7.553692 | \n", "00:01:00 | \n", "0.0 | \n", "
4 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "04:00:00 - 04:30:00 | \n", "1.163916 | \n", "0.333603 | \n", "0.001171 | \n", "10.01 | \n", "10.008100 | \n", "00:01:00 | \n", "0.0 | \n", "
\n", " | Share | \n", "AMA | \n", "rate | \n", "daily reach | \n", "cume reach | \n", "Unrolled | \n", "
---|---|---|---|---|---|---|
count | \n", "12096.000000 | \n", "12096.000000 | \n", "12096.000000 | \n", "12096.000000 | \n", "12096.000000 | \n", "12096.000000 | \n", "
mean | \n", "0.904877 | \n", "3.638381 | \n", "0.031671 | \n", "30.726294 | \n", "30.726317 | \n", "3.487959 | \n", "
std | \n", "3.773260 | \n", "4.987969 | \n", "0.074512 | \n", "33.505783 | \n", "33.505793 | \n", "5.746293 | \n", "
min | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
25% | \n", "0.089353 | \n", "0.122776 | \n", "0.003831 | \n", "3.000000 | \n", "3.002531 | \n", "0.000000 | \n", "
50% | \n", "0.199747 | \n", "2.192741 | \n", "0.015068 | \n", "22.730000 | \n", "22.732177 | \n", "0.974788 | \n", "
75% | \n", "0.482635 | \n", "5.174398 | \n", "0.029070 | \n", "46.930000 | \n", "46.932208 | \n", "4.620285 | \n", "
max | \n", "100.000000 | \n", "42.072407 | \n", "1.356598 | \n", "229.330000 | \n", "229.334577 | \n", "60.765814 | \n", "
\n", " | Week number | \n", "Channel | \n", "Week Day | \n", "TimeBand | \n", "Share | \n", "AMA | \n", "rate | \n", "daily reach | \n", "cume reach | \n", "ATS | \n", "Unrolled | \n", "Week_Day_Encoded | \n", "Time_Band_Encoded | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "02:00:00 - 02:30:00 | \n", "0.081305 | \n", "0.123363 | \n", "0.000433 | \n", "3.70 | \n", "3.700893 | \n", "00:01:00 | \n", "0.0 | \n", "2 | \n", "0 | \n", "
1 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "02:30:00 - 03:00:00 | \n", "0.469995 | \n", "0.394070 | \n", "0.001383 | \n", "11.82 | \n", "11.822103 | \n", "00:01:00 | \n", "0.0 | \n", "2 | \n", "1 | \n", "
2 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "03:00:00 - 03:30:00 | \n", "1.723084 | \n", "0.361537 | \n", "0.001269 | \n", "10.85 | \n", "10.846120 | \n", "00:01:00 | \n", "0.0 | \n", "2 | \n", "2 | \n", "
3 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "03:30:00 - 04:00:00 | \n", "2.019206 | \n", "0.251790 | \n", "0.000884 | \n", "7.55 | \n", "7.553692 | \n", "00:01:00 | \n", "0.0 | \n", "2 | \n", "3 | \n", "
4 | \n", "7'23 | \n", "Aaj Tak | \n", "Saturday | \n", "04:00:00 - 04:30:00 | \n", "1.163916 | \n", "0.333603 | \n", "0.001171 | \n", "10.01 | \n", "10.008100 | \n", "00:01:00 | \n", "0.0 | \n", "2 | \n", "4 | \n", "
\n", " | Share | \n", "AMA | \n", "rate | \n", "daily reach | \n", "cume reach | \n", "Week_Day_Encoded | \n", "Time_Band_Encoded | \n", "
---|---|---|---|---|---|---|---|
11232 | \n", "0.043364 | \n", "0.080953 | \n", "0.000357 | \n", "2.43 | \n", "2.428586 | \n", "5 | \n", "0 | \n", "
11118 | \n", "0.319280 | \n", "7.050287 | \n", "0.031111 | \n", "45.37 | \n", "45.372124 | \n", "2 | \n", "30 | \n", "
9301 | \n", "0.090855 | \n", "5.284389 | \n", "0.023781 | \n", "60.32 | \n", "60.317940 | \n", "6 | \n", "37 | \n", "
3222 | \n", "0.402614 | \n", "0.207835 | \n", "0.000917 | \n", "4.82 | \n", "4.815343 | \n", "6 | \n", "6 | \n", "
10322 | \n", "12.873856 | \n", "0.064336 | \n", "0.015220 | \n", "1.93 | \n", "1.930081 | \n", "4 | \n", "2 | \n", "
\n", " | Unrolled | \n", "
---|---|
11232 | \n", "0.000000 | \n", "
11118 | \n", "0.000000 | \n", "
9301 | \n", "6.285889 | \n", "
3222 | \n", "0.473240 | \n", "
10322 | \n", "0.000000 | \n", "
\n", " | Share | \n", "AMA | \n", "rate | \n", "daily reach | \n", "cume reach | \n", "Week_Day_Encoded | \n", "Time_Band_Encoded | \n", "
---|---|---|---|---|---|---|---|
468 | \n", "0.152596 | \n", "9.820626 | \n", "0.043337 | \n", "94.61 | \n", "94.614234 | \n", "1 | \n", "36 | \n", "
11620 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "6 | \n", "4 | \n", "
538 | \n", "0.969294 | \n", "3.181874 | \n", "0.014043 | \n", "34.30 | \n", "34.298911 | \n", "6 | \n", "10 | \n", "
5265 | \n", "0.064741 | \n", "2.991051 | \n", "0.013427 | \n", "41.62 | \n", "41.619074 | \n", "6 | \n", "33 | \n", "
7484 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.00 | \n", "0.000000 | \n", "3 | \n", "44 | \n", "
\n", " | Unrolled | \n", "
---|---|
468 | \n", "12.150886 | \n", "
11620 | \n", "0.000000 | \n", "
538 | \n", "1.480424 | \n", "
5265 | \n", "5.781056 | \n", "
7484 | \n", "0.000000 | \n", "
RandomForestRegressor(random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor(random_state=42)
RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),\n", " param_distributions={'criterion': ['squared_error',\n", " 'absolute_error'],\n", " 'max_depth': array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n", " 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n", " 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n", " 'n_estimators': array([10, 20, 30, 40]),\n", " 'random_state': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,\n", " 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,\n", " 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,\n", " 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),\n", " param_distributions={'criterion': ['squared_error',\n", " 'absolute_error'],\n", " 'max_depth': array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n", " 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n", " 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n", " 'n_estimators': array([10, 20, 30, 40]),\n", " 'random_state': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,\n", " 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,\n", " 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,\n", " 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])})
RandomForestRegressor()
RandomForestRegressor()
RandomForestRegressor(criterion='absolute_error', max_depth=39, n_estimators=20,\n", " random_state=49)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor(criterion='absolute_error', max_depth=39, n_estimators=20,\n", " random_state=49)