{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5849e5b1", "metadata": {}, "outputs": [], "source": [ "# import required packages\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib as plt\n", "import seaborn as sns\n", "\n", "from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split\n", "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n", "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "id": "6b77e2c3", "metadata": {}, "source": [ "## Preporcessing" ] }, { "cell_type": "code", "execution_count": 2, "id": "3725e933", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0ChannelWeek DayTimeBandShareAMAratedaily reachcume reachATSUnrolled
07'23Aaj TakSaturday02:00:00 - 02:30:000.0813050.1233630.0004333.703.70089300:01:000.000000
17'23Aaj TakSaturday02:30:00 - 03:00:000.4699950.3940700.00138311.8211.82210300:01:000.000000
27'23Aaj TakSaturday03:00:00 - 03:30:001.7230840.3615370.00126910.8510.84612000:01:000.000000
37'23Aaj TakSaturday03:30:00 - 04:00:002.0192060.2517900.0008847.557.55369200:01:000.000000
47'23Aaj TakSaturday04:00:00 - 04:30:001.1639160.3336030.00117110.0110.00810000:01:000.000000
....................................
1209115'23Aaj TakFriday23:30:00 - 24:00:000.3159756.3156080.02838252.3352.33424100:03:371.870176
1209215'23Aaj TakFriday24:00:00 - 24:30:000.6903768.0109920.03600133.6533.65144700:07:096.204409
1209315'23Aaj TakFriday24:30:00 - 25:00:001.3137618.5750850.03853626.9726.97404100:09:326.526442
1209415'23Aaj TakFriday25:00:00 - 25:30:001.1410464.4835070.02014937.2137.21479000:03:375.011646
1209515'23Aaj TakFriday25:30:00 - 26:00:000.0000000.0000000.0000000.000.00000000.000000
\n", "

12096 rows × 11 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 Channel Week Day TimeBand Share AMA \\\n", "0 7'23 Aaj Tak Saturday 02:00:00 - 02:30:00 0.081305 0.123363 \n", "1 7'23 Aaj Tak Saturday 02:30:00 - 03:00:00 0.469995 0.394070 \n", "2 7'23 Aaj Tak Saturday 03:00:00 - 03:30:00 1.723084 0.361537 \n", "3 7'23 Aaj Tak Saturday 03:30:00 - 04:00:00 2.019206 0.251790 \n", "4 7'23 Aaj Tak Saturday 04:00:00 - 04:30:00 1.163916 0.333603 \n", "... ... ... ... ... ... ... \n", "12091 15'23 Aaj Tak Friday 23:30:00 - 24:00:00 0.315975 6.315608 \n", "12092 15'23 Aaj Tak Friday 24:00:00 - 24:30:00 0.690376 8.010992 \n", "12093 15'23 Aaj Tak Friday 24:30:00 - 25:00:00 1.313761 8.575085 \n", "12094 15'23 Aaj Tak Friday 25:00:00 - 25:30:00 1.141046 4.483507 \n", "12095 15'23 Aaj Tak Friday 25:30:00 - 26:00:00 0.000000 0.000000 \n", "\n", " rate daily reach cume reach ATS Unrolled \n", "0 0.000433 3.70 3.700893 00:01:00 0.000000 \n", "1 0.001383 11.82 11.822103 00:01:00 0.000000 \n", "2 0.001269 10.85 10.846120 00:01:00 0.000000 \n", "3 0.000884 7.55 7.553692 00:01:00 0.000000 \n", "4 0.001171 10.01 10.008100 00:01:00 0.000000 \n", "... ... ... ... ... ... \n", "12091 0.028382 52.33 52.334241 00:03:37 1.870176 \n", "12092 0.036001 33.65 33.651447 00:07:09 6.204409 \n", "12093 0.038536 26.97 26.974041 00:09:32 6.526442 \n", "12094 0.020149 37.21 37.214790 00:03:37 5.011646 \n", "12095 0.000000 0.00 0.000000 0 0.000000 \n", "\n", "[12096 rows x 11 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read the dataset\n", "\n", "df = pd.read_excel(\"input_raw_data.xlsx\")\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "id": "cc260fc7", "metadata": {}, "outputs": [], "source": [ "df.rename(columns={'Unnamed: 0':'Week number'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 4, "id": "bfee3282", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Week numberChannelWeek DayTimeBandShareAMAratedaily reachcume reachATSUnrolled
07'23Aaj TakSaturday02:00:00 - 02:30:000.0813050.1233630.0004333.703.70089300:01:000.0
17'23Aaj TakSaturday02:30:00 - 03:00:000.4699950.3940700.00138311.8211.82210300:01:000.0
27'23Aaj TakSaturday03:00:00 - 03:30:001.7230840.3615370.00126910.8510.84612000:01:000.0
37'23Aaj TakSaturday03:30:00 - 04:00:002.0192060.2517900.0008847.557.55369200:01:000.0
47'23Aaj TakSaturday04:00:00 - 04:30:001.1639160.3336030.00117110.0110.00810000:01:000.0
\n", "
" ], "text/plain": [ " Week number Channel Week Day TimeBand Share AMA \\\n", "0 7'23 Aaj Tak Saturday 02:00:00 - 02:30:00 0.081305 0.123363 \n", "1 7'23 Aaj Tak Saturday 02:30:00 - 03:00:00 0.469995 0.394070 \n", "2 7'23 Aaj Tak Saturday 03:00:00 - 03:30:00 1.723084 0.361537 \n", "3 7'23 Aaj Tak Saturday 03:30:00 - 04:00:00 2.019206 0.251790 \n", "4 7'23 Aaj Tak Saturday 04:00:00 - 04:30:00 1.163916 0.333603 \n", "\n", " rate daily reach cume reach ATS Unrolled \n", "0 0.000433 3.70 3.700893 00:01:00 0.0 \n", "1 0.001383 11.82 11.822103 00:01:00 0.0 \n", "2 0.001269 10.85 10.846120 00:01:00 0.0 \n", "3 0.000884 7.55 7.553692 00:01:00 0.0 \n", "4 0.001171 10.01 10.008100 00:01:00 0.0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "e53ee7c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 12096 entries, 0 to 12095\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Week number 12096 non-null object \n", " 1 Channel 12096 non-null object \n", " 2 Week Day 12096 non-null object \n", " 3 TimeBand 12096 non-null object \n", " 4 Share 12096 non-null float64\n", " 5 AMA 12096 non-null float64\n", " 6 rate 12096 non-null float64\n", " 7 daily reach 12096 non-null float64\n", " 8 cume reach 12096 non-null float64\n", " 9 ATS 12096 non-null object \n", " 10 Unrolled 12096 non-null float64\n", "dtypes: float64(6), object(5)\n", "memory usage: 1.0+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 6, "id": "31fd40e9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ShareAMAratedaily reachcume reachUnrolled
count12096.00000012096.00000012096.00000012096.00000012096.00000012096.000000
mean0.9048773.6383810.03167130.72629430.7263173.487959
std3.7732604.9879690.07451233.50578333.5057935.746293
min0.0000000.0000000.0000000.0000000.0000000.000000
25%0.0893530.1227760.0038313.0000003.0025310.000000
50%0.1997472.1927410.01506822.73000022.7321770.974788
75%0.4826355.1743980.02907046.93000046.9322084.620285
max100.00000042.0724071.356598229.330000229.33457760.765814
\n", "
" ], "text/plain": [ " Share AMA rate daily reach cume reach \\\n", "count 12096.000000 12096.000000 12096.000000 12096.000000 12096.000000 \n", "mean 0.904877 3.638381 0.031671 30.726294 30.726317 \n", "std 3.773260 4.987969 0.074512 33.505783 33.505793 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.089353 0.122776 0.003831 3.000000 3.002531 \n", "50% 0.199747 2.192741 0.015068 22.730000 22.732177 \n", "75% 0.482635 5.174398 0.029070 46.930000 46.932208 \n", "max 100.000000 42.072407 1.356598 229.330000 229.334577 \n", "\n", " Unrolled \n", "count 12096.000000 \n", "mean 3.487959 \n", "std 5.746293 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.974788 \n", "75% 4.620285 \n", "max 60.765814 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 7, "id": "741765e3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Week number\n", "7'23 1344\n", "8'23 1344\n", "9'23 1344\n", "10'23 1344\n", "11'23 1344\n", "12'23 1344\n", "13'23 1344\n", "14'23 1344\n", "15'23 1344\n", "Name: count, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Count values of Week number\n", "df['Week number'].value_counts() # we have records of from 7 to 15" ] }, { "cell_type": "code", "execution_count": 8, "id": "894d2430", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Channel\n", "Aaj Tak 12096\n", "Name: count, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Count values of Channel\n", "df['Channel'].value_counts()" ] }, { "cell_type": "code", "execution_count": 9, "id": "abbc65aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Week Day\n", "Saturday 1728\n", "Sunday 1728\n", "Monday 1728\n", "Tuesday 1728\n", "Wednesday 1728\n", "Thursday 1728\n", "Friday 1728\n", "Name: count, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Count values of Week Day\n", "df['Week Day'].value_counts() # from Sunday to Monday" ] }, { "cell_type": "code", "execution_count": 10, "id": "24a0ea3a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TimeBand\n", "02:00:00 - 02:30:00 252\n", "02:30:00 - 03:00:00 252\n", "15:00:00 - 15:30:00 252\n", "15:30:00 - 16:00:00 252\n", "16:00:00 - 16:30:00 252\n", "16:30:00 - 17:00:00 252\n", "17:00:00 - 17:30:00 252\n", "17:30:00 - 18:00:00 252\n", "18:00:00 - 18:30:00 252\n", "18:30:00 - 19:00:00 252\n", "19:00:00 - 19:30:00 252\n", "19:30:00 - 20:00:00 252\n", "20:00:00 - 20:30:00 252\n", "20:30:00 - 21:00:00 252\n", "21:00:00 - 21:30:00 252\n", "21:30:00 - 22:00:00 252\n", "22:00:00 - 22:30:00 252\n", "22:30:00 - 23:00:00 252\n", "23:00:00 - 23:30:00 252\n", "23:30:00 - 24:00:00 252\n", "24:00:00 - 24:30:00 252\n", "24:30:00 - 25:00:00 252\n", "25:00:00 - 25:30:00 252\n", "14:30:00 - 15:00:00 252\n", "14:00:00 - 14:30:00 252\n", "13:30:00 - 14:00:00 252\n", "07:30:00 - 08:00:00 252\n", "03:00:00 - 03:30:00 252\n", "03:30:00 - 04:00:00 252\n", "04:00:00 - 04:30:00 252\n", "04:30:00 - 05:00:00 252\n", "05:00:00 - 05:30:00 252\n", "05:30:00 - 06:00:00 252\n", "06:00:00 - 06:30:00 252\n", "06:30:00 - 07:00:00 252\n", "07:00:00 - 07:30:00 252\n", "08:00:00 - 08:30:00 252\n", "13:00:00 - 13:30:00 252\n", "08:30:00 - 09:00:00 252\n", "09:00:00 - 09:30:00 252\n", "09:30:00 - 10:00:00 252\n", "10:00:00 - 10:30:00 252\n", "10:30:00 - 11:00:00 252\n", "11:00:00 - 11:30:00 252\n", "11:30:00 - 12:00:00 252\n", "12:00:00 - 12:30:00 252\n", "12:30:00 - 13:00:00 252\n", "25:30:00 - 26:00:00 252\n", "Name: count, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# count values of TimeBand\n", "df['TimeBand'].value_counts()" ] }, { "cell_type": "markdown", "id": "be8183bd", "metadata": {}, "source": [ "## Label Encoding" ] }, { "cell_type": "code", "execution_count": 11, "id": "877e32b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Week number', 'Channel', 'Week Day', 'TimeBand', 'Share', 'AMA',\n", " 'rate', 'daily reach', 'cume reach', 'ATS', 'Unrolled'],\n", " dtype='object')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 12, "id": "9f922296", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 12096 entries, 0 to 12095\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Week number 12096 non-null object \n", " 1 Channel 12096 non-null object \n", " 2 Week Day 12096 non-null object \n", " 3 TimeBand 12096 non-null object \n", " 4 Share 12096 non-null float64\n", " 5 AMA 12096 non-null float64\n", " 6 rate 12096 non-null float64\n", " 7 daily reach 12096 non-null float64\n", " 8 cume reach 12096 non-null float64\n", " 9 ATS 12096 non-null object \n", " 10 Unrolled 12096 non-null float64\n", "dtypes: float64(6), object(5)\n", "memory usage: 1.0+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 13, "id": "109ffb8d", "metadata": {}, "outputs": [], "source": [ "# Need to Label Encode columns like: \n", "# As of now Channel is not needed to encode as we are checking with AajTak only\n", "# 1: Week Day\n", "# 2: TimeBand" ] }, { "cell_type": "code", "execution_count": 14, "id": "e4fd0b0b", "metadata": {}, "outputs": [], "source": [ "# 1: Week Day\n", "\n", "weekDay_le = LabelEncoder()\n", "df['Week_Day_Encoded'] = weekDay_le.fit_transform(df['Week Day'])" ] }, { "cell_type": "code", "execution_count": 15, "id": "9b10dc13", "metadata": {}, "outputs": [], "source": [ "# L1 = list(weekDay_le.inverse_transform(df['Week_Day_Encoded']))\n", "# d1 = dict(zip(weekDay_le.classes_, weekDay_le.transform(weekDay_le.classes_)))\n", "# print (d1)\n", "\n", "# # Output: {'Friday': 0, 'Monday': 1, 'Saturday': 2, 'Sunday': 3, 'Thursday': 4, 'Tuesday': 5, 'Wednesday': 6}" ] }, { "cell_type": "code", "execution_count": 16, "id": "bc705800", "metadata": {}, "outputs": [], "source": [ "# 2: TimeBand\n", "\n", "timeBand_le = LabelEncoder()\n", "df['Time_Band_Encoded'] = timeBand_le.fit_transform(df['TimeBand'])" ] }, { "cell_type": "code", "execution_count": 17, "id": "16ac2be3", "metadata": {}, "outputs": [], "source": [ "# L2 = list(timeBand_le.inverse_transform(df['Time_Band_Encoded']))\n", "# d2 = dict(zip(timeBand_le.classes_, timeBand_le.transform(timeBand_le.classes_)))\n", "# print(d2)\n", "\n", "# # # Output: {'02:00:00 - 02:30:00': 0, '02:30:00 - 03:00:00': 1, '03:00:00 - 03:30:00': 2, '03:30:00 - 04:00:00': 3, \n", "# '04:00:00 - 04:30:00': 4, '04:30:00 - 05:00:00': 5, '05:00:00 - 05:30:00': 6, '05:30:00 - 06:00:00': 7, \n", "# '06:00:00 - 06:30:00': 8, '06:30:00 - 07:00:00': 9, '07:00:00 - 07:30:00': 10, '07:30:00 - 08:00:00': 11, \n", "# '08:00:00 - 08:30:00': 12, '08:30:00 - 09:00:00': 13, '09:00:00 - 09:30:00': 14, '09:30:00 - 10:00:00': 15, \n", "# '10:00:00 - 10:30:00': 16, '10:30:00 - 11:00:00': 17, '11:00:00 - 11:30:00': 18, '11:30:00 - 12:00:00': 19, \n", "# '12:00:00 - 12:30:00': 20, '12:30:00 - 13:00:00': 21, '13:00:00 - 13:30:00': 22, '13:30:00 - 14:00:00': 23, \n", "# '14:00:00 - 14:30:00': 24, '14:30:00 - 15:00:00': 25, '15:00:00 - 15:30:00': 26, '15:30:00 - 16:00:00': 27, \n", "# '16:00:00 - 16:30:00': 28, '16:30:00 - 17:00:00': 29, '17:00:00 - 17:30:00': 30, '17:30:00 - 18:00:00': 31, \n", "# '18:00:00 - 18:30:00': 32, '18:30:00 - 19:00:00': 33, '19:00:00 - 19:30:00': 34, '19:30:00 - 20:00:00': 35, \n", "# '20:00:00 - 20:30:00': 36, '20:30:00 - 21:00:00': 37, '21:00:00 - 21:30:00': 38, '21:30:00 - 22:00:00': 39, \n", "# '22:00:00 - 22:30:00': 40, '22:30:00 - 23:00:00': 41, '23:00:00 - 23:30:00': 42, '23:30:00 - 24:00:00': 43, \n", "# '24:00:00 - 24:30:00': 44, '24:30:00 - 25:00:00': 45, '25:00:00 - 25:30:00': 46, '25:30:00 - 26:00:00': 47}" ] }, { "cell_type": "code", "execution_count": 18, "id": "e65f3a9b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Week numberChannelWeek DayTimeBandShareAMAratedaily reachcume reachATSUnrolledWeek_Day_EncodedTime_Band_Encoded
07'23Aaj TakSaturday02:00:00 - 02:30:000.0813050.1233630.0004333.703.70089300:01:000.020
17'23Aaj TakSaturday02:30:00 - 03:00:000.4699950.3940700.00138311.8211.82210300:01:000.021
27'23Aaj TakSaturday03:00:00 - 03:30:001.7230840.3615370.00126910.8510.84612000:01:000.022
37'23Aaj TakSaturday03:30:00 - 04:00:002.0192060.2517900.0008847.557.55369200:01:000.023
47'23Aaj TakSaturday04:00:00 - 04:30:001.1639160.3336030.00117110.0110.00810000:01:000.024
\n", "
" ], "text/plain": [ " Week number Channel Week Day TimeBand Share AMA \\\n", "0 7'23 Aaj Tak Saturday 02:00:00 - 02:30:00 0.081305 0.123363 \n", "1 7'23 Aaj Tak Saturday 02:30:00 - 03:00:00 0.469995 0.394070 \n", "2 7'23 Aaj Tak Saturday 03:00:00 - 03:30:00 1.723084 0.361537 \n", "3 7'23 Aaj Tak Saturday 03:30:00 - 04:00:00 2.019206 0.251790 \n", "4 7'23 Aaj Tak Saturday 04:00:00 - 04:30:00 1.163916 0.333603 \n", "\n", " rate daily reach cume reach ATS Unrolled Week_Day_Encoded \\\n", "0 0.000433 3.70 3.700893 00:01:00 0.0 2 \n", "1 0.001383 11.82 11.822103 00:01:00 0.0 2 \n", "2 0.001269 10.85 10.846120 00:01:00 0.0 2 \n", "3 0.000884 7.55 7.553692 00:01:00 0.0 2 \n", "4 0.001171 10.01 10.008100 00:01:00 0.0 2 \n", "\n", " Time_Band_Encoded \n", "0 0 \n", "1 1 \n", "2 2 \n", "3 3 \n", "4 4 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "e604dbc6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 12096 entries, 0 to 12095\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Week number 12096 non-null object \n", " 1 Channel 12096 non-null object \n", " 2 Week Day 12096 non-null object \n", " 3 TimeBand 12096 non-null object \n", " 4 Share 12096 non-null float64\n", " 5 AMA 12096 non-null float64\n", " 6 rate 12096 non-null float64\n", " 7 daily reach 12096 non-null float64\n", " 8 cume reach 12096 non-null float64\n", " 9 ATS 12096 non-null object \n", " 10 Unrolled 12096 non-null float64\n", " 11 Week_Day_Encoded 12096 non-null int32 \n", " 12 Time_Band_Encoded 12096 non-null int32 \n", "dtypes: float64(6), int32(2), object(5)\n", "memory usage: 1.1+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "id": "fcb0b705", "metadata": {}, "source": [ "## Model Development : RandomForestRegressor" ] }, { "cell_type": "code", "execution_count": 20, "id": "f5af473f", "metadata": {}, "outputs": [], "source": [ "# Splitting into X and y \n", "\n", "X = df[['Share', 'AMA', 'rate','daily reach', 'cume reach','Week_Day_Encoded','Time_Band_Encoded']]\n", "y = df[['Unrolled']]" ] }, { "cell_type": "code", "execution_count": 33, "id": "8b74a5b8", "metadata": {}, "outputs": [], "source": [ "# Splitting into training and testing datasets\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state = 42)" ] }, { "cell_type": "code", "execution_count": 34, "id": "306b52f8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((9676, 7), (2420, 7), (9676, 1), (2420, 1))" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape, X_test.shape, y_train.shape, y_test.shape" ] }, { "cell_type": "code", "execution_count": 35, "id": "0d6b3c6e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ShareAMAratedaily reachcume reachWeek_Day_EncodedTime_Band_Encoded
112320.0433640.0809530.0003572.432.42858650
111180.3192807.0502870.03111145.3745.372124230
93010.0908555.2843890.02378160.3260.317940637
32220.4026140.2078350.0009174.824.81534366
1032212.8738560.0643360.0152201.931.93008142
\n", "
" ], "text/plain": [ " Share AMA rate daily reach cume reach \\\n", "11232 0.043364 0.080953 0.000357 2.43 2.428586 \n", "11118 0.319280 7.050287 0.031111 45.37 45.372124 \n", "9301 0.090855 5.284389 0.023781 60.32 60.317940 \n", "3222 0.402614 0.207835 0.000917 4.82 4.815343 \n", "10322 12.873856 0.064336 0.015220 1.93 1.930081 \n", "\n", " Week_Day_Encoded Time_Band_Encoded \n", "11232 5 0 \n", "11118 2 30 \n", "9301 6 37 \n", "3222 6 6 \n", "10322 4 2 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.head()" ] }, { "cell_type": "code", "execution_count": 36, "id": "38e2d59b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unrolled
112320.000000
111180.000000
93016.285889
32220.473240
103220.000000
\n", "
" ], "text/plain": [ " Unrolled\n", "11232 0.000000\n", "11118 0.000000\n", "9301 6.285889\n", "3222 0.473240\n", "10322 0.000000" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train[:5]" ] }, { "cell_type": "code", "execution_count": 37, "id": "0fc7342f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ShareAMAratedaily reachcume reachWeek_Day_EncodedTime_Band_Encoded
4680.1525969.8206260.04333794.6194.614234136
116200.0000000.0000000.0000000.000.00000064
5380.9692943.1818740.01404334.3034.298911610
52650.0647412.9910510.01342741.6241.619074633
74840.0000000.0000000.0000000.000.000000344
\n", "
" ], "text/plain": [ " Share AMA rate daily reach cume reach \\\n", "468 0.152596 9.820626 0.043337 94.61 94.614234 \n", "11620 0.000000 0.000000 0.000000 0.00 0.000000 \n", "538 0.969294 3.181874 0.014043 34.30 34.298911 \n", "5265 0.064741 2.991051 0.013427 41.62 41.619074 \n", "7484 0.000000 0.000000 0.000000 0.00 0.000000 \n", "\n", " Week_Day_Encoded Time_Band_Encoded \n", "468 1 36 \n", "11620 6 4 \n", "538 6 10 \n", "5265 6 33 \n", "7484 3 44 " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test.head()" ] }, { "cell_type": "code", "execution_count": 38, "id": "af5394e9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unrolled
46812.150886
116200.000000
5381.480424
52655.781056
74840.000000
\n", "
" ], "text/plain": [ " Unrolled\n", "468 12.150886\n", "11620 0.000000\n", "538 1.480424\n", "5265 5.781056\n", "7484 0.000000" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test[:5]" ] }, { "cell_type": "code", "execution_count": 39, "id": "ad1452db", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestRegressor(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor(random_state=42)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train Random Forest Regression model\n", "\n", "model = RandomForestRegressor(random_state = 42)\n", "model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 40, "id": "58a025a8", "metadata": {}, "outputs": [], "source": [ "# Make predictions on train data\n", "\n", "y_pred_train = model.predict(X_train)" ] }, { "cell_type": "code", "execution_count": 72, "id": "403259f6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Accuracy of Training Dataset is : 95.65798927048185\n" ] } ], "source": [ "acc_train = r2_score(y_train, y_pred_train)\n", "print(\"The Accuracy of Training Dataset is : \",acc_train*100)" ] }, { "cell_type": "code", "execution_count": 42, "id": "ac553b1e", "metadata": {}, "outputs": [], "source": [ "# Make predictions on test data\n", "\n", "y_pred_test = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 71, "id": "bc359944", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Accuracy of Test Dataset is : 71.01332045918515\n" ] } ], "source": [ "acc_test = r2_score(y_test, y_pred_test)\n", "print(\"The Accuracy of Test Dataset is : \",acc_test*100)" ] }, { "cell_type": "code", "execution_count": 70, "id": "fa33faec", "metadata": {}, "outputs": [], "source": [ "# # Saving Model\n", "\n", "# import pickle\n", "\n", "# with open('aajTak_model.pkl','wb') as file1:\n", "# pickle.dump(model,file1) " ] }, { "cell_type": "markdown", "id": "6f30a678", "metadata": {}, "source": [ "## Hyperparameter Tuning for Random Forest Regression" ] }, { "cell_type": "code", "execution_count": 45, "id": "44bd53a2", "metadata": {}, "outputs": [], "source": [ "# Hyperparameter Tuning\n", "\n", "hyp_model = RandomForestRegressor()\n", "\n", "hyp = {\n", "\"n_estimators\": np.arange(10,50,10),\n", "'criterion':[\"squared_error\", \"absolute_error\"],\n", "'max_depth':np.arange(3,50),\n", "# 'min_samples_split':np.arange(2,5),\n", "# 'min_samples_leaf':np.arange(1,5),\n", "'random_state':np.arange(0,100)\n", "}" ] }, { "cell_type": "code", "execution_count": 46, "id": "b7c9e0ab", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),\n",
       "                   param_distributions={'criterion': ['squared_error',\n",
       "                                                      'absolute_error'],\n",
       "                                        'max_depth': array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n",
       "       20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n",
       "       37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n",
       "                                        'n_estimators': array([10, 20, 30, 40]),\n",
       "                                        'random_state': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
       "       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,\n",
       "       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,\n",
       "       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,\n",
       "       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(),\n", " param_distributions={'criterion': ['squared_error',\n", " 'absolute_error'],\n", " 'max_depth': array([ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n", " 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,\n", " 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),\n", " 'n_estimators': array([10, 20, 30, 40]),\n", " 'random_state': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", " 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n", " 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,\n", " 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,\n", " 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,\n", " 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])})" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rscv = RandomizedSearchCV(hyp_model, hyp, cv=5)\n", "rscv.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 47, "id": "f0b0d172", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'random_state': 49,\n", " 'n_estimators': 20,\n", " 'max_depth': 39,\n", " 'criterion': 'absolute_error'}" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rscv.best_params_" ] }, { "cell_type": "code", "execution_count": 48, "id": "0252bdea", "metadata": {}, "outputs": [], "source": [ "best_model = rscv.best_estimator_" ] }, { "cell_type": "code", "execution_count": 49, "id": "b23a1e56", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
RandomForestRegressor(criterion='absolute_error', max_depth=39, n_estimators=20,\n",
       "                      random_state=49)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor(criterion='absolute_error', max_depth=39, n_estimators=20,\n", " random_state=49)" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "best_model.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 50, "id": "c2d2e731", "metadata": {}, "outputs": [], "source": [ "ypredtn = best_model.predict(X_train)" ] }, { "cell_type": "code", "execution_count": 51, "id": "9308b1d8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Accuracy of Training Dataset after hyperparameter tuning is : 94.41670975802535\n" ] } ], "source": [ "acctn = r2_score(y_train, ypredtn)\n", "print(\"The Accuracy of Training Dataset after hyperparameter tuning is : \",acctn*100)" ] }, { "cell_type": "code", "execution_count": 52, "id": "23cf5580", "metadata": {}, "outputs": [], "source": [ "ypredts = best_model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 54, "id": "d88fdedb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The Accuracy of Testing Dataset after hyperparameter tuning is : 69.97941529616791\n" ] } ], "source": [ "accts = r2_score(y_test, ypredts)\n", "print(\"The Accuracy of Testing Dataset after hyperparameter tuning is : \",accts*100)" ] }, { "cell_type": "code", "execution_count": 73, "id": "e5298c37", "metadata": {}, "outputs": [], "source": [ "# # Saving Model\n", "\n", "# import pickle\n", "\n", "# with open('aajTak_fineTune_model.pkl','wb') as file:\n", "# pickle.dump(best_model,file) " ] }, { "cell_type": "code", "execution_count": 74, "id": "7a5d25ac", "metadata": {}, "outputs": [], "source": [ "# # Saving the LabelEncoders for weekDay\n", "\n", "# with open('weekDay_le.pkl','wb') as f1:\n", "# pickle.dump(weekDay_le,f1)" ] }, { "cell_type": "code", "execution_count": 75, "id": "6a268e27", "metadata": {}, "outputs": [], "source": [ "# # Saving the LabelEncoders for timeBand\n", "\n", "# with open('timeBand_le.pkl','wb') as f2:\n", "# pickle.dump(timeBand_le,f2)" ] }, { "cell_type": "markdown", "id": "57557ac1", "metadata": {}, "source": [ "## UserTest Function - Prediction Script" ] }, { "cell_type": "code", "execution_count": 1, "id": "8cf621c3", "metadata": {}, "outputs": [], "source": [ "# import required packages\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib as plt\n", "import seaborn as sns\n", "\n", "from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split\n", "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n", "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "id": "62be1870", "metadata": {}, "outputs": [], "source": [ "# load the saved model using pickle\n", "with open('aajTak_model.pkl', 'rb') as f1:\n", " model1 = pickle.load(f1)" ] }, { "cell_type": "code", "execution_count": 3, "id": "0b4e2a7c", "metadata": {}, "outputs": [], "source": [ "# # load the saved model using pickle\n", "# with open('aajTak_fineTune_model.pkl', 'rb') as file:\n", "# model = pickle.load(file)\n", "\n", "# Load the saved weekDay label encoder object using pickle\n", "with open('weekDay_le.pkl','rb') as file1:\n", " weekDay_le = pickle.load(file1)\n", "\n", "# Load the saved timeBand label encoder object using pickle\n", "with open('timeBand_le.pkl','rb') as file2:\n", " timeBand_le = pickle.load(file2)" ] }, { "cell_type": "code", "execution_count": 4, "id": "e3a13c4e", "metadata": {}, "outputs": [], "source": [ "# define the prediction function\n", "# X = df[['Share', 'AMA', 'rate','daily reach', 'cume reach','Week_Day_Encoded','Time_Band_Encoded']]\n", "# y = df[['Unrolled']]\n", "\n", "\n", "def predict_unrolled_value(Share, AMA, rate, daily_reach, cume_reach, Week_Day, Time_Band):\n", " \n", " # create a DataFrame with the input variables\n", " \n", " # encode the Week_Day using the loaded LabelEncoder object\n", " weekDay_encoded = weekDay_le.transform([Week_Day])[0]\n", " \n", " # encode the Time_Band using the loaded LabelEncoder object\n", " Time_Band_encoded = timeBand_le.transform([Time_Band])[0]\n", " \n", " input_data = pd.DataFrame({'Share': [Share], \n", " 'AMA': [AMA], \n", " 'rate': [rate],\n", " 'daily reach': [daily_reach], \n", " 'cume reach': [cume_reach], \n", " 'Week_Day_Encoded': [weekDay_encoded], \n", " 'Time_Band_Encoded': [Time_Band_encoded]})\n", " \n", " # make the prediction using the loaded model and input data\n", " predicted_unrolled_value = model1.predict(input_data)\n", " \n", " # return the predicted unrolled value as output\n", " return predicted_unrolled_value[0]" ] }, { "cell_type": "code", "execution_count": 5, "id": "df4390e9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4.123954" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Function calling\n", "# 0.064741\t2.991051\t0.013427\t41.62\t41.619074\t'Wednesday'\t'18:30:00 - 19:00:00' --> test input data\n", "# 5.781056 --> unrolled actual value\n", "\n", "predict_unrolled_value(0.064741, 2.991051, 0.013427, 41.62, 41.619074, 'Wednesday', '18:30:00 - 19:00:00')" ] }, { "cell_type": "code", "execution_count": 6, "id": "5fadb125", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9.738856000000002" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 0.152596\t9.820626\t0.043337\t94.61\t94.614234\t1\t'20:00:00 - 20:30:00'\n", "# 12.150886\n", "predict_unrolled_value(0.152596, 9.820626, 0.043337, 94.61, 94.614234, 'Monday', '20:00:00 - 20:30:00')" ] }, { "cell_type": "code", "execution_count": 7, "id": "3ec5b3e0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.3215619" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 0.611246\t4.196084\t0.018516\t36.23\t36.231006\t'Saturday'\t''08:00:00 - 08:30:00''\n", "# 3.711884\n", "predict_unrolled_value(0.611246, 4.196084, 0.018516, 36.23, 36.23, 'Saturday', '08:00:00 - 08:30:00')" ] }, { "cell_type": "code", "execution_count": null, "id": "83a75023", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "1799f490", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 5 }