diff --git "a/core/bicep_model/4.evaluation.ipynb" "b/core/bicep_model/4.evaluation.ipynb" new file mode 100644--- /dev/null +++ "b/core/bicep_model/4.evaluation.ipynb" @@ -0,0 +1,912 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "objc[61196]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x10ab48860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece480). One of the two will be used. Which one is undefined.\n", + "objc[61196]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x105918a68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece4d0). One of the two will be used. Which one is undefined.\n", + "objc[61196]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x105918a90) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece4f8). One of the two will be used. Which one is undefined.\n", + "objc[61196]: Class CVSlider is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x105918ab8) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x15eece520). One of the two will be used. Which one is undefined.\n" + ] + } + ], + "source": [ + "import mediapipe as mp\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pickle\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix, roc_curve, auc\n", + "\n", + "from keras.utils.np_utils import to_categorical\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# Drawing helpers\n", + "mp_drawing = mp.solutions.drawing_utils\n", + "mp_pose = mp.solutions.pose" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Set ups" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.1. Load models & scaler" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Metal device set to: Apple M1\n", + "\n", + "systemMemory: 16.00 GB\n", + "maxCacheSize: 5.33 GB\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-11-28 10:16:34.709228: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n", + "2022-11-28 10:16:34.709336: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: )\n" + ] + } + ], + "source": [ + "# Load all sklearn models\n", + "with open(\"./model/all_sklearn.pkl\", \"rb\") as f:\n", + " sklearn_models = pickle.load(f)\n", + "\n", + "# Load all deep learning models\n", + "with open(\"./model/all_dp.pkl\", \"rb\") as f:\n", + " dp_models = pickle.load(f)\n", + "\n", + "# Load input scaler\n", + "with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n", + " sc = pickle.load(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1.2. Important functions" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def describe_dataset(dataset_path: str):\n", + " '''\n", + " Describe dataset\n", + " '''\n", + "\n", + " data = pd.read_csv(dataset_path)\n", + " print(f\"Headers: {list(data.columns.values)}\")\n", + " print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n", + " print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n", + " print(f\"Missing values: {data.isnull().values.any()}\\n\")\n", + " \n", + " duplicate = data[data.duplicated()]\n", + " print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n", + "\n", + " return data\n", + "\n", + "\n", + "def round_up_metric_results(results) -> list:\n", + " '''Round up metrics results such as precision score, recall score, ...'''\n", + " return list(map(lambda el: round(el, 3), results))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Process Test set" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']\n", + "Number of rows: 604 \n", + "Number of columns: 37\n", + "\n", + "Labels: \n", + "C 339\n", + "L 265\n", + "Name: label, dtype: int64\n", + "\n", + "Missing values: False\n", + "\n", + "Duplicate Rows : 0\n" + ] + } + ], + "source": [ + "# load dataset\n", + "test_df = describe_dataset(\"./test.csv\")\n", + "\n", + "# Categorizing label\n", + "test_df.loc[test_df[\"label\"] == \"C\", \"label\"] = 0\n", + "test_df.loc[test_df[\"label\"] == \"L\", \"label\"] = 1\n", + "\n", + "# Standard Scaling of features\n", + "test_x = test_df.drop(\"label\", axis = 1)\n", + "test_x = pd.DataFrame(sc.transform(test_x))\n", + "\n", + "test_y = test_df[\"label\"].astype('int')\n", + "\n", + "# # Converting prediction to categorical\n", + "test_y_cat = to_categorical(test_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Test set evaluation for all models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1. Sklearn models evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelPrecision ScoreRecall ScoreAccuracy ScoreF1 ScoreConfusion Matrix
0LR0.7926940.7377750.7615890.740550[[316, 23], [121, 144]]
1SVC0.9299660.9337620.9321190.931420[[312, 27], [14, 251]]
2KNN0.9754010.9683360.9718540.971230[[338, 1], [16, 249]]
3DTC0.6842550.6507820.6754970.647579[[289, 50], [146, 119]]
4SGDC0.7124570.7150110.7152320.712937[[243, 96], [76, 189]]
5NB0.7973680.5641510.6175500.486650[[339, 0], [231, 34]]
6RF0.9472300.9245280.9337750.931329[[339, 0], [40, 225]]
\n", + "
" + ], + "text/plain": [ + " Model Precision Score Recall Score Accuracy Score F1 Score \\\n", + "0 LR 0.792694 0.737775 0.761589 0.740550 \n", + "1 SVC 0.929966 0.933762 0.932119 0.931420 \n", + "2 KNN 0.975401 0.968336 0.971854 0.971230 \n", + "3 DTC 0.684255 0.650782 0.675497 0.647579 \n", + "4 SGDC 0.712457 0.715011 0.715232 0.712937 \n", + "5 NB 0.797368 0.564151 0.617550 0.486650 \n", + "6 RF 0.947230 0.924528 0.933775 0.931329 \n", + "\n", + " Confusion Matrix \n", + "0 [[316, 23], [121, 144]] \n", + "1 [[312, 27], [14, 251]] \n", + "2 [[338, 1], [16, 249]] \n", + "3 [[289, 50], [146, 119]] \n", + "4 [[243, 96], [76, 189]] \n", + "5 [[339, 0], [231, 34]] \n", + "6 [[339, 0], [40, 225]] " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "testset_final_results = []\n", + "\n", + "for name, model in sklearn_models.items():\n", + " # Evaluate model\n", + " model_results = model.predict(test_x)\n", + "\n", + " p_score = precision_score(test_y, model_results, average=\"macro\")\n", + " a_score = accuracy_score(test_y, model_results)\n", + " r_score = recall_score(test_y, model_results, average=\"macro\")\n", + " f1_score_result = f1_score(test_y, model_results, average=\"macro\")\n", + " cm = confusion_matrix(test_y, model_results, labels=[0, 1])\n", + " testset_final_results.append(( name, p_score, r_score, a_score, f1_score_result, cm ))\n", + "\n", + "\n", + "sklearn_eval = pd.DataFrame(testset_final_results, columns=[\"Model\", \"Precision Score\", \"Recall Score\", \"Accuracy Score\", \"F1 Score\", \"Confusion Matrix\"])\n", + "\n", + "sklearn_eval" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.2. Deep learning models" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelPrecision ScoreRecall ScoreAccuracy ScoreF1 ScoreConfusion Matrix
03_layers0.9391650.9205150.9288080.926411[[335, 4], [39, 226]]
15_layers0.9631150.9490570.9552980.954012[[339, 0], [27, 238]]
27_layers_with_dropout0.9358730.9244620.9304640.928583[[330, 9], [33, 232]]
37_layers0.9721450.9622640.9668870.966066[[339, 0], [20, 245]]
\n", + "
" + ], + "text/plain": [ + " Model Precision Score Recall Score Accuracy Score \\\n", + "0 3_layers 0.939165 0.920515 0.928808 \n", + "1 5_layers 0.963115 0.949057 0.955298 \n", + "2 7_layers_with_dropout 0.935873 0.924462 0.930464 \n", + "3 7_layers 0.972145 0.962264 0.966887 \n", + "\n", + " F1 Score Confusion Matrix \n", + "0 0.926411 [[335, 4], [39, 226]] \n", + "1 0.954012 [[339, 0], [27, 238]] \n", + "2 0.928583 [[330, 9], [33, 232]] \n", + "3 0.966066 [[339, 0], [20, 245]] " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_set_results = []\n", + "\n", + "for name, model in dp_models.items():\n", + " # Evaluate model\n", + " predict_x = model.predict(test_x, verbose=False) \n", + " y_pred_class = np.argmax(predict_x, axis=1)\n", + " y_test_class = np.argmax(test_y_cat, axis=1)\n", + "\n", + " cm = confusion_matrix(y_test_class, y_pred_class, labels=[0, 1])\n", + " p_score = precision_score(y_test_class, y_pred_class, average=\"macro\")\n", + " a_score = accuracy_score(y_test_class, y_pred_class)\n", + " r_score = recall_score(y_test_class, y_pred_class, average=\"macro\")\n", + " f1_score_result = f1_score(y_test_class, y_pred_class, average=\"macro\")\n", + " \n", + " test_set_results.append(( name, (p_score), r_score, (a_score), (f1_score_result), cm ))\n", + "\n", + "dp_eval = pd.DataFrame(test_set_results, columns=[\"Model\", \"Precision Score\", \"Recall Score\", \"Accuracy Score\", \"F1 Score\", \"Confusion Matrix\"])\n", + "\n", + "dp_eval" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.3. Final Results" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelPrecision ScoreRecall ScoreAccuracy ScoreF1 ScoreConfusion Matrix
0KNN0.9754010.9683360.9718540.971230[[338, 1], [16, 249]]
17_layers0.9721450.9622640.9668870.966066[[339, 0], [20, 245]]
25_layers0.9631150.9490570.9552980.954012[[339, 0], [27, 238]]
3SVC0.9299660.9337620.9321190.931420[[312, 27], [14, 251]]
4RF0.9472300.9245280.9337750.931329[[339, 0], [40, 225]]
57_layers_with_dropout0.9358730.9244620.9304640.928583[[330, 9], [33, 232]]
63_layers0.9391650.9205150.9288080.926411[[335, 4], [39, 226]]
7LR0.7926940.7377750.7615890.740550[[316, 23], [121, 144]]
8SGDC0.7124570.7150110.7152320.712937[[243, 96], [76, 189]]
9DTC0.6842550.6507820.6754970.647579[[289, 50], [146, 119]]
10NB0.7973680.5641510.6175500.486650[[339, 0], [231, 34]]
\n", + "
" + ], + "text/plain": [ + " Model Precision Score Recall Score Accuracy Score \\\n", + "0 KNN 0.975401 0.968336 0.971854 \n", + "1 7_layers 0.972145 0.962264 0.966887 \n", + "2 5_layers 0.963115 0.949057 0.955298 \n", + "3 SVC 0.929966 0.933762 0.932119 \n", + "4 RF 0.947230 0.924528 0.933775 \n", + "5 7_layers_with_dropout 0.935873 0.924462 0.930464 \n", + "6 3_layers 0.939165 0.920515 0.928808 \n", + "7 LR 0.792694 0.737775 0.761589 \n", + "8 SGDC 0.712457 0.715011 0.715232 \n", + "9 DTC 0.684255 0.650782 0.675497 \n", + "10 NB 0.797368 0.564151 0.617550 \n", + "\n", + " F1 Score Confusion Matrix \n", + "0 0.971230 [[338, 1], [16, 249]] \n", + "1 0.966066 [[339, 0], [20, 245]] \n", + "2 0.954012 [[339, 0], [27, 238]] \n", + "3 0.931420 [[312, 27], [14, 251]] \n", + "4 0.931329 [[339, 0], [40, 225]] \n", + "5 0.928583 [[330, 9], [33, 232]] \n", + "6 0.926411 [[335, 4], [39, 226]] \n", + "7 0.740550 [[316, 23], [121, 144]] \n", + "8 0.712937 [[243, 96], [76, 189]] \n", + "9 0.647579 [[289, 50], [146, 119]] \n", + "10 0.486650 [[339, 0], [231, 34]] " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eval_df = pd.concat([sklearn_eval, dp_eval])\n", + "eval_df = eval_df.sort_values(by=['F1 Score'], ascending=False).reset_index(drop=True)\n", + "eval_df.to_csv(f\"evaluation.csv\", sep=',', encoding='utf-8', index=False)\n", + "eval_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Best model - ROC - Confusion Matrix\n", + "\n", + "As we can see from the evaluation, the best model according to the F1 Score is the KNN model." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0.95480226, 0.996 ]),\n", + " array([0.99705015, 0.93962264]),\n", + " array([0.97546898, 0.96699029]))" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "best_model = sklearn_models[\"KNN\"]\n", + "y_predictions = best_model.predict(test_x)\n", + "\n", + "p_score = precision_score(test_y, y_predictions, labels=[0, 1], average=None)\n", + "r_score = recall_score(test_y, y_predictions, labels=[0, 1], average=None)\n", + "f1_score_result = f1_score(test_y, y_predictions, labels=[0, 1], average=None)\n", + "\n", + "p_score, r_score, f1_score_result" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.971" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(0.975 + 0.967) / 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1. Confusion Matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "KNN_cm = eval_df[ eval_df[\"Model\"] == 'KNN' ][\"Confusion Matrix\"].values[0]\n", + "\n", + "cm_array_df = pd.DataFrame(KNN_cm, index=[\"C\", \"L\"], columns=[\"C\", \"L\"])\n", + "\n", + "fig, ax = plt.subplots(figsize=(8,4)) \n", + "sns.heatmap(cm_array_df, linewidths=1, annot=True, ax=ax, fmt='g')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.3. F1 Score and Confidence correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def to_labels(y_pred, y_pred_proba, threshold):\n", + " '''Return prediction taking confidence threshold into account'''\n", + " results = []\n", + "\n", + " for index, predicted_class in enumerate(y_pred):\n", + " prediction_probabilities = y_pred_proba[index]\n", + " class_prediction_probability = round(prediction_probabilities[np.argmax(prediction_probabilities)], 2)\n", + "\n", + " results.append(predicted_class if class_prediction_probability >= threshold else -1)\n", + " \n", + " return results\n", + "\n", + "\n", + "def calculate_correlation_score_confidence(test_x, test_y):\n", + " '''Calculate correlation between Precision score/Recall score/F1 score and confidence threshold'''\n", + " y_predictions = best_model.predict(test_x)\n", + " y_predict_proba = best_model.predict_proba(test_x)\n", + "\n", + " thresholds = list(np.arange(0, 1.01, 0.01))\n", + "\n", + " f1_score_results = []\n", + "\n", + " for threshold in thresholds:\n", + " true_predictions = to_labels(y_predictions, y_predict_proba, threshold)\n", + " f1_s = list(f1_score(test_y, true_predictions, labels=[0, 1], average=None))\n", + " all_class_f1 = f1_score(test_y, true_predictions, labels=[0, 1, 2], average=\"weighted\")\n", + " f1_s.append(all_class_f1)\n", + " f1_score_results.append(f1_s)\n", + " \n", + " return thresholds, f1_score_results\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "thresholds, f1_scores = calculate_correlation_score_confidence(test_x, test_y)\n", + "\n", + "first_class = [ el[0] for el in f1_scores ]\n", + "second_class = [ el[1] for el in f1_scores ]\n", + "all_classes = [ el[2] for el in f1_scores ]\n", + "\n", + "fig, ax = plt.subplots(figsize=(8,6))\n", + "plt.plot(thresholds, first_class, label = \"F1 Score - Correct class\")\n", + "plt.plot(thresholds, second_class, label = \"F1 Score - Incorrect class\")\n", + "plt.plot(thresholds, all_classes, label = \"F1 Score - All classes\", linewidth=2.0, color=\"blue\")\n", + "plt.legend(loc = 'lower left')\n", + "plt.ylim([0.8, 1])\n", + "plt.xlim([0.025, 1])\n", + "plt.xlabel(\"Threshold\", fontsize = 12)\n", + "plt.ylabel(\"Score\", fontsize = 12)\n", + "# plt.axvline(thresholds[np.argmin(abs(precision-recall))], color=\"k\", ls = \"--\")\n", + "# plt.title(label = F\"Threshold = {thresholds[np.argmin(abs(precision-recall))]:.3f}\", fontsize = 12)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2. ROC curve" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimal Threshold: 0.6\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# calculate the fpr and tpr for all thresholds of the classification\n", + "probs = best_model.predict_proba(test_x)\n", + "preds = probs[:,1]\n", + "fpr, tpr, threshold = roc_curve(test_y, preds)\n", + "roc_auc = auc(fpr, tpr)\n", + "\n", + "optimal_idx = np.argmax(tpr - fpr)\n", + "optimal_threshold = threshold[optimal_idx]\n", + "print(f\"Optimal Threshold: {optimal_threshold}\")\n", + "\n", + "# method I: plt\n", + "plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)\n", + "plt.plot([0, 1], [0, 1],'r--', label=\"Random Classifier\")\n", + "plt.legend(loc=4)\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.13 (conda)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}