Files
PacketBreeze/assets/Deepfence_ML_flowmeter.ipynb
Unic-X 3f83936e6e Init
2023-06-18 14:03:27 +05:30

2710 lines
195 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 1;\n",
" var nbb_unformatted_code = \"%load_ext nb_black\";\n",
" var nbb_formatted_code = \"%load_ext nb_black\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%load_ext nb_black"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"code_folding": []
},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 2;\n",
" var nbb_unformatted_code = \"# Import libraries\\nfrom __future__ import division\\n\\nimport pandas as pd\\nimport matplotlib.pyplot as plt # plotting\\nimport numpy as np # linear algebra\\nimport math\\n\\nfrom sklearn.linear_model import LogisticRegression\\nfrom sklearn.svm import SVC\\n\\nfrom sklearn.pipeline import make_pipeline\\n\\nfrom sklearn.preprocessing import StandardScaler, MinMaxScaler\\n\\nfrom sklearn.model_selection import (\\n train_test_split,\\n GridSearchCV,\\n cross_val_score,\\n RepeatedStratifiedKFold,\\n StratifiedKFold,\\n)\\n\\nfrom sklearn.metrics import (\\n accuracy_score,\\n confusion_matrix,\\n roc_curve,\\n roc_auc_score,\\n auc,\\n precision_score,\\n recall_score,\\n precision_recall_curve,\\n plot_confusion_matrix,\\n f1_score,\\n)\\n\\n\\nfrom collections import Counter\\n\\nfrom sklearn.datasets import make_classification\\n\\nimport copy\\n\\nimport warnings\";\n",
" var nbb_formatted_code = \"# Import libraries\\nfrom __future__ import division\\n\\nimport pandas as pd\\nimport matplotlib.pyplot as plt # plotting\\nimport numpy as np # linear algebra\\nimport math\\n\\nfrom sklearn.linear_model import LogisticRegression\\nfrom sklearn.svm import SVC\\n\\nfrom sklearn.pipeline import make_pipeline\\n\\nfrom sklearn.preprocessing import StandardScaler, MinMaxScaler\\n\\nfrom sklearn.model_selection import (\\n train_test_split,\\n GridSearchCV,\\n cross_val_score,\\n RepeatedStratifiedKFold,\\n StratifiedKFold,\\n)\\n\\nfrom sklearn.metrics import (\\n accuracy_score,\\n confusion_matrix,\\n roc_curve,\\n roc_auc_score,\\n auc,\\n precision_score,\\n recall_score,\\n precision_recall_curve,\\n plot_confusion_matrix,\\n f1_score,\\n)\\n\\n\\nfrom collections import Counter\\n\\nfrom sklearn.datasets import make_classification\\n\\nimport copy\\n\\nimport warnings\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Import libraries\n",
"from __future__ import division\n",
"\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt # plotting\n",
"import numpy as np # linear algebra\n",
"import math\n",
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.svm import SVC\n",
"\n",
"from sklearn.pipeline import make_pipeline\n",
"\n",
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
"\n",
"from sklearn.model_selection import (\n",
" train_test_split,\n",
" GridSearchCV,\n",
" cross_val_score,\n",
" RepeatedStratifiedKFold,\n",
" StratifiedKFold,\n",
")\n",
"\n",
"from sklearn.metrics import (\n",
" accuracy_score,\n",
" confusion_matrix,\n",
" roc_curve,\n",
" roc_auc_score,\n",
" auc,\n",
" precision_score,\n",
" recall_score,\n",
" precision_recall_curve,\n",
" plot_confusion_matrix,\n",
" f1_score,\n",
")\n",
"\n",
"\n",
"from collections import Counter\n",
"\n",
"from sklearn.datasets import make_classification\n",
"\n",
"import copy\n",
"\n",
"import warnings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### File path"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 3;\n",
" var nbb_unformatted_code = \"folder = \\\"../pkg/flowOutput/\\\"\\nfname_benign = \\\"2017-05-02_kali-normal22_flow_stats.csv\\\"\\nfname_malicious = \\\"webgoat_flow_stats.csv\\\"\";\n",
" var nbb_formatted_code = \"folder = \\\"../pkg/flowOutput/\\\"\\nfname_benign = \\\"2017-05-02_kali-normal22_flow_stats.csv\\\"\\nfname_malicious = \\\"webgoat_flow_stats.csv\\\"\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"folder = \"../pkg/flowOutput/\"\n",
"fname_benign = \"2017-05-02_kali-normal22_flow_stats.csv\"\n",
"fname_malicious = \"webgoat_flow_stats.csv\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Malicious: Webgoat"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"code_folding": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(248, 40)\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 4;\n",
" var nbb_unformatted_code = \"# Malicious flows\\npd_malicious = pd.read_csv(folder + fname_malicious)\\npd_malicious.drop(pd_malicious.tail(1).index, inplace=True)\\npd_malicious[\\\"Type\\\"] = \\\"Malicious\\\"\\n\\nprint(pd_malicious.shape)\";\n",
" var nbb_formatted_code = \"# Malicious flows\\npd_malicious = pd.read_csv(folder + fname_malicious)\\npd_malicious.drop(pd_malicious.tail(1).index, inplace=True)\\npd_malicious[\\\"Type\\\"] = \\\"Malicious\\\"\\n\\nprint(pd_malicious.shape)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Malicious flows\n",
"pd_malicious = pd.read_csv(folder + fname_malicious)\n",
"pd_malicious.drop(pd_malicious.tail(1).index, inplace=True)\n",
"pd_malicious[\"Type\"] = \"Malicious\"\n",
"\n",
"print(pd_malicious.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Benign"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"code_folding": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(28714, 40)\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 5;\n",
" var nbb_unformatted_code = \"# Benign flows\\npd_benign = pd.read_csv(folder + fname_benign)\\n# pd_benign.drop(pd_webgoat.tail(1).index, inplace=True)\\npd_benign[\\\"Type\\\"] = \\\"Benign\\\"\\n\\nprint(pd_benign.shape)\";\n",
" var nbb_formatted_code = \"# Benign flows\\npd_benign = pd.read_csv(folder + fname_benign)\\n# pd_benign.drop(pd_webgoat.tail(1).index, inplace=True)\\npd_benign[\\\"Type\\\"] = \\\"Benign\\\"\\n\\nprint(pd_benign.shape)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Benign flows\n",
"pd_benign = pd.read_csv(folder + fname_benign)\n",
"# pd_benign.drop(pd_webgoat.tail(1).index, inplace=True)\n",
"pd_benign[\"Type\"] = \"Benign\"\n",
"\n",
"print(pd_benign.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Combined dataframe - Benign + malicious"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"code_folding": []
},
"outputs": [
{
"data": {
"text/plain": [
"(28962, 40)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 6;\n",
" var nbb_unformatted_code = \"## Combine malicous and benign dataframes.\\npd_comb = pd.concat([pd_malicious, pd_benign])\\n\\n## Random shuffle of rows\\npd_comb = pd_comb.sample(frac=1)\\n\\npd_comb.shape\";\n",
" var nbb_formatted_code = \"## Combine malicous and benign dataframes.\\npd_comb = pd.concat([pd_malicious, pd_benign])\\n\\n## Random shuffle of rows\\npd_comb = pd_comb.sample(frac=1)\\n\\npd_comb.shape\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## Combine malicous and benign dataframes.\n",
"pd_comb = pd.concat([pd_malicious, pd_benign])\n",
"\n",
"## Random shuffle of rows\n",
"pd_comb = pd_comb.sample(frac=1)\n",
"\n",
"pd_comb.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fiveTuple</th>\n",
" <th>srcIP</th>\n",
" <th>dstIP</th>\n",
" <th>protocol</th>\n",
" <th>srcPort</th>\n",
" <th>dstPort</th>\n",
" <th>flowDuration</th>\n",
" <th>flowLength</th>\n",
" <th>fwdFlowLength</th>\n",
" <th>bwdFlowLength</th>\n",
" <th>...</th>\n",
" <th>bwdIATTotal</th>\n",
" <th>fwdIATMean</th>\n",
" <th>bwdIATMean</th>\n",
" <th>fwdIATStd</th>\n",
" <th>bwdIATStd</th>\n",
" <th>fwdIATMin</th>\n",
" <th>bwdIATMin</th>\n",
" <th>fwdIATMax</th>\n",
" <th>bwdIATMax</th>\n",
" <th>Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22705</th>\n",
" <td>216.137.61.254--192.168.1.191--TCP--443(https)...</td>\n",
" <td>192.168.1.191</td>\n",
" <td>216.137.61.254</td>\n",
" <td>TCP</td>\n",
" <td>39338</td>\n",
" <td>443(https)</td>\n",
" <td>5.189759e+09</td>\n",
" <td>15.0</td>\n",
" <td>0.0</td>\n",
" <td>15.0</td>\n",
" <td>...</td>\n",
" <td>5.189759e+09</td>\n",
" <td>0.0</td>\n",
" <td>3.706970e+08</td>\n",
" <td>0.0</td>\n",
" <td>1.209897e+09</td>\n",
" <td>0.0</td>\n",
" <td>9735.0</td>\n",
" <td>0.0</td>\n",
" <td>4.741535e+09</td>\n",
" <td>Benign</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24032</th>\n",
" <td>192.168.1.191--216.137.61.62--TCP--52706--80(h...</td>\n",
" <td>216.137.61.62</td>\n",
" <td>192.168.1.191</td>\n",
" <td>TCP</td>\n",
" <td>80(http)</td>\n",
" <td>52706</td>\n",
" <td>1.095918e+11</td>\n",
" <td>23.0</td>\n",
" <td>0.0</td>\n",
" <td>23.0</td>\n",
" <td>...</td>\n",
" <td>1.095918e+11</td>\n",
" <td>0.0</td>\n",
" <td>4.981446e+09</td>\n",
" <td>0.0</td>\n",
" <td>4.866784e+09</td>\n",
" <td>0.0</td>\n",
" <td>6304.0</td>\n",
" <td>0.0</td>\n",
" <td>1.138703e+10</td>\n",
" <td>Benign</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4936</th>\n",
" <td>173.241.240.220--192.168.1.191--TCP--80(http)-...</td>\n",
" <td>192.168.1.191</td>\n",
" <td>173.241.240.220</td>\n",
" <td>TCP</td>\n",
" <td>47724</td>\n",
" <td>80(http)</td>\n",
" <td>1.667897e+09</td>\n",
" <td>33.0</td>\n",
" <td>0.0</td>\n",
" <td>33.0</td>\n",
" <td>...</td>\n",
" <td>1.667897e+09</td>\n",
" <td>0.0</td>\n",
" <td>5.212100e+07</td>\n",
" <td>0.0</td>\n",
" <td>2.502940e+08</td>\n",
" <td>0.0</td>\n",
" <td>5642.0</td>\n",
" <td>0.0</td>\n",
" <td>1.441531e+09</td>\n",
" <td>Benign</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3 rows × 40 columns</p>\n",
"</div>"
],
"text/plain": [
" fiveTuple srcIP \\\n",
"22705 216.137.61.254--192.168.1.191--TCP--443(https)... 192.168.1.191 \n",
"24032 192.168.1.191--216.137.61.62--TCP--52706--80(h... 216.137.61.62 \n",
"4936 173.241.240.220--192.168.1.191--TCP--80(http)-... 192.168.1.191 \n",
"\n",
" dstIP protocol srcPort dstPort flowDuration \\\n",
"22705 216.137.61.254 TCP 39338 443(https) 5.189759e+09 \n",
"24032 192.168.1.191 TCP 80(http) 52706 1.095918e+11 \n",
"4936 173.241.240.220 TCP 47724 80(http) 1.667897e+09 \n",
"\n",
" flowLength fwdFlowLength bwdFlowLength ... bwdIATTotal \\\n",
"22705 15.0 0.0 15.0 ... 5.189759e+09 \n",
"24032 23.0 0.0 23.0 ... 1.095918e+11 \n",
"4936 33.0 0.0 33.0 ... 1.667897e+09 \n",
"\n",
" fwdIATMean bwdIATMean fwdIATStd bwdIATStd fwdIATMin \\\n",
"22705 0.0 3.706970e+08 0.0 1.209897e+09 0.0 \n",
"24032 0.0 4.981446e+09 0.0 4.866784e+09 0.0 \n",
"4936 0.0 5.212100e+07 0.0 2.502940e+08 0.0 \n",
"\n",
" bwdIATMin fwdIATMax bwdIATMax Type \n",
"22705 9735.0 0.0 4.741535e+09 Benign \n",
"24032 6304.0 0.0 1.138703e+10 Benign \n",
"4936 5642.0 0.0 1.441531e+09 Benign \n",
"\n",
"[3 rows x 40 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 7;\n",
" var nbb_unformatted_code = \"pd_comb.head(3)\";\n",
" var nbb_formatted_code = \"pd_comb.head(3)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pd_comb.head(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Added throughput columns."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"flowLengthPerTime\n",
"fwdFlowLengthPerTime\n",
"bwdFlowLengthPerTime\n",
"packetSizeTotalPerTime\n",
"fwdPacketSizeTotalPerTime\n",
"bwdPacketSizeTotalPerTime\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 8;\n",
" var nbb_unformatted_code = \"## Add throughput columns.\\ncolsPerTime = [\\n \\\"flowLength\\\",\\n \\\"fwdFlowLength\\\",\\n \\\"bwdFlowLength\\\",\\n \\\"packetSizeTotal\\\",\\n \\\"fwdPacketSizeTotal\\\",\\n \\\"bwdPacketSizeTotal\\\",\\n]\\n\\nfor feature in colsPerTime:\\n pd_comb[feature + \\\"PerTime\\\"] = pd_comb[feature] / pd_comb[\\\"flowDuration\\\"]\\n print(feature + \\\"PerTime\\\")\";\n",
" var nbb_formatted_code = \"## Add throughput columns.\\ncolsPerTime = [\\n \\\"flowLength\\\",\\n \\\"fwdFlowLength\\\",\\n \\\"bwdFlowLength\\\",\\n \\\"packetSizeTotal\\\",\\n \\\"fwdPacketSizeTotal\\\",\\n \\\"bwdPacketSizeTotal\\\",\\n]\\n\\nfor feature in colsPerTime:\\n pd_comb[feature + \\\"PerTime\\\"] = pd_comb[feature] / pd_comb[\\\"flowDuration\\\"]\\n print(feature + \\\"PerTime\\\")\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## Add throughput columns.\n",
"colsPerTime = [\n",
" \"flowLength\",\n",
" \"fwdFlowLength\",\n",
" \"bwdFlowLength\",\n",
" \"packetSizeTotal\",\n",
" \"fwdPacketSizeTotal\",\n",
" \"bwdPacketSizeTotal\",\n",
"]\n",
"\n",
"for feature in colsPerTime:\n",
" pd_comb[feature + \"PerTime\"] = pd_comb[feature] / pd_comb[\"flowDuration\"]\n",
" print(feature + \"PerTime\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Features"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 9;\n",
" var nbb_unformatted_code = \"# Feature columns.\\nfeature_cols = [\\n \\\"flowDuration\\\",\\n \\\"flowLength\\\",\\n \\\"fwdFlowLength\\\",\\n \\\"bwdFlowLength\\\",\\n \\\"packetSizeTotal\\\",\\n \\\"packetSizeMean\\\",\\n \\\"packetSizeStd\\\",\\n \\\"packetSizeMin\\\",\\n \\\"packetSizeMax\\\",\\n \\\"fwdPacketSizeTotal\\\",\\n \\\"bwdPacketSizeTotal\\\",\\n \\\"fwdPacketSizeMean\\\",\\n \\\"bwdPacketSizeMean\\\",\\n \\\"fwdPacketSizeStd\\\",\\n \\\"bwdPacketSizeStd\\\",\\n \\\"fwdPacketSizeMin\\\",\\n \\\"bwdPacketSizeMin\\\",\\n \\\"fwdPacketSizeMax\\\",\\n \\\"bwdPacketSizeMax\\\",\\n \\\"IATMean\\\",\\n \\\"IATStd\\\",\\n \\\"IATMin\\\",\\n \\\"IATMax\\\",\\n \\\"fwdIATTotal\\\",\\n \\\"bwdIATTotal\\\",\\n \\\"fwdIATMean\\\",\\n \\\"bwdIATMean\\\",\\n \\\"fwdIATStd\\\",\\n \\\"bwdIATStd\\\",\\n \\\"fwdIATMin\\\",\\n \\\"bwdIATMin\\\",\\n \\\"fwdIATMax\\\",\\n \\\"bwdIATMax\\\",\\n \\\"flowLengthPerTime\\\",\\n \\\"fwdFlowLengthPerTime\\\",\\n \\\"bwdFlowLengthPerTime\\\",\\n \\\"packetSizeTotalPerTime\\\",\\n \\\"fwdPacketSizeTotalPerTime\\\",\\n \\\"bwdPacketSizeTotalPerTime\\\",\\n \\\"Type\\\",\\n]\";\n",
" var nbb_formatted_code = \"# Feature columns.\\nfeature_cols = [\\n \\\"flowDuration\\\",\\n \\\"flowLength\\\",\\n \\\"fwdFlowLength\\\",\\n \\\"bwdFlowLength\\\",\\n \\\"packetSizeTotal\\\",\\n \\\"packetSizeMean\\\",\\n \\\"packetSizeStd\\\",\\n \\\"packetSizeMin\\\",\\n \\\"packetSizeMax\\\",\\n \\\"fwdPacketSizeTotal\\\",\\n \\\"bwdPacketSizeTotal\\\",\\n \\\"fwdPacketSizeMean\\\",\\n \\\"bwdPacketSizeMean\\\",\\n \\\"fwdPacketSizeStd\\\",\\n \\\"bwdPacketSizeStd\\\",\\n \\\"fwdPacketSizeMin\\\",\\n \\\"bwdPacketSizeMin\\\",\\n \\\"fwdPacketSizeMax\\\",\\n \\\"bwdPacketSizeMax\\\",\\n \\\"IATMean\\\",\\n \\\"IATStd\\\",\\n \\\"IATMin\\\",\\n \\\"IATMax\\\",\\n \\\"fwdIATTotal\\\",\\n \\\"bwdIATTotal\\\",\\n \\\"fwdIATMean\\\",\\n \\\"bwdIATMean\\\",\\n \\\"fwdIATStd\\\",\\n \\\"bwdIATStd\\\",\\n \\\"fwdIATMin\\\",\\n \\\"bwdIATMin\\\",\\n \\\"fwdIATMax\\\",\\n \\\"bwdIATMax\\\",\\n \\\"flowLengthPerTime\\\",\\n \\\"fwdFlowLengthPerTime\\\",\\n \\\"bwdFlowLengthPerTime\\\",\\n \\\"packetSizeTotalPerTime\\\",\\n \\\"fwdPacketSizeTotalPerTime\\\",\\n \\\"bwdPacketSizeTotalPerTime\\\",\\n \\\"Type\\\",\\n]\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Feature columns.\n",
"feature_cols = [\n",
" \"flowDuration\",\n",
" \"flowLength\",\n",
" \"fwdFlowLength\",\n",
" \"bwdFlowLength\",\n",
" \"packetSizeTotal\",\n",
" \"packetSizeMean\",\n",
" \"packetSizeStd\",\n",
" \"packetSizeMin\",\n",
" \"packetSizeMax\",\n",
" \"fwdPacketSizeTotal\",\n",
" \"bwdPacketSizeTotal\",\n",
" \"fwdPacketSizeMean\",\n",
" \"bwdPacketSizeMean\",\n",
" \"fwdPacketSizeStd\",\n",
" \"bwdPacketSizeStd\",\n",
" \"fwdPacketSizeMin\",\n",
" \"bwdPacketSizeMin\",\n",
" \"fwdPacketSizeMax\",\n",
" \"bwdPacketSizeMax\",\n",
" \"IATMean\",\n",
" \"IATStd\",\n",
" \"IATMin\",\n",
" \"IATMax\",\n",
" \"fwdIATTotal\",\n",
" \"bwdIATTotal\",\n",
" \"fwdIATMean\",\n",
" \"bwdIATMean\",\n",
" \"fwdIATStd\",\n",
" \"bwdIATStd\",\n",
" \"fwdIATMin\",\n",
" \"bwdIATMin\",\n",
" \"fwdIATMax\",\n",
" \"bwdIATMax\",\n",
" \"flowLengthPerTime\",\n",
" \"fwdFlowLengthPerTime\",\n",
" \"bwdFlowLengthPerTime\",\n",
" \"packetSizeTotalPerTime\",\n",
" \"fwdPacketSizeTotalPerTime\",\n",
" \"bwdPacketSizeTotalPerTime\",\n",
" \"Type\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Dataframe with chosen features"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 10;\n",
" var nbb_unformatted_code = \"## Select feature columns in datasets.\\npd_comb_features = pd_comb[feature_cols]\";\n",
" var nbb_formatted_code = \"## Select feature columns in datasets.\\npd_comb_features = pd_comb[feature_cols]\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"## Select feature columns in datasets.\n",
"pd_comb_features = pd_comb[feature_cols]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Feature importance plots"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 10;\n",
" var nbb_unformatted_code = \"#feature = \\\"fwdPacketSizeMax\\\"\\nfeature = \\\"fwdPacketSizeMax\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"fwdPacketSizeMax\\\"\\nfeature = \\\"fwdPacketSizeMax\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"fwdPacketSizeMax\"\n",
"feature = \"fwdPacketSizeMax\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 12;\n",
" var nbb_unformatted_code = \"#feature = \\\"flowDuration\\\"\\nfeature = \\\"flowDuration\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"flowDuration\\\"\\nfeature = \\\"flowDuration\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"flowDuration\"\n",
"feature = \"flowDuration\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 13;\n",
" var nbb_unformatted_code = \"#feature = \\\"fwdPacketSizeTotal\\\"\\nfeature = \\\"fwdPacketSizeTotal\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"fwdPacketSizeTotal\\\"\\nfeature = \\\"fwdPacketSizeTotal\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"fwdPacketSizeTotal\"\n",
"feature = \"fwdPacketSizeTotal\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 14;\n",
" var nbb_unformatted_code = \"#feature = \\\"fwdFlowLength\\\"\\nfeature = \\\"fwdFlowLength\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\n#plt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"fwdFlowLength\\\"\\nfeature = \\\"fwdFlowLength\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\n# plt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"fwdFlowLength\"\n",
"feature = \"fwdFlowLength\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"#plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 15;\n",
" var nbb_unformatted_code = \"#feature = \\\"fwdIATMean\\\"\\nfeature = \\\"fwdIATMean\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"fwdIATMean\\\"\\nfeature = \\\"fwdIATMean\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"fwdIATMean\"\n",
"feature = \"fwdIATMean\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 16;\n",
" var nbb_unformatted_code = \"#feature = \\\"bwdIATMean\\\"\\nfeature = \\\"bwdIATMean\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"bwdIATMean\\\"\\nfeature = \\\"bwdIATMean\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"bwdIATMean\"\n",
"feature = \"bwdIATMean\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 17;\n",
" var nbb_unformatted_code = \"#feature = \\\"fwdPacketSizeMean\\\"\\nfeature = \\\"fwdPacketSizeMean\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"fwdPacketSizeMean\\\"\\nfeature = \\\"fwdPacketSizeMean\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"fwdPacketSizeMean\"\n",
"feature = \"fwdPacketSizeMean\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Density')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 18;\n",
" var nbb_unformatted_code = \"#feature = \\\"packetSizeMax\\\"\\nfeature = \\\"packetSizeMax\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\", zorder=2\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\", zorder=3\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_formatted_code = \"# feature = \\\"packetSizeMax\\\"\\nfeature = \\\"packetSizeMax\\\"\\n\\ndf_benign = pd_benign\\ndf_malicious = pd_malicious\\n\\n\\nMIN1, MAX1 = (\\n df_benign[feature].min(),\\n df_benign[feature].max(),\\n)\\n\\n\\nMIN2, MAX2 = (\\n df_malicious[feature].min(),\\n df_malicious[feature].max(),\\n)\\n\\n\\nNUM_BINS = 50\\n\\nIF_NORM = True\\n\\nylabel_dict = {True: \\\"Density\\\", False: \\\"Frequency\\\"}\\n\\nplt.hist(\\n x=df_benign[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN1, MAX1, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Benign\\\",\\n zorder=2,\\n)\\n\\nplt.hist(\\n x=df_malicious[feature].values,\\n # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\\n bins=np.linspace(MIN2, MAX2, NUM_BINS),\\n density=IF_NORM,\\n label=\\\"Malicious\\\",\\n zorder=3,\\n)\\n\\n\\nplt.gca().set_xscale(\\\"log\\\")\\nplt.gca().set_yscale(\\\"log\\\")\\n\\nplt.legend()\\nplt.xlabel(feature)\\nplt.ylabel(ylabel_dict[IF_NORM])\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#feature = \"packetSizeMax\"\n",
"feature = \"packetSizeMax\"\n",
"\n",
"df_benign = pd_benign\n",
"df_malicious = pd_malicious\n",
"\n",
"\n",
"MIN1, MAX1 = (\n",
" df_benign[feature].min(),\n",
" df_benign[feature].max(),\n",
")\n",
"\n",
"\n",
"MIN2, MAX2 = (\n",
" df_malicious[feature].min(),\n",
" df_malicious[feature].max(),\n",
")\n",
"\n",
"\n",
"NUM_BINS = 50\n",
"\n",
"IF_NORM = True\n",
"\n",
"ylabel_dict = {True: \"Density\", False: \"Frequency\"}\n",
"\n",
"plt.hist(\n",
" x=df_benign[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN1, MAX1, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Benign\", zorder=2\n",
")\n",
"\n",
"plt.hist(\n",
" x=df_malicious[feature].values,\n",
" # bins=10 ** np.linspace(np.log10(MIN1), np.log10(MAX1), NUM_BINS),\n",
" bins=np.linspace(MIN2, MAX2, NUM_BINS),\n",
" density=IF_NORM,\n",
" label=\"Malicious\", zorder=3\n",
")\n",
"\n",
"\n",
"plt.gca().set_xscale(\"log\")\n",
"plt.gca().set_yscale(\"log\")\n",
"\n",
"plt.legend()\n",
"plt.xlabel(feature)\n",
"plt.ylabel(ylabel_dict[IF_NORM])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Machine learning - feature importance"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean dataset"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 11;\n",
" var nbb_unformatted_code = \"# Remove spurious entries from dataset.\\ndef clean_dataset(df):\\n assert isinstance(df, pd.DataFrame), \\\"df needs to be a pd.DataFrame\\\"\\n df.dropna(inplace=True)\\n\\n df_X = df.iloc[:, :-1]\\n df_Y = df.iloc[:, -1]\\n\\n print(df.shape, df_X.shape, df_Y.shape)\\n indices_to_keep = ~df_X.isin([np.nan, np.inf, -np.inf]).any(1)\\n return df_X[indices_to_keep].astype(np.float64).values, df_Y[indices_to_keep].values\";\n",
" var nbb_formatted_code = \"# Remove spurious entries from dataset.\\ndef clean_dataset(df):\\n assert isinstance(df, pd.DataFrame), \\\"df needs to be a pd.DataFrame\\\"\\n df.dropna(inplace=True)\\n\\n df_X = df.iloc[:, :-1]\\n df_Y = df.iloc[:, -1]\\n\\n print(df.shape, df_X.shape, df_Y.shape)\\n indices_to_keep = ~df_X.isin([np.nan, np.inf, -np.inf]).any(1)\\n return df_X[indices_to_keep].astype(np.float64).values, df_Y[indices_to_keep].values\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Remove spurious entries from dataset.\n",
"def clean_dataset(df):\n",
" assert isinstance(df, pd.DataFrame), \"df needs to be a pd.DataFrame\"\n",
" df.dropna(inplace=True)\n",
"\n",
" df_X = df.iloc[:, :-1]\n",
" df_Y = df.iloc[:, -1]\n",
"\n",
" print(df.shape, df_X.shape, df_Y.shape)\n",
" indices_to_keep = ~df_X.isin([np.nan, np.inf, -np.inf]).any(1)\n",
" return df_X[indices_to_keep].astype(np.float64).values, df_Y[indices_to_keep].values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get feature and class arrays (X and y.)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(28962, 40) (28962, 39) (28962,)\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 12;\n",
" var nbb_unformatted_code = \"# Get feature and class arrays (X and y.)\\npd_comb_features_cp = pd_comb_features.copy(deep=True)\\n\\nX, y = clean_dataset(pd_comb_features_cp)\";\n",
" var nbb_formatted_code = \"# Get feature and class arrays (X and y.)\\npd_comb_features_cp = pd_comb_features.copy(deep=True)\\n\\nX, y = clean_dataset(pd_comb_features_cp)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Get feature and class arrays (X and y.)\n",
"pd_comb_features_cp = pd_comb_features.copy(deep=True)\n",
"\n",
"X, y = clean_dataset(pd_comb_features_cp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train test split"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(20273, 39) (8689, 39)\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 13;\n",
" var nbb_unformatted_code = \"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\\n\\nprint(X_train.shape, X_test.shape)\";\n",
" var nbb_formatted_code = \"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\\n\\nprint(X_train.shape, X_test.shape)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)\n",
"\n",
"print(X_train.shape, X_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Scale data"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 14;\n",
" var nbb_unformatted_code = \"scaler = StandardScaler() # MinMaxScaler\\nX_train_scale = scaler.fit_transform(X_train)\\nX_test_scale = scaler.transform(X_test)\";\n",
" var nbb_formatted_code = \"scaler = StandardScaler() # MinMaxScaler\\nX_train_scale = scaler.fit_transform(X_train)\\nX_test_scale = scaler.transform(X_test)\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"scaler = StandardScaler() # MinMaxScaler\n",
"X_train_scale = scaler.fit_transform(X_train)\n",
"X_test_scale = scaler.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Weighted Logistic Regression"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Hyperparameter grid search."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 15;\n",
" var nbb_unformatted_code = \"# Class weights.\\nw = [\\n {0: 0.10, 1: 99.90},\\n {0: 0.25, 1: 99.75},\\n {0: 0.50, 1: 99.50},\\n {0: 0.75, 1: 99.25},\\n {0: 1.00, 1: 99.00},\\n {\\n 0: 100\\n * np.sum(y == \\\"Malicious\\\")\\n / (np.sum(y == \\\"Benign\\\") + np.sum(y == \\\"Malicious\\\")),\\n 1: 100\\n * np.sum(y == \\\"Benign\\\")\\n / (np.sum(y == \\\"Benign\\\") + np.sum(y == \\\"Malicious\\\")),\\n },\\n]\\n\\n# Inverse of regularization strength.\\ncrange = np.arange(0.1, 1.0, 0.2)\\n\\n\\n# Hyperparameter grid.\\nhyperparam_grid = {\\n \\\"class_weight\\\": w,\\n \\\"penalty\\\": [\\\"l1\\\", \\\"l2\\\"],\\n \\\"C\\\": crange,\\n \\\"fit_intercept\\\": [True, False],\\n}\";\n",
" var nbb_formatted_code = \"# Class weights.\\nw = [\\n {0: 0.10, 1: 99.90},\\n {0: 0.25, 1: 99.75},\\n {0: 0.50, 1: 99.50},\\n {0: 0.75, 1: 99.25},\\n {0: 1.00, 1: 99.00},\\n {\\n 0: 100\\n * np.sum(y == \\\"Malicious\\\")\\n / (np.sum(y == \\\"Benign\\\") + np.sum(y == \\\"Malicious\\\")),\\n 1: 100\\n * np.sum(y == \\\"Benign\\\")\\n / (np.sum(y == \\\"Benign\\\") + np.sum(y == \\\"Malicious\\\")),\\n },\\n]\\n\\n# Inverse of regularization strength.\\ncrange = np.arange(0.1, 1.0, 0.2)\\n\\n\\n# Hyperparameter grid.\\nhyperparam_grid = {\\n \\\"class_weight\\\": w,\\n \\\"penalty\\\": [\\\"l1\\\", \\\"l2\\\"],\\n \\\"C\\\": crange,\\n \\\"fit_intercept\\\": [True, False],\\n}\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Class weights.\n",
"w = [\n",
" {0: 0.10, 1: 99.90},\n",
" {0: 0.25, 1: 99.75},\n",
" {0: 0.50, 1: 99.50},\n",
" {0: 0.75, 1: 99.25},\n",
" {0: 1.00, 1: 99.00},\n",
" {\n",
" 0: 100\n",
" * np.sum(y == \"Malicious\")\n",
" / (np.sum(y == \"Benign\") + np.sum(y == \"Malicious\")),\n",
" 1: 100\n",
" * np.sum(y == \"Benign\")\n",
" / (np.sum(y == \"Benign\") + np.sum(y == \"Malicious\")),\n",
" },\n",
"]\n",
"\n",
"# Inverse of regularization strength.\n",
"crange = np.arange(0.1, 1.0, 0.2)\n",
"\n",
"\n",
"# Hyperparameter grid.\n",
"hyperparam_grid = {\n",
" \"class_weight\": w,\n",
" \"penalty\": [\"l1\", \"l2\"],\n",
" \"C\": crange,\n",
" \"fit_intercept\": [True, False],\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Prepare samples."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 16;\n",
" var nbb_unformatted_code = \"# String to int for classes\\ny_train2 = np.copy(y_train)\\ny_train2[np.where(y_train == \\\"Benign\\\")[0]] = 0\\ny_train2[np.where(y_train == \\\"Malicious\\\")[0]] = 1\";\n",
" var nbb_formatted_code = \"# String to int for classes\\ny_train2 = np.copy(y_train)\\ny_train2[np.where(y_train == \\\"Benign\\\")[0]] = 0\\ny_train2[np.where(y_train == \\\"Malicious\\\")[0]] = 1\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# String to int for classes\n",
"y_train2 = np.copy(y_train)\n",
"y_train2[np.where(y_train == \"Benign\")[0]] = 0\n",
"y_train2[np.where(y_train == \"Malicious\")[0]] = 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model fitting."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/siddharthsatpathy/opt/anaconda3/lib/python3.8/site-packages/scikit_learn-1.0.dev0-py3.8-macosx-10.9-x86_64.egg/sklearn/model_selection/_search.py:890: UserWarning: One or more of the test scores are non-finite: [ nan 0.9880426 nan 0.98095783 nan 0.99057238\n",
" nan 0.97893785 nan 0.99171251 nan 0.97658718\n",
" nan 0.99194833 nan 0.97491777 nan 0.99202053\n",
" nan 0.97368655 nan 0.99198866 nan 0.97435697\n",
" nan 0.98991559 nan 0.98147736 nan 0.99206716\n",
" nan 0.97907415 nan 0.99294926 nan 0.97651238\n",
" nan 0.99310424 nan 0.97479531 nan 0.99329743\n",
" nan 0.97358621 nan 0.99321499 nan 0.97423402\n",
" nan 0.9905359 nan 0.98155072 nan 0.99265558\n",
" nan 0.97911204 nan 0.99344045 nan 0.97649611\n",
" nan 0.99376651 nan 0.97475562 nan 0.99400771\n",
" nan 0.97355775 nan 0.99385109 nan 0.97419971\n",
" nan 0.99099631 nan 0.98158927 nan 0.99312902\n",
" nan 0.97911352 nan 0.99379647 nan 0.97649009\n",
" nan 0.99421899 nan 0.97475513 nan 0.99447011\n",
" nan 0.97355238 nan 0.99435966 nan 0.97419141\n",
" nan 0.99141737 nan 0.98158634 nan 0.99345803\n",
" nan 0.97914409 nan 0.99406938 nan 0.9764844\n",
" nan 0.99454865 nan 0.97473854 nan 0.99480042\n",
" nan 0.97354408 nan 0.99466787 nan 0.97418263]\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best score: 0.9948004205120142 with param: {'C': 0.9000000000000001, 'class_weight': {0: 1.0, 1: 99.0}, 'fit_intercept': True, 'penalty': 'l2'}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/siddharthsatpathy/opt/anaconda3/lib/python3.8/site-packages/scikit_learn-1.0.dev0-py3.8-macosx-10.9-x86_64.egg/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 17;\n",
" var nbb_unformatted_code = \"# logistic model classifier.\\nlg = LogisticRegression(random_state=13)\\n\\n# define evaluation procedure\\ngrid = GridSearchCV(\\n lg, hyperparam_grid, scoring=\\\"roc_auc\\\", cv=10, n_jobs=-1, refit=True\\n)\\ngrid.fit(X_train_scale, y_train2.astype(\\\"int32\\\"))\\n\\nprint(f\\\"Best score: {grid.best_score_} with param: {grid.best_params_}\\\")\";\n",
" var nbb_formatted_code = \"# logistic model classifier.\\nlg = LogisticRegression(random_state=13)\\n\\n# define evaluation procedure\\ngrid = GridSearchCV(\\n lg, hyperparam_grid, scoring=\\\"roc_auc\\\", cv=10, n_jobs=-1, refit=True\\n)\\ngrid.fit(X_train_scale, y_train2.astype(\\\"int32\\\"))\\n\\nprint(f\\\"Best score: {grid.best_score_} with param: {grid.best_params_}\\\")\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# logistic model classifier.\n",
"lg = LogisticRegression(random_state=13)\n",
"\n",
"# define evaluation procedure\n",
"grid = GridSearchCV(\n",
" lg, hyperparam_grid, scoring=\"roc_auc\", cv=10, n_jobs=-1, refit=True\n",
")\n",
"grid.fit(X_train_scale, y_train2.astype(\"int32\"))\n",
"\n",
"print(f\"Best score: {grid.best_score_} with param: {grid.best_params_}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test perfomance."
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy Score: 0.9766371274024629\n",
"Confusion Matrix: \n",
"[[8418 202]\n",
" [ 1 68]]\n",
"Area Under Curve: 0.9810366858334173\n",
"Recall score (Pct of true malicious detected): 98.55072463768117\n",
"Data reduction: 3.11 percent\n",
"Pct malicious in data sent to console: 25.19 percent\n",
"F1 score: 0.9834254890707712\n"
]
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 18;\n",
" var nbb_unformatted_code = \"# test\\ny_pred_wt = grid.predict(X_test_scale)\\n\\ny_test2 = np.copy(y_test)\\ny_test2[np.where(y_test == \\\"Benign\\\")[0]] = 0\\ny_test2[np.where(y_test == \\\"Malicious\\\")[0]] = 1\\n\\n\\n# performance\\nconf_mat = confusion_matrix(y_test2.astype(\\\"int32\\\"), y_pred_wt)\\n\\nprint(f\\\"Accuracy Score: {accuracy_score(y_test2.astype('int32'),y_pred_wt)}\\\")\\nprint(f\\\"Confusion Matrix: \\\\n{confusion_matrix(y_test2.astype('int32'), y_pred_wt)}\\\")\\nprint(f\\\"Area Under Curve: {roc_auc_score(y_test2.astype('int32'), y_pred_wt)}\\\")\\nprint(\\n f\\\"Recall score (Pct of true malicious detected): {100*recall_score(y_test2.astype('int32'), y_pred_wt)}\\\"\\n)\\nprint(\\n f\\\"Data reduction: { np.round( 100.0 * conf_mat.T[1].sum() / conf_mat.sum() , 2 )} percent\\\"\\n)\\n\\nprint(\\n f\\\"Pct malicious in data sent to console: { np.round( 100.0 * conf_mat.T[1][1] / conf_mat.T[1].sum() , 2 )} percent\\\"\\n)\\n\\nprint(\\\"F1 score: \\\", f1_score(y_test2.astype(\\\"int32\\\"), y_pred_wt, average=\\\"weighted\\\"))\";\n",
" var nbb_formatted_code = \"# test\\ny_pred_wt = grid.predict(X_test_scale)\\n\\ny_test2 = np.copy(y_test)\\ny_test2[np.where(y_test == \\\"Benign\\\")[0]] = 0\\ny_test2[np.where(y_test == \\\"Malicious\\\")[0]] = 1\\n\\n\\n# performance\\nconf_mat = confusion_matrix(y_test2.astype(\\\"int32\\\"), y_pred_wt)\\n\\nprint(f\\\"Accuracy Score: {accuracy_score(y_test2.astype('int32'),y_pred_wt)}\\\")\\nprint(f\\\"Confusion Matrix: \\\\n{confusion_matrix(y_test2.astype('int32'), y_pred_wt)}\\\")\\nprint(f\\\"Area Under Curve: {roc_auc_score(y_test2.astype('int32'), y_pred_wt)}\\\")\\nprint(\\n f\\\"Recall score (Pct of true malicious detected): {100*recall_score(y_test2.astype('int32'), y_pred_wt)}\\\"\\n)\\nprint(\\n f\\\"Data reduction: { np.round( 100.0 * conf_mat.T[1].sum() / conf_mat.sum() , 2 )} percent\\\"\\n)\\n\\nprint(\\n f\\\"Pct malicious in data sent to console: { np.round( 100.0 * conf_mat.T[1][1] / conf_mat.T[1].sum() , 2 )} percent\\\"\\n)\\n\\nprint(\\\"F1 score: \\\", f1_score(y_test2.astype(\\\"int32\\\"), y_pred_wt, average=\\\"weighted\\\"))\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# test\n",
"y_pred_wt = grid.predict(X_test_scale)\n",
"\n",
"y_test2 = np.copy(y_test)\n",
"y_test2[np.where(y_test == \"Benign\")[0]] = 0\n",
"y_test2[np.where(y_test == \"Malicious\")[0]] = 1\n",
"\n",
"\n",
"# performance\n",
"conf_mat = confusion_matrix(y_test2.astype(\"int32\"), y_pred_wt)\n",
"\n",
"print(f\"Accuracy Score: {accuracy_score(y_test2.astype('int32'),y_pred_wt)}\")\n",
"print(f\"Confusion Matrix: \\n{confusion_matrix(y_test2.astype('int32'), y_pred_wt)}\")\n",
"print(f\"Area Under Curve: {roc_auc_score(y_test2.astype('int32'), y_pred_wt)}\")\n",
"print(\n",
" f\"Recall score (Pct of true malicious detected): {100*recall_score(y_test2.astype('int32'), y_pred_wt)}\"\n",
")\n",
"print(\n",
" f\"Data reduction: { np.round( 100.0 * conf_mat.T[1].sum() / conf_mat.sum() , 2 )} percent\"\n",
")\n",
"\n",
"print(\n",
" f\"Pct malicious in data sent to console: { np.round( 100.0 * conf_mat.T[1][1] / conf_mat.T[1].sum() , 2 )} percent\"\n",
")\n",
"\n",
"print(\"F1 score: \", f1_score(y_test2.astype(\"int32\"), y_pred_wt, average=\"weighted\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Visualization of results"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"code_folding": [
0
]
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 19;\n",
" var nbb_unformatted_code = \"# Confusion matrix\\nxarray = [\\\"Benign\\\", \\\"Malicious\\\"]\\nyarray = [\\\"Benign\\\", \\\"Malicious\\\"]\\n\\nconf_matrix = confusion_matrix(y_test2.astype(\\\"int32\\\"), y_pred_wt)\\n\\n\\nfig, ax = plt.subplots()\\nim = ax.imshow(conf_matrix, cmap=plt.cm.jet, clim=(0, 1000))\\n\\n# Show all ticks and label them with the respective list entries\\nax.set_xticks(np.arange(len(xarray)))\\nax.set_xticklabels(xarray)\\n\\nax.set_yticks(np.arange(len(yarray)))\\nax.set_yticklabels(yarray)\\n\\n# Rotate the tick labels and set their alignment.\\nplt.setp(ax.get_xticklabels(), rotation=45, ha=\\\"right\\\", rotation_mode=\\\"anchor\\\")\\n\\n# Loop over data dimensions and create text annotations.\\nfor i in range(len(xarray)):\\n for j in range(len(yarray)):\\n text = ax.text(j, i, conf_matrix[i, j], ha=\\\"center\\\", va=\\\"center\\\", color=\\\"w\\\")\\n\\nax.set_title(\\\"Confusion matrix\\\")\\nfig.tight_layout()\\nplt.show()\";\n",
" var nbb_formatted_code = \"# Confusion matrix\\nxarray = [\\\"Benign\\\", \\\"Malicious\\\"]\\nyarray = [\\\"Benign\\\", \\\"Malicious\\\"]\\n\\nconf_matrix = confusion_matrix(y_test2.astype(\\\"int32\\\"), y_pred_wt)\\n\\n\\nfig, ax = plt.subplots()\\nim = ax.imshow(conf_matrix, cmap=plt.cm.jet, clim=(0, 1000))\\n\\n# Show all ticks and label them with the respective list entries\\nax.set_xticks(np.arange(len(xarray)))\\nax.set_xticklabels(xarray)\\n\\nax.set_yticks(np.arange(len(yarray)))\\nax.set_yticklabels(yarray)\\n\\n# Rotate the tick labels and set their alignment.\\nplt.setp(ax.get_xticklabels(), rotation=45, ha=\\\"right\\\", rotation_mode=\\\"anchor\\\")\\n\\n# Loop over data dimensions and create text annotations.\\nfor i in range(len(xarray)):\\n for j in range(len(yarray)):\\n text = ax.text(j, i, conf_matrix[i, j], ha=\\\"center\\\", va=\\\"center\\\", color=\\\"w\\\")\\n\\nax.set_title(\\\"Confusion matrix\\\")\\nfig.tight_layout()\\nplt.show()\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Confusion matrix\n",
"xarray = [\"Benign\", \"Malicious\"]\n",
"yarray = [\"Benign\", \"Malicious\"]\n",
"\n",
"conf_matrix = confusion_matrix(y_test2.astype(\"int32\"), y_pred_wt)\n",
"\n",
"\n",
"fig, ax = plt.subplots()\n",
"im = ax.imshow(conf_matrix, cmap=plt.cm.jet, clim=(0, 1000))\n",
"\n",
"# Show all ticks and label them with the respective list entries\n",
"ax.set_xticks(np.arange(len(xarray)))\n",
"ax.set_xticklabels(xarray)\n",
"\n",
"ax.set_yticks(np.arange(len(yarray)))\n",
"ax.set_yticklabels(yarray)\n",
"\n",
"# Rotate the tick labels and set their alignment.\n",
"plt.setp(ax.get_xticklabels(), rotation=45, ha=\"right\", rotation_mode=\"anchor\")\n",
"\n",
"# Loop over data dimensions and create text annotations.\n",
"for i in range(len(xarray)):\n",
" for j in range(len(yarray)):\n",
" text = ax.text(j, i, conf_matrix[i, j], ha=\"center\", va=\"center\", color=\"w\")\n",
"\n",
"ax.set_title(\"Confusion matrix\")\n",
"fig.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Best fit parameters."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/siddharthsatpathy/opt/anaconda3/lib/python3.8/site-packages/scikit_learn-1.0.dev0-py3.8-macosx-10.9-x86_64.egg/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"data": {
"text/plain": [
"LogisticRegression(C=0.9000000000000001, class_weight={0: 1.0, 1: 99.0},\n",
" max_iter=5, random_state=13)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 20;\n",
" var nbb_unformatted_code = \"# define model\\nbest_fit_model = LogisticRegression(\\n class_weight=grid.best_params_[\\\"class_weight\\\"],\\n penalty=grid.best_params_[\\\"penalty\\\"],\\n C=grid.best_params_[\\\"C\\\"],\\n fit_intercept=grid.best_params_[\\\"fit_intercept\\\"],\\n random_state=13,\\n max_iter=5,\\n)\\n\\n# fit it\\nbest_fit_model.fit(X_train_scale, y_train2.astype(\\\"int32\\\"))\";\n",
" var nbb_formatted_code = \"# define model\\nbest_fit_model = LogisticRegression(\\n class_weight=grid.best_params_[\\\"class_weight\\\"],\\n penalty=grid.best_params_[\\\"penalty\\\"],\\n C=grid.best_params_[\\\"C\\\"],\\n fit_intercept=grid.best_params_[\\\"fit_intercept\\\"],\\n random_state=13,\\n max_iter=5,\\n)\\n\\n# fit it\\nbest_fit_model.fit(X_train_scale, y_train2.astype(\\\"int32\\\"))\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# define model\n",
"best_fit_model = LogisticRegression(\n",
" class_weight=grid.best_params_[\"class_weight\"],\n",
" penalty=grid.best_params_[\"penalty\"],\n",
" C=grid.best_params_[\"C\"],\n",
" fit_intercept=grid.best_params_[\"fit_intercept\"],\n",
" random_state=13,\n",
" max_iter=5,\n",
")\n",
"\n",
"# fit it\n",
"best_fit_model.fit(X_train_scale, y_train2.astype(\"int32\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save parameters."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 30;\n",
" var nbb_unformatted_code = \"np.savetxt(\\\"mean.txt\\\", scaler.mean_, delimiter=\\\",\\\")\\nnp.savetxt(\\\"std.txt\\\", scaler.scale_, delimiter=\\\",\\\")\\nnp.savetxt(\\\"weights.txt\\\", best_fit_model.coef_[0], delimiter=\\\",\\\")\\nnp.savetxt(\\\"intercepts.txt\\\", best_fit_model.intercept_, delimiter=\\\",\\\")\";\n",
" var nbb_formatted_code = \"np.savetxt(\\\"mean.txt\\\", scaler.mean_, delimiter=\\\",\\\")\\nnp.savetxt(\\\"std.txt\\\", scaler.scale_, delimiter=\\\",\\\")\\nnp.savetxt(\\\"weights.txt\\\", best_fit_model.coef_[0], delimiter=\\\",\\\")\\nnp.savetxt(\\\"intercepts.txt\\\", best_fit_model.intercept_, delimiter=\\\",\\\")\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"np.savetxt(\"mean.txt\", scaler.mean_, delimiter=\",\")\n",
"np.savetxt(\"std.txt\", scaler.scale_, delimiter=\",\")\n",
"np.savetxt(\"weights.txt\", best_fit_model.coef_[0], delimiter=\",\")\n",
"np.savetxt(\"intercept.txt\", best_fit_model.intercept_, delimiter=\",\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Feature importance scores"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 21;\n",
" var nbb_unformatted_code = \"important_features = pd_comb_features_cp.iloc[:, :-1].columns.values[\\n np.argsort(-1 * np.abs(best_fit_model.coef_[0]))\\n]\";\n",
" var nbb_formatted_code = \"important_features = pd_comb_features_cp.iloc[:, :-1].columns.values[\\n np.argsort(-1 * np.abs(best_fit_model.coef_[0]))\\n]\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"important_features = pd_comb_features_cp.iloc[:, :-1].columns.values[\n",
" np.argsort(-1 * np.abs(best_fit_model.coef_[0]))\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['flowLength', 'bwdFlowLength', 'packetSizeMin', 'bwdPacketSizeMin',\n",
" 'bwdPacketSizeTotalPerTime', 'packetSizeTotalPerTime',\n",
" 'packetSizeMax', 'bwdPacketSizeMax', 'bwdFlowLengthPerTime',\n",
" 'flowLengthPerTime', 'bwdIATTotal', 'flowDuration', 'IATMean',\n",
" 'bwdIATMean', 'packetSizeTotal', 'bwdPacketSizeTotal', 'bwdIATStd',\n",
" 'IATStd', 'bwdPacketSizeStd', 'packetSizeStd', 'bwdIATMax',\n",
" 'IATMax', 'packetSizeMean', 'fwdPacketSizeMean',\n",
" 'bwdPacketSizeMean', 'IATMin', 'bwdIATMin',\n",
" 'fwdPacketSizeTotalPerTime', 'fwdIATStd', 'fwdPacketSizeTotal',\n",
" 'fwdIATMin', 'fwdIATMax', 'fwdPacketSizeMax', 'fwdPacketSizeMin',\n",
" 'fwdFlowLengthPerTime', 'fwdPacketSizeStd', 'fwdFlowLength',\n",
" 'fwdIATTotal', 'fwdIATMean'], dtype=object)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"application/javascript": [
"\n",
" setTimeout(function() {\n",
" var nbb_cell_id = 22;\n",
" var nbb_unformatted_code = \"important_features\";\n",
" var nbb_formatted_code = \"important_features\";\n",
" var nbb_cells = Jupyter.notebook.get_cells();\n",
" for (var i = 0; i < nbb_cells.length; ++i) {\n",
" if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n",
" if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n",
" nbb_cells[i].set_text(nbb_formatted_code);\n",
" }\n",
" break;\n",
" }\n",
" }\n",
" }, 500);\n",
" "
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"important_features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}