{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#
Multiclass Classification
"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Data Scientist.: Dr. Eddy Giusepe Chirinos Isidro"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Link de estudo:\n",
"\n",
"* [pycaret 3.2.0](https://pypi.org/project/pycaret/)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 5.1 | \n",
" 3.5 | \n",
" 1.4 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 1 | \n",
" 4.9 | \n",
" 3.0 | \n",
" 1.4 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 2 | \n",
" 4.7 | \n",
" 3.2 | \n",
" 1.3 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 3 | \n",
" 4.6 | \n",
" 3.1 | \n",
" 1.5 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 4 | \n",
" 5.0 | \n",
" 3.6 | \n",
" 1.4 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 5.1 3.5 1.4 0.2 Iris-setosa\n",
"1 4.9 3.0 1.4 0.2 Iris-setosa\n",
"2 4.7 3.2 1.3 0.2 Iris-setosa\n",
"3 4.6 3.1 1.5 0.2 Iris-setosa\n",
"4 5.0 3.6 1.4 0.2 Iris-setosa"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.datasets import get_data\n",
"\n",
"\n",
"dataset= get_data(\"iris\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(150, 5)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.shape"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset[\"species\"].unique()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" | 145 | \n",
" 6.7 | \n",
" 3.0 | \n",
" 5.2 | \n",
" 2.3 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
" | 146 | \n",
" 6.3 | \n",
" 2.5 | \n",
" 5.0 | \n",
" 1.9 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
" | 147 | \n",
" 6.5 | \n",
" 3.0 | \n",
" 5.2 | \n",
" 2.0 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
" | 148 | \n",
" 6.2 | \n",
" 3.4 | \n",
" 5.4 | \n",
" 2.3 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
" | 149 | \n",
" 5.9 | \n",
" 3.0 | \n",
" 5.1 | \n",
" 1.8 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"145 6.7 3.0 5.2 2.3 Iris-virginica\n",
"146 6.3 2.5 5.0 1.9 Iris-virginica\n",
"147 6.5 3.0 5.2 2.0 Iris-virginica\n",
"148 6.2 3.4 5.4 2.3 Iris-virginica\n",
"149 5.9 3.0 5.1 1.8 Iris-virginica"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Mostra as 5 últimas linhas:\n",
"dataset.tail()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
"
\n",
" \n",
" \n",
" \n",
" | count | \n",
" 150.000000 | \n",
" 150.000000 | \n",
" 150.000000 | \n",
" 150.000000 | \n",
"
\n",
" \n",
" | mean | \n",
" 5.843333 | \n",
" 3.054000 | \n",
" 3.758667 | \n",
" 1.198667 | \n",
"
\n",
" \n",
" | std | \n",
" 0.828066 | \n",
" 0.433594 | \n",
" 1.764420 | \n",
" 0.763161 | \n",
"
\n",
" \n",
" | min | \n",
" 4.300000 | \n",
" 2.000000 | \n",
" 1.000000 | \n",
" 0.100000 | \n",
"
\n",
" \n",
" | 25% | \n",
" 5.100000 | \n",
" 2.800000 | \n",
" 1.600000 | \n",
" 0.300000 | \n",
"
\n",
" \n",
" | 50% | \n",
" 5.800000 | \n",
" 3.000000 | \n",
" 4.350000 | \n",
" 1.300000 | \n",
"
\n",
" \n",
" | 75% | \n",
" 6.400000 | \n",
" 3.300000 | \n",
" 5.100000 | \n",
" 1.800000 | \n",
"
\n",
" \n",
" | max | \n",
" 7.900000 | \n",
" 4.400000 | \n",
" 6.900000 | \n",
" 2.500000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width\n",
"count 150.000000 150.000000 150.000000 150.000000\n",
"mean 5.843333 3.054000 3.758667 1.198667\n",
"std 0.828066 0.433594 1.764420 0.763161\n",
"min 4.300000 2.000000 1.000000 0.100000\n",
"25% 5.100000 2.800000 1.600000 0.300000\n",
"50% 5.800000 3.000000 4.350000 1.300000\n",
"75% 6.400000 3.300000 5.100000 1.800000\n",
"max 7.900000 4.400000 6.900000 2.500000"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" | 73 | \n",
" 6.1 | \n",
" 2.8 | \n",
" 4.7 | \n",
" 1.2 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 18 | \n",
" 5.7 | \n",
" 3.8 | \n",
" 1.7 | \n",
" 0.3 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 118 | \n",
" 7.7 | \n",
" 2.6 | \n",
" 6.9 | \n",
" 2.3 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
" | 78 | \n",
" 6.0 | \n",
" 2.9 | \n",
" 4.5 | \n",
" 1.5 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 76 | \n",
" 6.8 | \n",
" 2.8 | \n",
" 4.8 | \n",
" 1.4 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"73 6.1 2.8 4.7 1.2 Iris-versicolor\n",
"18 5.7 3.8 1.7 0.3 Iris-setosa\n",
"118 7.7 2.6 6.9 2.3 Iris-virginica\n",
"78 6.0 2.9 4.5 1.5 Iris-versicolor\n",
"76 6.8 2.8 4.8 1.4 Iris-versicolor"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Dados de Treinamento:\n",
"\n",
"data_train = dataset.sample(frac=0.9, random_state=42)\n",
"\n",
"data_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(135, 5)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" | 14 | \n",
" 5.8 | \n",
" 4.0 | \n",
" 1.2 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 20 | \n",
" 5.4 | \n",
" 3.4 | \n",
" 1.7 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 52 | \n",
" 6.9 | \n",
" 3.1 | \n",
" 4.9 | \n",
" 1.5 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 71 | \n",
" 6.1 | \n",
" 2.8 | \n",
" 4.0 | \n",
" 1.3 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 74 | \n",
" 6.4 | \n",
" 2.9 | \n",
" 4.3 | \n",
" 1.3 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"14 5.8 4.0 1.2 0.2 Iris-setosa\n",
"20 5.4 3.4 1.7 0.2 Iris-setosa\n",
"52 6.9 3.1 4.9 1.5 Iris-versicolor\n",
"71 6.1 2.8 4.0 1.3 Iris-versicolor\n",
"74 6.4 2.9 4.3 1.3 Iris-versicolor"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Dados de Teste:\n",
"\n",
"data_test = dataset.drop(data_train.index)\n",
"\n",
"data_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15, 5)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_test.shape"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 6.1 | \n",
" 2.8 | \n",
" 4.7 | \n",
" 1.2 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 1 | \n",
" 5.7 | \n",
" 3.8 | \n",
" 1.7 | \n",
" 0.3 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 2 | \n",
" 7.7 | \n",
" 2.6 | \n",
" 6.9 | \n",
" 2.3 | \n",
" Iris-virginica | \n",
"
\n",
" \n",
" | 3 | \n",
" 6.0 | \n",
" 2.9 | \n",
" 4.5 | \n",
" 1.5 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 4 | \n",
" 6.8 | \n",
" 2.8 | \n",
" 4.8 | \n",
" 1.4 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 6.1 2.8 4.7 1.2 Iris-versicolor\n",
"1 5.7 3.8 1.7 0.3 Iris-setosa\n",
"2 7.7 2.6 6.9 2.3 Iris-virginica\n",
"3 6.0 2.9 4.5 1.5 Iris-versicolor\n",
"4 6.8 2.8 4.8 1.4 Iris-versicolor"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Tratando os índices dos dados de Treinamento:\n",
"\n",
"data_train.reset_index(drop=True, inplace=True)\n",
"\n",
"data_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 5.8 | \n",
" 4.0 | \n",
" 1.2 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 1 | \n",
" 5.4 | \n",
" 3.4 | \n",
" 1.7 | \n",
" 0.2 | \n",
" Iris-setosa | \n",
"
\n",
" \n",
" | 2 | \n",
" 6.9 | \n",
" 3.1 | \n",
" 4.9 | \n",
" 1.5 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 3 | \n",
" 6.1 | \n",
" 2.8 | \n",
" 4.0 | \n",
" 1.3 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
" | 4 | \n",
" 6.4 | \n",
" 2.9 | \n",
" 4.3 | \n",
" 1.3 | \n",
" Iris-versicolor | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sepal_length sepal_width petal_length petal_width species\n",
"0 5.8 4.0 1.2 0.2 Iris-setosa\n",
"1 5.4 3.4 1.7 0.2 Iris-setosa\n",
"2 6.9 3.1 4.9 1.5 Iris-versicolor\n",
"3 6.1 2.8 4.0 1.3 Iris-versicolor\n",
"4 6.4 2.9 4.3 1.3 Iris-versicolor"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Tratando os índices dos dados de Teste:\n",
"\n",
"data_test.reset_index(drop=True, inplace=True)\n",
"\n",
"data_test.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Importando nosso modelo de `Classificação`:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | | \n",
" Description | \n",
" Value | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Session id | \n",
" 123 | \n",
"
\n",
" \n",
" | 1 | \n",
" Target | \n",
" species | \n",
"
\n",
" \n",
" | 2 | \n",
" Target type | \n",
" Multiclass | \n",
"
\n",
" \n",
" | 3 | \n",
" Target mapping | \n",
" Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2 | \n",
"
\n",
" \n",
" | 4 | \n",
" Original data shape | \n",
" (135, 5) | \n",
"
\n",
" \n",
" | 5 | \n",
" Transformed data shape | \n",
" (135, 5) | \n",
"
\n",
" \n",
" | 6 | \n",
" Transformed train set shape | \n",
" (94, 5) | \n",
"
\n",
" \n",
" | 7 | \n",
" Transformed test set shape | \n",
" (41, 5) | \n",
"
\n",
" \n",
" | 8 | \n",
" Numeric features | \n",
" 4 | \n",
"
\n",
" \n",
" | 9 | \n",
" Preprocess | \n",
" True | \n",
"
\n",
" \n",
" | 10 | \n",
" Imputation type | \n",
" simple | \n",
"
\n",
" \n",
" | 11 | \n",
" Numeric imputation | \n",
" mean | \n",
"
\n",
" \n",
" | 12 | \n",
" Categorical imputation | \n",
" mode | \n",
"
\n",
" \n",
" | 13 | \n",
" Fold Generator | \n",
" StratifiedKFold | \n",
"
\n",
" \n",
" | 14 | \n",
" Fold Number | \n",
" 10 | \n",
"
\n",
" \n",
" | 15 | \n",
" CPU Jobs | \n",
" -1 | \n",
"
\n",
" \n",
" | 16 | \n",
" Use GPU | \n",
" False | \n",
"
\n",
" \n",
" | 17 | \n",
" Log Experiment | \n",
" False | \n",
"
\n",
" \n",
" | 18 | \n",
" Experiment Name | \n",
" clf-default-name | \n",
"
\n",
" \n",
" | 19 | \n",
" USI | \n",
" c071 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from pycaret.classification import *\n",
"\n",
"\n",
"Mult_clf = setup(data=data_train,\n",
" target=\"species\",\n",
" session_id=123\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"best_Model = compare_models()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NOTA:\n",
"\n",
"Tive que executar no Google Colab, porque demorou muito para obter os resultados.\n",
"\n",
"Pedir permissão para meu arquivo compartilhado, [Aqui](https://colab.research.google.com/drive/1bELHHfpLFZ7SyRifMpn_4h34Nir1y37D#scrollTo=NelzMSccoXnI)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv_pycaret",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}