In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
Used the Obesity dataset from UCI Repository accessible on Python.¶
In [2]:
from ucimlrepo import fetch_ucirepo
# fetch dataset
estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition = fetch_ucirepo(id=544)
# data (as pandas dataframes)
X = estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.data.features
y = estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.data.targets
# metadata
print(estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.metadata)
# variable information
print(estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.variables)
{'uci_id': 544, 'name': 'Estimation of Obesity Levels Based On Eating Habits and Physical Condition ', 'repository_url': 'https://archive.ics.uci.edu/dataset/544/estimation+of+obesity+levels+based+on+eating+habits+and+physical+condition', 'data_url': 'https://archive.ics.uci.edu/static/public/544/data.csv', 'abstract': 'This dataset include data for the estimation of obesity levels in individuals from the countries of Mexico, Peru and Colombia, based on their eating habits and physical condition. ', 'area': 'Health and Medicine', 'tasks': ['Classification', 'Regression', 'Clustering'], 'characteristics': ['Multivariate'], 'num_instances': 2111, 'num_features': 16, 'feature_types': ['Integer'], 'demographics': ['Gender', 'Age'], 'target_col': ['NObeyesdad'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2019, 'last_updated': 'Tue Sep 10 2024', 'dataset_doi': '10.24432/C5H31Z', 'creators': [], 'intro_paper': {'ID': 358, 'type': 'NATIVE', 'title': 'Dataset for estimation of obesity levels based on eating habits and physical condition in individuals from Colombia, Peru and Mexico', 'authors': 'Fabio Mendoza Palechor, Alexis De la Hoz Manotas', 'venue': 'Data in Brief', 'year': 2019, 'journal': None, 'DOI': '10.1016/j.dib.2019.104344', 'URL': 'https://www.semanticscholar.org/paper/35b40bacd2ffa9370885b7a3004d88995fd1d011', 'sha': None, 'corpus': None, 'arxiv': None, 'mag': None, 'acl': None, 'pmid': None, 'pmcid': None}, 'additional_info': {'summary': 'This dataset include data for the estimation of obesity levels in individuals from the countries of Mexico, Peru and Colombia, based on their eating habits and physical condition. The data contains 17 attributes and 2111 records, the records are labeled with the class variable NObesity (Obesity Level), that allows classification of the data using the values of Insufficient Weight, Normal Weight, Overweight Level I, Overweight Level II, Obesity Type I, Obesity Type II and Obesity Type III. 77% of the data was generated synthetically using the Weka tool and the SMOTE filter, 23% of the data was collected directly from users through a web platform.', 'purpose': None, 'funded_by': None, 'instances_represent': None, 'recommended_data_splits': None, 'sensitive_data': None, 'preprocessing_description': None, 'variable_info': 'Read the article (https://doi.org/10.1016/j.dib.2019.104344) to see the description of the attributes.', 'citation': None}}
name role type demographic \
0 Gender Feature Categorical Gender
1 Age Feature Continuous Age
2 Height Feature Continuous None
3 Weight Feature Continuous None
4 family_history_with_overweight Feature Binary None
5 FAVC Feature Binary None
6 FCVC Feature Integer None
7 NCP Feature Continuous None
8 CAEC Feature Categorical None
9 SMOKE Feature Binary None
10 CH2O Feature Continuous None
11 SCC Feature Binary None
12 FAF Feature Continuous None
13 TUE Feature Integer None
14 CALC Feature Categorical None
15 MTRANS Feature Categorical None
16 NObeyesdad Target Categorical None
description units missing_values
0 None None no
1 None None no
2 None None no
3 None None no
4 Has a family member suffered or suffers from o... None no
5 Do you eat high caloric food frequently? None no
6 Do you usually eat vegetables in your meals? None no
7 How many main meals do you have daily? None no
8 Do you eat any food between meals? None no
9 Do you smoke? None no
10 How much water do you drink daily? None no
11 Do you monitor the calories you eat daily? None no
12 How often do you have physical activity? None no
13 How much time do you use technological devices... None no
14 How often do you drink alcohol? None no
15 Which transportation do you usually use? None no
16 Obesity level None no
General setup for Data preprocessing¶
In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
# 1. Define feature lists
# Based on your dataset image
numeric_features = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']
categorical_features = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS']
# 2. Create the preprocessing transformers
# StandardScaler for numerical features
numeric_transformer = StandardScaler()
# OneHotEncoder for categorical features
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
# 3. Create the ColumnTransformer to apply different transforms to different columns
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)
])
# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Training set shape: X={X_train.shape}, y={y_train.shape}")
print(f"Test set shape: X={X_test.shape}, y={y_test.shape}")
Training set shape: X=(1688, 16), y=(1688, 1) Test set shape: X=(423, 16), y=(423, 1)
Neural Network Setup¶
In [5]:
pip install tensorflow
Collecting tensorflow
Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Requirement already satisfied: packaging in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (24.1)
Collecting protobuf>=5.28.0 (from tensorflow)
Downloading protobuf-6.33.0-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Requirement already satisfied: requests<3,>=2.21.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (2.32.3)
Requirement already satisfied: setuptools in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (75.1.0)
Requirement already satisfied: six>=1.12.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (1.16.0)
Collecting termcolor>=1.1.0 (from tensorflow)
Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)
Requirement already satisfied: typing_extensions>=3.6.6 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (4.11.0)
Requirement already satisfied: wrapt>=1.11.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (1.14.1)
Collecting grpcio<2.0,>=1.24.3 (from tensorflow)
Downloading grpcio-1.76.0-cp312-cp312-win_amd64.whl.metadata (3.8 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting keras>=3.10.0 (from tensorflow)
Downloading keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)
Requirement already satisfied: numpy>=1.26.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (1.26.4)
Requirement already satisfied: h5py>=3.11.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (3.11.0)
Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow)
Downloading ml_dtypes-0.5.3-cp312-cp312-win_amd64.whl.metadata (9.2 kB)
Requirement already satisfied: wheel<1.0,>=0.23.0 in c:\users\vikas\anaconda3\lib\site-packages (from astunparse>=1.6.0->tensorflow) (0.44.0)
Collecting typing_extensions>=3.6.6 (from tensorflow)
Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Requirement already satisfied: rich in c:\users\vikas\anaconda3\lib\site-packages (from keras>=3.10.0->tensorflow) (13.7.1)
Collecting namex (from keras>=3.10.0->tensorflow)
Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)
Collecting optree (from keras>=3.10.0->tensorflow)
Downloading optree-0.17.0-cp312-cp312-win_amd64.whl.metadata (34 kB)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (2024.8.30)
Requirement already satisfied: markdown>=2.6.8 in c:\users\vikas\anaconda3\lib\site-packages (from tensorboard~=2.20.0->tensorflow) (3.4.1)
Requirement already satisfied: pillow in c:\users\vikas\anaconda3\lib\site-packages (from tensorboard~=2.20.0->tensorflow) (10.4.0)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)
Downloading tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Requirement already satisfied: werkzeug>=1.0.1 in c:\users\vikas\anaconda3\lib\site-packages (from tensorboard~=2.20.0->tensorflow) (3.0.3)
Requirement already satisfied: MarkupSafe>=2.1.1 in c:\users\vikas\anaconda3\lib\site-packages (from werkzeug>=1.0.1->tensorboard~=2.20.0->tensorflow) (2.1.3)
Requirement already satisfied: markdown-it-py>=2.2.0 in c:\users\vikas\anaconda3\lib\site-packages (from rich->keras>=3.10.0->tensorflow) (2.2.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\vikas\anaconda3\lib\site-packages (from rich->keras>=3.10.0->tensorflow) (2.15.1)
Requirement already satisfied: mdurl~=0.1 in c:\users\vikas\anaconda3\lib\site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow) (0.1.0)
Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl (331.9 MB)
---------------------------------------- 0.0/331.9 MB ? eta -:--:--
--------------------------------------- 6.0/331.9 MB 30.8 MB/s eta 0:00:11
- -------------------------------------- 14.2/331.9 MB 34.2 MB/s eta 0:00:10
-- ------------------------------------- 21.5/331.9 MB 34.9 MB/s eta 0:00:09
--- ------------------------------------ 29.1/331.9 MB 35.5 MB/s eta 0:00:09
---- ----------------------------------- 37.0/331.9 MB 35.6 MB/s eta 0:00:09
----- ---------------------------------- 45.6/331.9 MB 36.8 MB/s eta 0:00:08
------ --------------------------------- 54.3/331.9 MB 37.2 MB/s eta 0:00:08
------- -------------------------------- 62.7/331.9 MB 37.3 MB/s eta 0:00:08
-------- ------------------------------- 71.0/331.9 MB 37.4 MB/s eta 0:00:07
--------- ------------------------------ 79.4/331.9 MB 37.5 MB/s eta 0:00:07
---------- ----------------------------- 87.8/331.9 MB 37.9 MB/s eta 0:00:07
----------- ---------------------------- 95.9/331.9 MB 37.8 MB/s eta 0:00:07
------------ -------------------------- 103.8/331.9 MB 38.1 MB/s eta 0:00:06
------------- ------------------------- 112.2/331.9 MB 38.1 MB/s eta 0:00:06
-------------- ------------------------ 120.1/331.9 MB 38.0 MB/s eta 0:00:06
--------------- ----------------------- 128.2/331.9 MB 37.9 MB/s eta 0:00:06
---------------- ---------------------- 136.8/331.9 MB 38.0 MB/s eta 0:00:06
----------------- --------------------- 145.0/331.9 MB 38.0 MB/s eta 0:00:05
------------------ -------------------- 153.6/331.9 MB 38.0 MB/s eta 0:00:05
------------------- ------------------- 162.0/331.9 MB 38.1 MB/s eta 0:00:05
------------------- ------------------- 168.3/331.9 MB 38.1 MB/s eta 0:00:05
-------------------- ------------------ 175.1/331.9 MB 37.4 MB/s eta 0:00:05
--------------------- ----------------- 183.2/331.9 MB 37.5 MB/s eta 0:00:04
---------------------- ---------------- 190.8/331.9 MB 37.5 MB/s eta 0:00:04
----------------------- --------------- 198.7/331.9 MB 37.5 MB/s eta 0:00:04
------------------------ -------------- 206.3/331.9 MB 37.5 MB/s eta 0:00:04
------------------------- ------------- 213.9/331.9 MB 37.5 MB/s eta 0:00:04
-------------------------- ------------ 221.8/331.9 MB 37.4 MB/s eta 0:00:03
-------------------------- ------------ 229.4/331.9 MB 37.3 MB/s eta 0:00:03
--------------------------- ----------- 237.0/331.9 MB 37.4 MB/s eta 0:00:03
---------------------------- ---------- 245.1/331.9 MB 37.3 MB/s eta 0:00:03
----------------------------- --------- 253.5/331.9 MB 37.3 MB/s eta 0:00:03
------------------------------ -------- 260.6/331.9 MB 37.3 MB/s eta 0:00:02
------------------------------- ------- 268.4/331.9 MB 37.5 MB/s eta 0:00:02
-------------------------------- ------ 276.0/331.9 MB 37.5 MB/s eta 0:00:02
--------------------------------- ----- 283.9/331.9 MB 37.5 MB/s eta 0:00:02
---------------------------------- ---- 291.8/331.9 MB 37.6 MB/s eta 0:00:02
----------------------------------- --- 300.2/331.9 MB 37.5 MB/s eta 0:00:01
------------------------------------ -- 308.0/331.9 MB 37.4 MB/s eta 0:00:01
------------------------------------- - 316.1/331.9 MB 37.3 MB/s eta 0:00:01
-------------------------------------- 324.0/331.9 MB 37.2 MB/s eta 0:00:01
-------------------------------------- 331.9/331.9 MB 37.3 MB/s eta 0:00:01
-------------------------------------- 331.9/331.9 MB 37.3 MB/s eta 0:00:01
-------------------------------------- 331.9/331.9 MB 37.3 MB/s eta 0:00:01
-------------------------------------- 331.9/331.9 MB 37.3 MB/s eta 0:00:01
--------------------------------------- 331.9/331.9 MB 34.0 MB/s eta 0:00:00
Downloading absl_py-2.3.1-py3-none-any.whl (135 kB)
Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Downloading flatbuffers-25.9.23-py2.py3-none-any.whl (30 kB)
Downloading gast-0.6.0-py3-none-any.whl (21 kB)
Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
Downloading grpcio-1.76.0-cp312-cp312-win_amd64.whl (4.7 MB)
---------------------------------------- 0.0/4.7 MB ? eta -:--:--
---------------------------------------- 4.7/4.7 MB 35.5 MB/s eta 0:00:00
Downloading keras-3.11.3-py3-none-any.whl (1.4 MB)
---------------------------------------- 0.0/1.4 MB ? eta -:--:--
---------------------------------------- 1.4/1.4 MB 35.8 MB/s eta 0:00:00
Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl (26.4 MB)
---------------------------------------- 0.0/26.4 MB ? eta -:--:--
--------- ------------------------------ 6.6/26.4 MB 33.6 MB/s eta 0:00:01
----------------------- ---------------- 15.2/26.4 MB 36.8 MB/s eta 0:00:01
----------------------------------- ---- 23.6/26.4 MB 38.3 MB/s eta 0:00:01
---------------------------------------- 26.4/26.4 MB 34.9 MB/s eta 0:00:00
Downloading ml_dtypes-0.5.3-cp312-cp312-win_amd64.whl (208 kB)
Downloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)
Downloading protobuf-6.33.0-cp310-abi3-win_amd64.whl (436 kB)
Downloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)
---------------------------------------- 0.0/5.5 MB ? eta -:--:--
---------------------------------------- 5.5/5.5 MB 33.7 MB/s eta 0:00:00
Downloading termcolor-3.1.0-py3-none-any.whl (7.7 kB)
Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
Downloading tensorboard_data_server-0.7.2-py3-none-any.whl (2.4 kB)
Downloading namex-0.1.0-py3-none-any.whl (5.9 kB)
Downloading optree-0.17.0-cp312-cp312-win_amd64.whl (314 kB)
Installing collected packages: namex, libclang, flatbuffers, typing_extensions, termcolor, tensorboard-data-server, protobuf, opt_einsum, ml_dtypes, google_pasta, gast, astunparse, absl-py, optree, grpcio, tensorboard, keras, tensorflow
Attempting uninstall: typing_extensions
Found existing installation: typing_extensions 4.11.0
Uninstalling typing_extensions-4.11.0:
Successfully uninstalled typing_extensions-4.11.0
Attempting uninstall: protobuf
Found existing installation: protobuf 4.25.3
Uninstalling protobuf-4.25.3:
Successfully uninstalled protobuf-4.25.3
Successfully installed absl-py-2.3.1 astunparse-1.6.3 flatbuffers-25.9.23 gast-0.6.0 google_pasta-0.2.0 grpcio-1.76.0 keras-3.11.3 libclang-18.1.1 ml_dtypes-0.5.3 namex-0.1.0 opt_einsum-3.4.0 optree-0.17.0 protobuf-6.33.0 tensorboard-2.20.0 tensorboard-data-server-0.7.2 tensorflow-2.20.0 termcolor-3.1.0 typing_extensions-4.15.0
Note: you may need to restart the kernel to use updated packages.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. streamlit 1.37.1 requires protobuf<6,>=3.20, but you have protobuf 6.33.0 which is incompatible.
In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from sklearn.preprocessing import OneHotEncoder
import numpy as np
# --- 1. Additional Data Preprocessing for Neural Networks ---
# Note: This assumes 'preprocessor', 'X_train', 'X_test', 'y_train',
# and 'y_test' already exist from the previous step.
print("Applying preprocessing (scaling/encoding) to X data...")
# Fit the preprocessor on the training data and transform both train and test sets
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)
# Keras models can't work with sparse matrices from the preprocessor
if hasattr(X_train_processed, "toarray"):
X_train_processed = X_train_processed.toarray()
X_test_processed = X_test_processed.toarray()
# One-hot encode the target variable 'y'
y_encoder = OneHotEncoder(sparse_output=False)
# Reshape y to be a 2D array, as required by OneHotEncoder
y_train_encoded = y_encoder.fit_transform(np.array(y_train).reshape(-1, 1))
y_test_encoded = y_encoder.transform(np.array(y_test).reshape(-1, 1))
# Get feature and class counts for the model
n_features = X_train_processed.shape[1]
n_classes = y_train_encoded.shape[1]
print(f"Number of input features: {n_features}")
print(f"Number of output classes: {n_classes}")
Applying preprocessing (scaling/encoding) to X data... Number of input features: 31 Number of output classes: 7
In [7]:
# SIMPLE NEURAL NETWORK
print("\n--- Building Simple Neural Network ---")
simple_model = Sequential([
# Define the input shape based on our preprocessed data
Input(shape=(n_features,)),
# A single hidden layer with 64 neurons and 'relu' activation
Dense(64, activation='relu'),
# The output layer:
# - 'n_classes' neurons (one for each obesity type)
# - 'softmax' activation to get class probabilities
Dense(n_classes, activation='softmax')
])
# Compile the model
simple_model.compile(
optimizer='adam',
loss='categorical_crossentropy', # Use for one-hot encoded labels
metrics=['accuracy']
)
# Print a summary of the model architecture
simple_model.summary()
# Train the model
print("\nTraining Simple Model...")
# We use a validation_split to monitor performance on unseen data during training
history_simple = simple_model.fit(
X_train_processed,
y_train_encoded,
epochs=20, # Number of times to see the entire dataset
batch_size=32,
validation_split=0.2 # Use 20% of training data for validation
)
# Evaluate the final model on the actual test set
print("\nEvaluating Simple Model on Test Data:")
loss, accuracy = simple_model.evaluate(X_test_processed, y_test_encoded)
print(f"Test Accuracy: {accuracy:.4f}")
--- Building Simple Neural Network ---
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 64) │ 2,048 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 7) │ 455 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 2,503 (9.78 KB)
Trainable params: 2,503 (9.78 KB)
Non-trainable params: 0 (0.00 B)
Training Simple Model... Epoch 1/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - accuracy: 0.2859 - loss: 1.8723 - val_accuracy: 0.3846 - val_loss: 1.6881 Epoch 2/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.4667 - loss: 1.5142 - val_accuracy: 0.5237 - val_loss: 1.4124 Epoch 3/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.5563 - loss: 1.2615 - val_accuracy: 0.5976 - val_loss: 1.2172 Epoch 4/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.6533 - loss: 1.0880 - val_accuracy: 0.6450 - val_loss: 1.0758 Epoch 5/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.6859 - loss: 0.9635 - val_accuracy: 0.6864 - val_loss: 0.9811 Epoch 6/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.7193 - loss: 0.8720 - val_accuracy: 0.6893 - val_loss: 0.8948 Epoch 7/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.7444 - loss: 0.7967 - val_accuracy: 0.7485 - val_loss: 0.8240 Epoch 8/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.7793 - loss: 0.7364 - val_accuracy: 0.7574 - val_loss: 0.7705 Epoch 9/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.7926 - loss: 0.6856 - val_accuracy: 0.7870 - val_loss: 0.7264 Epoch 10/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8059 - loss: 0.6417 - val_accuracy: 0.7988 - val_loss: 0.6806 Epoch 11/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - accuracy: 0.8185 - loss: 0.6032 - val_accuracy: 0.8166 - val_loss: 0.6506 Epoch 12/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8304 - loss: 0.5684 - val_accuracy: 0.8284 - val_loss: 0.6117 Epoch 13/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8370 - loss: 0.5389 - val_accuracy: 0.8195 - val_loss: 0.5833 Epoch 14/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8452 - loss: 0.5084 - val_accuracy: 0.8373 - val_loss: 0.5607 Epoch 15/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8607 - loss: 0.4833 - val_accuracy: 0.8402 - val_loss: 0.5362 Epoch 16/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8741 - loss: 0.4594 - val_accuracy: 0.8580 - val_loss: 0.5144 Epoch 17/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8822 - loss: 0.4382 - val_accuracy: 0.8609 - val_loss: 0.5023 Epoch 18/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8822 - loss: 0.4196 - val_accuracy: 0.8580 - val_loss: 0.4771 Epoch 19/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8926 - loss: 0.3983 - val_accuracy: 0.8669 - val_loss: 0.4641 Epoch 20/20 43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8948 - loss: 0.3822 - val_accuracy: 0.8609 - val_loss: 0.4485 Evaluating Simple Model on Test Data: 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.8629 - loss: 0.4644 Test Accuracy: 0.8629
Neural Network with Dropout and more layers¶
In [8]:
# --- 3. Advanced Neural Network (Deeper, with Dropout) ---
print("\n--- Building Advanced Neural Network ---")
advanced_model = Sequential([
Input(shape=(n_features,)),
# First hidden layer
Dense(128, activation='relu'),
# Dropout layer: randomly "turns off" 30% of neurons during
# training to prevent the model from relying too much on any one neuron
Dropout(0.3),
# Second hidden layer
Dense(128, activation='relu'),
Dropout(0.3),
# Third hidden layer
Dense(64, activation='relu'),
# Output layer (same as before)
Dense(n_classes, activation='softmax')
])
# Compile the model
advanced_model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Print a summary
advanced_model.summary()
# Train the model
# This time, we'll train for more epochs (as it's a more complex model)
# and pass the test set directly as validation data.
print("\nTraining Advanced Model...")
print("(This may take longer, especially if running on CPU)")
history_advanced = advanced_model.fit(
X_train_processed,
y_train_encoded,
epochs=50,
batch_size=32,
validation_data=(X_test_processed, y_test_encoded) # Use test set for validation
)
# Evaluate the final model
print("\nEvaluating Advanced Model on Test Data:")
loss_adv, accuracy_adv = advanced_model.evaluate(X_test_processed, y_test_encoded)
print(f"Test Accuracy: {accuracy_adv:.4f}")
--- Building Advanced Neural Network ---
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ dense_2 (Dense) │ (None, 128) │ 4,096 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_3 (Dense) │ (None, 128) │ 16,512 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_4 (Dense) │ (None, 64) │ 8,256 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_5 (Dense) │ (None, 7) │ 455 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 29,319 (114.53 KB)
Trainable params: 29,319 (114.53 KB)
Non-trainable params: 0 (0.00 B)
Training Advanced Model... (This may take longer, especially if running on CPU) Epoch 1/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 3s 14ms/step - accuracy: 0.3750 - loss: 1.6817 - val_accuracy: 0.6076 - val_loss: 1.2214 Epoch 2/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.5966 - loss: 1.0359 - val_accuracy: 0.7400 - val_loss: 0.7269 Epoch 3/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.6866 - loss: 0.7689 - val_accuracy: 0.7967 - val_loss: 0.5380 Epoch 4/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.7684 - loss: 0.5970 - val_accuracy: 0.8227 - val_loss: 0.4362 Epoch 5/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.8098 - loss: 0.5062 - val_accuracy: 0.8629 - val_loss: 0.3604 Epoch 6/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8128 - loss: 0.4511 - val_accuracy: 0.8889 - val_loss: 0.3022 Epoch 7/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - accuracy: 0.8353 - loss: 0.4064 - val_accuracy: 0.8865 - val_loss: 0.2729 Epoch 8/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8602 - loss: 0.3561 - val_accuracy: 0.9267 - val_loss: 0.2292 Epoch 9/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - accuracy: 0.8720 - loss: 0.3262 - val_accuracy: 0.9196 - val_loss: 0.2219 Epoch 10/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8957 - loss: 0.2936 - val_accuracy: 0.9338 - val_loss: 0.1914 Epoch 11/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.8803 - loss: 0.2905 - val_accuracy: 0.9362 - val_loss: 0.1794 Epoch 12/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8987 - loss: 0.2610 - val_accuracy: 0.9385 - val_loss: 0.1704 Epoch 13/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9028 - loss: 0.2524 - val_accuracy: 0.9433 - val_loss: 0.1609 Epoch 14/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.9159 - loss: 0.2209 - val_accuracy: 0.9433 - val_loss: 0.1567 Epoch 15/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9254 - loss: 0.2044 - val_accuracy: 0.9338 - val_loss: 0.1607 Epoch 16/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9182 - loss: 0.2117 - val_accuracy: 0.9385 - val_loss: 0.1681 Epoch 17/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9070 - loss: 0.2195 - val_accuracy: 0.9433 - val_loss: 0.1471 Epoch 18/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9313 - loss: 0.1884 - val_accuracy: 0.9433 - val_loss: 0.1493 Epoch 19/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9242 - loss: 0.1997 - val_accuracy: 0.9574 - val_loss: 0.1289 Epoch 20/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9325 - loss: 0.1800 - val_accuracy: 0.9504 - val_loss: 0.1440 Epoch 21/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9277 - loss: 0.1960 - val_accuracy: 0.9527 - val_loss: 0.1319 Epoch 22/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9378 - loss: 0.1738 - val_accuracy: 0.9598 - val_loss: 0.1158 Epoch 23/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9437 - loss: 0.1561 - val_accuracy: 0.9480 - val_loss: 0.1642 Epoch 24/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - accuracy: 0.9502 - loss: 0.1553 - val_accuracy: 0.9480 - val_loss: 0.1410 Epoch 25/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9443 - loss: 0.1472 - val_accuracy: 0.9433 - val_loss: 0.1685 Epoch 26/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9508 - loss: 0.1355 - val_accuracy: 0.9456 - val_loss: 0.1382 Epoch 27/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9514 - loss: 0.1374 - val_accuracy: 0.9480 - val_loss: 0.1209 Epoch 28/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9573 - loss: 0.1168 - val_accuracy: 0.9504 - val_loss: 0.1352 Epoch 29/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9556 - loss: 0.1055 - val_accuracy: 0.9551 - val_loss: 0.1177 Epoch 30/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9532 - loss: 0.1139 - val_accuracy: 0.9574 - val_loss: 0.1340 Epoch 31/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9573 - loss: 0.1155 - val_accuracy: 0.9504 - val_loss: 0.1568 Epoch 32/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9609 - loss: 0.0995 - val_accuracy: 0.9456 - val_loss: 0.1433 Epoch 33/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.9615 - loss: 0.1009 - val_accuracy: 0.9551 - val_loss: 0.1145 Epoch 34/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9609 - loss: 0.1039 - val_accuracy: 0.9574 - val_loss: 0.1226 Epoch 35/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9609 - loss: 0.1025 - val_accuracy: 0.9527 - val_loss: 0.1431 Epoch 36/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9639 - loss: 0.0917 - val_accuracy: 0.9551 - val_loss: 0.1127 Epoch 37/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9692 - loss: 0.0963 - val_accuracy: 0.9574 - val_loss: 0.1116 Epoch 38/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9727 - loss: 0.0813 - val_accuracy: 0.9551 - val_loss: 0.1348 Epoch 39/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9639 - loss: 0.0953 - val_accuracy: 0.9504 - val_loss: 0.1393 Epoch 40/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9656 - loss: 0.1012 - val_accuracy: 0.9574 - val_loss: 0.1200 Epoch 41/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9639 - loss: 0.1103 - val_accuracy: 0.9598 - val_loss: 0.1143 Epoch 42/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9692 - loss: 0.0946 - val_accuracy: 0.9527 - val_loss: 0.1289 Epoch 43/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9698 - loss: 0.0838 - val_accuracy: 0.9551 - val_loss: 0.1214 Epoch 44/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9656 - loss: 0.0848 - val_accuracy: 0.9527 - val_loss: 0.1328 Epoch 45/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9686 - loss: 0.0862 - val_accuracy: 0.9598 - val_loss: 0.1224 Epoch 46/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9668 - loss: 0.0954 - val_accuracy: 0.9504 - val_loss: 0.1250 Epoch 47/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9698 - loss: 0.0807 - val_accuracy: 0.9598 - val_loss: 0.1214 Epoch 48/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9680 - loss: 0.0801 - val_accuracy: 0.9645 - val_loss: 0.1139 Epoch 49/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9686 - loss: 0.0880 - val_accuracy: 0.9598 - val_loss: 0.1157 Epoch 50/50 53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9710 - loss: 0.0814 - val_accuracy: 0.9669 - val_loss: 0.1127 Evaluating Advanced Model on Test Data: 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9669 - loss: 0.1127 Test Accuracy: 0.9669
Per class variable breakdown
In [9]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
# --- Setup: Get Class Labels ---
# We need the original string labels that the y_encoder learned
# (This assumes 'y_encoder' was created in the previous step)
class_labels = y_encoder.categories_[0]
# --- 1. Simple Neural Network: Get Full Report ---
print("\n--- Simple Neural Network: Detailed Report ---")
# Get probabilities for each class
y_prob_simple = simple_model.predict(X_test_processed)
# Get the index of the highest probability class
y_pred_indices_simple = np.argmax(y_prob_simple, axis=1)
# Convert indices back to original string labels
y_pred_simple_labels = class_labels[y_pred_indices_simple]
# Now we can use sklearn's classification_report and confusion_matrix
print(classification_report(y_test, y_pred_simple_labels))
print("Confusion Matrix (Simple NN):")
cm_simple_nn = confusion_matrix(y_test, y_pred_simple_labels, labels=class_labels)
print(cm_simple_nn)
# --- 2. Advanced Neural Network: Get Full Report ---
print("\n--- Advanced Neural Network: Detailed Report ---")
# Get probabilities
y_prob_advanced = advanced_model.predict(X_test_processed)
# Get indices
y_pred_indices_advanced = np.argmax(y_prob_advanced, axis=1)
# Convert indices to labels
y_pred_advanced_labels = class_labels[y_pred_indices_advanced]
# Print the report
print(classification_report(y_test, y_pred_advanced_labels))
print("Confusion Matrix (Advanced NN):")
cm_advanced_nn = confusion_matrix(y_test, y_pred_advanced_labels, labels=class_labels)
print(cm_advanced_nn)
# --- 3. (Optional) Plot the NN Confusion Matrices ---
fig, axes = plt.subplots(1, 2, figsize=(18, 7))
fig.suptitle('Neural Network Confusion Matrices', fontsize=16)
# Plot for Simple NN
disp_simple = ConfusionMatrixDisplay(confusion_matrix=cm_simple_nn, display_labels=class_labels)
disp_simple.plot(ax=axes[0], xticks_rotation='vertical')
axes[0].set_title('Simple NN')
# Plot for Advanced NN
disp_advanced = ConfusionMatrixDisplay(confusion_matrix=cm_advanced_nn, display_labels=class_labels)
disp_advanced.plot(ax=axes[1], xticks_rotation='vertical')
axes[1].set_title('Advanced NN')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
#plt.savefig('nn_confusion_matrices.png')
#print("\nSaved plot of NN confusion matrices to 'nn_confusion_matrices.png'")
--- Simple Neural Network: Detailed Report --- 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step precision recall f1-score support Insufficient_Weight 0.88 0.94 0.91 54 Normal_Weight 0.75 0.66 0.70 58 Obesity_Type_I 0.89 0.96 0.92 70 Obesity_Type_II 0.98 0.98 0.98 60 Obesity_Type_III 1.00 0.98 0.99 65 Overweight_Level_I 0.71 0.76 0.73 58 Overweight_Level_II 0.79 0.72 0.76 58 accuracy 0.86 423 macro avg 0.86 0.86 0.86 423 weighted avg 0.86 0.86 0.86 423 Confusion Matrix (Simple NN): [[51 3 0 0 0 0 0] [ 7 38 0 0 0 11 2] [ 0 0 67 0 0 1 2] [ 0 0 1 59 0 0 0] [ 0 0 0 1 64 0 0] [ 0 7 0 0 0 44 7] [ 0 3 7 0 0 6 42]] --- Advanced Neural Network: Detailed Report --- 14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step precision recall f1-score support Insufficient_Weight 0.96 1.00 0.98 54 Normal_Weight 0.93 0.88 0.90 58 Obesity_Type_I 0.97 1.00 0.99 70 Obesity_Type_II 1.00 0.98 0.99 60 Obesity_Type_III 1.00 1.00 1.00 65 Overweight_Level_I 0.91 0.91 0.91 58 Overweight_Level_II 0.98 0.98 0.98 58 accuracy 0.97 423 macro avg 0.97 0.97 0.97 423 weighted avg 0.97 0.97 0.97 423 Confusion Matrix (Advanced NN): [[54 0 0 0 0 0 0] [ 2 51 0 0 0 5 0] [ 0 0 70 0 0 0 0] [ 0 0 1 59 0 0 0] [ 0 0 0 0 65 0 0] [ 0 4 0 0 0 53 1] [ 0 0 1 0 0 0 57]]
Comparison of all the models¶
In [11]:
from sklearn.neighbors import KNeighborsClassifier
# 1. Create the full pipeline with the preprocessor and the classifier
knn_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', KNeighborsClassifier(n_neighbors=7))
])
# 2. Train the KNN model
print("Training KNN model...")
knn_pipeline.fit(X_train, y_train)
# 3. Make predictions on the test set
y_pred_knn = knn_pipeline.predict(X_test)
# 4. Evaluate the model
print("\n--- K-Nearest Neighbors (KNN) Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred_knn):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_knn))
from sklearn.svm import SVC
# 1. Create the full pipeline
svm_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', SVC(kernel='linear', C=1.0, random_state=42)) # 'C=1.0' is the regularization parameter
])
# 2. Train the SVM model
print("Training SVM model...")
svm_pipeline.fit(X_train, y_train)
# 3. Make predictions on the test set
y_pred_svm = svm_pipeline.predict(X_test)
# 4. Evaluate the model
print("\n--- Support Vector Machine (SVM) Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_svm))
from sklearn.ensemble import RandomForestClassifier
# 1. Create the full pipeline
rf_pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1))
])
# 2. Train the Random Forest model
print("Training Random Forest model...")
rf_pipeline.fit(X_train, y_train)
# 3. Make predictions on the test set
y_pred_rf = rf_pipeline.predict(X_test)
# 4. Evaluate the model
print("\n--- Random Forest Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
Training KNN model...
c:\Users\Vikas\anaconda3\Lib\site-packages\sklearn\neighbors\_classification.py:238: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel(). return self._fit(X, y)
--- K-Nearest Neighbors (KNN) Results ---
Accuracy: 0.8109
Classification Report:
precision recall f1-score support
Insufficient_Weight 0.73 0.94 0.82 54
Normal_Weight 0.79 0.40 0.53 58
Obesity_Type_I 0.76 0.94 0.84 70
Obesity_Type_II 0.91 0.97 0.94 60
Obesity_Type_III 0.98 1.00 0.99 65
Overweight_Level_I 0.73 0.69 0.71 58
Overweight_Level_II 0.77 0.69 0.73 58
accuracy 0.81 423
macro avg 0.81 0.80 0.79 423
weighted avg 0.81 0.81 0.80 423
Training SVM model...
--- Support Vector Machine (SVM) Results ---
Accuracy: 0.9551
Classification Report:
precision recall f1-score support
Insufficient_Weight 0.95 1.00 0.97 54
Normal_Weight 0.93 0.86 0.89 58
Obesity_Type_I 0.99 0.97 0.98 70
Obesity_Type_II 0.98 0.98 0.98 60
Obesity_Type_III 1.00 0.98 0.99 65
Overweight_Level_I 0.88 0.91 0.90 58
Overweight_Level_II 0.95 0.97 0.96 58
accuracy 0.96 423
macro avg 0.95 0.95 0.95 423
weighted avg 0.96 0.96 0.95 423
c:\Users\Vikas\anaconda3\Lib\site-packages\sklearn\utils\validation.py:1339: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). y = column_or_1d(y, warn=True)
Training Random Forest model...
c:\Users\Vikas\anaconda3\Lib\site-packages\sklearn\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel(). return fit_method(estimator, *args, **kwargs)
--- Random Forest Results ---
Accuracy: 0.9433
Classification Report:
precision recall f1-score support
Insufficient_Weight 1.00 0.94 0.97 54
Normal_Weight 0.78 0.93 0.85 58
Obesity_Type_I 0.97 0.96 0.96 70
Obesity_Type_II 1.00 0.98 0.99 60
Obesity_Type_III 1.00 0.98 0.99 65
Overweight_Level_I 0.91 0.86 0.88 58
Overweight_Level_II 0.96 0.93 0.95 58
accuracy 0.94 423
macro avg 0.95 0.94 0.94 423
weighted avg 0.95 0.94 0.94 423
In [14]:
# --- 1. Get Predictions for all models ---
# (We already have y_pred_simple_labels and y_pred_advanced_labels from above)
# Get predictions from the sklearn pipelines
y_pred_knn = knn_pipeline.predict(X_test)
y_pred_svm = svm_pipeline.predict(X_test)
y_pred_rf = rf_pipeline.predict(X_test)
# --- 2. Store reports in a dictionary ---
model_predictions = {
'KNN': y_pred_knn,
'SVM': y_pred_svm,
'Random Forest': y_pred_rf,
'Simple NN': y_pred_simple_labels,
'Advanced NN': y_pred_advanced_labels
}
# --- 3. Build a DataFrame of F1-Scores ---
f1_scores = {}
for model_name, y_pred in model_predictions.items():
# Get the classification report as a dictionary
report = classification_report(y_test, y_pred, output_dict=True)
# Extract the f1-score for each class (excluding averages)
class_f1_scores = {}
for label, metrics in report.items():
if label in class_labels: # Only include our target classes
class_f1_scores[label] = metrics['f1-score']
f1_scores[model_name] = class_f1_scores
# Convert the dictionary to a DataFrame
f1_df = pd.DataFrame(f1_scores).T # Transpose to get models as rows, classes as columns
# --- 4. Plot the Heatmap ---
plt.figure(figsize=(14, 8))
sns.heatmap(f1_df, annot=True, cmap='YlGnBu', fmt='.3f', linewidths=.5)
plt.title('Model Comparison: F1-Scores per Class', fontsize=16)
plt.xlabel('Obesity Type (Class)', fontsize=12)
plt.ylabel('Model', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
#plt.savefig('model_comparison_heatmap.png')
print("\nSaved model comparison heatmap to 'model_comparison_heatmap.png'")
f1_df.head()
Saved model comparison heatmap to 'model_comparison_heatmap.png'
Out[14]:
| Insufficient_Weight | Normal_Weight | Obesity_Type_I | Obesity_Type_II | Obesity_Type_III | Overweight_Level_I | Overweight_Level_II | |
|---|---|---|---|---|---|---|---|
| KNN | 0.822581 | 0.528736 | 0.840764 | 0.935484 | 0.992366 | 0.707965 | 0.727273 |
| SVM | 0.972973 | 0.892857 | 0.978417 | 0.983333 | 0.992248 | 0.898305 | 0.957265 |
| Random Forest | 0.971429 | 0.850394 | 0.964029 | 0.991597 | 0.992248 | 0.884956 | 0.947368 |
| Simple NN | 0.910714 | 0.697248 | 0.924138 | 0.983333 | 0.992248 | 0.733333 | 0.756757 |
| Advanced NN | 0.981818 | 0.902655 | 0.985915 | 0.991597 | 1.000000 | 0.913793 | 0.982759 |