Classifying Obesity on 5 categories using 5 different classifier models from simple to complex¶

Used KNN, SVM, RF, Simple NN, Advanced NN (w dropout)¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Used the Obesity dataset from UCI Repository accessible on Python.¶

In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition = fetch_ucirepo(id=544) 
  
# data (as pandas dataframes) 
X = estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.data.features 
y = estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.data.targets 
  
# metadata 
print(estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.metadata) 
  
# variable information 
print(estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.variables) 
{'uci_id': 544, 'name': 'Estimation of Obesity Levels Based On Eating Habits and Physical Condition ', 'repository_url': 'https://archive.ics.uci.edu/dataset/544/estimation+of+obesity+levels+based+on+eating+habits+and+physical+condition', 'data_url': 'https://archive.ics.uci.edu/static/public/544/data.csv', 'abstract': 'This dataset include data for the estimation of obesity levels in individuals from the countries of Mexico, Peru and Colombia, based on their eating habits and physical condition. ', 'area': 'Health and Medicine', 'tasks': ['Classification', 'Regression', 'Clustering'], 'characteristics': ['Multivariate'], 'num_instances': 2111, 'num_features': 16, 'feature_types': ['Integer'], 'demographics': ['Gender', 'Age'], 'target_col': ['NObeyesdad'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2019, 'last_updated': 'Tue Sep 10 2024', 'dataset_doi': '10.24432/C5H31Z', 'creators': [], 'intro_paper': {'ID': 358, 'type': 'NATIVE', 'title': 'Dataset for estimation of obesity levels based on eating habits and physical condition in individuals from Colombia, Peru and Mexico', 'authors': 'Fabio Mendoza Palechor, Alexis De la Hoz Manotas', 'venue': 'Data in Brief', 'year': 2019, 'journal': None, 'DOI': '10.1016/j.dib.2019.104344', 'URL': 'https://www.semanticscholar.org/paper/35b40bacd2ffa9370885b7a3004d88995fd1d011', 'sha': None, 'corpus': None, 'arxiv': None, 'mag': None, 'acl': None, 'pmid': None, 'pmcid': None}, 'additional_info': {'summary': 'This dataset include data for the estimation of obesity levels in individuals from the countries of Mexico, Peru and Colombia, based on their eating habits and physical condition. The data contains 17 attributes and 2111 records, the records are labeled with the class variable NObesity (Obesity Level), that allows classification of the data using the values of Insufficient Weight, Normal Weight, Overweight Level I, Overweight Level II, Obesity Type I, Obesity Type II and Obesity Type III. 77% of the data was generated synthetically using the Weka tool and the SMOTE filter, 23% of the data was collected directly from users through a web platform.', 'purpose': None, 'funded_by': None, 'instances_represent': None, 'recommended_data_splits': None, 'sensitive_data': None, 'preprocessing_description': None, 'variable_info': 'Read the article (https://doi.org/10.1016/j.dib.2019.104344) to see the description of the attributes.', 'citation': None}}
                              name     role         type demographic  \
0                           Gender  Feature  Categorical      Gender   
1                              Age  Feature   Continuous         Age   
2                           Height  Feature   Continuous        None   
3                           Weight  Feature   Continuous        None   
4   family_history_with_overweight  Feature       Binary        None   
5                             FAVC  Feature       Binary        None   
6                             FCVC  Feature      Integer        None   
7                              NCP  Feature   Continuous        None   
8                             CAEC  Feature  Categorical        None   
9                            SMOKE  Feature       Binary        None   
10                            CH2O  Feature   Continuous        None   
11                             SCC  Feature       Binary        None   
12                             FAF  Feature   Continuous        None   
13                             TUE  Feature      Integer        None   
14                            CALC  Feature  Categorical        None   
15                          MTRANS  Feature  Categorical        None   
16                      NObeyesdad   Target  Categorical        None   

                                          description units missing_values  
0                                                None  None             no  
1                                                None  None             no  
2                                                None  None             no  
3                                                None  None             no  
4   Has a family member suffered or suffers from o...  None             no  
5            Do you eat high caloric food frequently?  None             no  
6        Do you usually eat vegetables in your meals?  None             no  
7              How many main meals do you have daily?  None             no  
8                  Do you eat any food between meals?  None             no  
9                                       Do you smoke?  None             no  
10                 How much water do you drink daily?  None             no  
11         Do you monitor the calories you eat daily?  None             no  
12           How often do you have physical activity?  None             no  
13  How much time do you use technological devices...  None             no  
14                    How often do you drink alcohol?  None             no  
15           Which transportation do you usually use?  None             no  
16                                      Obesity level  None             no  

General setup for Data preprocessing¶

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score



# 1. Define feature lists
# Based on your dataset image
numeric_features = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']
categorical_features = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS']

# 2. Create the preprocessing transformers
# StandardScaler for numerical features
numeric_transformer = StandardScaler()

# OneHotEncoder for categorical features
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# 3. Create the ColumnTransformer to apply different transforms to different columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set shape: X={X_train.shape}, y={y_train.shape}")
print(f"Test set shape: X={X_test.shape}, y={y_test.shape}")
Training set shape: X=(1688, 16), y=(1688, 1)
Test set shape: X=(423, 16), y=(423, 1)

Neural Network Setup¶

In [5]:
pip install tensorflow
Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Requirement already satisfied: packaging in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (24.1)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.33.0-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Requirement already satisfied: requests<3,>=2.21.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (2.32.3)
Requirement already satisfied: setuptools in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (75.1.0)
Requirement already satisfied: six>=1.12.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (1.16.0)
Collecting termcolor>=1.1.0 (from tensorflow)
  Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)
Requirement already satisfied: typing_extensions>=3.6.6 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (4.11.0)
Requirement already satisfied: wrapt>=1.11.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (1.14.1)
Collecting grpcio<2.0,>=1.24.3 (from tensorflow)
  Downloading grpcio-1.76.0-cp312-cp312-win_amd64.whl.metadata (3.8 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting keras>=3.10.0 (from tensorflow)
  Downloading keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)
Requirement already satisfied: numpy>=1.26.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (1.26.4)
Requirement already satisfied: h5py>=3.11.0 in c:\users\vikas\anaconda3\lib\site-packages (from tensorflow) (3.11.0)
Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow)
  Downloading ml_dtypes-0.5.3-cp312-cp312-win_amd64.whl.metadata (9.2 kB)
Requirement already satisfied: wheel<1.0,>=0.23.0 in c:\users\vikas\anaconda3\lib\site-packages (from astunparse>=1.6.0->tensorflow) (0.44.0)
Collecting typing_extensions>=3.6.6 (from tensorflow)
  Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Requirement already satisfied: rich in c:\users\vikas\anaconda3\lib\site-packages (from keras>=3.10.0->tensorflow) (13.7.1)
Collecting namex (from keras>=3.10.0->tensorflow)
  Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)
Collecting optree (from keras>=3.10.0->tensorflow)
  Downloading optree-0.17.0-cp312-cp312-win_amd64.whl.metadata (34 kB)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (3.3.2)
Requirement already satisfied: idna<4,>=2.5 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (2.2.3)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\vikas\anaconda3\lib\site-packages (from requests<3,>=2.21.0->tensorflow) (2024.8.30)
Requirement already satisfied: markdown>=2.6.8 in c:\users\vikas\anaconda3\lib\site-packages (from tensorboard~=2.20.0->tensorflow) (3.4.1)
Requirement already satisfied: pillow in c:\users\vikas\anaconda3\lib\site-packages (from tensorboard~=2.20.0->tensorflow) (10.4.0)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)
  Downloading tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Requirement already satisfied: werkzeug>=1.0.1 in c:\users\vikas\anaconda3\lib\site-packages (from tensorboard~=2.20.0->tensorflow) (3.0.3)
Requirement already satisfied: MarkupSafe>=2.1.1 in c:\users\vikas\anaconda3\lib\site-packages (from werkzeug>=1.0.1->tensorboard~=2.20.0->tensorflow) (2.1.3)
Requirement already satisfied: markdown-it-py>=2.2.0 in c:\users\vikas\anaconda3\lib\site-packages (from rich->keras>=3.10.0->tensorflow) (2.2.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\vikas\anaconda3\lib\site-packages (from rich->keras>=3.10.0->tensorflow) (2.15.1)
Requirement already satisfied: mdurl~=0.1 in c:\users\vikas\anaconda3\lib\site-packages (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow) (0.1.0)
Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl (331.9 MB)
   ---------------------------------------- 0.0/331.9 MB ? eta -:--:--
    --------------------------------------- 6.0/331.9 MB 30.8 MB/s eta 0:00:11
   - -------------------------------------- 14.2/331.9 MB 34.2 MB/s eta 0:00:10
   -- ------------------------------------- 21.5/331.9 MB 34.9 MB/s eta 0:00:09
   --- ------------------------------------ 29.1/331.9 MB 35.5 MB/s eta 0:00:09
   ---- ----------------------------------- 37.0/331.9 MB 35.6 MB/s eta 0:00:09
   ----- ---------------------------------- 45.6/331.9 MB 36.8 MB/s eta 0:00:08
   ------ --------------------------------- 54.3/331.9 MB 37.2 MB/s eta 0:00:08
   ------- -------------------------------- 62.7/331.9 MB 37.3 MB/s eta 0:00:08
   -------- ------------------------------- 71.0/331.9 MB 37.4 MB/s eta 0:00:07
   --------- ------------------------------ 79.4/331.9 MB 37.5 MB/s eta 0:00:07
   ---------- ----------------------------- 87.8/331.9 MB 37.9 MB/s eta 0:00:07
   ----------- ---------------------------- 95.9/331.9 MB 37.8 MB/s eta 0:00:07
   ------------ -------------------------- 103.8/331.9 MB 38.1 MB/s eta 0:00:06
   ------------- ------------------------- 112.2/331.9 MB 38.1 MB/s eta 0:00:06
   -------------- ------------------------ 120.1/331.9 MB 38.0 MB/s eta 0:00:06
   --------------- ----------------------- 128.2/331.9 MB 37.9 MB/s eta 0:00:06
   ---------------- ---------------------- 136.8/331.9 MB 38.0 MB/s eta 0:00:06
   ----------------- --------------------- 145.0/331.9 MB 38.0 MB/s eta 0:00:05
   ------------------ -------------------- 153.6/331.9 MB 38.0 MB/s eta 0:00:05
   ------------------- ------------------- 162.0/331.9 MB 38.1 MB/s eta 0:00:05
   ------------------- ------------------- 168.3/331.9 MB 38.1 MB/s eta 0:00:05
   -------------------- ------------------ 175.1/331.9 MB 37.4 MB/s eta 0:00:05
   --------------------- ----------------- 183.2/331.9 MB 37.5 MB/s eta 0:00:04
   ---------------------- ---------------- 190.8/331.9 MB 37.5 MB/s eta 0:00:04
   ----------------------- --------------- 198.7/331.9 MB 37.5 MB/s eta 0:00:04
   ------------------------ -------------- 206.3/331.9 MB 37.5 MB/s eta 0:00:04
   ------------------------- ------------- 213.9/331.9 MB 37.5 MB/s eta 0:00:04
   -------------------------- ------------ 221.8/331.9 MB 37.4 MB/s eta 0:00:03
   -------------------------- ------------ 229.4/331.9 MB 37.3 MB/s eta 0:00:03
   --------------------------- ----------- 237.0/331.9 MB 37.4 MB/s eta 0:00:03
   ---------------------------- ---------- 245.1/331.9 MB 37.3 MB/s eta 0:00:03
   ----------------------------- --------- 253.5/331.9 MB 37.3 MB/s eta 0:00:03
   ------------------------------ -------- 260.6/331.9 MB 37.3 MB/s eta 0:00:02
   ------------------------------- ------- 268.4/331.9 MB 37.5 MB/s eta 0:00:02
   -------------------------------- ------ 276.0/331.9 MB 37.5 MB/s eta 0:00:02
   --------------------------------- ----- 283.9/331.9 MB 37.5 MB/s eta 0:00:02
   ---------------------------------- ---- 291.8/331.9 MB 37.6 MB/s eta 0:00:02
   ----------------------------------- --- 300.2/331.9 MB 37.5 MB/s eta 0:00:01
   ------------------------------------ -- 308.0/331.9 MB 37.4 MB/s eta 0:00:01
   ------------------------------------- - 316.1/331.9 MB 37.3 MB/s eta 0:00:01
   --------------------------------------  324.0/331.9 MB 37.2 MB/s eta 0:00:01
   --------------------------------------  331.9/331.9 MB 37.3 MB/s eta 0:00:01
   --------------------------------------  331.9/331.9 MB 37.3 MB/s eta 0:00:01
   --------------------------------------  331.9/331.9 MB 37.3 MB/s eta 0:00:01
   --------------------------------------  331.9/331.9 MB 37.3 MB/s eta 0:00:01
   --------------------------------------- 331.9/331.9 MB 34.0 MB/s eta 0:00:00
Downloading absl_py-2.3.1-py3-none-any.whl (135 kB)
Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Downloading flatbuffers-25.9.23-py2.py3-none-any.whl (30 kB)
Downloading gast-0.6.0-py3-none-any.whl (21 kB)
Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
Downloading grpcio-1.76.0-cp312-cp312-win_amd64.whl (4.7 MB)
   ---------------------------------------- 0.0/4.7 MB ? eta -:--:--
   ---------------------------------------- 4.7/4.7 MB 35.5 MB/s eta 0:00:00
Downloading keras-3.11.3-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 35.8 MB/s eta 0:00:00
Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl (26.4 MB)
   ---------------------------------------- 0.0/26.4 MB ? eta -:--:--
   --------- ------------------------------ 6.6/26.4 MB 33.6 MB/s eta 0:00:01
   ----------------------- ---------------- 15.2/26.4 MB 36.8 MB/s eta 0:00:01
   ----------------------------------- ---- 23.6/26.4 MB 38.3 MB/s eta 0:00:01
   ---------------------------------------- 26.4/26.4 MB 34.9 MB/s eta 0:00:00
Downloading ml_dtypes-0.5.3-cp312-cp312-win_amd64.whl (208 kB)
Downloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)
Downloading protobuf-6.33.0-cp310-abi3-win_amd64.whl (436 kB)
Downloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)
   ---------------------------------------- 0.0/5.5 MB ? eta -:--:--
   ---------------------------------------- 5.5/5.5 MB 33.7 MB/s eta 0:00:00
Downloading termcolor-3.1.0-py3-none-any.whl (7.7 kB)
Downloading typing_extensions-4.15.0-py3-none-any.whl (44 kB)
Downloading tensorboard_data_server-0.7.2-py3-none-any.whl (2.4 kB)
Downloading namex-0.1.0-py3-none-any.whl (5.9 kB)
Downloading optree-0.17.0-cp312-cp312-win_amd64.whl (314 kB)
Installing collected packages: namex, libclang, flatbuffers, typing_extensions, termcolor, tensorboard-data-server, protobuf, opt_einsum, ml_dtypes, google_pasta, gast, astunparse, absl-py, optree, grpcio, tensorboard, keras, tensorflow
  Attempting uninstall: typing_extensions
    Found existing installation: typing_extensions 4.11.0
    Uninstalling typing_extensions-4.11.0:
      Successfully uninstalled typing_extensions-4.11.0
  Attempting uninstall: protobuf
    Found existing installation: protobuf 4.25.3
    Uninstalling protobuf-4.25.3:
      Successfully uninstalled protobuf-4.25.3
Successfully installed absl-py-2.3.1 astunparse-1.6.3 flatbuffers-25.9.23 gast-0.6.0 google_pasta-0.2.0 grpcio-1.76.0 keras-3.11.3 libclang-18.1.1 ml_dtypes-0.5.3 namex-0.1.0 opt_einsum-3.4.0 optree-0.17.0 protobuf-6.33.0 tensorboard-2.20.0 tensorboard-data-server-0.7.2 tensorflow-2.20.0 termcolor-3.1.0 typing_extensions-4.15.0
Note: you may need to restart the kernel to use updated packages.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.37.1 requires protobuf<6,>=3.20, but you have protobuf 6.33.0 which is incompatible.
In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# --- 1. Additional Data Preprocessing for Neural Networks ---

# Note: This assumes 'preprocessor', 'X_train', 'X_test', 'y_train', 
# and 'y_test' already exist from the previous step.

print("Applying preprocessing (scaling/encoding) to X data...")
# Fit the preprocessor on the training data and transform both train and test sets
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Keras models can't work with sparse matrices from the preprocessor
if hasattr(X_train_processed, "toarray"):
    X_train_processed = X_train_processed.toarray()
    X_test_processed = X_test_processed.toarray()

# One-hot encode the target variable 'y'
y_encoder = OneHotEncoder(sparse_output=False)
# Reshape y to be a 2D array, as required by OneHotEncoder
y_train_encoded = y_encoder.fit_transform(np.array(y_train).reshape(-1, 1))
y_test_encoded = y_encoder.transform(np.array(y_test).reshape(-1, 1))

# Get feature and class counts for the model
n_features = X_train_processed.shape[1]
n_classes = y_train_encoded.shape[1]

print(f"Number of input features: {n_features}")
print(f"Number of output classes: {n_classes}")
Applying preprocessing (scaling/encoding) to X data...
Number of input features: 31
Number of output classes: 7
In [7]:
# SIMPLE NEURAL NETWORK

print("\n--- Building Simple Neural Network ---")

simple_model = Sequential([
    # Define the input shape based on our preprocessed data
    Input(shape=(n_features,)),
    
    # A single hidden layer with 64 neurons and 'relu' activation
    Dense(64, activation='relu'),
    
    # The output layer: 
    # - 'n_classes' neurons (one for each obesity type)
    # - 'softmax' activation to get class probabilities
    Dense(n_classes, activation='softmax')
])

# Compile the model
simple_model.compile(
    optimizer='adam', 
    loss='categorical_crossentropy',  # Use for one-hot encoded labels
    metrics=['accuracy']
)

# Print a summary of the model architecture
simple_model.summary()

# Train the model
print("\nTraining Simple Model...")
# We use a validation_split to monitor performance on unseen data during training
history_simple = simple_model.fit(
    X_train_processed, 
    y_train_encoded,
    epochs=20,  # Number of times to see the entire dataset
    batch_size=32,
    validation_split=0.2  # Use 20% of training data for validation
)

# Evaluate the final model on the actual test set
print("\nEvaluating Simple Model on Test Data:")
loss, accuracy = simple_model.evaluate(X_test_processed, y_test_encoded)
print(f"Test Accuracy: {accuracy:.4f}")
--- Building Simple Neural Network ---
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense (Dense)                   │ (None, 64)             │         2,048 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 7)              │           455 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 2,503 (9.78 KB)
 Trainable params: 2,503 (9.78 KB)
 Non-trainable params: 0 (0.00 B)
Training Simple Model...
Epoch 1/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - accuracy: 0.2859 - loss: 1.8723 - val_accuracy: 0.3846 - val_loss: 1.6881
Epoch 2/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.4667 - loss: 1.5142 - val_accuracy: 0.5237 - val_loss: 1.4124
Epoch 3/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.5563 - loss: 1.2615 - val_accuracy: 0.5976 - val_loss: 1.2172
Epoch 4/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.6533 - loss: 1.0880 - val_accuracy: 0.6450 - val_loss: 1.0758
Epoch 5/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.6859 - loss: 0.9635 - val_accuracy: 0.6864 - val_loss: 0.9811
Epoch 6/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.7193 - loss: 0.8720 - val_accuracy: 0.6893 - val_loss: 0.8948
Epoch 7/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.7444 - loss: 0.7967 - val_accuracy: 0.7485 - val_loss: 0.8240
Epoch 8/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.7793 - loss: 0.7364 - val_accuracy: 0.7574 - val_loss: 0.7705
Epoch 9/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.7926 - loss: 0.6856 - val_accuracy: 0.7870 - val_loss: 0.7264
Epoch 10/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8059 - loss: 0.6417 - val_accuracy: 0.7988 - val_loss: 0.6806
Epoch 11/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step - accuracy: 0.8185 - loss: 0.6032 - val_accuracy: 0.8166 - val_loss: 0.6506
Epoch 12/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8304 - loss: 0.5684 - val_accuracy: 0.8284 - val_loss: 0.6117
Epoch 13/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8370 - loss: 0.5389 - val_accuracy: 0.8195 - val_loss: 0.5833
Epoch 14/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8452 - loss: 0.5084 - val_accuracy: 0.8373 - val_loss: 0.5607
Epoch 15/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8607 - loss: 0.4833 - val_accuracy: 0.8402 - val_loss: 0.5362
Epoch 16/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8741 - loss: 0.4594 - val_accuracy: 0.8580 - val_loss: 0.5144
Epoch 17/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8822 - loss: 0.4382 - val_accuracy: 0.8609 - val_loss: 0.5023
Epoch 18/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8822 - loss: 0.4196 - val_accuracy: 0.8580 - val_loss: 0.4771
Epoch 19/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8926 - loss: 0.3983 - val_accuracy: 0.8669 - val_loss: 0.4641
Epoch 20/20
43/43 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8948 - loss: 0.3822 - val_accuracy: 0.8609 - val_loss: 0.4485

Evaluating Simple Model on Test Data:
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.8629 - loss: 0.4644 
Test Accuracy: 0.8629

Neural Network with Dropout and more layers¶

In [8]:
# --- 3. Advanced Neural Network (Deeper, with Dropout) ---

print("\n--- Building Advanced Neural Network ---")

advanced_model = Sequential([
    Input(shape=(n_features,)),
    
    # First hidden layer
    Dense(128, activation='relu'),
    # Dropout layer: randomly "turns off" 30% of neurons during
    # training to prevent the model from relying too much on any one neuron
    Dropout(0.3),
    
    # Second hidden layer
    Dense(128, activation='relu'),
    Dropout(0.3),
    
    # Third hidden layer
    Dense(64, activation='relu'),
    
    # Output layer (same as before)
    Dense(n_classes, activation='softmax')
])

# Compile the model
advanced_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Print a summary
advanced_model.summary()

# Train the model
# This time, we'll train for more epochs (as it's a more complex model)
# and pass the test set directly as validation data.
print("\nTraining Advanced Model...")
print("(This may take longer, especially if running on CPU)")
history_advanced = advanced_model.fit(
    X_train_processed,
    y_train_encoded,
    epochs=50, 
    batch_size=32,
    validation_data=(X_test_processed, y_test_encoded) # Use test set for validation
)

# Evaluate the final model
print("\nEvaluating Advanced Model on Test Data:")
loss_adv, accuracy_adv = advanced_model.evaluate(X_test_processed, y_test_encoded)
print(f"Test Accuracy: {accuracy_adv:.4f}")
--- Building Advanced Neural Network ---
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ dense_2 (Dense)                 │ (None, 128)            │         4,096 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_3 (Dense)                 │ (None, 128)            │        16,512 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_4 (Dense)                 │ (None, 64)             │         8,256 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_5 (Dense)                 │ (None, 7)              │           455 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 29,319 (114.53 KB)
 Trainable params: 29,319 (114.53 KB)
 Non-trainable params: 0 (0.00 B)
Training Advanced Model...
(This may take longer, especially if running on CPU)
Epoch 1/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 3s 14ms/step - accuracy: 0.3750 - loss: 1.6817 - val_accuracy: 0.6076 - val_loss: 1.2214
Epoch 2/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.5966 - loss: 1.0359 - val_accuracy: 0.7400 - val_loss: 0.7269
Epoch 3/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.6866 - loss: 0.7689 - val_accuracy: 0.7967 - val_loss: 0.5380
Epoch 4/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.7684 - loss: 0.5970 - val_accuracy: 0.8227 - val_loss: 0.4362
Epoch 5/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.8098 - loss: 0.5062 - val_accuracy: 0.8629 - val_loss: 0.3604
Epoch 6/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8128 - loss: 0.4511 - val_accuracy: 0.8889 - val_loss: 0.3022
Epoch 7/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - accuracy: 0.8353 - loss: 0.4064 - val_accuracy: 0.8865 - val_loss: 0.2729
Epoch 8/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8602 - loss: 0.3561 - val_accuracy: 0.9267 - val_loss: 0.2292
Epoch 9/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - accuracy: 0.8720 - loss: 0.3262 - val_accuracy: 0.9196 - val_loss: 0.2219
Epoch 10/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.8957 - loss: 0.2936 - val_accuracy: 0.9338 - val_loss: 0.1914
Epoch 11/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.8803 - loss: 0.2905 - val_accuracy: 0.9362 - val_loss: 0.1794
Epoch 12/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.8987 - loss: 0.2610 - val_accuracy: 0.9385 - val_loss: 0.1704
Epoch 13/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9028 - loss: 0.2524 - val_accuracy: 0.9433 - val_loss: 0.1609
Epoch 14/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.9159 - loss: 0.2209 - val_accuracy: 0.9433 - val_loss: 0.1567
Epoch 15/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9254 - loss: 0.2044 - val_accuracy: 0.9338 - val_loss: 0.1607
Epoch 16/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9182 - loss: 0.2117 - val_accuracy: 0.9385 - val_loss: 0.1681
Epoch 17/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9070 - loss: 0.2195 - val_accuracy: 0.9433 - val_loss: 0.1471
Epoch 18/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9313 - loss: 0.1884 - val_accuracy: 0.9433 - val_loss: 0.1493
Epoch 19/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9242 - loss: 0.1997 - val_accuracy: 0.9574 - val_loss: 0.1289
Epoch 20/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9325 - loss: 0.1800 - val_accuracy: 0.9504 - val_loss: 0.1440
Epoch 21/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9277 - loss: 0.1960 - val_accuracy: 0.9527 - val_loss: 0.1319
Epoch 22/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9378 - loss: 0.1738 - val_accuracy: 0.9598 - val_loss: 0.1158
Epoch 23/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9437 - loss: 0.1561 - val_accuracy: 0.9480 - val_loss: 0.1642
Epoch 24/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - accuracy: 0.9502 - loss: 0.1553 - val_accuracy: 0.9480 - val_loss: 0.1410
Epoch 25/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9443 - loss: 0.1472 - val_accuracy: 0.9433 - val_loss: 0.1685
Epoch 26/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9508 - loss: 0.1355 - val_accuracy: 0.9456 - val_loss: 0.1382
Epoch 27/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9514 - loss: 0.1374 - val_accuracy: 0.9480 - val_loss: 0.1209
Epoch 28/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9573 - loss: 0.1168 - val_accuracy: 0.9504 - val_loss: 0.1352
Epoch 29/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9556 - loss: 0.1055 - val_accuracy: 0.9551 - val_loss: 0.1177
Epoch 30/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9532 - loss: 0.1139 - val_accuracy: 0.9574 - val_loss: 0.1340
Epoch 31/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9573 - loss: 0.1155 - val_accuracy: 0.9504 - val_loss: 0.1568
Epoch 32/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9609 - loss: 0.0995 - val_accuracy: 0.9456 - val_loss: 0.1433
Epoch 33/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - accuracy: 0.9615 - loss: 0.1009 - val_accuracy: 0.9551 - val_loss: 0.1145
Epoch 34/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9609 - loss: 0.1039 - val_accuracy: 0.9574 - val_loss: 0.1226
Epoch 35/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9609 - loss: 0.1025 - val_accuracy: 0.9527 - val_loss: 0.1431
Epoch 36/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9639 - loss: 0.0917 - val_accuracy: 0.9551 - val_loss: 0.1127
Epoch 37/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9692 - loss: 0.0963 - val_accuracy: 0.9574 - val_loss: 0.1116
Epoch 38/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9727 - loss: 0.0813 - val_accuracy: 0.9551 - val_loss: 0.1348
Epoch 39/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9639 - loss: 0.0953 - val_accuracy: 0.9504 - val_loss: 0.1393
Epoch 40/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9656 - loss: 0.1012 - val_accuracy: 0.9574 - val_loss: 0.1200
Epoch 41/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9639 - loss: 0.1103 - val_accuracy: 0.9598 - val_loss: 0.1143
Epoch 42/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9692 - loss: 0.0946 - val_accuracy: 0.9527 - val_loss: 0.1289
Epoch 43/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step - accuracy: 0.9698 - loss: 0.0838 - val_accuracy: 0.9551 - val_loss: 0.1214
Epoch 44/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9656 - loss: 0.0848 - val_accuracy: 0.9527 - val_loss: 0.1328
Epoch 45/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9686 - loss: 0.0862 - val_accuracy: 0.9598 - val_loss: 0.1224
Epoch 46/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9668 - loss: 0.0954 - val_accuracy: 0.9504 - val_loss: 0.1250
Epoch 47/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9698 - loss: 0.0807 - val_accuracy: 0.9598 - val_loss: 0.1214
Epoch 48/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9680 - loss: 0.0801 - val_accuracy: 0.9645 - val_loss: 0.1139
Epoch 49/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - accuracy: 0.9686 - loss: 0.0880 - val_accuracy: 0.9598 - val_loss: 0.1157
Epoch 50/50
53/53 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9710 - loss: 0.0814 - val_accuracy: 0.9669 - val_loss: 0.1127

Evaluating Advanced Model on Test Data:
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - accuracy: 0.9669 - loss: 0.1127
Test Accuracy: 0.9669

Per class variable breakdown

In [9]:
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

# --- Setup: Get Class Labels ---
# We need the original string labels that the y_encoder learned
# (This assumes 'y_encoder' was created in the previous step)
class_labels = y_encoder.categories_[0]

# --- 1. Simple Neural Network: Get Full Report ---
print("\n--- Simple Neural Network: Detailed Report ---")

# Get probabilities for each class
y_prob_simple = simple_model.predict(X_test_processed)
# Get the index of the highest probability class
y_pred_indices_simple = np.argmax(y_prob_simple, axis=1)
# Convert indices back to original string labels
y_pred_simple_labels = class_labels[y_pred_indices_simple]

# Now we can use sklearn's classification_report and confusion_matrix
print(classification_report(y_test, y_pred_simple_labels))

print("Confusion Matrix (Simple NN):")
cm_simple_nn = confusion_matrix(y_test, y_pred_simple_labels, labels=class_labels)
print(cm_simple_nn)


# --- 2. Advanced Neural Network: Get Full Report ---
print("\n--- Advanced Neural Network: Detailed Report ---")

# Get probabilities
y_prob_advanced = advanced_model.predict(X_test_processed)
# Get indices
y_pred_indices_advanced = np.argmax(y_prob_advanced, axis=1)
# Convert indices to labels
y_pred_advanced_labels = class_labels[y_pred_indices_advanced]

# Print the report
print(classification_report(y_test, y_pred_advanced_labels))

print("Confusion Matrix (Advanced NN):")
cm_advanced_nn = confusion_matrix(y_test, y_pred_advanced_labels, labels=class_labels)
print(cm_advanced_nn)

# --- 3. (Optional) Plot the NN Confusion Matrices ---

fig, axes = plt.subplots(1, 2, figsize=(18, 7))
fig.suptitle('Neural Network Confusion Matrices', fontsize=16)

# Plot for Simple NN
disp_simple = ConfusionMatrixDisplay(confusion_matrix=cm_simple_nn, display_labels=class_labels)
disp_simple.plot(ax=axes[0], xticks_rotation='vertical')
axes[0].set_title('Simple NN')

# Plot for Advanced NN
disp_advanced = ConfusionMatrixDisplay(confusion_matrix=cm_advanced_nn, display_labels=class_labels)
disp_advanced.plot(ax=axes[1], xticks_rotation='vertical')
axes[1].set_title('Advanced NN')

plt.tight_layout(rect=[0, 0.03, 1, 0.95])
#plt.savefig('nn_confusion_matrices.png')
#print("\nSaved plot of NN confusion matrices to 'nn_confusion_matrices.png'")
--- Simple Neural Network: Detailed Report ---
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
                     precision    recall  f1-score   support

Insufficient_Weight       0.88      0.94      0.91        54
      Normal_Weight       0.75      0.66      0.70        58
     Obesity_Type_I       0.89      0.96      0.92        70
    Obesity_Type_II       0.98      0.98      0.98        60
   Obesity_Type_III       1.00      0.98      0.99        65
 Overweight_Level_I       0.71      0.76      0.73        58
Overweight_Level_II       0.79      0.72      0.76        58

           accuracy                           0.86       423
          macro avg       0.86      0.86      0.86       423
       weighted avg       0.86      0.86      0.86       423

Confusion Matrix (Simple NN):
[[51  3  0  0  0  0  0]
 [ 7 38  0  0  0 11  2]
 [ 0  0 67  0  0  1  2]
 [ 0  0  1 59  0  0  0]
 [ 0  0  0  1 64  0  0]
 [ 0  7  0  0  0 44  7]
 [ 0  3  7  0  0  6 42]]

--- Advanced Neural Network: Detailed Report ---
14/14 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
                     precision    recall  f1-score   support

Insufficient_Weight       0.96      1.00      0.98        54
      Normal_Weight       0.93      0.88      0.90        58
     Obesity_Type_I       0.97      1.00      0.99        70
    Obesity_Type_II       1.00      0.98      0.99        60
   Obesity_Type_III       1.00      1.00      1.00        65
 Overweight_Level_I       0.91      0.91      0.91        58
Overweight_Level_II       0.98      0.98      0.98        58

           accuracy                           0.97       423
          macro avg       0.97      0.97      0.97       423
       weighted avg       0.97      0.97      0.97       423

Confusion Matrix (Advanced NN):
[[54  0  0  0  0  0  0]
 [ 2 51  0  0  0  5  0]
 [ 0  0 70  0  0  0  0]
 [ 0  0  1 59  0  0  0]
 [ 0  0  0  0 65  0  0]
 [ 0  4  0  0  0 53  1]
 [ 0  0  1  0  0  0 57]]
No description has been provided for this image

Comparison of all the models¶

In [11]:
from sklearn.neighbors import KNeighborsClassifier

# 1. Create the full pipeline with the preprocessor and the classifier
knn_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', KNeighborsClassifier(n_neighbors=7))
])

# 2. Train the KNN model
print("Training KNN model...")
knn_pipeline.fit(X_train, y_train)

# 3. Make predictions on the test set
y_pred_knn = knn_pipeline.predict(X_test)

# 4. Evaluate the model
print("\n--- K-Nearest Neighbors (KNN) Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred_knn):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_knn))


from sklearn.svm import SVC

# 1. Create the full pipeline
svm_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(kernel='linear', C=1.0, random_state=42)) # 'C=1.0' is the regularization parameter
])

# 2. Train the SVM model
print("Training SVM model...")
svm_pipeline.fit(X_train, y_train)

# 3. Make predictions on the test set
y_pred_svm = svm_pipeline.predict(X_test)

# 4. Evaluate the model
print("\n--- Support Vector Machine (SVM) Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_svm))


from sklearn.ensemble import RandomForestClassifier

# 1. Create the full pipeline
rf_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1))
])

# 2. Train the Random Forest model
print("Training Random Forest model...")
rf_pipeline.fit(X_train, y_train)

# 3. Make predictions on the test set
y_pred_rf = rf_pipeline.predict(X_test)

# 4. Evaluate the model
print("\n--- Random Forest Results ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))
Training KNN model...
c:\Users\Vikas\anaconda3\Lib\site-packages\sklearn\neighbors\_classification.py:238: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
--- K-Nearest Neighbors (KNN) Results ---
Accuracy: 0.8109

Classification Report:
                     precision    recall  f1-score   support

Insufficient_Weight       0.73      0.94      0.82        54
      Normal_Weight       0.79      0.40      0.53        58
     Obesity_Type_I       0.76      0.94      0.84        70
    Obesity_Type_II       0.91      0.97      0.94        60
   Obesity_Type_III       0.98      1.00      0.99        65
 Overweight_Level_I       0.73      0.69      0.71        58
Overweight_Level_II       0.77      0.69      0.73        58

           accuracy                           0.81       423
          macro avg       0.81      0.80      0.79       423
       weighted avg       0.81      0.81      0.80       423

Training SVM model...

--- Support Vector Machine (SVM) Results ---
Accuracy: 0.9551

Classification Report:
                     precision    recall  f1-score   support

Insufficient_Weight       0.95      1.00      0.97        54
      Normal_Weight       0.93      0.86      0.89        58
     Obesity_Type_I       0.99      0.97      0.98        70
    Obesity_Type_II       0.98      0.98      0.98        60
   Obesity_Type_III       1.00      0.98      0.99        65
 Overweight_Level_I       0.88      0.91      0.90        58
Overweight_Level_II       0.95      0.97      0.96        58

           accuracy                           0.96       423
          macro avg       0.95      0.95      0.95       423
       weighted avg       0.96      0.96      0.95       423

c:\Users\Vikas\anaconda3\Lib\site-packages\sklearn\utils\validation.py:1339: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Training Random Forest model...
c:\Users\Vikas\anaconda3\Lib\site-packages\sklearn\base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
--- Random Forest Results ---
Accuracy: 0.9433

Classification Report:
                     precision    recall  f1-score   support

Insufficient_Weight       1.00      0.94      0.97        54
      Normal_Weight       0.78      0.93      0.85        58
     Obesity_Type_I       0.97      0.96      0.96        70
    Obesity_Type_II       1.00      0.98      0.99        60
   Obesity_Type_III       1.00      0.98      0.99        65
 Overweight_Level_I       0.91      0.86      0.88        58
Overweight_Level_II       0.96      0.93      0.95        58

           accuracy                           0.94       423
          macro avg       0.95      0.94      0.94       423
       weighted avg       0.95      0.94      0.94       423

In [14]:
# --- 1. Get Predictions for all models ---
# (We already have y_pred_simple_labels and y_pred_advanced_labels from above)

# Get predictions from the sklearn pipelines
y_pred_knn = knn_pipeline.predict(X_test)
y_pred_svm = svm_pipeline.predict(X_test)
y_pred_rf = rf_pipeline.predict(X_test)

# --- 2. Store reports in a dictionary ---
model_predictions = {
    'KNN': y_pred_knn,
    'SVM': y_pred_svm,
    'Random Forest': y_pred_rf,
    'Simple NN': y_pred_simple_labels,
    'Advanced NN': y_pred_advanced_labels
}

# --- 3. Build a DataFrame of F1-Scores ---
f1_scores = {}
for model_name, y_pred in model_predictions.items():
    # Get the classification report as a dictionary
    report = classification_report(y_test, y_pred, output_dict=True)
    
    # Extract the f1-score for each class (excluding averages)
    class_f1_scores = {}
    for label, metrics in report.items():
        if label in class_labels: # Only include our target classes
            class_f1_scores[label] = metrics['f1-score']
            
    f1_scores[model_name] = class_f1_scores

# Convert the dictionary to a DataFrame
f1_df = pd.DataFrame(f1_scores).T  # Transpose to get models as rows, classes as columns

# --- 4. Plot the Heatmap ---
plt.figure(figsize=(14, 8))
sns.heatmap(f1_df, annot=True, cmap='YlGnBu', fmt='.3f', linewidths=.5)
plt.title('Model Comparison: F1-Scores per Class', fontsize=16)
plt.xlabel('Obesity Type (Class)', fontsize=12)
plt.ylabel('Model', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
#plt.savefig('model_comparison_heatmap.png')

print("\nSaved model comparison heatmap to 'model_comparison_heatmap.png'")
f1_df.head()
Saved model comparison heatmap to 'model_comparison_heatmap.png'
Out[14]:
Insufficient_Weight Normal_Weight Obesity_Type_I Obesity_Type_II Obesity_Type_III Overweight_Level_I Overweight_Level_II
KNN 0.822581 0.528736 0.840764 0.935484 0.992366 0.707965 0.727273
SVM 0.972973 0.892857 0.978417 0.983333 0.992248 0.898305 0.957265
Random Forest 0.971429 0.850394 0.964029 0.991597 0.992248 0.884956 0.947368
Simple NN 0.910714 0.697248 0.924138 0.983333 0.992248 0.733333 0.756757
Advanced NN 0.981818 0.902655 0.985915 0.991597 1.000000 0.913793 0.982759
No description has been provided for this image