from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from sklearn.datasets import fetch_california_housing
import pandas as pd

# Load dataset
df = fetch_california_housing(as_frame=True).frame
target_col = 'MedHouseVal'

# Preview dataset
df.head()

# Initialize the data characteristics dictionary for prediction
data_characteristics = {
    "target_variable": {
        "distribution": None,  # Possible values: "normal", "skewed"
        "outliers": None  # Possible values: True (outliers detected in target variable), False (no outliers in target variable)
    },
    "features": {
        "type": None,  # Dictionary with feature names as keys and types as values ("categorical", "continuous")
        "correlation": None,  # Dictionary with feature names as keys and boolean values indicating whether they are significantly correlated with target (e.g., {"AveRooms": True, "HouseAge": False})
        "multicollinearity": None,  # Possible values: True (high multicollinearity detected), False (no multicollinearity)
        "outliers": None,  # Dictionary with feature names as keys and the number of outliers as values (e.g., {"AveRooms": 3, "HouseAge": 0})
        "missing_data": None  # Dictionary with feature names as keys and the percentage of missing data as values (e.g., {"AveRooms": 5.0, "Population": 2.5})
    },
    "data_size": {
        "samples": None,  # Number of rows (samples) in the dataset
        "features": None  # Number of columns (features) in the dataset
    },
    "relationships": {
        "feature_target_linear_relationships": {
            # Example: "AveRooms": True means there is a linear relationship between the feature 'AveRooms' and the target variable
        },
        "feature_target_non_linear_relationships": {
            # Example: "HouseAge": True means there is a non-linear relationship between the feature 'HouseAge' and the target variable
        }
    }
}

# Import necessary libraries
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.preprocessing import LabelEncoder

# Assuming df is your dataset and target_col is your target column
# For example: target_col = 'MedHouseVal'

# 1. Populate Target Variable Characteristics
# ===========================================
# Distribution check (Target)
target_dist = df[target_col].describe()
if df[target_col].skew() > 1:
    data_characteristics["target_variable"]["distribution"] = "skewed"
elif df[target_col].skew() < -1:
    data_characteristics["target_variable"]["distribution"] = "skewed"
else:
    data_characteristics["target_variable"]["distribution"] = "normal"

# Check for outliers in the Target
z_scores_target = np.abs(stats.zscore(df[target_col]))
outliers_target = np.where(z_scores_target > 3)
data_characteristics["target_variable"]["outliers"] = len(outliers_target[0]) > 0  # True if outliers are present

# 2. Populate Features Characteristics
# ====================================
# Initialize dictionaries for features
feature_types = {}
feature_correlations = {}
outliers_features = {}
missing_data_features = {}

# Feature Types and Correlations
for col in df.columns:
    if df[col].dtype == 'object':  # Categorical feature
        feature_types[col] = "categorical"
        # Encoding categorical variables
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
    else:  # Continuous feature
        feature_types[col] = "continuous"
    
    # Correlation with the target (Pearson)
    if col != target_col:  # Don't check correlation for the target column itself
        corr_value = df[target_col].corr(df[col])
        feature_correlations[col] = abs(corr_value) > 0.3  # Flag if correlation is significant

    # Outliers detection (Z-score method)
    z_scores_feature = np.abs(stats.zscore(df[col]))
    feature_outliers = np.where(z_scores_feature > 3)  # Points with Z-score > 3 are outliers
    outliers_features[col] = len(feature_outliers[0])  # Store the number of outliers in the feature
    
    # Missing data calculation
    missing_data = df[col].isnull().sum() / len(df) * 100
    if missing_data > 0:
        missing_data_features[col] = missing_data  # Store percentage of missing data for each feature

# Update the dictionary with feature-related information
data_characteristics["features"]["type"] = feature_types
data_characteristics["features"]["correlation"] = feature_correlations
data_characteristics["features"]["outliers"] = outliers_features
data_characteristics["features"]["missing_data"] = missing_data_features

# 3. Check for Multicollinearity
# =======================================
correlation_matrix = df.corr()
multicollinearity_flag = False
for i in range(len(correlation_matrix.columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > 0.9:
            multicollinearity_flag = True
            break
data_characteristics["features"]["multicollinearity"] = multicollinearity_flag

# 4. Populate Data Size Characteristics
# ====================================
data_characteristics["data_size"]["samples"] = df.shape[0]
data_characteristics["data_size"]["features"] = df.shape[1]

# 5. Populate Relationships between Features and Target
# =======================================================
linear_relationships = {}
non_linear_relationships = {}
for col in df.columns:
    if col != target_col:
        # Linear relationship check (Pearson correlation threshold of 0.7)
        corr_value = df[target_col].corr(df[col])
        if abs(corr_value) >= 0.7:
            linear_relationships[col] = True
            non_linear_relationships[col] = False
        else:
            linear_relationships[col] = False
            non_linear_relationships[col] = True

# Update the dictionary with the relationships
data_characteristics["relationships"]["feature_target_linear_relationships"] = linear_relationships
data_characteristics["relationships"]["feature_target_non_linear_relationships"] = non_linear_relationships

from pprint import pprint
pprint(data_characteristics)

{'data_size': {'features': 9, 'samples': 20640},
 'features': {'correlation': {'AveBedrms': False,
                              'AveOccup': False,
                              'AveRooms': False,
                              'HouseAge': False,
                              'Latitude': False,
                              'Longitude': False,
                              'MedInc': True,
                              'Population': False},
              'missing_data': {},
              'multicollinearity': True,
              'outliers': {'AveBedrms': 145,
                           'AveOccup': 8,
                           'AveRooms': 133,
                           'HouseAge': 0,
                           'Latitude': 0,
                           'Longitude': 0,
                           'MedHouseVal': 0,
                           'MedInc': 345,
                           'Population': 342},
              'type': {'AveBedrms': 'continuous',
                       'AveOccup': 'continuous',
                       'AveRooms': 'continuous',
                       'HouseAge': 'continuous',
                       'Latitude': 'continuous',
                       'Longitude': 'continuous',
                       'MedHouseVal': 'continuous',
                       'MedInc': 'continuous',
                       'Population': 'continuous'}},
 'relationships': {'feature_target_linear_relationships': {'AveBedrms': False,
                                                           'AveOccup': False,
                                                           'AveRooms': False,
                                                           'HouseAge': False,
                                                           'Latitude': False,
                                                           'Longitude': False,
                                                           'MedInc': False,
                                                           'Population': False},
                   'feature_target_non_linear_relationships': {'AveBedrms': True,
                                                               'AveOccup': True,
                                                               'AveRooms': True,
                                                               'HouseAge': True,
                                                               'Latitude': True,
                                                               'Longitude': True,
                                                               'MedInc': True,
                                                               'Population': True}},
 'target_variable': {'distribution': 'normal', 'outliers': False}}

import matplotlib.pyplot as plt
import seaborn as sns

# Define X and y using target_col
X = df.drop(columns=[target_col])
y = df[target_col]

# Plot each feature vs target
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15, 12));
axes = axes.flatten();

for idx, col in enumerate(X.columns):
    sns.scatterplot(x=df[col], y=y, ax=axes[idx], alpha=0.6);
    axes[idx].set_title(f'{target_col} vs {col}');
    axes[idx].set_xlabel(col);
    axes[idx].set_ylabel(target_col);

plt.tight_layout();
plt.suptitle('Feature-wise Linearity Check', fontsize=16, y=1.02);
plt.show();

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Redefine X and y in case not done yet
X = df.drop(columns=[target_col])
y = df[target_col]

# Split and fit model
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
lr = LinearRegression()
lr.fit(X_train, y_train)

# Get residuals
y_pred = lr.predict(X_train)
residuals = y_train - y_pred

# Plot residuals vs predicted
plt.figure(figsize=(8, 5));
sns.scatterplot(x=y_pred, y=residuals, alpha=0.6);
plt.axhline(0, linestyle='--', color='red');
plt.xlabel("Predicted Values");
plt.ylabel("Residuals");
plt.title("Homoscedasticity Check: Residuals vs Predicted");
plt.show();

import scipy.stats as stats

# Residuals already computed as: residuals = y_train - y_pred

# Plot histogram + KDE of residuals
plt.figure(figsize=(12, 5));

plt.subplot(1, 2, 1);
sns.histplot(residuals, kde=True, bins=30);
plt.title("Histogram of Residuals");

# Q-Q plot
plt.subplot(1, 2, 2);
stats.probplot(residuals, dist="norm", plot=plt);
plt.title("Q-Q Plot");

plt.tight_layout();
plt.show();

import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import numpy as np

# Compute VIF for each feature
X_with_const = sm.add_constant(X)  # statsmodels needs constant for intercept
vif_data = pd.DataFrame()
vif_data["Feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X_with_const.values, i + 1) for i in range(X.shape[1])]

vif_data.sort_values("VIF", ascending=False)

corr = df[X.columns].corr()
plt.figure(figsize=(10, 8));
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm");
plt.title("Feature Correlation Matrix");
plt.show();

from termcolor import colored

# identify features with high VIF (e.g., > 5)
high_vif_features = vif_data[vif_data["VIF"] > 5]["Feature"].tolist()

drop_features = False
if drop_features:
    # Drop features with high VIF (e.g., > 5)
    X_train = X_train.drop(columns=high_vif_features)
    X_test = X_test.drop(columns=high_vif_features)

    # Notify the user with colored text
    if high_vif_features:
        print(colored(f"Dropped features due to high VIF (> 5): {', '.join(high_vif_features)}", 'red'))
    else:
        print(colored("No features were dropped due to high VIF.", 'green'))

from statsmodels.stats.stattools import durbin_watson

# Run Durbin-Watson test on residuals
dw_stat = durbin_watson(residuals)
print(f"Durbin-Watson statistic: {dw_stat:.3f}")

# Simple interpretation for business context
if 1.5 <= dw_stat <= 2.5:
    print("✅ Residuals appear to be independent — no patterns over observations.")
else:
    print("⚠️ Residuals may not be independent — investigate possible autocorrelation.")

# Rule of thumb:
# ~2.0  → No autocorrelation
# <1.5 or >2.5 → Possible autocorrelation

Durbin-Watson statistic: 1.963
✅ Residuals appear to be independent — no patterns over observations.

# from sklearn.preprocessing import StandardScaler

# Initialize the scaler
# scaler = StandardScaler()

# Fit on training data and transform
# X_scaled = scaler.fit_transform(X)

# If you want a DataFrame back:
# X_scaled = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)

# Example: create ratio or interaction features
# df["RoomsPerPerson"] = df["AveRooms"] / (df["Population"] + 1)
# df["BedToRoomRatio"] = df["AveBedrms"] / (df["AveRooms"] + 1)

# Always inspect new features:
# df[["RoomsPerPerson", "BedToRoomRatio"]].describe()

#### ✂️ Train-Test Split
from sklearn.model_selection import train_test_split

# Define features and target
X = df.drop(columns=[target_col])
y = df[target_col]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train: \n", X_train)
print("X_test: \n", X_test)
print("y_train: \n", y_train)
print("y_test: \n", y_test)

X_train: 
        MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
14196  3.2596      33.0  5.017657   1.006421      2300.0  3.691814     32.71   
8267   3.8125      49.0  4.473545   1.041005      1314.0  1.738095     33.77   
17445  4.1563       4.0  5.645833   0.985119       915.0  2.723214     34.66   
14265  1.9425      36.0  4.002817   1.033803      1418.0  3.994366     32.69   
2271   3.5542      43.0  6.268421   1.134211       874.0  2.300000     36.78   
...       ...       ...       ...        ...         ...       ...       ...   
11284  6.3700      35.0  6.129032   0.926267       658.0  3.032258     33.78   
11964  3.0500      33.0  6.868597   1.269488      1753.0  3.904232     34.02   
5390   2.9344      36.0  3.986717   1.079696      1756.0  3.332068     34.03   
860    5.7192      15.0  6.395349   1.067979      1777.0  3.178891     37.58   
15795  2.5755      52.0  3.402576   1.058776      2619.0  2.108696     37.77   

       Longitude  
14196    -117.03  
8267     -118.16  
17445    -120.48  
14265    -117.11  
2271     -119.80  
...          ...  
11284    -117.96  
11964    -117.43  
5390     -118.38  
860      -121.96  
15795    -122.42  

[16512 rows x 8 columns]
X_test: 
        MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
20046  1.6812      25.0  4.192201   1.022284      1392.0  3.877437     36.06   
3024   2.5313      30.0  5.039384   1.193493      1565.0  2.679795     35.14   
15663  3.4801      52.0  3.977155   1.185877      1310.0  1.360332     37.80   
20484  5.7376      17.0  6.163636   1.020202      1705.0  3.444444     34.28   
9814   3.7250      34.0  5.492991   1.028037      1063.0  2.483645     36.62   
...       ...       ...       ...        ...         ...       ...       ...   
15362  4.6050      16.0  7.002212   1.066372      1351.0  2.988938     33.36   
16623  2.7266      28.0  6.131915   1.256738      1650.0  2.340426     35.36   
18086  9.2298      25.0  7.237676   0.947183      1585.0  2.790493     37.31   
2144   2.7850      36.0  5.289030   0.983122      1227.0  2.588608     36.77   
3665   3.5521      17.0  3.988839   1.033482      1671.0  3.729911     34.22   

       Longitude  
20046    -119.01  
3024     -119.46  
15663    -122.44  
20484    -118.72  
9814     -121.93  
...          ...  
15362    -117.22  
16623    -120.83  
18086    -122.05  
2144     -119.76  
3665     -118.37  

[4128 rows x 8 columns]
y_train: 
 14196    1.030
8267     3.821
17445    1.726
14265    0.934
2271     0.965
         ...  
11284    2.292
11964    0.978
5390     2.221
860      2.835
15795    3.250
Name: MedHouseVal, Length: 16512, dtype: float64
y_test: 
 20046    0.47700
3024     0.45800
15663    5.00001
20484    2.18600
9814     2.78000
          ...   
15362    2.63300
16623    2.66800
18086    5.00001
2144     0.72300
3665     1.51500
Name: MedHouseVal, Length: 4128, dtype: float64

import numpy as np

# Initialize Central tracker dictionary to track best model details upon iterations
best_model_info = {
    "name": None,
    "model": None,
    "metrics": {
        "train": {
            "r2": -np.inf,  # R² (higher is better)
            "adjusted_r2": -np.inf,  # Adjusted R² (higher is better)
            "mape": np.inf,  # Mean Absolute Percentage Error (lower is better)
            "rmse": np.inf,  # Root Mean Squared Error (lower is better)
            "mse": np.inf,  # Mean Squared Error (lower is better)
            "mae": np.inf,  # Mean Absolute Error (lower is better)
        },
        "test": {
            "r2": -np.inf,
            "adjusted_r2": -np.inf,
            "mape": np.inf,
            "rmse": np.inf,
            "mse": np.inf,
            "mae": np.inf,
        }
    },
    "hyperparameters": None
}

# Dictionary to store all model performance results for comparison
model_results = {}

# Success metric used to select the best model
success_metric = "r2"  # or "rmse", depending on your goals
# success_split = "test"  # "train" or "test"

from sklearn.dummy import DummyRegressor

# Fit a dummy regressor as a baseline (predicting the mean)
dummy_regressor = DummyRegressor(strategy="mean")  # You can also try "median"
dummy_regressor.fit(X_train, y_train)

DummyRegressor()

DummyRegressor()

# Predict on both train and test
y_train_pred = dummy_regressor.predict(X_train)
y_test_pred = dummy_regressor.predict(X_test)

residuals_test = y_test - y_test_pred
residuals_train = y_train - y_train_pred

import matplotlib.pyplot as plt
import seaborn as sns

def plot_predicted_vs_actual(y_true, y_pred, title="Predicted vs Actual", data_type="Test Data"):
    """
    Plots the predicted values vs the actual values.
    
    Parameters:
    y_true : array-like
        Actual target values.
    y_pred : array-like
        Predicted target values.
    title : str
        Title of the plot (default is "Predicted vs Actual").
    data_type : str
        Label for the dataset type (default is "Test Data").
    """
    plt.figure(figsize=(8, 6))
    
    # Scatter plot for actual vs predicted values
    sns.scatterplot(x=y_true, y=y_pred, alpha=0.6, label=f"{data_type} Predictions")
    
    # Plot labels and title
    plt.xlabel("Actual Values", fontsize=12)
    plt.ylabel("Predicted Values", fontsize=12)
    plt.title(title, fontsize=14)
    
    # Add a legend
    plt.legend(loc="upper left", fontsize=10)
    
    # Add gridlines for better visualization
    plt.grid(True)
    
    # Tight layout
    plt.tight_layout()
    
    # Show the plot
    plt.show()

plot_predicted_vs_actual(y_test, y_test_pred, title="Baseline Model - Test Predicted vs Actual", data_type="Test Data")
plot_predicted_vs_actual(y_train, y_train_pred, title="Baseline Model - Train Predicted vs Actual", data_type="Train Data")

import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

def plot_residuals_vs_predicted(y_true, y_pred, label="Data", plot_qq=True):
    """
    Plots residuals vs predicted values and optionally a Q-Q plot of residuals.
    
    Parameters:
    y_true : array-like
        Actual target values.
    y_pred : array-like
        Predicted target values.
    label : str
        Label for the plot (e.g., "Train Data", "Test Data").
    plot_qq : bool
        Whether to plot the Q-Q plot of residuals (default is True).
    """
    # Calculate residuals
    residuals = y_true - y_pred
    
    # Plot: Residuals vs Predicted
    plt.figure(figsize=(8, 5))
    sns.scatterplot(x=y_pred, y=residuals, alpha=0.6)
    plt.axhline(0, linestyle='--', color='red')
    plt.xlabel("Predicted Values")
    plt.ylabel("Residuals")
    plt.title(f"Residuals vs Predicted - {label} (Homoscedasticity Check)")
    plt.tight_layout()
    plt.show()

    # Optionally, Q-Q plot of residuals
    if plot_qq:
        plt.figure(figsize=(6, 6))
        stats.probplot(residuals, dist="norm", plot=plt)
        plt.title(f"Q-Q Plot of Residuals - {label}")
        plt.tight_layout()
        plt.show()

plot_residuals_vs_predicted(y_test, y_test_pred, label="Test Data", plot_qq=True)
plot_residuals_vs_predicted(y_train, y_train_pred, label="Train Data", plot_qq=True)

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

def evaluate_model_performance(
    X_train, y_train, y_train_pred, X_test, y_test, y_test_pred, label="Model"
):
    """
    Evaluate model performance for both training and testing data, including core metrics and business interpretation.
    """
    # --- Train Performance ---
    print(f"\n🔧 {label} - Train Performance")
    
    # Ensure shapes match for multi-output models
    if len(y_train.shape) == 2 and len(y_train_pred.shape) == 2:  # Multi-output
        r2_train = [r2_score(y_train[:, i], y_train_pred[:, i]) for i in range(y_train.shape[1])]
    else:  # Single output
        r2_train = r2_score(y_train, y_train_pred)
    
    adjusted_r2_train = 1 - (1 - r2_train) * (len(y_train) - 1) / (len(y_train) - X_train.shape[1] - 1)
    mae_train = mean_absolute_error(y_train, y_train_pred)
    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
    mape_train = np.mean(np.abs((y_train - y_train_pred) / y_train)) * 100
    mse_train = mean_squared_error(y_train, y_train_pred)

    print(f"R²: {r2_train:.3f} → Explains {r2_train*100:.0f}% of variation in target")
    print(f"Adjusted R²: {adjusted_r2_train:.3f}")
    print(f"MAPE: {mape_train:.2f}% → On average, we're off by this percentage")
    print(f"MAE: {mae_train:.2f} → On average, we're off by ±${mae_train*100000:.0f}")
    print(f"RMSE: {rmse_train:.2f} → Penalizes big misses more heavily")
    print(f"MSE: {mse_train:.2f} → Mean squared error")

    # --- Test Performance ---
    print(f"\n🧪 {label} - Test Performance")
    
    if len(y_test.shape) == 2 and len(y_test_pred.shape) == 2:  # Multi-output
        r2_test = [r2_score(y_test[:, i], y_test_pred[:, i]) for i in range(y_test.shape[1])]
    else:  # Single output
        r2_test = r2_score(y_test, y_test_pred)
    
    adjusted_r2_test = 1 - (1 - r2_test) * (len(y_test) - 1) / (len(y_test) - X_test.shape[1] - 1)
    mae_test = mean_absolute_error(y_test, y_test_pred)
    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
    mape_test = np.mean(np.abs((y_test - y_test_pred) / y_test)) * 100
    mse_test = mean_squared_error(y_test, y_test_pred)

    print(f"R²: {r2_test:.3f} → Explains {r2_test*100:.0f}% of variation in target")
    print(f"Adjusted R²: {adjusted_r2_test:.3f}")
    print(f"MAPE: {mape_test:.2f}% → On average, we're off by this percentage")
    print(f"MAE: {mae_test:.2f} → On average, we're off by ±${mae_test*100000:.0f}")
    print(f"RMSE: {rmse_test:.2f} → Penalizes big misses more heavily")
    print(f"MSE: {mse_test:.2f} → Mean squared error")

    # --- Business Interpretation ---
    print("\n📌 Interpretation:")
    if mape_test > 10:
        print("- High MAPE → model might need improvement.")
    else:
        print("- MAPE is acceptable; model is performing well.")

    if mae_test > 1:
        print("- High MAE → model might need improvement.")
    else:
        print("- MAE is acceptable; the model is performing well.")

    if rmse_test > 1:
        print("- High RMSE → model is not handling large errors well.")
    else:
        print("- RMSE is acceptable; model is not penalizing large errors excessively.")

    print(f"- R² indicates model fit: {r2_test:.2f}")
    print(f"- Adjusted R² accounts for complexity: {adjusted_r2_test:.2f}")

evaluate_model_performance(
    X_train, y_train, y_train_pred, X_test, y_test, y_test_pred, label="Baseline_Model"
)

🔧 Baseline_Model - Train Performance
R²: 0.000 → Explains 0% of variation in target
Adjusted R²: -0.000
MAPE: 62.08% → On average, we're off by this percentage
MAE: 0.91 → On average, we're off by ±$91391
RMSE: 1.16 → Penalizes big misses more heavily
MSE: 1.34 → Mean squared error

🧪 Baseline_Model - Test Performance
R²: -0.000 → Explains -0% of variation in target
Adjusted R²: -0.002
MAPE: 62.89% → On average, we're off by this percentage
MAE: 0.91 → On average, we're off by ±$90607
RMSE: 1.14 → Penalizes big misses more heavily
MSE: 1.31 → Mean squared error

📌 Interpretation:
- High MAPE → model might need improvement.
- MAE is acceptable; the model is performing well.
- High RMSE → model is not handling large errors well.
- R² indicates model fit: -0.00
- Adjusted R² accounts for complexity: -0.00

import numpy as np
from termcolor import colored
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def update_best_model_info(model_name, model_obj, y_train, y_test, y_train_pred, y_test_pred, hyperparameters=None, mask=False):
    """
    Computes prediction metrics internally, updates best_model_info if model outperforms current best.
    Also logs all model results.
    """
    # Evaluate performance
    metrics = {
        "train": {
            "r2": r2_score(y_train, y_train_pred),
            "adjusted_r2": 1 - (1 - r2_score(y_train, y_train_pred)) * (len(y_train) - 1) / (len(y_train) - X_train.shape[1] - 1),
            "mape": np.mean(np.abs((y_train - y_train_pred) / y_train)) * 100,
            "rmse": np.sqrt(mean_squared_error(y_train, y_train_pred)),
            "mse": mean_squared_error(y_train, y_train_pred),
            "mae": mean_absolute_error(y_train, y_train_pred)
        },
        "test": {
            "r2": r2_score(y_test, y_test_pred),
            "adjusted_r2": 1 - (1 - r2_score(y_test, y_test_pred)) * (len(y_test) - 1) / (len(y_test) - X_test.shape[1] - 1),
            "mape": np.mean(np.abs((y_test - y_test_pred) / y_test)) * 100,
            "rmse": np.sqrt(mean_squared_error(y_test, y_test_pred)),
            "mse": mean_squared_error(y_test, y_test_pred),
            "mae": mean_absolute_error(y_test, y_test_pred)
        }
    }

    # Compare with current best
    current_score = metrics["test"][success_metric]
    best_score = best_model_info["metrics"]["test"].get(success_metric, -np.inf)
    previous_best = best_model_info["name"] or "None"

    if current_score > best_score:
        if not mask:
            best_model_info.update({
                "name": model_name,
                "model": model_obj,
                "metrics": metrics,
                "hyperparameters": hyperparameters or {}
            })
        print(colored(
            f"✅ {model_name} just beat previous best ({previous_best}) → "
            f"{success_metric}: {best_score:.4f} → {current_score:.4f}", "green"))
    else:
        print(f"⚠️ {model_name} did not improve the best score.")

    # Log all model results
    model_results[model_name] = {
        "model": model_obj,
        "metrics": metrics,
        "hyperparameters": hyperparameters or {}
    }

# Example usage with the existing Dummy Regressor (Baseline Model)
update_best_model_info(
    model_name="Baseline Model",
    model_obj=dummy_regressor,  # Using the existing baseline model
    y_train=y_train,
    y_test=y_test,
    y_train_pred=y_train_pred,
    y_test_pred=y_test_pred,
    hyperparameters={"strategy": "mean"}
)

✅ Baseline Model just beat previous best (None) → r2: -inf → -0.0002

# from pprint import pprint
# pprint(best_model_info)
# pprint(model_results)

# import json
# print(json.dumps(best_model_info, indent=2, default=str))
# print(json.dumps(model_results, indent=2, default=str))

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Recreate X and y if needed
X = df.drop(columns=[target_col])
y = df[target_col]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Fit base linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predict
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

LinearRegression()

LinearRegression()

import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

# Add intercept to X for statsmodels
X_with_const = sm.add_constant(X_train)

# Fit OLS model using statsmodels
model = sm.OLS(y_train, X_with_const).fit()

# Get leverage scores from hat matrix
influence = model.get_influence()
leverage = influence.hat_matrix_diag

# Plot leverage scores
plt.figure(figsize=(8, 4));
plt.stem(np.arange(len(leverage)), leverage, markerfmt=" ", basefmt=" ");
plt.axhline(2 * X_with_const.shape[1] / X_with_const.shape[0], color='red', linestyle='--', label='Common threshold');
plt.xlabel("Observation Index");
plt.ylabel("Leverage");
plt.title("Leverage Scores");
plt.legend();
plt.tight_layout();
plt.show();

<Figure size 800x400 with 0 Axes>

<StemContainer object of 3 artists>

<matplotlib.lines.Line2D at 0x1686466d0>

Text(0.5, 0, 'Observation Index')

Text(0, 0.5, 'Leverage')

Text(0.5, 1.0, 'Leverage Scores')

<matplotlib.legend.Legend at 0x1688bf390>

# Show top N leverage observations with full feature + target + leverage score
top_n = 5
top_idx = np.argsort(leverage)[-top_n:][::-1]

# Build DataFrame
high_leverage_df = X_train.iloc[top_idx].copy()
high_leverage_df[y_train.name] = y_train.iloc[top_idx].values
high_leverage_df["Leverage"] = leverage[top_idx]

print(f"🔍 Top {top_n} High-Leverage Observations:")
display(high_leverage_df)

🔍 Top 5 High-Leverage Observations:

import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Add constant and fit OLS model
X_with_const = sm.add_constant(X_train)
model = sm.OLS(y_train, X_with_const).fit()

# Compute influence
influence = model.get_influence()
cooks_d = influence.cooks_distance[0]  # First element is the array of distances

# Plot Cook's Distance
plt.figure(figsize=(10, 4));
plt.stem(np.arange(len(cooks_d)), cooks_d, markerfmt=" ", basefmt=" ");
plt.axhline(4 / len(X_train), color='red', linestyle='--', label="Common threshold (4/n)");
plt.title("Cook's Distance");
plt.xlabel("Observation Index");
plt.ylabel("Cook's D");
plt.legend();
plt.tight_layout();
plt.show();

# Print top N influential points
top_n = 5
top_idx = np.argsort(cooks_d)[-top_n:][::-1]
cooks_df = X_train.iloc[top_idx].copy()
cooks_df[y_train.name] = y_train.iloc[top_idx].values
cooks_df["Cook's D"] = cooks_d[top_idx]

print(f"🔥 Top {top_n} Most Influential Observations (Cook’s Distance):")
display(cooks_df)

<Figure size 1000x400 with 0 Axes>

<StemContainer object of 3 artists>

<matplotlib.lines.Line2D at 0x17a145c10>

Text(0.5, 1.0, "Cook's Distance")

Text(0.5, 0, 'Observation Index')

Text(0, 0.5, "Cook's D")

<matplotlib.legend.Legend at 0x17a281890>

🔥 Top 5 Most Influential Observations (Cook’s Distance):

# View model coefficients
coeff_df = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": lr.coef_
}).sort_values(by="Coefficient", key=abs, ascending=False)

# Identify most impactful positive and negative drivers
top_positive = coeff_df.iloc[0]
top_negative = coeff_df.iloc[coeff_df["Coefficient"].idxmin()]

print("🧠 Interpretation Summary:")
print(f"- 💡 '{top_positive['Feature']}' has the strongest positive effect on {target_col} (for a unit increase).")
print(f"- 🔻 '{top_negative['Feature']}' has the strongest negative effect on {target_col} (for a unit increase).")

# Optional threshold for low-impact features
low_impact = coeff_df[coeff_df["Coefficient"].abs() < 0.01]["Feature"].tolist()
if low_impact:
    print(f"- 💤 Features like {', '.join(low_impact)} have **minimal predictive power** in this model.")


coeff_df

🧠 Interpretation Summary:
- 💡 'AveBedrms' has the strongest positive effect on MedHouseVal (for a unit increase).
- 🔻 'Population' has the strongest negative effect on MedHouseVal (for a unit increase).
- 💤 Features like HouseAge, AveOccup, Population have **minimal predictive power** in this model.

import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Recompute VIF for final X
X_with_const = sm.add_constant(X)
vif_data = pd.DataFrame()
vif_data["Feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X_with_const.values, i + 1) for i in range(X.shape[1])]
vif_data = vif_data.sort_values("VIF", ascending=False)

# Show table
display(vif_data)

# Interpretation
vif_threshold = 5
high_vif = vif_data[vif_data["VIF"] > vif_threshold]["Feature"].tolist()

print("\n📉 Multicollinearity Interpretation:")
if high_vif:
    print(f"- ⚠️ The following features may be strongly correlated with others: {', '.join(high_vif)}")
    print("- This could make individual feature effects harder to trust.")
else:
    print("- ✅ No problematic multicollinearity — all features are within acceptable VIF range.")

📉 Multicollinearity Interpretation:
- ⚠️ The following features may be strongly correlated with others: Latitude, Longitude, AveRooms, AveBedrms
- This could make individual feature effects harder to trust.

# Bar plot of absolute coefficients to show feature impact on variance explained
plt.figure(figsize=(8, 5))
sns.barplot(
    x=coeff_df["Coefficient"].abs(),
    y=coeff_df["Feature"],
    palette="viridis"
)
plt.title("Feature Contribution Magnitude (Absolute Coefficients)")
plt.xlabel("Impact on Prediction (|β|)")
plt.ylabel("Feature")
plt.tight_layout()
plt.show()

# Business interpretation
print("🧠 Interpretation:")
print("- The higher the margitude, the more influence that feature has on the prediction — regardless of direction.")
print("- This helps identify which features the model is leaning on most heavily to make predictions.")
print("- Note: High influence doesn’t mean causation — just strong correlation within this dataset.")

<Figure size 800x500 with 0 Axes>

<Axes: xlabel='Coefficient', ylabel='Feature'>

Text(0.5, 1.0, 'Feature Contribution Magnitude (Absolute Coefficients)')

Text(0.5, 0, 'Impact on Prediction (|β|)')

Text(0, 0.5, 'Feature')

🧠 Interpretation:
- The higher the margitude, the more influence that feature has on the prediction — regardless of direction.
- This helps identify which features the model is leaning on most heavily to make predictions.
- Note: High influence doesn’t mean causation — just strong correlation within this dataset.

# Quick check: Train vs Test R²
print(f"Train R²: {r2_score(y_train, y_train_pred):.3f}")
print(f"Test  R²: {r2_score(y_test, y_test_pred):.3f}")

gap = r2_score(y_train, y_train_pred) - r2_score(y_test, y_test_pred)
print(f"Gap: {gap:.3f}")

if gap > 0.1:
    print("⚠️ Likely overfitting — model performs much better on training data.")
else:
    print("✅ No strong overfitting signal — train/test performance is similar.")

Train R²: 0.610
Test  R²: 0.591
Gap: 0.019
✅ No strong overfitting signal — train/test performance is similar.

import shap

# Fit model on full data for SHAP
lr.fit(X, y)
explainer = shap.Explainer(lr, X)
shap_values = explainer(X)

# SHAP summary plot
shap.summary_plot(shap_values, X, plot_type="bar")

# Business interpretation
print("🧠 Interpretation:")
print("- This shows which features contribute most to the model’s predictions across all data points.")
print("- The top features are the biggest drivers of predicted changes in the target.")
print("- Unlike raw coefficients, SHAP values account for interactions and provide a more reliable ranking.")

LinearRegression()

LinearRegression()

🧠 Interpretation:
- This shows which features contribute most to the model’s predictions across all data points.
- The top features are the biggest drivers of predicted changes in the target.
- Unlike raw coefficients, SHAP values account for interactions and provide a more reliable ranking.

from sklearn.inspection import PartialDependenceDisplay

# Pick top 2–3 features from SHAP or coefficients
top_features = coeff_df["Feature"].head(3).tolist()

# Plot PDP
PartialDependenceDisplay.from_estimator(
    lr, X, features=top_features, kind="average", grid_resolution=50
)
plt.suptitle("Partial Dependence Plots (Top Features)", fontsize=14)
plt.tight_layout()
plt.show()

# Business interpretation — tailored to dataset
print("🧠 Interpretation:")
print(f"- These plots show how the predicted value of '{target_col}' changes when one feature increases, while holding others fixed.")
for feature in top_features:
    print(f"  • As '{feature}' increases, observe whether '{target_col}' rises, falls, or stays flat — this shows its individual effect.")
print("- A flat curve → minimal impact. A steep or curved slope → strong or non-linear influence.")

<sklearn.inspection._plot.partial_dependence.PartialDependenceDisplay at 0x3045c4690>

Text(0.5, 0.98, 'Partial Dependence Plots (Top Features)')

🧠 Interpretation:
- These plots show how the predicted value of 'MedHouseVal' changes when one feature increases, while holding others fixed.
  • As 'AveBedrms' increases, observe whether 'MedHouseVal' rises, falls, or stays flat — this shows its individual effect.
  • As 'MedInc' increases, observe whether 'MedHouseVal' rises, falls, or stays flat — this shows its individual effect.
  • As 'Longitude' increases, observe whether 'MedHouseVal' rises, falls, or stays flat — this shows its individual effect.
- A flat curve → minimal impact. A steep or curved slope → strong or non-linear influence.

from sklearn.linear_model import Ridge

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

y_ridge_train = ridge.predict(X_train)
y_ridge_test = ridge.predict(X_test)

print("🔧 Ridge Regression")
print(f"Train R²: {r2_score(y_train, y_ridge_train):.3f}")
print(f"Test  R²: {r2_score(y_test, y_ridge_test):.3f}")
print(f"Test  MAE: {mean_absolute_error(y_test, y_ridge_test):.2f}")

Ridge()

Ridge()

🔧 Ridge Regression
Train R²: 0.610
Test  R²: 0.591
Test  MAE: 0.53

from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

y_lasso_train = lasso.predict(X_train)
y_lasso_test = lasso.predict(X_test)

print("🔥 Lasso Regression")
print(f"Train R²: {r2_score(y_train, y_lasso_train):.3f}")
print(f"Test  R²: {r2_score(y_test, y_lasso_test):.3f}")
print(f"Test  MAE: {mean_absolute_error(y_test, y_lasso_test):.2f}")

Lasso(alpha=0.1)

Lasso(alpha=0.1)

🔥 Lasso Regression
Train R²: 0.546
Test  R²: 0.544
Test  MAE: 0.58

from sklearn.linear_model import ElasticNet

elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic.fit(X_train, y_train)

y_elastic_train = elastic.predict(X_train)
y_elastic_test = elastic.predict(X_test)

print("🧃 ElasticNet Regression")
print(f"Train R²: {r2_score(y_train, y_elastic_train):.3f}")
print(f"Test  R²: {r2_score(y_test, y_elastic_test):.3f}")
print(f"Test  MAE: {mean_absolute_error(y_test, y_elastic_test):.2f}")

ElasticNet(alpha=0.1)

ElasticNet(alpha=0.1)

🧃 ElasticNet Regression
Train R²: 0.576
Test  R²: 0.575
Test  MAE: 0.55

# Import necessary libraries for regression models
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
import xgboost as xgb

model_registry = {
    "LinearRegression": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
    "ElasticNet": ElasticNet(),
    "DecisionTreeRegressor": DecisionTreeRegressor(),
    "RandomForestRegressor": RandomForestRegressor(),
    "GradientBoostingRegressor": GradientBoostingRegressor(),
    "XGBRegressor": xgb.XGBRegressor(),
    "KNeighborsRegressor": KNeighborsRegressor(), # K-Nearest Neighbors (KNN)
    "SVR": SVR(), # Support Vector Regression 
    "MLPRegressor": MLPRegressor(max_iter=1000) # Neural Network
}

def recommend_models_prediction(data_characteristics, verbose=True):
    """
    Scores and ranks prediction models (for regression or other prediction tasks) based on data characteristics.
    Prints the recommended order and rationales.
    """
    from termcolor import colored

    score = {}
    rationale = {}

    # Extract characteristics
    target_info = data_characteristics.get("target_variable", {})
    feature_info = data_characteristics.get("features", {})
    is_linear = data_characteristics.get("relationships", {}).get("feature_target_linear_relationships", {})

    imbalance = target_info.get("imbalance", False)
    feature_type = feature_info.get("type", "")
    corr = feature_info.get("correlation", "")
    outliers = feature_info.get("outliers", False)
    multicollinearity = feature_info.get("multicollinearity", False)

    # --- Linear Regression ---
    score["LinearRegression"] = 2
    rationale["LinearRegression"] = ["Good for linear relationships"]
    if all(is_linear.values()):
        score["LinearRegression"] += 3
        rationale["LinearRegression"].append("Linear relationships between features and target")
    if feature_type == "continuous":
        score["LinearRegression"] += 1
        rationale["LinearRegression"].append("Features are continuous")
    if outliers:
        score["LinearRegression"] -= 1
        rationale["LinearRegression"].append("Sensitive to outliers")

    # --- Ridge Regression ---
    score["Ridge"] = 3
    rationale["Ridge"] = ["Handles collinearity well"]
    if multicollinearity:
        score["Ridge"] += 2
        rationale["Ridge"].append("Features are correlated (multicollinearity)")
    if outliers:
        score["Ridge"] -= 1
        rationale["Ridge"].append("Sensitive to outliers")

    # --- Lasso Regression ---
    score["Lasso"] = 2
    rationale["Lasso"] = ["Performs feature selection"]
    if multicollinearity:
        score["Lasso"] += 1
        rationale["Lasso"].append("Can handle correlated features with L1 regularization")
    if outliers:
        score["Lasso"] -= 1
        rationale["Lasso"].append("Sensitive to outliers")

    # --- ElasticNet ---
    score["ElasticNet"] = 3
    rationale["ElasticNet"] = ["Good balance between Ridge and Lasso"]
    if multicollinearity:
        score["ElasticNet"] += 2
        rationale["ElasticNet"].append("Can handle collinearity well with both L1 and L2 regularization")
    if outliers:
        score["ElasticNet"] -= 1
        rationale["ElasticNet"].append("Sensitive to outliers")

    # --- Decision Tree Regressor ---
    score["DecisionTreeRegressor"] = 3
    rationale["DecisionTreeRegressor"] = ["Handles non-linear relationships"]
    if outliers:
        score["DecisionTreeRegressor"] += 1
        rationale["DecisionTreeRegressor"].append("Robust to outliers")
    if multicollinearity:
        score["DecisionTreeRegressor"] += 1
        rationale["DecisionTreeRegressor"].append("Can handle correlated features")

    # --- Random Forest Regressor ---
    score["RandomForestRegressor"] = 4
    rationale["RandomForestRegressor"] = ["Handles non-linear relationships, robust to outliers"]
    if outliers:
        score["RandomForestRegressor"] += 1
        rationale["RandomForestRegressor"].append("Robust to outliers")
    if imbalance:
        score["RandomForestRegressor"] += 1
        rationale["RandomForestRegressor"].append("Bootstrap helps with imbalance")

    # --- Gradient Boosting Regressor ---
    score["GradientBoostingRegressor"] = 4
    rationale["GradientBoostingRegressor"] = ["Strong performance for non-linear relationships"]
    if outliers:
        score["GradientBoostingRegressor"] += 1
        rationale["GradientBoostingRegressor"].append("Robust to outliers")
    if imbalance:
        score["GradientBoostingRegressor"] += 1
        rationale["GradientBoostingRegressor"].append("Boosting handles imbalance well")

    # --- XGBoost ---
    score["XGBRegressor"] = 5
    rationale["XGBRegressor"] = ["Strong general-purpose model, robust to outliers and imbalance"]
    if outliers:
        score["XGBRegressor"] += 1
        rationale["XGBRegressor"].append("Robust to outliers")
    if imbalance:
        score["XGBRegressor"] += 1
        rationale["XGBRegressor"].append("scale_pos_weight helps with imbalance")
    if multicollinearity:
        score["XGBRegressor"] += 1
        rationale["XGBRegressor"].append("Handles redundant features well")

    # --- K-Nearest Neighbors (KNN) ---
    score["KNeighborsRegressor"] = 2
    rationale["KNeighborsRegressor"] = ["Simple, distance-based"]
    if feature_type == "continuous":
        score["KNeighborsRegressor"] += 1
        rationale["KNeighborsRegressor"].append("Distance-based → works better on continuous features")
    if outliers:
        score["KNeighborsRegressor"] -= 2
        rationale["KNeighborsRegressor"].append("Sensitive to outliers")

    # --- Neural Network (MLP Regressor) ---
    score["MLPRegressor"] = 3
    rationale["MLPRegressor"] = ["Flexible model for complex relationships"]
    if imbalance:
        score["MLPRegressor"] += 1
        rationale["MLPRegressor"].append("Can learn from imbalance with tuning")
    if outliers:
        score["MLPRegressor"] -= 1
        rationale["MLPRegressor"].append("Sensitive to outliers")

    # Sort by descending score
    ranked_models = sorted(score.items(), key=lambda x: x[1], reverse=True)
    ranked_model_names = [model for model, _ in ranked_models]

    # Filter and reorder model_registry based on ranking
    ranked_registry = {name: model_registry[name] for name in ranked_model_names if name in model_registry}

    if verbose:
        print("🚀 Recommended Model Evaluation Order:\n")
        for i, name in enumerate(ranked_model_names, 1):
            if name in model_registry:
                prefix = colored(f"{i}. {name} (Score: {score[name]})", "green") if i <= 3 else f"{i}. {name} (Score: {score[name]})"
                print(prefix)
                for reason in rationale[name]:
                    print(f"   ↪ {reason}")
        print()

    return ranked_model_names, ranked_registry

ranked_model_names, model_registry = recommend_models_prediction(data_characteristics, verbose=True)
" > ".join(ranked_model_names)

🚀 Recommended Model Evaluation Order:

1. XGBRegressor (Score: 7)
   ↪ Strong general-purpose model, robust to outliers and imbalance
   ↪ Robust to outliers
   ↪ Handles redundant features well
2. DecisionTreeRegressor (Score: 5)
   ↪ Handles non-linear relationships
   ↪ Robust to outliers
   ↪ Can handle correlated features
3. RandomForestRegressor (Score: 5)
   ↪ Handles non-linear relationships, robust to outliers
   ↪ Robust to outliers
4. GradientBoostingRegressor (Score: 5)
   ↪ Strong performance for non-linear relationships
   ↪ Robust to outliers
5. Ridge (Score: 4)
   ↪ Handles collinearity well
   ↪ Features are correlated (multicollinearity)
   ↪ Sensitive to outliers
6. ElasticNet (Score: 4)
   ↪ Good balance between Ridge and Lasso
   ↪ Can handle collinearity well with both L1 and L2 regularization
   ↪ Sensitive to outliers
7. Lasso (Score: 2)
   ↪ Performs feature selection
   ↪ Can handle correlated features with L1 regularization
   ↪ Sensitive to outliers
8. MLPRegressor (Score: 2)
   ↪ Flexible model for complex relationships
   ↪ Sensitive to outliers
9. LinearRegression (Score: 1)
   ↪ Good for linear relationships
   ↪ Sensitive to outliers
10. KNeighborsRegressor (Score: 0)
   ↪ Simple, distance-based
   ↪ Sensitive to outliers

'XGBRegressor > DecisionTreeRegressor > RandomForestRegressor > GradientBoostingRegressor > Ridge > ElasticNet > Lasso > MLPRegressor > LinearRegression > KNeighborsRegressor'

top_k = 5
for name in list(model_registry.keys())[:top_k][::-1]:
    # We evaluate only the top 3 recommended models (ranked earlier) for focused comparison.
    print(f"\n🔧 Training: {name}")

    # Fit and predict
    model = model_registry[name]
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Evaluation summary using existing functions
    print(f"Evaluation for {name}:")

    # Evaluation Metrics
    evaluate_model_performance(X_train, y_train, y_train_pred, X_test, y_test, y_test_pred, label=name)

    # Predicted vs Actual
    # plot_predicted_vs_actual(y_test, y_test_pred, title=f"{name} - Test Predicted vs Actual", data_type="Test Data")
    # plot_predicted_vs_actual(y_train, y_train_pred, title=f"{name} - Train Predicted vs Actual", data_type="Train Data")

    # Residual Plot
    # plot_residuals_vs_predicted(y_test, y_test_pred, label="Test Data", plot_qq=True)
    # plot_residuals_vs_predicted(y_train, y_train_pred, label="Train Data", plot_qq=True)

    # Track and log best model
    update_best_model_info(
        model_name=name,
        model_obj=model,
        y_train=y_train,
        y_test=y_test,
        y_train_pred=y_train_pred,
        y_test_pred=y_test_pred
    )

    print("—" * 80)  # horizontal line

🔧 Training: Ridge

Ridge()

Ridge()

Evaluation for Ridge:

🔧 Ridge - Train Performance
R²: 0.610 → Explains 61% of variation in target
Adjusted R²: 0.610
MAPE: 31.49% → On average, we're off by this percentage
MAE: 0.53 → On average, we're off by ±$52953
RMSE: 0.72 → Penalizes big misses more heavily
MSE: 0.52 → Mean squared error

🧪 Ridge - Test Performance
R²: 0.591 → Explains 59% of variation in target
Adjusted R²: 0.590
MAPE: 31.98% → On average, we're off by this percentage
MAE: 0.53 → On average, we're off by ±$52970
RMSE: 0.74 → Penalizes big misses more heavily
MSE: 0.54 → Mean squared error

📌 Interpretation:
- High MAPE → model might need improvement.
- MAE is acceptable; the model is performing well.
- RMSE is acceptable; model is not penalizing large errors excessively.
- R² indicates model fit: 0.59
- Adjusted R² accounts for complexity: 0.59
✅ Ridge just beat previous best (Baseline Model) → r2: -0.0002 → 0.5911
————————————————————————————————————————————————————————————————————————————————

🔧 Training: GradientBoostingRegressor

GradientBoostingRegressor()

GradientBoostingRegressor()

Evaluation for GradientBoostingRegressor:

🔧 GradientBoostingRegressor - Train Performance
R²: 0.806 → Explains 81% of variation in target
Adjusted R²: 0.806
MAPE: 20.32% → On average, we're off by this percentage
MAE: 0.36 → On average, we're off by ±$35626
RMSE: 0.51 → Penalizes big misses more heavily
MSE: 0.26 → Mean squared error

🧪 GradientBoostingRegressor - Test Performance
R²: 0.781 → Explains 78% of variation in target
Adjusted R²: 0.781
MAPE: 21.68% → On average, we're off by this percentage
MAE: 0.37 → On average, we're off by ±$37125
RMSE: 0.54 → Penalizes big misses more heavily
MSE: 0.29 → Mean squared error

📌 Interpretation:
- High MAPE → model might need improvement.
- MAE is acceptable; the model is performing well.
- RMSE is acceptable; model is not penalizing large errors excessively.
- R² indicates model fit: 0.78
- Adjusted R² accounts for complexity: 0.78
✅ GradientBoostingRegressor just beat previous best (Ridge) → r2: 0.5911 → 0.7809
————————————————————————————————————————————————————————————————————————————————

🔧 Training: RandomForestRegressor

RandomForestRegressor()

RandomForestRegressor()

Evaluation for RandomForestRegressor:

🔧 RandomForestRegressor - Train Performance
R²: 0.973 → Explains 97% of variation in target
Adjusted R²: 0.973
MAPE: 6.91% → On average, we're off by this percentage
MAE: 0.12 → On average, we're off by ±$12382
RMSE: 0.19 → Penalizes big misses more heavily
MSE: 0.04 → Mean squared error

🧪 RandomForestRegressor - Test Performance
R²: 0.808 → Explains 81% of variation in target
Adjusted R²: 0.807
MAPE: 19.10% → On average, we're off by this percentage
MAE: 0.33 → On average, we're off by ±$32840
RMSE: 0.50 → Penalizes big misses more heavily
MSE: 0.25 → Mean squared error

📌 Interpretation:
- High MAPE → model might need improvement.
- MAE is acceptable; the model is performing well.
- RMSE is acceptable; model is not penalizing large errors excessively.
- R² indicates model fit: 0.81
- Adjusted R² accounts for complexity: 0.81
✅ RandomForestRegressor just beat previous best (GradientBoostingRegressor) → r2: 0.7809 → 0.8077
————————————————————————————————————————————————————————————————————————————————

🔧 Training: DecisionTreeRegressor

# Print current best model based on regression metric (e.g., R², MSE, MAE)
print(f"\n🏆 Best model so far: {best_model_info['name']} "
      f"({success_metric.upper()} = {best_model_info['metrics']['test'][success_metric]:.4f})")

print(f"\n📊 Model Ranking by {success_metric.upper()}:\n")
ranked = sorted(
    model_results.items(),
    key=lambda x: x[1]["metrics"]["test"][success_metric],
    reverse=True
)

for i, (name, result) in enumerate(ranked, 1):
    score = result["metrics"]["test"][success_metric]
    print(f"{i}. {name:<30} {success_metric}: {score:.4f}")

🏆 Best model so far: XGBRegressor (R2 = 0.8367)

📊 Model Ranking by R2:

1. XGBRegressor                   r2: 0.8367
2. RandomForestRegressor          r2: 0.8077
3. GradientBoostingRegressor      r2: 0.7809
4. DecisionTreeRegressor          r2: 0.6023
5. Ridge                          r2: 0.5911
6. Baseline Model                 r2: -0.0002

import plotly.graph_objects as go
import plotly.subplots as sp
import pandas as pd

# Extract test metrics for regression
df_results = pd.DataFrame({
    model_name: data["metrics"]["test"]
    for model_name, data in model_results.items()
}).T

# Metrics you'd like to plot for regression
desired_metrics = ['r2', 'adjusted_r2', 'mse', 'rmse', 'mae', 'mape']

# Filter only those that exist in df_results
metrics = [m for m in desired_metrics if m in df_results.columns]

# Create subplot layout
rows = (len(metrics) + 1) // 2
fig = sp.make_subplots(rows=rows, cols=2, subplot_titles=[m.upper() for m in metrics])

# Plot each available metric
for i, metric in enumerate(metrics):
    row, col = divmod(i, 2)
    fig.add_trace(
        go.Bar(
            x=df_results.index,
            y=df_results[metric],
            name=metric,
            text=pd.to_numeric(df_results[metric], errors="coerce").round(3),
            textposition="auto"
        ),
        row=row+1, col=col+1
    )

fig.update_layout(
    height=300 * rows,
    width=1000,
    title_text="Model Comparison by Regression Metrics",
    showlegend=False
)

fig.show();

import pandas as pd
import matplotlib.pyplot as plt

def plot_feature_importance(model=None, feature_names=None, top_n=10, model_name=None):
    """
    Plots top N feature importances for regression models.
    Defaults to best_model_info['model'] unless overridden.
    Optionally takes a model_name for the plot title.
    """
    if model is None:
        model = best_model_info["model"]
        model_name = best_model_info.get("name", "Best Model") if model_name is None else model_name
    else:
        model_name = model_name or "Selected Model"

    if feature_names is None:
        feature_names = X_train.columns

    if hasattr(model, "feature_importances_"):  # For tree-based models like Random Forest or XGBoost
        importance = model.feature_importances_
    elif hasattr(model, "coef_"):  # For linear models like Linear Regression, Lasso, Ridge
        importance = model.coef_
    else:
        raise ValueError("Model does not support feature importances or coefficients")

    importance_df = pd.DataFrame({
        "Feature": feature_names,
        "Importance": importance
    }).sort_values(by="Importance", ascending=False).head(top_n)

    plt.figure(figsize=(8, 5))
    plt.barh(importance_df["Feature"][::-1], importance_df["Importance"][::-1])
    for i, (feature, importance) in enumerate(zip(importance_df["Feature"][::-1], importance_df["Importance"][::-1])):
        plt.text(importance + 0.005, i, f"{importance:.3f}", va='center')
    plt.title(f"Top Feature Importances ({model_name})")
    plt.xlabel("Importance Score")
    plt.tight_layout()
    plt.show()

    return list(importance_df["Feature"])

# ✅ Default: plot for best model
imp_ranked = plot_feature_importance()

# 🛠️ Optional: override model + title
# alt_model = model_results["Random Forest"]["model"]
# imp_ranked = plot_feature_importance(model=alt_model, model_name="Random Forest")

import shap
import numpy as np
import pandas as pd

def plot_shap_summary_tree(model=None, X=None, model_name=None):
    """
    Plot SHAP summary for tree-based regression models (RandomForestRegressor, XGBoost).
    Defaults to best_model_info['model'] and X_test.
    """
    if model is None:
        model = best_model_info["model"]
        model_name = model_name or best_model_info.get("name", "Best Model")
    else:
        model_name = model_name or "Selected Model"

    if X is None:
        X = X_test

    # Use SHAP TreeExplainer for tree-based models
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    # SHAP values for regression are continuous, so no need to index for binary classification
    shap.summary_plot(shap_values, X)

    print(f"\n📌 SHAP Summary for {model_name}:")
    print("- Each bar shows how much that feature influences the model’s prediction.")
    print("- Features at the top are the most impactful across all predictions.")
    print("- Blue/red indicate direction: does the feature push the prediction up or down?")
    print("- Helps us understand *why* the model predicts a certain value — not just *what* it predicts.")

    # Calculate the mean absolute SHAP values for each feature to rank them
    shap_df = pd.DataFrame(np.abs(shap_values), columns=X.columns).mean().sort_values(ascending=False)
    
    # Return the ranked feature names based on SHAP importance
    return list(shap_df.index)

# ✅ Default: SHAP for best model
shap_ranked = plot_shap_summary_tree()

# 🛠️ Optional: SHAP for any other model
# alt_model = model_results["Random Forest Regressor"]["model"]
# shap_ranked = plot_shap_summary_tree_regression(model=alt_model, model_name="Random Forest Regressor")

/Users/ashrithreddy/anaconda3/lib/python3.11/site-packages/xgboost/core.py:160: UserWarning:

[20:44:54] WARNING: /Users/runner/work/xgboost/xgboost/src/c_api/c_api.cc:1240: Saving into deprecated binary model format, please consider using `json` or `ubj`. Model format will default to JSON in XGBoost 2.2 if not specified.

📌 SHAP Summary for XGBRegressor:
- Each bar shows how much that feature influences the model’s prediction.
- Features at the top are the most impactful across all predictions.
- Blue/red indicate direction: does the feature push the prediction up or down?
- Helps us understand *why* the model predicts a certain value — not just *what* it predicts.

from sklearn.feature_selection import RFE
from sklearn.metrics import r2_score
import numpy as np

def perform_rfe_with_evaluation(model, X_train, y_train, X_test, y_test, n_features_to_select_range=None):
    """
    Perform Recursive Feature Elimination (RFE) for feature selection and evaluate performance.
    
    Parameters:
    - model: The regression model to use (e.g., RandomForestRegressor, LinearRegression).
    - X_train: Training data (features).
    - y_train: Training data (target variable).
    - X_test: Test data (features).
    - y_test: Test data (target variable).
    - n_features_to_select_range: Range of number of features to select. If None, it will try selecting all features.

    Returns:
    - best_selected_features: List of features that result in the best performance.
    """
    if n_features_to_select_range is None:
        n_features_to_select_range = range(1, X_train.shape[1] + 1)
    
    best_score = -np.inf
    best_selected_features = None
    score_history = []

    # Loop through different feature counts to find the best number of features
    for n_features in n_features_to_select_range:
        rfe = RFE(estimator=model, n_features_to_select=n_features)
        rfe.fit(X_train, y_train)
        
        # Get selected features
        selected_features = X_train.columns[rfe.support_]
        
        # Drop unselected features from the dataset
        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]
        
        # Train the model with the selected features
        model.fit(X_train_selected, y_train)
        y_test_pred = model.predict(X_test_selected)
        
        # Evaluate model performance (using R² or other metrics)
        current_score = r2_score(y_test, y_test_pred)
        score_history.append((n_features, current_score))
        
        # If the current score is better, update the best model info
        if current_score > best_score:
            best_score = current_score
            best_selected_features = selected_features

        print(f"Selected {n_features} features → R²: {current_score:.4f}")
    
    # Store the best model with the optimal number of features
    print(f"\n🎯 Best features selection: {len(best_selected_features)} features → R²: {best_score:.4f}")
    
    # Return the best selected features and the score history
    return best_selected_features, score_history

# Assuming the best model is stored in best_model_info["model"]
best_model = best_model_info["model"]

# Perform RFE with evaluation to find the best number of features
best_selected_features, score_history = perform_rfe_with_evaluation(
    best_model, X_train, y_train, X_test, y_test, n_features_to_select_range=range(1, 11)
)

Selected 1 features → R²: 0.4765
Selected 2 features → R²: 0.5590
Selected 3 features → R²: 0.7033
Selected 4 features → R²: 0.8300
Selected 5 features → R²: 0.8356
Selected 6 features → R²: 0.8401
Selected 7 features → R²: 0.8359
Selected 8 features → R²: 0.8367
Selected 9 features → R²: 0.8367
Selected 10 features → R²: 0.8367

🎯 Best features selection: 6 features → R²: 0.8401

# Ensure that we use only the best selected features for both train and test data
X_train_selected = X_train[best_selected_features]
X_test_selected = X_test[best_selected_features]

# Now train and evaluate the model using the selected features
best_model.fit(X_train_selected, y_train)
y_train_pred = best_model.predict(X_train_selected)
y_test_pred = best_model.predict(X_test_selected)

evaluate_model_performance(X_train_selected, y_train, y_train_pred, X_test_selected, y_test, y_test_pred, label="Best Model After RFE")
# plot_predicted_vs_actual(y_test, y_test_pred, title="Best Model After RFE - Test Predicted vs Actual", data_type="Test Data")
# plot_residuals_vs_predicted(y_test, y_test_pred, label="Test Data", plot_qq=True)

update_best_model_info(
    model_name="Best Model After RFE",
    model_obj=best_model,
    y_train=y_train,
    y_test=y_test,
    y_train_pred=y_train_pred,
    y_test_pred=y_test_pred,
    mask=True
)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)

🔧 Best Model After RFE - Train Performance
R²: 0.941 → Explains 94% of variation in target
Adjusted R²: 0.941
MAPE: 11.23% → On average, we're off by this percentage
MAE: 0.20 → On average, we're off by ±$19613
RMSE: 0.28 → Penalizes big misses more heavily
MSE: 0.08 → Mean squared error

🧪 Best Model After RFE - Test Performance
R²: 0.840 → Explains 84% of variation in target
Adjusted R²: 0.840
MAPE: 17.49% → On average, we're off by this percentage
MAE: 0.30 → On average, we're off by ±$30477
RMSE: 0.46 → Penalizes big misses more heavily
MSE: 0.21 → Mean squared error

📌 Interpretation:
- High MAPE → model might need improvement.
- MAE is acceptable; the model is performing well.
- RMSE is acceptable; model is not penalizing large errors excessively.
- R² indicates model fit: 0.84
- Adjusted R² accounts for complexity: 0.84
✅ Best Model After RFE just beat previous best (XGBRegressor) → r2: 0.8367 → 0.8401

from sklearn.base import clone
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import shap
import numpy as np

def shap_guided_backward_elimination(
    model=None, X=None, y=None,
    shap_ranked=None,
    metric_name=None,
    drop_threshold=0.005,
    min_features=1,
    verbose=True
):
    """
    SHAP-guided backward elimination with early stopping for prediction tasks.
    Drops features by SHAP rank until performance drops significantly or hits min_features.
    """
    # Use the best model stored programmatically by default
    model_base = clone(model or best_model_info["model"])
    X_curr = X.copy()

    # Determine the evaluation metric
    metric_name = metric_name or "r2"  # Default to R²
    metric_func = {
        "r2": r2_score,
        "mse": mean_squared_error,
        "mae": mean_absolute_error
    }.get(metric_name)

    if metric_func is None:
        raise ValueError(f"Unsupported metric: {metric_name}")

    # Get SHAP-ranked features if not provided
    if shap_ranked is None:
        explainer = shap.TreeExplainer(model_base.fit(X_curr, y))
        shap_values = explainer.shap_values(X_curr)
        shap_importance = np.abs(shap_values).mean(axis=0)
        shap_ranked = X_curr.columns[np.argsort(shap_importance)[::-1]].tolist()
    else:
        shap_ranked = shap_ranked.copy()

    # Initialize tracking variables
    score_history = []
    previous_score = None

    # Backward feature elimination loop
    while len(shap_ranked) >= min_features:
        model = clone(model_base)
        model.fit(X_curr[shap_ranked], y)
        y_train_pred = model.predict(X_curr[shap_ranked])
        y_test_pred = model.predict(X_test[shap_ranked])  # Evaluate on both train and test data
        score = metric_func(y_test, y_test_pred)
        score_history.append((len(shap_ranked), score, shap_ranked.copy()))

        # Visualize performance
        if verbose:
            feat_list = ", ".join(shap_ranked)
            print(f"✅ {len(shap_ranked)} features → {metric_name}: {score:.4f} → [{feat_list}]")

        # Early stop if score drops significantly
        if previous_score is not None and (previous_score - score) > drop_threshold:
            if verbose:
                print(f"🛑 Stopping early: {metric_name} dropped from {previous_score:.4f} to {score:.4f}")
            break

        previous_score = score
        shap_ranked.pop()  # Drop the least important SHAP feature

    if not score_history:
        raise ValueError("No elimination steps executed — shap_ranked too short or invalid inputs.")

    # Best configuration
    tolerance = 0.01  # Accept within 1% drop of best score
    best_score = max(score_history, key=lambda x: x[1])[1]
    # Keep all configurations that are within tolerance
    candidates = [cfg for cfg in score_history if (best_score - cfg[1]) <= tolerance]
    # Pick the configuration with the fewest features
    best_config = min(candidates, key=lambda x: x[0])

    print(f"\n🎯 Best config: {len(best_config[2])} features → {metric_name}: {best_config[1]:.4f}")
    
    # Return the best selected features and the score history
    return best_config[2], score_history

# Example usage:  
# Use the best model from the programmatically stored best_model_info
best_model = best_model_info["model"]  # This gets the best model programmatically stored

# Optionally, pass an alternative model by uncommenting the following line
# alt_model = model_results["Random Forest Regressor"]["model"]  # Use a different model if desired

# Run the SHAP-guided backward elimination for feature selection
best_selected_features, score_history = shap_guided_backward_elimination(
    model=best_model, X=X_train, y=y_train, metric_name=success_metric, drop_threshold=0.01, min_features=1
)

/Users/ashrithreddy/anaconda3/lib/python3.11/site-packages/xgboost/core.py:160: UserWarning:

[20:48:53] WARNING: /Users/runner/work/xgboost/xgboost/src/c_api/c_api.cc:1240: Saving into deprecated binary model format, please consider using `json` or `ubj`. Model format will default to JSON in XGBoost 2.2 if not specified.

✅ 8 features → r2: 0.8365 → [Latitude, Longitude, MedInc, AveOccup, AveRooms, HouseAge, Population, AveBedrms]
✅ 7 features → r2: 0.8378 → [Latitude, Longitude, MedInc, AveOccup, AveRooms, HouseAge, Population]
✅ 6 features → r2: 0.8393 → [Latitude, Longitude, MedInc, AveOccup, AveRooms, HouseAge]
✅ 5 features → r2: 0.8352 → [Latitude, Longitude, MedInc, AveOccup, AveRooms]
✅ 4 features → r2: 0.8306 → [Latitude, Longitude, MedInc, AveOccup]
✅ 3 features → r2: 0.8305 → [Latitude, Longitude, MedInc]
✅ 2 features → r2: 0.7673 → [Latitude, Longitude]
🛑 Stopping early: r2 dropped from 0.8305 to 0.7673

🎯 Best config: 3 features → r2: 0.8305

# Ensure that we use only the best selected features for both train and test data
X_train_selected = X_train[best_selected_features]
X_test_selected = X_test[best_selected_features]

# Now train and evaluate the model using the selected features
best_model.fit(X_train_selected, y_train)
y_train_pred = best_model.predict(X_train_selected)
y_test_pred = best_model.predict(X_test_selected)

# Visual evaluation (reuse the functions for evaluation)
evaluate_model_performance(X_train_selected, y_train, y_train_pred, X_test_selected, y_test, y_test_pred, label="Best Model After SHAP-guided RFE")
# plot_predicted_vs_actual(y_test, y_test_pred, title="Best Model After SHAP-guided RFE - Test Predicted vs Actual", data_type="Test Data")
# plot_residuals_vs_predicted(y_test, y_test_pred, label="Test Data", plot_qq=True)

# Update the best model info after feature selection (if it improves performance)
update_best_model_info(
    model_name="Best Model + SHAP-guided RFE",
    model_obj=best_model,
    y_train=y_train,
    y_test=y_test,
    y_train_pred=y_train_pred,
    y_test_pred=y_test_pred,
    mask=True
)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=None, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)

🔧 Best Model After SHAP-guided RFE - Train Performance
R²: 0.907 → Explains 91% of variation in target
Adjusted R²: 0.907
MAPE: 13.53% → On average, we're off by this percentage
MAE: 0.24 → On average, we're off by ±$23961
RMSE: 0.35 → Penalizes big misses more heavily
MSE: 0.12 → Mean squared error

🧪 Best Model After SHAP-guided RFE - Test Performance
R²: 0.830 → Explains 83% of variation in target
Adjusted R²: 0.830
MAPE: 17.74% → On average, we're off by this percentage
MAE: 0.31 → On average, we're off by ±$31099
RMSE: 0.47 → Penalizes big misses more heavily
MSE: 0.22 → Mean squared error

📌 Interpretation:
- High MAPE → model might need improvement.
- MAE is acceptable; the model is performing well.
- RMSE is acceptable; model is not penalizing large errors excessively.
- R² indicates model fit: 0.83
- Adjusted R² accounts for complexity: 0.83
⚠️ Best Model + SHAP-guided RFE did not improve the best score.

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import shap
import numpy as np

# Define param grid for prediction models (similar to classification)
param_grids = {
    "LinearRegression": {
        "fit_intercept": [True, False],
        "normalize": [True, False]  # deprecated in newer versions of scikit-learn, but included for older compatibility
    },
    "Ridge": {
        "alpha": [0.1, 1, 10],  # regularization strength
        "solver": ["auto", "svd", "cholesky", "lsqr"]
    },
    "Lasso": {
        "alpha": [0.1, 1, 10],
        "max_iter": [1000, 5000]
    },
    "ElasticNet": {
        "alpha": [0.1, 1, 10],
        "l1_ratio": [0.1, 0.5, 0.9]
    },
    "RandomForestRegressor": {
        "n_estimators": [100, 200],
        "max_depth": [None, 5, 10],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 2]
    },
    "DecisionTreeRegressor": {
        "max_depth": [None, 5, 10],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 2],
        "criterion": ["mse", "friedman_mse"]
    },
    "XGBRegressor": {
        "n_estimators": [100, 200],
        "max_depth": [3, 5, 7],
        "learning_rate": [0.01, 0.1],
        "subsample": [0.8, 1.0],
        "colsample_bytree": [0.8, 1.0]
    },
    "KNeighborsRegressor": {
        "n_neighbors": [3, 5, 7],
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan"]
    },
    "SVR": {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf"],  # For support vector regression
        "gamma": ["scale", "auto"]
    },
    "MLPRegressor": {
        "hidden_layer_sizes": [(50,), (100,)],  # default: (100,)
        "activation": ["relu", "tanh"],          # default: "relu"
        "alpha": [0.0001, 0.001],                # default: 0.0001
        "learning_rate": ["constant", "adaptive"],  # default: "constant"
        "max_iter": [200, 500]                   # default: 200
    }
}

# ⚙️ Resolve model name and corresponding grid
model_name = best_model_info["name"] # best_model_info["model"].__class__.__name__ 
param_grid = param_grids.get(model_name)

if param_grid is None:
    raise ValueError(f"No param grid defined for model: {model_name}")

print(f"\n🔧 Running Grid Search for: {model_name}")

# 🧪 Run Grid Search
model_instance = best_model_info["model"].__class__()

grid_search = GridSearchCV(
    estimator=model_instance,
    param_grid=param_grid,
    scoring="neg_mean_squared_error",  # Use MSE for regression scoring
    cv=5,
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)
best_tuned_model = grid_search.best_estimator_

print("✅ Best Parameters Found:")
print(grid_search.best_params_)

# 📈 Evaluate tuned model using the existing functions

# Predictions
y_test_pred = best_tuned_model.predict(X_test)
y_train_pred = best_tuned_model.predict(X_train)

# Use the existing `evaluate_model_performance` function for model evaluation
evaluate_model_performance(X_train, y_train, y_train_pred, X_test, y_test, y_test_pred, label="Best Model + Grid Search")

update_best_model_info(
    model_name=f"{model_name} + GridSearch", 
    model_obj=best_tuned_model,
    y_train=y_train, 
    y_test=y_test, 
    y_train_pred=best_tuned_model.predict(X_train), 
    y_test_pred=best_tuned_model.predict(X_test),
    mask=True
)

# Plot Predicted vs Actual (reuse the existing function for this)
# plot_predicted_vs_actual(y_test, y_test_pred, title=f"{model_name} (Tuned) - Test Predicted vs Actual", data_type="Test Data")
# plot_predicted_vs_actual(y_train, y_train_pred, title=f"{model_name} (Tuned) - Train Predicted vs Actual", data_type="Train Data")

# Plot Residuals vs Predicted (reuse your existing function for residuals vs predicted)
# plot_residuals_vs_predicted(y_test, y_test_pred, label="Test Data", plot_qq=True)
# plot_residuals_vs_predicted(y_train, y_train_pred, label="Train Data", plot_qq=True)

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 🔁 Use same param grid as defined earlier
model_name = best_model_info["name"] # best_model_info["model"].__class__.__name__
param_dist = param_grids.get(model_name)

if param_dist is None:
    raise ValueError(f"No param distribution defined for model: {model_name}")

print(f"\n🎲 Running Randomized Search for: {model_name}")

# Create a new instance of the model
model_instance = best_model_info["model"].__class__()

# 🔍 Run randomized search
random_search = RandomizedSearchCV(
    estimator=model_instance,
    param_distributions=param_dist,
    n_iter=15,
    scoring="neg_mean_squared_error",  # Use MSE or any other regression metric
    cv=5,
    n_jobs=-1,
    verbose=1,
    random_state=42
)

random_search.fit(X_train, y_train)
best_random_model = random_search.best_estimator_

print("✅ Best Parameters Found:")
print(random_search.best_params_)

# 🔎 Evaluate tuned model using existing functions

# Predictions
y_test_pred = best_random_model.predict(X_test)
y_train_pred = best_random_model.predict(X_train)

# Visual evaluation using existing functions
evaluate_model_performance(X_train, y_train, y_train_pred, X_test, y_test, y_test_pred, label="Best Model + RandomizedSearch")

# Store results (This can be done via update_best_model_info to avoid duplicating the storage process)
update_best_model_info(
    model_name=f"{model_name} + RandomSearch",
    model_obj=best_random_model,
    y_train=y_train,
    y_test=y_test,
    y_train_pred=y_train_pred,
    y_test_pred=y_test_pred,
    mask=True
)

# Plot Predicted vs Actual
# plot_predicted_vs_actual(y_test, y_test_pred, title=f"{model_name} (RandomSearch) - Test Predicted vs Actual", data_type="Test Data")
# plot_predicted_vs_actual(y_train, y_train_pred, title=f"{model_name} (RandomSearch) - Train Predicted vs Actual", data_type="Train Data")

# Plot Residuals vs Predicted
# plot_residuals_vs_predicted(y_test, y_test_pred, label="Test Data", plot_qq=True)
# plot_residuals_vs_predicted(y_train, y_train_pred, label="Train Data", plot_qq=True)

from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# Define base estimator — can be swapped out (e.g., DecisionTreeRegressor, LinearRegression, etc.)
base_model = DecisionTreeRegressor()

# Wrap it in a Bagging ensemble
bagging = BaggingRegressor(base_estimator=base_model, n_estimators=100, random_state=42)
bagging.fit(X_train, y_train)

# Predictions
y_bag_train = bagging.predict(X_train)
y_bag_test = bagging.predict(X_test)

# Evaluation
print("🧺 Bagging Regressor (with Decision Tree)")
print(f"Train R²: {r2_score(y_train, y_bag_train):.3f}")
print(f"Test  R²: {r2_score(y_test, y_bag_test):.3f}")
print(f"Test  MAE: {mean_absolute_error(y_test, y_bag_test):.2f}")

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# Train Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbr.fit(X_train, y_train)

# Predictions
y_gbr_train = gbr.predict(X_train)
y_gbr_test = gbr.predict(X_test)

# Evaluation
print("🚀 Gradient Boosting Regressor")
print(f"Train R²: {r2_score(y_train, y_gbr_train):.3f}")
print(f"Test  R²: {r2_score(y_test, y_gbr_test):.3f}")
print(f"Test  MAE: {mean_absolute_error(y_test, y_gbr_test):.2f}")

from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_absolute_error

# Define base models (can mix different types)
base_models = [
    ("tree", DecisionTreeRegressor(max_depth=5)),
    ("svr", SVR()),
    ("linear", LinearRegression())
]

# Define meta-model (also tunable — RidgeCV here adds internal regularization)
meta_model = RidgeCV()

# Build stacking regressor
stacking = StackingRegressor(estimators=base_models, final_estimator=meta_model, cv=5)
stacking.fit(X_train, y_train)

# Predictions
y_stack_train = stacking.predict(X_train)
y_stack_test = stacking.predict(X_test)

# Evaluation
print("🧠 Stacking Regressor")
print(f"Train R²: {r2_score(y_train, y_stack_train):.3f}")
print(f"Test  R²: {r2_score(y_test, y_stack_test):.3f}")
print(f"Test  MAE: {mean_absolute_error(y_test, y_stack_test):.2f}")

	MedInc	HouseAge	AveRooms	AveBedrms	Population	AveOccup	Latitude	Longitude	MedHouseVal
0	8.3252	41.0	6.984127	1.023810	322.0	2.555556	37.88	-122.23	4.526
1	8.3014	21.0	6.238137	0.971880	2401.0	2.109842	37.86	-122.22	3.585
2	7.2574	52.0	8.288136	1.073446	496.0	2.802260	37.85	-122.24	3.521
3	5.6431	52.0	5.817352	1.073059	558.0	2.547945	37.85	-122.25	3.413
4	3.8462	52.0	6.281853	1.081081	565.0	2.181467	37.85	-122.25	3.422

Metric	Description	When to Use	Example
r2	R² score (higher is better, reflects model fit)	Understand how well the model explains the variance in the target variable	Predicting house prices where you want to know how well the features explain the price variance
adjusted_r2	Adjusted R² (corrects R² for number of predictors in model)	Compare models while accounting for model complexity	Real estate pricing models with multiple features (e.g., square footage, neighborhood)
mape	Mean Absolute Percentage Error (lower is better, interpretable as % error)	When % error is meaningful and easy to communicate	Forecasting sales where a 5% error matters for decision-making
rmse	Root Mean Squared Error (lower is better, penalizes large errors)	When large errors are especially costly and need stronger penalty	Energy consumption forecasting where spikes matter more than small fluctuations
mse	Mean Squared Error (lower is better, penalizes large errors)	Focus on reducing all squared errors, though less interpretable	Stock price prediction where minimizing overall error magnitude is critical
mae	Mean Absolute Error (lower is better, interpretable in original units)	When you want error in same units as target and easy to explain	Predicting monthly household spending (e.g., error in dollars)

Criterion	⚙️ Linear Regression	🧊 Ridge Regression	🔥 Lasso Regression	🧃 ElasticNet	🌳 Decision Trees	🪵 Random Forests	🚀 Gradient Boosting (XGBoost, LightGBM)	⚙️ Support Vector Machines (SVR)	🧠 K-Nearest Neighbors (KNN)
Interpretability	✅ Excellent – coefficients are directly interpretable	⚠️ Moderate – coefficients interpretable, but shrunk due to L2 penalty	⚠️ Moderate – less interpretable due to variable shrinkage, but aids feature selection	⚠️ Moderate – coefficients are shrunk and partially zeroed; harder to interpret	✅ Good – tree structure is easy to visualize and explain	❌ Low – individual trees are interpretable, but the ensemble is not	❌ Low – ensembles are complex; interpretability requires SHAP or similar tools	❌ Low – lacks clear coefficients or rule-based structure	⚠️ Moderate – intuitive to explain, but no learned model or coefficients
Linearity Expectation	✅ Yes – assumes a linear relationship between features and target	✅ Yes – assumes linear relationship between features and target	✅ Yes – assumes linear relationship between features and target	✅ Yes – assumes linear relationship between features and target	✅ No – captures complex non-linear relationships naturally	✅ No – models complex non-linear relationships naturally	✅ No – captures highly non-linear relationships	⚠️ Depends – linear SVR assumes linearity; kernel SVR handles non-linear patterns	✅ No – models non-linear patterns based on local neighborhoods
High dimensionality	⚠️ Moderate – can overfit without regularization (use Ridge/Lasso)	✅ Good – handles many features with L2 regularization	✅ Very good – performs implicit feature selection via L1 penalty	✅ Excellent – handles many features well, especially when correlated	⚠️ Moderate – prone to overfitting with many features unless pruned	✅ Good – handles many features well via random feature selection	✅ Excellent – handles large feature sets with built-in feature importance and regularization	✅ Very good – effective in high-dimensional spaces, especially with appropriate kernels	❌ Poor – suffers from the curse of dimensionality; distance metrics lose meaning
Handling of multicollinearity	❌ Poor – highly sensitive unless regularized	✅ Good – L2 penalty reduces impact of correlated features	⚠️ Partial – tends to pick one feature among correlated ones	✅ Better than Lasso – distributes weight across correlated features	✅ Handles – splits on strongest feature; ignores others	✅ Handles – less sensitive due to randomized feature selection in each tree	✅ Robust – tends to pick one feature among correlated ones; not harmed significantly	⚠️ Somewhat handles – regularization helps, but not designed to resolve multicollinearity explicitly	❌ Problematic – redundant features distort distance-based similarity
Handling of categorical features	❌ Not supported natively – requires one-hot or ordinal encoding	❌ Requires encoding – use one-hot or ordinal encoding	❌ Requires encoding – one-hot or ordinal encoding needed	❌ Requires encoding – one-hot or ordinal encoding needed	⚠️ Requires encoding – label encoding preferred; not ideal with high-cardinality	⚠️ Requires encoding – typically label encoding; not optimal with high-cardinality	⚠️ Requires encoding – label encoding usually sufficient; LightGBM supports native categorical handling	❌ Not supported natively – requires proper encoding and kernel compatibility	❌ Not natively supported – requires encoding and careful distance handling
Handling of outliers	❌ Sensitive – predictions and coefficients can be skewed	❌ Sensitive – does not explicitly handle outliers	❌ Sensitive – does not explicitly handle outliers	❌ Sensitive – still affected by extreme values	✅ Robust – splits based on thresholds, not influenced by extreme values	✅ Robust – ensemble effect reduces outlier influence	✅ Robust – tree splits reduce outlier influence significantly	❌ Sensitive – margin-based loss can be distorted by outliers	❌ Sensitive – local predictions easily skewed by nearby outliers
Handling of missing values	❌ Not supported – needs imputation before modeling	❌ Not supported – requires imputation	❌ Not supported – requires imputation	❌ Not supported – requires imputation	⚠️ Partial – some implementations handle them natively, others require imputation	⚠️ Partial – some implementations support it; others need imputation	✅ Supported – both XGBoost and LightGBM handle missing values natively during tree construction	❌ Not supported – must impute missing data beforehand	❌ Not supported – requires complete cases or imputation beforehand
Scaling of features needed	⚠️ Sometimes – needed with regularization or interaction terms	✅ Yes – regularization depends on feature magnitude	✅ Yes – essential due to L1 penalty sensitivity	✅ Yes – critical due to combined penalties	✅ Not needed – trees are scale-invariant	✅ Not needed – tree-based and scale-invariant	✅ Not needed – models are scale-invariant	✅ Yes – essential for stable and effective optimization (especially with RBF kernel)	✅ Yes – crucial, since all predictions are based on distance
Handling of sparseness in data	⚠️ Limited – better with regularization and preprocessing	⚠️ Moderate – doesn't zero out coefficients, better with dense data	✅ Good – promotes sparsity in coefficients, useful for sparse signals	✅ Good – promotes sparsity but more stable than Lasso alone	⚠️ Limited – not ideal for very sparse matrices like bag-of-words	⚠️ Moderate – performs okay, but not best for highly sparse data	✅ Excellent – built-in support for sparse data (e.g., in XGBoost and LightGBM)	✅ Good – works well in sparse high-dimensional cases (e.g., text with linear kernel)	❌ Poor – sparse vectors make nearest neighbor distances unreliable
Skewed target distribution	⚠️ Assumes homoscedasticity – may underperform on skewed targets	⚠️ Assumes homoscedasticity – performance may drop on skewed targets	⚠️ Assumes homoscedasticity – not ideal for highly skewed targets	⚠️ Assumes homoscedasticity – not ideal for skewed distributions	✅ Handles well – no distributional assumptions	✅ Handles – no assumption about target distribution	✅ Handles well – flexible enough to model skewed targets without assumptions	⚠️ Handles better than OLS, but predictions are bounded by ε-margin; tuning required	⚠️ Affected – highly skewed targets can lead to biased local averages
Assumes normality of residuals?	✅ Yes – important for inference and prediction intervals	✅ Yes – for valid inference and confidence intervals	✅ Yes – for statistical inference, if needed post-selection	✅ Yes – standard inference assumptions apply post-selection	❌ No – non-parametric, no residual distribution assumption	❌ No – fully non-parametric model	❌ No – fully non-parametric, no assumptions on residual distribution	❌ No – non-parametric and does not rely on residual distribution assumptions	❌ No – non-parametric, no assumptions on residuals or distributions
Accuracy	✅ Strong when assumptions hold; good as a baseline	✅ Often better than linear regression when multicollinearity is present	✅ Strong when only a subset of features are useful; acts as model + selector	✅ Flexible – performs well when features are correlated and some sparsity is expected	⚠️ Can overfit – strong on small to medium data, weaker alone on complex patterns	✅ Strong – often top-tier performance on structured/tabular data	✅ Top-tier – often best-in-class for structured/tabular regression problems	✅ High – strong performance with well-tuned kernel and hyperparameters	⚠️ Variable – performs well with clean, low-dimensional, dense data; poor elsewhere
Training speed	✅ Very fast – efficient even on large datasets	✅ Fast – nearly as efficient as linear regression	✅ Fast for small to medium datasets; slower with very high dimensionality	⚠️ Slower than Ridge/Lasso – due to tuning two hyperparameters (`alpha`, `l1_ratio`)	✅ Fast – efficient even on large datasets with moderate depth	⚠️ Slower than single models – but parallelizable and scalable	⚠️ Slower than simpler models – but optimized and scalable (especially with LightGBM and GPU support)	❌ Slow – especially with large datasets or non-linear kernels	✅ Fast training (lazy learner), ❌ Slow inference – computes distances at prediction time

Criterion	Comment
Interpretability	✅ Excellent – coefficients are directly interpretable
Linearity Expectation	✅ Yes – assumes a linear relationship between features and target
High dimensionality	⚠️ Moderate – can overfit without regularization (use Ridge/Lasso)
Handling of multicollinearity	❌ Poor – highly sensitive unless regularized
Handling of categorical features	❌ Not supported natively – requires one-hot or ordinal encoding
Handling of outliers	❌ Sensitive – predictions and coefficients can be skewed
Handling of missing values	❌ Not supported – needs imputation before modeling
Scaling of features needed	⚠️ Sometimes – needed with regularization or interaction terms
Handling of sparseness in data	⚠️ Limited – better with regularization and preprocessing
Skewed target distribution	⚠️ Assumes homoscedasticity – may underperform on skewed targets
Assumes normality of residuals?	✅ Yes – important for inference and prediction intervals

	MedInc	HouseAge	AveRooms	AveBedrms	Population	AveOccup	Latitude	Longitude	MedHouseVal	Leverage
19006	10.2264	45.0	3.166667	0.833333	7460.0	1243.333333	38.32	-121.98	1.37500	0.696212
1914	1.8750	33.0	141.909091	25.636364	30.0	2.727273	38.91	-120.10	5.00001	0.255724
3364	5.5179	36.0	5.142857	1.142857	4198.0	599.714286	40.41	-120.51	0.67500	0.162325
16669	4.2639	46.0	9.076923	1.307692	6532.0	502.461538	35.32	-120.70	3.50000	0.115033
11862	2.6250	25.0	59.875000	15.312500	28.0	1.750000	40.27	-121.25	0.67500	0.082439

	Feature	VIF
6	Latitude	9.297624
7	Longitude	8.962263
2	AveRooms	8.342786
3	AveBedrms	6.994995
0	MedInc	2.501295
1	HouseAge	1.241254
4	Population	1.138125
5	AveOccup	1.008324

	Feature	Coefficient
3	AveBedrms	0.794471
0	MedInc	0.447600
7	Longitude	-0.433405
6	Latitude	-0.418555
2	AveRooms	-0.124756
1	HouseAge	0.009568
5	AveOccup	-0.003443
4	Population	-0.000001

VIF Value	Meaning
1	No multicollinearity
1 – 5	Moderate multicollinearity (usually fine)
> 5	Concerning – consider dropping or combining features
> 10	Problematic – strong multicollinearity

Linearly Separable	Correlation	Multicollinearity	Outliers	Recommended Models	Notes
✅ True	Low	✅ True	✅ True	XGBoost > Random Forest > Gradient Boosting	Strong with multicollinearity, handles outliers well.
✅ True	Low	✅ True	✅ False	Ridge > Lasso > ElasticNet	Regularization helps with collinearity, robust against outliers.
✅ True	Low	❌ False	✅ True	Random Forest > Decision Tree	Non-linear, robust to outliers.
✅ True	High	✅ True	✅ True	XGBoost > Random Forest > Gradient Boosting	Handles correlation, outliers effectively.
✅ True	High	✅ True	✅ False	Ridge > Lasso > ElasticNet	Regularization works well with high correlation and collinearity.
✅ True	High	❌ False	✅ True	Random Forest > Decision Tree	Non-linear trees handle outliers well.
❌ False	Low	✅ True	✅ True	XGBoost > Random Forest > Gradient Boosting	Boosting trees handle complex relationships and outliers.
❌ False	Low	✅ True	✅ False	Ridge > Lasso > ElasticNet	Regularization and feature selection work well for complex data.
❌ False	Low	❌ False	✅ True	Decision Tree > Random Forest	Handles non-linearity and outliers without heavy tuning.
❌ False	High	✅ True	✅ True	XGBoost > Random Forest > Gradient Boosting	Best for complex relationships and outliers.
❌ False	High	✅ True	✅ False	Random Forest > Decision Tree	Non-linear, handles collinearity and outliers well.
❌ False	High	❌ False	✅ True	Random Forest > Decision Tree	Handles non-linear relationships, robust to outliers.
❌ False	High	❌ False	✅ False	Decision Tree > Random Forest	Simple trees work well for non-linear relationships.

📖 Prediction¶

🧪 Data Setup¶

📥 Load Dataset¶

📊 Data Characteristics Dictionary

🔎 EDA¶

📐 Linearity - LR Assumption¶

📉 Homoscedasticity - LR Assumption¶

📊 Normality of Residuals - LR Assumption¶

🔗 Multicollinearity - LR Assumption¶

🔄 Independence of Residuals (optional)¶

🛠️ Feature Engineering¶

🧹 Preprocessing¶

🧪 Baseline Model¶

🆚 Predicted vs Actual¶

🌀 Residual Plot¶

📏 Evaluation Metrics¶

🧮 Update Best Model Info¶

🔍 Algorithms¶

⚙️ Algorithms - Linear Regression¶

🏗️ Model Fit¶

🔍 Leverage - Outlier Detection¶

🧠 What is Leverage?

📐 Math:

💡 Intuition:

🚨 What It Means

🧠 What You Can Do

💡 Key Insight

🔥 Cook’s Distance - Outlier Detection¶

🔥 What It Measures

📐 Math (Simplified)

🧠 What To Do With It

💡 Intuition

🔢 Coefficients¶

🧮 VIF (optional)¶

📐 What is VIF?¶

🧮 Mathematical Formula¶

🔍 Interpretation¶

📊 What impacts R²¶

🚨 Overfitting indicators¶

🔬 SHAP¶

📊 Partial Dependence Plot¶

🧊 Algorithms - Ridge Regression¶

🔥 Algorithms - Lasso Regression

🧃 Algorithms - ElasticNet

🌳 Algorithms - Decision Trees¶

🪵 Algorithms - Random Forests¶

How it works:

Equation (for Regression):

**Objective:**

**In business terms:**

🚀 Algorithms - Gradient Boosting (XGBoost, LightGBM)¶

⚙️ Algorithms - Support Vector Machines (SVR)¶

🧠 Algorithms - K-Nearest Neighbors (KNN)¶

🤖 Algorithms - Neural Networks (MLP Regressor)¶

📊 Model Selection¶

🧠 Regression Model Selection Table (Based on Data Characteristics)

🧠 Model Selection Flowchart (Based on Data Characteristics)

🧠 Recommend Models¶

📈 Model Comparison¶

📊 Feature Importance¶

🧬 SHAP Values¶

🎯 Fine Tune¶

🧪 Feature Selection – RFE¶

📊 What is RFE (Recursive Feature Elimination)?

🔄 How Does RFE Work?

🔧 Performance Evaluation

🚀 Why Use RFE?

🧪 Feature Selection – RFE + SHAP¶

📊 What is RFE + SHAP?

🔄 How Does RFE + SHAP Work?

🚀 Why Combine RFE and SHAP?

🔧 What Happens in the Code?

🧪 Grid Search¶

🔄 What is Grid Search?

🔧 Steps Taken Internally in Grid Search:

🧑‍🔬 Why Use Grid Search?

⚠️ Limitations:

🎲 Random Search¶

What is Random Search?

How Does Random Search Work?

Objective:

In business terms: