import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Simulate time series components
n_periods = 200
trend = np.linspace(0, 10, n_periods)
seasonality = 5 * np.sin(2 * np.pi * np.arange(n_periods) / 12)
noise = np.random.normal(0, 1, n_periods)

# Final series
y = trend + seasonality + noise
dates = pd.date_range(start="2020-01-01", periods=n_periods, freq="M")
ts = pd.DataFrame({"date": dates, "value": y})

ts.head()

/var/folders/9v/jtlv1fgs07x_4tf_v6kn4x6r0000gn/T/ipykernel_2645/2852442368.py:15: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.
  dates = pd.date_range(start="2020-01-01", periods=n_periods, freq="M")

# 📈 Visualize the time series

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 4))
plt.plot(ts["date"], ts["value"], label="Simulated Time Series")
plt.title("Simulated Time Series")
plt.xlabel("Date")
plt.ylabel("Value")
plt.grid(True)
plt.tight_layout()
plt.legend()
plt.show()

train_ratio = 0.8
cutoff = int(len(ts) * train_ratio)

train = ts.iloc[:cutoff].copy()
test = ts.iloc[cutoff:].copy()

print(f"Train range: {train['date'].min().date()} to {train['date'].max().date()}")
print(f"Test range:  {test['date'].min().date()} to {test['date'].max().date()}")

print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}")

Train range: 2020-01-31 to 2033-04-30
Test range:  2033-05-31 to 2036-08-31
Train shape: (160, 2)
Test shape: (40, 2)

# 📉 Stationarity checks

from statsmodels.tsa.stattools import adfuller
import plotly.graph_objects as go

# Rolling statistics
ts["rolling_mean"] = ts["value"].rolling(window=12).mean()
ts["rolling_std"] = ts["value"].rolling(window=12).std()

fig = go.Figure()
fig.add_trace(go.Scatter(x=ts["date"], y=ts["value"], mode="lines", name="Original"))
fig.add_trace(go.Scatter(x=ts["date"], y=ts["rolling_mean"], mode="lines", name="Rolling Mean (12)"))
fig.add_trace(go.Scatter(x=ts["date"], y=ts["rolling_std"], mode="lines", name="Rolling Std (12)"))

fig.update_layout(
    title="Rolling Mean & Standard Deviation",
    xaxis_title="Date",
    yaxis_title="Value",
    showlegend=True,
    template="plotly_white"
)
fig.show()

# ADF Test
adf_result = adfuller(ts["value"])
print(f"ADF Statistic: {adf_result[0]:.4f}")
print(f"p-value: {adf_result[1]:.4f}")
for key, value in adf_result[4].items():
    print(f"Critical Value ({key}): {value:.4f}")

ADF Statistic: -0.1689
p-value: 0.9421
Critical Value (1%): -3.4658
Critical Value (5%): -2.8771
Critical Value (10%): -2.5751

# 📐 Seasonality & trend decomposition

from statsmodels.tsa.seasonal import seasonal_decompose

# Perform additive decomposition (suitable when components are independent)
decomp = seasonal_decompose(ts["value"], model="additive", period=12)

# Plot using Plotly
import plotly.subplots as sp

fig = sp.make_subplots(rows=4, cols=1, shared_xaxes=True, subplot_titles=["Original", "Trend", "Seasonality", "Residual"])

fig.add_trace(go.Scatter(x=ts["date"], y=decomp.observed, name="Original"), row=1, col=1)
fig.add_trace(go.Scatter(x=ts["date"], y=decomp.trend, name="Trend"), row=2, col=1)
fig.add_trace(go.Scatter(x=ts["date"], y=decomp.seasonal, name="Seasonality"), row=3, col=1)
fig.add_trace(go.Scatter(x=ts["date"], y=decomp.resid, name="Residual"), row=4, col=1)

fig.update_layout(
    height=800,
    title="Seasonal Decomposition (Additive)",
    showlegend=False,
    template="plotly_white"
)

fig.update_xaxes(title_text="Date", row=4, col=1)
fig.update_yaxes(title_text="Value", row=1, col=1)
fig.show()

# 📏 Autocorrelation & partial autocorrelation

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig, ax = plt.subplots(2, 1, figsize=(12, 6))

plot_acf(ts["value"], ax=ax[0], lags=40)
ax[0].set_title("Autocorrelation (ACF)")
ax[0].set_xlabel("Lag")
ax[0].set_ylabel("Correlation")
ax[0].grid(False)

plot_pacf(ts["value"], ax=ax[1], lags=40, method='ywm')
ax[1].set_title("Partial Autocorrelation (PACF)")
ax[1].set_xlabel("Lag")
ax[1].set_ylabel("Correlation")
ax[1].grid(False)

plt.tight_layout()
plt.show()

naive_forecast = test.copy()
naive_forecast["prediction"] = train["value"].iloc[-1]

import plotly.express as px

fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=naive_forecast["date"], y=naive_forecast["prediction"], name="Naive Forecast")
fig.update_layout(
    title="Naive Forecast",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

window = 12
moving_avg = test.copy()
moving_avg["prediction"] = train["value"].rolling(window).mean().iloc[-1]

fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=moving_avg["date"], y=moving_avg["prediction"], name=f"Moving Average ({window})")
fig.update_layout(
    title="Moving Average Forecast",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

# 📈 Simple Exponential Smoothing

from statsmodels.tsa.holtwinters import SimpleExpSmoothing

model_ses = SimpleExpSmoothing(train["value"]).fit()
ses_forecast = test.copy()
ses_forecast["prediction"] = model_ses.forecast(len(test))

fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=ses_forecast["date"], y=ses_forecast["prediction"], name="SES Forecast")
fig.update_layout(
    title="Simple Exponential Smoothing Forecast",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

# 📋 Baseline Evaluation

from sklearn.metrics import mean_absolute_error, mean_squared_error

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

baseline_results = pd.DataFrame({
    "Model": ["Naive", "Moving Avg", "SES"],
    "MAE": [
        mean_absolute_error(test["value"], naive_forecast["prediction"]),
        mean_absolute_error(test["value"], moving_avg["prediction"]),
        mean_absolute_error(test["value"], ses_forecast["prediction"])
    ],
    "RMSE": [
        np.sqrt(mean_squared_error(test["value"], naive_forecast["prediction"])),
        np.sqrt(mean_squared_error(test["value"], moving_avg["prediction"])),
        np.sqrt(mean_squared_error(test["value"], ses_forecast["prediction"]))
    ],
    "MAPE (%)": [
        mape(test["value"], naive_forecast["prediction"]),
        mape(test["value"], moving_avg["prediction"]),
        mape(test["value"], ses_forecast["prediction"])
    ]
})

# Show table directly
print(baseline_results.round(3))

import plotly.express as px

# Melt to long format
melted = baseline_results.melt(id_vars="Model", var_name="Metric", value_name="Score")

fig = px.bar(melted, x="Model", y="Score", color="Metric", barmode="group",
             title="📊 Baseline Model Errors (Lower is Better)",
             text_auto=".2f")

fig.update_layout(template="plotly_white", yaxis_title="Error")
fig.show()

        Model    MAE   RMSE  MAPE (%)
0       Naive  4.610  5.613    79.707
1  Moving Avg  3.191  3.652    40.232
2         SES  4.610  5.613    79.707

# 🛠️ ARIMA

from statsmodels.tsa.arima.model import ARIMA

# We'll try ARIMA(1,1,1) — a classic starting point
model_arima = ARIMA(train["value"], order=(1,1,1))
fitted_arima = model_arima.fit()

# Forecast on test window
arima_forecast = test.copy()
arima_forecast["prediction"] = fitted_arima.forecast(steps=len(test))

# Plot
import plotly.express as px

fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=arima_forecast["date"], y=arima_forecast["prediction"], name="ARIMA Forecast")
fig.update_layout(
    title="ARIMA(1,1,1) Forecast",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

# 🔁 SARIMA

from statsmodels.tsa.statespace.sarimax import SARIMAX

# Let's try SARIMA(1,1,1)x(1,1,1,12)
model_sarima = SARIMAX(train["value"], order=(1,1,1), seasonal_order=(1,1,1,12))
fitted_sarima = model_sarima.fit(disp=False)

sarima_forecast = test.copy()
sarima_forecast["prediction"] = fitted_sarima.forecast(steps=len(test))

# Plot
fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=sarima_forecast["date"], y=sarima_forecast["prediction"], name="SARIMA Forecast")
fig.update_layout(
    title="SARIMA(1,1,1)(1,1,1)[12] Forecast",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

# 🔍 Residual diagnostics

resid = fitted_sarima.resid

fig, ax = plt.subplots(1, 2, figsize=(12, 4))
ax[0].plot(resid)
ax[0].set_title("Residuals Over Time")
ax[0].set_xlabel("Index")
ax[0].set_ylabel("Residual")
ax[0].grid(False)

plot_acf(resid, ax=ax[1], lags=40)
ax[1].set_title("ACF of Residuals")
ax[1].set_xlabel("Lag")
ax[1].grid(False)

plt.tight_layout()
plt.show()

# 📋 ARIMA Evaluation

baseline_results.loc[len(baseline_results)] = [
    "ARIMA",
    mean_absolute_error(test["value"], arima_forecast["prediction"]),
    np.sqrt(mean_squared_error(test["value"], arima_forecast["prediction"])),
    mape(test["value"], arima_forecast["prediction"])
]

baseline_results.loc[len(baseline_results)] = [
    "SARIMA",
    mean_absolute_error(test["value"], sarima_forecast["prediction"]),
    np.sqrt(mean_squared_error(test["value"], sarima_forecast["prediction"])),
    mape(test["value"], sarima_forecast["prediction"])
]

print(baseline_results.round(3))

        Model    MAE   RMSE  MAPE (%)
0       Naive  4.610  5.613    79.707
1  Moving Avg  3.191  3.652    40.232
2         SES  4.610  5.613    79.707
3       ARIMA  6.426  7.289   104.616
4      SARIMA  0.654  0.846     9.469

# 📦 Fit a State Space Model using UnobservedComponents

from statsmodels.tsa.statespace.structural import UnobservedComponents

model_ucm = UnobservedComponents(train["value"],
                                 level="local level",
                                 seasonal=12)
fitted_ucm = model_ucm.fit()

ucm_forecast = test.copy()
ucm_forecast["prediction"] = fitted_ucm.forecast(steps=len(test))

# Plot
import plotly.express as px

fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=ucm_forecast["date"], y=ucm_forecast["prediction"], name="State Space Forecast")
fig.update_layout(
    title="Unobserved Components Model (Local Level + Seasonal)",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  2.82591D+00    |proj g|=  1.13483D-01

At iterate    5    f=  1.41768D+00    |proj g|=  3.44592D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3      9     18      1     0     0   5.639D-05   1.417D+00
  F =   1.4172714045237189     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH

 This problem is unconstrained.

# 📋 State Space Forecast Evaluation

from sklearn.metrics import mean_absolute_error, mean_squared_error

baseline_results.loc[len(baseline_results)] = [
    "State Space",
    mean_absolute_error(test["value"], ucm_forecast["prediction"]),
    np.sqrt(mean_squared_error(test["value"], ucm_forecast["prediction"])),
    mape(test["value"], ucm_forecast["prediction"])
]

print(baseline_results.round(3))

         Model    MAE   RMSE  MAPE (%)
0        Naive  4.610  5.613    79.707
1   Moving Avg  3.191  3.652    40.232
2          SES  4.610  5.613    79.707
3        ARIMA  6.426  7.289   104.616
4       SARIMA  0.654  0.846     9.469
5  State Space  1.154  1.397    14.044

# 🧮 Feature engineering

df_ml = ts.copy()

# Create lag features
for lag in [1, 2, 3, 6, 12]:
    df_ml[f"lag_{lag}"] = df_ml["value"].shift(lag)

# Create rolling features
df_ml["rolling_mean_3"] = df_ml["value"].shift(1).rolling(window=3).mean()
df_ml["rolling_std_3"] = df_ml["value"].shift(1).rolling(window=3).std()

# Add time-based features
df_ml["month"] = df_ml["date"].dt.month
df_ml["year"] = df_ml["date"].dt.year

# Drop early NA rows
df_ml.dropna(inplace=True)

# Redefine train/test from this ML-friendly df
train_ml = df_ml[df_ml["date"] < test["date"].min()]
test_ml = df_ml[df_ml["date"] >= test["date"].min()]

X_train = train_ml.drop(columns=["date", "value"])
y_train = train_ml["value"]

X_test = test_ml.drop(columns=["date", "value"])
y_test = test_ml["value"]

# Preview transformed feature set
print("✅ Feature Matrix (X_train):")
print(X_train.head())

print("\n✅ Target Vector (y_train):")
print(y_train.head())

print(f"\n🔢 Train shape: X={X_train.shape}, y={y_train.shape}")
print(f"🔢 Test shape:  X={X_test.shape}, y={y_test.shape}")

✅ Feature Matrix (X_train):
    rolling_mean  rolling_std     lag_1     lag_2     lag_3     lag_6  \
12      0.601359     3.842112 -2.412966 -4.291032 -4.005179  1.880720   
13      0.503692     3.806634  0.844977 -2.412966 -4.291032 -1.380806   
14      0.356226     3.644113  1.239986  0.844977 -2.412966 -4.397591   
15      0.232701     3.429288  3.308727  1.239986  0.844977 -4.005179   
16      0.218063     3.410686  5.191481  3.308727  1.239986 -4.291032   

      lag_12  rolling_mean_3  rolling_std_3  month  year  
12  0.496714       -3.569726       1.011928      1  2021  
13  2.411987       -1.953007       2.598715      2  2021  
14  5.078318       -0.109334       2.004756      3  2021  
15  6.673784        1.797897       1.323240      4  2021  
16  4.296979        3.246731       1.976477      5  2021  

✅ Target Vector (y_train):
12    0.844977
13    1.239986
14    3.308727
15    5.191481
16    4.121316
Name: value, dtype: float64

🔢 Train shape: X=(148, 11), y=(148,)
🔢 Test shape:  X=(40, 11), y=(40,)
The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.

# ⚡ XGBoost Regressor

from xgboost import XGBRegressor

model_xgb = XGBRegressor(n_estimators=100, random_state=42)
model_xgb.fit(X_train, y_train)

xgb_forecast = test_ml.copy()
xgb_forecast["prediction"] = model_xgb.predict(X_test)

# Plot
import plotly.express as px

fig = px.line()
fig.add_scatter(x=train["date"], y=train["value"], name="Train")
fig.add_scatter(x=test["date"], y=test["value"], name="Actual")
fig.add_scatter(x=xgb_forecast["date"], y=xgb_forecast["prediction"], name="XGBoost Forecast")
fig.update_layout(
    title="XGBoost Forecast (Lag + Rolling Features)",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)
fig.show()

# 📋 ML Model Evaluation

baseline_results.loc[len(baseline_results)] = [
    "XGBoost",
    mean_absolute_error(y_test, xgb_forecast["prediction"]),
    np.sqrt(mean_squared_error(y_test, xgb_forecast["prediction"])),
    mape(y_test, xgb_forecast["prediction"])
]

print(baseline_results.round(3))

         Model    MAE   RMSE  MAPE (%)
0        Naive  4.610  5.613    79.707
1   Moving Avg  3.191  3.652    40.232
2          SES  4.610  5.613    79.707
3        ARIMA  6.426  7.289   104.616
4       SARIMA  0.654  0.846     9.469
5  State Space  1.154  1.397    14.044
6      XGBoost  1.774  2.140    22.329

# 📊 Metric comparison table

print(baseline_results.round(3))

         Model    MAE   RMSE  MAPE (%)
0        Naive  4.610  5.613    79.707
1   Moving Avg  3.191  3.652    40.232
2          SES  4.610  5.613    79.707
3        ARIMA  6.426  7.289   104.616
4       SARIMA  0.654  0.846     9.469
5  State Space  1.154  1.397    14.044
6      XGBoost  1.774  2.140    22.329

# 📈 Actual vs predicted plots

import plotly.graph_objects as go

fig = go.Figure()

# Overlay all model predictions on test set
fig.add_trace(go.Scatter(x=test["date"], y=test["value"], name="Actual", mode="lines"))

fig.add_trace(go.Scatter(x=naive_forecast["date"], y=naive_forecast["prediction"], name="Naive"))
fig.add_trace(go.Scatter(x=moving_avg["date"], y=moving_avg["prediction"], name="Moving Avg"))
fig.add_trace(go.Scatter(x=ses_forecast["date"], y=ses_forecast["prediction"], name="SES"))
fig.add_trace(go.Scatter(x=arima_forecast["date"], y=arima_forecast["prediction"], name="ARIMA"))
fig.add_trace(go.Scatter(x=sarima_forecast["date"], y=sarima_forecast["prediction"], name="SARIMA"))
fig.add_trace(go.Scatter(x=ucm_forecast["date"], y=ucm_forecast["prediction"], name="State Space"))
fig.add_trace(go.Scatter(x=xgb_forecast["date"], y=xgb_forecast["prediction"], name="XGBoost"))

fig.update_layout(
    title="📈 Actual vs Predicted (All Models)",
    xaxis_title="Date",
    yaxis_title="Value",
    template="plotly_white"
)

fig.show()

# 💾 Minimal model saving example (XGBoost)
import joblib
joblib.dump(model_xgb, "xgb_forecast_model.pkl")

['xgb_forecast_model.pkl']

# 🧪 Example: Rolling window retraining loop (demo only)

# Let's assume 'ts' is our full time series dataframe
lookback = 60  # e.g., use last 60 points for each training window
start_point = lookback
end_point = len(ts)

for i in range(start_point, end_point):
    train_window = ts.iloc[i - lookback:i].copy()

    # Simulate retraining (e.g., using SES just as a placeholder)
    from statsmodels.tsa.holtwinters import SimpleExpSmoothing
    model = SimpleExpSmoothing(train_window["value"]).fit()

    # Predict next time point
    forecast = model.forecast(1).values[0]
    
    print(f"t={i} | Forecast: {forecast:.2f} | Actual: {ts['value'].iloc[i]:.2f}")

t=60 | Forecast: 1.44 | Actual: 2.54
t=61 | Forecast: 2.54 | Actual: 5.38
t=62 | Forecast: 5.38 | Actual: 6.34
t=63 | Forecast: 6.34 | Actual: 6.97
t=64 | Forecast: 6.97 | Actual: 8.36
t=65 | Forecast: 8.36 | Actual: 7.12
t=66 | Forecast: 7.12 | Actual: 3.24
t=67 | Forecast: 3.24 | Actual: 1.87
t=68 | Forecast: 1.87 | Actual: -0.55
t=69 | Forecast: -0.55 | Actual: -2.18
t=70 | Forecast: -2.18 | Actual: -0.45
t=71 | Forecast: -0.45 | Actual: 2.61
t=72 | Forecast: 2.61 | Actual: 3.58
t=73 | Forecast: 3.58 | Actual: 7.73
t=74 | Forecast: 7.73 | Actual: 5.43
t=75 | Forecast: 5.43 | Actual: 9.59
t=76 | Forecast: 9.59 | Actual: 8.24
t=77 | Forecast: 8.24 | Actual: 6.07
t=78 | Forecast: 6.07 | Actual: 4.01
t=79 | Forecast: 4.01 | Actual: -0.52
t=80 | Forecast: -0.52 | Actual: -0.53
t=81 | Forecast: -0.53 | Actual: -0.57
t=82 | Forecast: -0.57 | Actual: 1.27
t=83 | Forecast: 1.27 | Actual: 1.15
t=84 | Forecast: 1.15 | Actual: 3.41
t=85 | Forecast: 3.41 | Actual: 6.27
t=86 | Forecast: 6.27 | Actual: 9.57
t=87 | Forecast: 9.57 | Actual: 9.70
t=88 | Forecast: 9.70 | Actual: 8.22
t=89 | Forecast: 8.22 | Actual: 7.49
t=90 | Forecast: 7.49 | Actual: 4.62
t=91 | Forecast: 4.62 | Actual: 3.04
t=92 | Forecast: 3.04 | Actual: -0.41
t=93 | Forecast: -0.41 | Actual: -0.65
t=94 | Forecast: -0.65 | Actual: 0.00
t=95 | Forecast: 0.00 | Actual: 0.81
t=96 | Forecast: 0.81 | Actual: 5.12
t=97 | Forecast: 5.12 | Actual: 7.64
t=98 | Forecast: 7.64 | Actual: 9.26
t=99 | Forecast: 9.26 | Actual: 9.74
t=100 | Forecast: 9.74 | Actual: 7.94
t=101 | Forecast: 7.94 | Actual: 7.15
t=102 | Forecast: 7.15 | Actual: 4.78
t=103 | Forecast: 4.78 | Actual: 1.87
t=104 | Forecast: 1.87 | Actual: 0.73
t=105 | Forecast: 0.73 | Actual: 0.68
t=106 | Forecast: 0.68 | Actual: 2.88
t=107 | Forecast: 2.88 | Actual: 3.05
t=108 | Forecast: 3.05 | Actual: 5.68
t=109 | Forecast: 5.68 | Actual: 7.90
t=110 | Forecast: 7.90 | Actual: 7.94
t=111 | Forecast: 7.94 | Actual: 10.55
t=112 | Forecast: 10.55 | Actual: 10.02
t=113 | Forecast: 10.02 | Actual: 10.64
t=114 | Forecast: 10.64 | Actual: 5.54
t=115 | Forecast: 5.54 | Actual: 3.58
t=116 | Forecast: 3.58 | Actual: 1.46
t=117 | Forecast: 1.46 | Actual: -0.29
t=118 | Forecast: -0.29 | Actual: 2.74
t=119 | Forecast: 2.74 | Actual: 4.23
t=120 | Forecast: 4.23 | Actual: 6.82
t=121 | Forecast: 6.82 | Actual: 7.67
t=122 | Forecast: 7.67 | Actual: 11.86
t=123 | Forecast: 11.86 | Actual: 9.78
t=124 | Forecast: 9.78 | Actual: 11.15
t=125 | Forecast: 11.15 | Actual: 10.97
t=126 | Forecast: 10.97 | Actual: 5.34
t=127 | Forecast: 5.34 | Actual: 3.32
t=128 | Forecast: 3.32 | Actual: 2.20
t=129 | Forecast: 2.20 | Actual: 0.98
t=130 | Forecast: 0.98 | Actual: 0.65
t=131 | Forecast: 0.65 | Actual: 4.15
t=132 | Forecast: 4.15 | Actual: 5.57
t=133 | Forecast: 5.57 | Actual: 9.66
t=134 | Forecast: 9.66 | Actual: 10.14
t=135 | Forecast: 10.14 | Actual: 13.33
t=136 | Forecast: 13.33 | Actual: 10.38
t=137 | Forecast: 10.38 | Actual: 9.06
t=138 | Forecast: 9.06 | Actual: 7.75
t=139 | Forecast: 7.75 | Actual: 3.25
t=140 | Forecast: 3.25 | Actual: 2.93
t=141 | Forecast: 2.93 | Actual: 3.39
t=142 | Forecast: 3.39 | Actual: 1.20
t=143 | Forecast: 1.20 | Actual: 4.87
t=144 | Forecast: 4.87 | Actual: 7.50
t=145 | Forecast: 7.50 | Actual: 10.57
t=146 | Forecast: 10.57 | Actual: 10.43
t=147 | Forecast: 10.43 | Actual: 11.07
t=148 | Forecast: 11.07 | Actual: 12.29
t=149 | Forecast: 12.29 | Actual: 10.28
t=150 | Forecast: 10.28 | Actual: 7.79
t=151 | Forecast: 7.79 | Actual: 5.43
t=152 | Forecast: 5.43 | Actual: 2.63
t=153 | Forecast: 2.63 | Actual: 2.92
t=154 | Forecast: 2.92 | Actual: 3.70
t=155 | Forecast: 3.70 | Actual: 4.57
t=156 | Forecast: 4.57 | Actual: 9.70
t=157 | Forecast: 9.70 | Actual: 10.86
t=158 | Forecast: 10.86 | Actual: 11.08
t=159 | Forecast: 11.08 | Actual: 13.65
t=160 | Forecast: 13.65 | Actual: 11.40
t=161 | Forecast: 11.40 | Actual: 11.38
t=162 | Forecast: 11.38 | Actual: 9.30
t=163 | Forecast: 9.30 | Actual: 4.87
t=164 | Forecast: 4.87 | Actual: 4.87
t=165 | Forecast: 4.87 | Actual: 3.70
t=166 | Forecast: 3.70 | Actual: 4.83
t=167 | Forecast: 4.83 | Actual: 7.79
t=168 | Forecast: 7.79 | Actual: 8.20
t=169 | Forecast: 8.20 | Actual: 10.24
t=170 | Forecast: 10.24 | Actual: 11.98
t=171 | Forecast: 11.98 | Actual: 12.78
t=172 | Forecast: 12.78 | Actual: 12.90
t=173 | Forecast: 12.90 | Actual: 11.53
t=174 | Forecast: 11.53 | Actual: 9.02
t=175 | Forecast: 9.02 | Actual: 7.12
t=176 | Forecast: 7.12 | Actual: 4.53
t=177 | Forecast: 4.53 | Actual: 5.35
t=178 | Forecast: 5.35 | Actual: 4.35
t=179 | Forecast: 4.35 | Actual: 9.22
t=180 | Forecast: 9.22 | Actual: 9.67
t=181 | Forecast: 9.67 | Actual: 10.74
t=182 | Forecast: 10.74 | Actual: 12.40
t=183 | Forecast: 12.40 | Actual: 14.68
t=184 | Forecast: 14.68 | Actual: 13.35
t=185 | Forecast: 13.35 | Actual: 12.51
t=186 | Forecast: 12.51 | Actual: 9.82
t=187 | Forecast: 9.82 | Actual: 6.82
t=188 | Forecast: 6.82 | Actual: 4.27
t=189 | Forecast: 4.27 | Actual: 2.98
t=190 | Forecast: 2.98 | Actual: 4.77
t=191 | Forecast: 4.77 | Actual: 7.95
t=192 | Forecast: 7.95 | Actual: 9.86
t=193 | Forecast: 9.86 | Actual: 10.95
t=194 | Forecast: 10.95 | Actual: 14.25
t=195 | Forecast: 14.25 | Actual: 15.18
t=196 | Forecast: 15.18 | Actual: 13.30
t=197 | Forecast: 13.30 | Actual: 12.55
t=198 | Forecast: 12.55 | Actual: 10.01
t=199 | Forecast: 10.01 | Actual: 6.36

	date	value
0	2020-01-31	0.496714
1	2020-02-29	2.411987
2	2020-03-31	5.078318
3	2020-04-30	6.673784
4	2020-05-31	4.296979

📖 Time Series Forecasting¶

🧱 Data Setup¶

📌 Simulate a univariate time series¶

📈 Visualize the time series¶

🧮 Create train/test split¶

🧮 Why It's Different¶

⚠️ Common Time Series Splits¶

🌊 Optional: Add second seasonality/trend¶

📊 Exploratory Analysis¶

📉 Stationarity checks¶

🧪 What Does "Stationary" Mean?¶

📐 Seasonality & trend decomposition¶

📐 What Is Decomposition?¶

📏 Autocorrelation & partial autocorrelation¶

📏 Why These Plots?¶

🧪 Forecasting Baselines¶

🧪 What Are Forecasting Baselines?¶

📉 Naive Forecast¶

📉 Naive Forecast¶

📊 Moving Average¶

📊 Moving Average¶

📈 Simple Exponential Smoothing¶

📈 Simple Exponential Smoothing (SES)¶

📋 Baseline Evaluation¶

📋 Why Evaluate Even Simple Models?¶

⚙️ ARIMA Family¶

🧩 AR, MA, ARMA¶

🧩 What Are AR, MA, ARMA?¶

🛠️ ARIMA¶

🛠️ What Is ARIMA?¶

🔁 SARIMA¶

🔁 What Is SARIMA?¶

🔍 Residual diagnostics¶

🔍 Why Check Residuals?¶

📋 ARIMA Evaluation¶

📋 How Did ARIMA Models Perform?¶

🌊 State Space Models¶

🧠 Intro to Kalman Filters¶

🧠 What’s a Kalman Filter?¶

📦 Statsmodels or Prophet¶

📦 What Model Are We Using?¶

📋 State Space Forecast Evaluation¶

📋 Evaluation Same as Before¶

🧠 ML/Hybrid Approaches (Optional)¶

🧮 Feature engineering¶

🧮 Why Do Feature Engineering for Time Series?¶

⚡ XGBoost or LightGBM¶

⚡ Why Use XGBoost for Time Series?¶

📋 ML Model Evaluation¶

🧮 Evaluation Summary¶

📊 Metric comparison table¶

📈 Actual vs predicted plots¶

📦 Deployment Considerations¶

🧾 What to save¶

🧾 What Should You Save After Training?¶

🔄 Re-forecasting workflows¶

🔄 How Do You Forecast Over Time?¶

🔚 Closing Notes¶

🧠 Strengths/limits recap¶

🧠 Strengths of Time Series Forecasting (What You Now Know)¶

🧨 Limits / Cautions¶

🚀 Next steps¶

🚀 Where to Go from Here¶