from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


import numpy as np

# Define the range [a, b]
a, b = 0, 10
size = 10000  

# Generate random samples from the Uniform distribution
samples = np.random.uniform(a, b, size)

# Print the first 10 samples as a quick check
print(np.round(samples[:10], 2))

[8.18 0.23 1.94 3.45 5.33 0.25 0.24 6.23 8.53 1.07]


import scipy.stats as stats

# 1. Probability Density Function (PDF)
x = np.linspace(min(samples), max(samples), 100)
pdf_values = stats.uniform.pdf(x, a, b-a)

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.uniform.cdf(x, a, b-a)

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [1.319591e-03 1.022937e-01 2.032678e-01 3.042419e-01 4.052160e-01
 5.061901e-01 6.071642e-01 7.081382e-01 8.091123e-01 9.100864e-01 ...
 9.088988e+00 9.189962e+00 9.290936e+00 9.391910e+00 9.492884e+00
 9.593858e+00 9.694833e+00 9.795807e+00 9.896781e+00 9.997755e+00] 

PDF values: [0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ... 0.1 0.1 0.1 0.1 0.1 0.1 0.1
 0.1 0.1 0.1] 

CDF values: [1.319591e-04 1.022937e-02 2.032678e-02 3.042419e-02 4.052160e-02
 5.061901e-02 6.071642e-02 7.081382e-02 8.091123e-02 9.100864e-02 ...
 9.088988e-01 9.189962e-01 9.290936e-01 9.391910e-01 9.492884e-01
 9.593858e-01 9.694833e-01 9.795807e-01 9.896781e-01 9.997755e-01]


def summarize_distribution(samples):
    """
    Compute and print basic statistics of a given sample.
    
    Parameters:
    - samples (array-like): Sample data from a distribution.
    """
    stats_dict = {
        "📌 Mean": round(np.mean(samples), 4),
        "📈 Variance": round(np.var(samples), 4),
        "📏 Std Dev": round(np.std(samples), 4),
        "🔀 Skewness": round(stats.skew(samples), 4),
        "🎯 Kurtosis": round(stats.kurtosis(samples), 4),
    }

    for label, value in stats_dict.items():
        print(f"{label:<12}: {value:>8.4f}")


summarize_distribution(samples)

📌 Mean      :   4.9603
📈 Variance  :   8.4569
📏 Std Dev   :   2.9081
🔀 Skewness  :   0.0189
🎯 Kurtosis  :  -1.2182


def summarize_moments(samples, raw=True):
    """
    Compute and print the 1st through 4th moments of a sample.

    Parameters:
    - samples (array-like): Sample data from a distribution.
    - raw (bool): If True, compute raw moments (E[X^k]); if False, compute central moments.
    """
    if raw:
        moments = [np.mean(samples**i) for i in range(1, 5)]
        label = "Raw"
    else:
        moments = [stats.moment(samples, moment=i) for i in range(1, 5)]
        label = "Central"

    print(f"📊 {label} Moments:")
    for i, val in enumerate(moments, start=1):
        print(f"  {i}ᵗʰ Moment: {val:.4f}")


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: 4.9603
  2ᵗʰ Moment: 33.0619
  3ᵗʰ Moment: 248.3614
  4ᵗʰ Moment: 1990.5557


import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns

def plot_distribution(samples, distribution_name, dist_func=None, params=None, discrete=False):
    """
    General function to visualize a distribution.
    
    Args:
        samples (np.array): The generated random samples.
        distribution_name (str): Name of the distribution (for title/labels).
        dist_func (scipy.stats distribution, optional): Distribution function from scipy.stats (e.g., stats.norm).
        params (tuple, optional): Parameters for the distribution function (e.g., (mean, std) for normal).
        discrete (bool): Set to True for discrete distributions.
    """

    fig, axs = plt.subplots(1, 3, figsize=(18, 5))
    
    # 1️⃣ Histogram vs. Theoretical Distribution (PMF/PDF)
    axs[0].hist(samples, bins=50 if not discrete else np.arange(min(samples), max(samples) + 1.5) - 0.5,
                density=True, alpha=0.6, color='blue', edgecolor='black', label="Sampled Data (Observed)")
    
    if dist_func and params:
        x = np.linspace(min(samples), max(samples), 100) if not discrete else np.arange(min(samples), max(samples) + 1)
        if discrete:
            y = dist_func.pmf(x, *params)
        else:
            y = dist_func.pdf(x, *params)
        axs[0].plot(x, y, 'r-', label="Theoretical")
    
    axs[0].set_title(f"{distribution_name} - Actual vs. Theoretical")
    axs[0].set_xlabel("Value")
    axs[0].set_ylabel("Probability Density/Mass")
    axs[0].legend()

    # 2️⃣ CDF Plot
    sorted_samples = np.sort(samples)
    empirical_cdf = np.arange(1, len(sorted_samples) + 1) / len(sorted_samples)
    axs[1].plot(sorted_samples, empirical_cdf, marker="o", linestyle="none", label="Empirical CDF")
    
    if dist_func and params:
        theoretical_cdf = dist_func.cdf(sorted_samples, *params)
        axs[1].plot(sorted_samples, theoretical_cdf, 'r-', label="Theoretical CDF")
    
    axs[1].set_title(f"{distribution_name} - CDF")
    axs[1].set_xlabel("Value of Random Variable X")
    axs[1].set_ylabel("Cumulative Probability")
    axs[1].legend()

    # 3️⃣ QQ-Plot (for normality check)
    stats.probplot(samples, dist="norm", plot=axs[2])
    axs[2].set_title(f"{distribution_name} - QQ-Plot")

    plt.tight_layout()
    plt.show()


plot_distribution(samples, "Uniform Distribution", stats.uniform, (0, 10))


def mle_uniform(samples):
    """ MLE for Uniform Distribution: Estimates a (min) and b (max) """
    estimated_a = np.min(samples)
    estimated_b = np.max(samples)
    return estimated_a, estimated_b

# Example usage
estimated_a, estimated_b = mle_uniform(samples)
print(f"MLE Estimated a: {estimated_a:.3f}, b: {estimated_b:.3f}")

MLE Estimated a: 0.001, b: 9.998


def mom_uniform(samples):
    """ MoM for Uniform Distribution: Estimates a (min) and b (max) """
    sample_mean = np.mean(samples)
    sample_var = np.var(samples)

    estimated_a = sample_mean - np.sqrt(3 * sample_var)
    estimated_b = sample_mean + np.sqrt(3 * sample_var)
    
    return estimated_a, estimated_b

# Example usage
estimated_a, estimated_b = mom_uniform(samples)
print(f"MoM Estimated a: {estimated_a:.3f}, b: {estimated_b:.3f}")

MoM Estimated a: -0.077, b: 9.997


import scipy.stats as stats

def bayesian_uniform(samples, prior_range=(0, 20)):
    """ Bayesian estimation for Uniform Distribution using weakly informative priors """
    prior_a = stats.uniform(0, prior_range[1])  # Prior for a
    prior_b = stats.uniform(0, prior_range[1])  # Prior for b

    estimated_a = np.min(samples)  # Approximate MAP estimate
    estimated_b = np.max(samples)

    return estimated_a, estimated_b

# Example usage
estimated_a, estimated_b = bayesian_uniform(samples)
print(f"Bayesian Estimated a: {estimated_a:.3f}, b: {estimated_b:.3f}")

Bayesian Estimated a: 0.001, b: 9.998


import numpy as np

# Define parameters
mean = 0    
std_dev = 1  
size = 10000  

# Generate samples
samples = np.random.normal(mean, std_dev, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[-0.75 -0.19 -0.77 -0.39  0.82  1.13  1.94 -0.77  0.    0.74]


# Define x range
x = np.linspace(min(samples), max(samples), 100)

# 1. Probability Density Function (PDF)
pdf_values = stats.norm.pdf(x, np.mean(samples), np.std(samples))

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.norm.cdf(x, np.mean(samples), np.std(samples))


summarize_distribution(samples)

📌 Mean      :  -0.0096
📈 Variance  :   1.0011
📏 Std Dev   :   1.0006
🔀 Skewness  :   0.0194
🎯 Kurtosis  :   0.0262


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: -0.0096
  2ᵗʰ Moment: 1.0012
  3ᵗʰ Moment: -0.0094
  4ᵗʰ Moment: 3.0328


plot_distribution(samples, "Normal Distribution", stats.norm, (np.mean(samples), np.std(samples)))


from scipy.optimize import minimize

def mle_normal(samples):
    """ MLE for Normal Distribution: Estimates mean and std deviation """
    def neg_log_likelihood(params):
        mu, sigma = params
        return -np.sum(stats.norm.logpdf(samples, mu, sigma))
    
    init_params = [np.mean(samples), np.std(samples)]
    result = minimize(neg_log_likelihood, init_params, method="L-BFGS-B")
    return result.x  # [estimated_mu, estimated_sigma]

# Example usage
estimated_mu, estimated_sigma = mle_normal(samples)
print(f"MLE Estimated μ: {estimated_mu:.3f}, σ: {estimated_sigma:.3f}")

MLE Estimated μ: -0.010, σ: 1.001


def mom_normal(samples):
    """ MoM for Normal Distribution: Estimates mean and std deviation """
    estimated_mu = np.mean(samples)
    estimated_sigma = np.std(samples)
    return estimated_mu, estimated_sigma

# Example usage
estimated_mu, estimated_sigma = mom_normal(samples)
print(f"MoM Estimated μ: {estimated_mu:.3f}, σ: {estimated_sigma:.3f}")

MoM Estimated μ: -0.010, σ: 1.001


def bayesian_normal(samples, prior_mu=0, prior_sigma=10):
    """ Bayesian estimation for Normal Distribution using Normal-Gamma prior """
    n = len(samples)
    sample_mean = np.mean(samples)
    sample_var = np.var(samples)

    posterior_mu = (prior_mu + n * sample_mean) / (n + 1)
    posterior_sigma = np.sqrt(sample_var / (n + 1))

    return posterior_mu, posterior_sigma

# Example usage
estimated_mu, estimated_sigma = bayesian_normal(samples)
print(f"Bayesian Estimated μ: {estimated_mu:.3f}, σ: {estimated_sigma:.3f}")

Bayesian Estimated μ: -0.010, σ: 0.010


import numpy as np

# Define parameter
lambda_ = 1.0  # Rate parameter (1/mean)
size = 10000  

# Generate samples
samples = np.random.exponential(1/lambda_, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[0.67 1.44 3.28 0.51 0.47 2.16 0.34 1.54 5.14 3.37]


# Define x range
x = np.linspace(min(samples), max(samples), 100)

# 1. Probability Density Function (PDF)
pdf_values = stats.expon.pdf(x, scale=np.mean(samples))

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.expon.cdf(x, scale=np.mean(samples))

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [3.260167e-05 7.579197e-02 1.515513e-01 2.273107e-01 3.030701e-01
 3.788294e-01 4.545888e-01 5.303482e-01 6.061075e-01 6.818669e-01 ...
 6.818376e+00 6.894135e+00 6.969894e+00 7.045654e+00 7.121413e+00
 7.197172e+00 7.272932e+00 7.348691e+00 7.424451e+00 7.500210e+00] 

PDF values: [1.009983e+00 9.355839e-01 8.666651e-01 8.028231e-01 7.436840e-01
 6.889013e-01 6.381541e-01 5.911451e-01 5.475990e-01 5.072607e-01 ...
 1.031537e-03 9.555500e-04 8.851604e-04 8.199560e-04 7.595548e-04
 7.036030e-04 6.517728e-04 6.037606e-04 5.592852e-04 5.180861e-04] 

CDF values: [3.292769e-05 7.369446e-02 1.419298e-01 2.051386e-01 2.636913e-01
 3.179307e-01 3.681746e-01 4.147174e-01 4.578316e-01 4.977699e-01 ...
 9.989787e-01 9.990539e-01 9.991236e-01 9.991882e-01 9.992480e-01
 9.993034e-01 9.993547e-01 9.994022e-01 9.994463e-01 9.994871e-01]


summarize_distribution(samples)

📌 Mean      :   0.9901
📈 Variance  :   0.9956
📏 Std Dev   :   0.9978
🔀 Skewness  :   1.9406
🎯 Kurtosis  :   4.9710


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: 0.9901
  2ᵗʰ Moment: 1.9759
  3ᵗʰ Moment: 5.8557
  4ᵗʰ Moment: 22.3536


plot_distribution(samples, "Exponential Distribution", stats.expon, (0, np.mean(samples)))


def mle_exponential(samples):
    """ MLE for Exponential Distribution: Estimates lambda (rate parameter) """
    estimated_lambda = 1 / np.mean(samples)
    return estimated_lambda

# Example usage
estimated_lambda = mle_exponential(samples)
print(f"MLE Estimated λ: {estimated_lambda:.3f}")

MLE Estimated λ: 1.010


def mom_exponential(samples):
    """ MoM for Exponential Distribution: Estimates lambda (rate parameter) """
    sample_mean = np.mean(samples)
    estimated_lambda = 1 / sample_mean
    return estimated_lambda

# Example usage
estimated_lambda = mom_exponential(samples)
print(f"MoM Estimated λ: {estimated_lambda:.3f}")

MoM Estimated λ: 1.010


from scipy.stats import gamma
import numpy as np

def summarize_gamma(alpha, beta_val):
    mean = alpha / beta_val
    ci = gamma.ppf([0.025, 0.975], a=alpha, scale=1/beta_val)
    return mean, ci

def bayesian_exponential(samples, alpha_prior=1, beta_prior=1):
    """ Bayesian estimation for Exponential Distribution using Gamma prior """
    n = len(samples)
    posterior_alpha = alpha_prior + n
    posterior_beta = beta_prior + np.sum(samples)

    return posterior_alpha, posterior_beta

# Example usage
posterior_alpha, posterior_beta = bayesian_exponential(samples)
mean, ci = summarize_gamma(posterior_alpha, posterior_beta)
print(f"Posterior estimate for Exponential rate λ:\n- Distribution: Gamma(shape={posterior_alpha}, rate={posterior_beta:.2f})\n- Mean rate: {mean:.3f} per unit\n- 95% credible interval: [{ci[0]:.3f}, {ci[1]:.3f}]")

Posterior estimate for Exponential rate λ:
- Distribution: Gamma(shape=10001, rate=9901.83)
- Mean rate: 1.010 per unit
- 95% credible interval: [0.990, 1.030]


import numpy as np

# Define degrees of freedom
df = 4  
size = 10000  

# Generate samples
samples = np.random.chisquare(df, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[ 3.87 12.99  4.83  8.65  7.87  5.4   7.03  2.93  5.42  1.56]


# Define x range
x = np.linspace(min(samples), max(samples), 100)

# Degrees of freedom (df) estimated from sample mean
df = np.mean(samples)

# 1. Probability Density Function (PDF)
pdf_values = stats.chi2.pdf(x, df)

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.chi2.cdf(x, df)

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [ 0.040802  0.268104  0.495405  0.722707  0.950008  1.17731   1.404611
  1.631913  1.859214  2.086516 ... 20.49794  20.725242 20.952543 21.179845
 21.407146 21.634448 21.861749 22.089051 22.316352 22.543654] 

PDF values: [1.071029e-02 6.094558e-02 9.953194e-02 1.288172e-01 1.504788e-01
 1.658768e-01 1.761419e-01 1.822217e-01 1.849123e-01 1.848826e-01 ...
 1.759012e-04 1.587170e-04 1.431946e-04 1.291753e-04 1.165154e-04
 1.050845e-04 9.476481e-05 8.544944e-05 7.704175e-05 6.945423e-05] 

CDF values: [2.217819e-04 8.618740e-03 2.705184e-02 5.316285e-02 8.503569e-02
 1.210970e-01 1.600553e-01 2.008549e-01 2.426374e-01 2.847107e-01 ...
 9.996145e-01 9.996525e-01 9.996868e-01 9.997177e-01 9.997456e-01
 9.997707e-01 9.997934e-01 9.998139e-01 9.998323e-01 9.998490e-01]


summarize_distribution(samples)

📌 Mean      :   3.9679
📈 Variance  :   7.5040
📏 Std Dev   :   2.7393
🔀 Skewness  :   1.3363
🎯 Kurtosis  :   2.5909


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: 3.9679
  2ᵗʰ Moment: 23.2482
  3ᵗʰ Moment: 179.2662
  4ᵗʰ Moment: 1707.5563


plot_distribution(samples, "Chi-Square Distribution", stats.chi2, (np.mean(samples),))


def mle_chi_square(samples):
    """ MLE for Chi-Square Distribution: Estimates degrees of freedom (df) """
    estimated_df = np.mean(samples)
    return estimated_df

# Example usage
estimated_df = mle_chi_square(samples)
print(f"MLE Estimated df: {estimated_df:.3f}")

MLE Estimated df: 3.968


def mom_chi_square(samples):
    """ MoM for Chi-Square Distribution: Estimates degrees of freedom (df) """
    estimated_df = np.mean(samples)
    return estimated_df

# Example usage
estimated_df = mom_chi_square(samples)
print(f"MoM Estimated df: {estimated_df:.3f}")

MoM Estimated df: 3.968


from scipy.stats import gamma
import numpy as np

def summarize_gamma(alpha, beta_val):
    mean = alpha / beta_val
    ci = gamma.ppf([0.025, 0.975], a=alpha, scale=1/beta_val)
    return mean, ci

def bayesian_chi_square(samples, alpha_prior=1, beta_prior=1):
    """ Bayesian estimation for Chi-Square Distribution using Gamma prior """
    n = len(samples)
    posterior_alpha = alpha_prior + n / 2
    posterior_beta = beta_prior + np.sum(samples) / 2

    return posterior_alpha, posterior_beta

# Example usage
posterior_alpha, posterior_beta = bayesian_chi_square(samples)
mean, ci = summarize_gamma(posterior_alpha, posterior_beta)
print(f"Posterior estimate for Chi-Square degrees of freedom:\n"
      f"- Distribution: Gamma(shape={posterior_alpha:.1f}, rate={posterior_beta:.1f})\n"
      f"- Mean: {mean:.2f}\n"
      f"- 95% credible interval: [{ci[0]:.2f}, {ci[1]:.2f}]")

Posterior estimate for Chi-Square degrees of freedom:
- Distribution: Gamma(shape=5001.0, rate=19840.5)
- Mean: 0.25
- 95% credible interval: [0.25, 0.26]


import numpy as np

# Define degrees of freedom
df = 10  
size = 10000  

# Generate samples
samples = np.random.standard_t(df, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[-1.14 -0.4  -0.61  1.68  0.63 -0.8  -1.01 -0.56 -0.79  2.56]


# Define x range
x = np.linspace(min(samples), max(samples), 100)

# Degrees of freedom (df) estimated from sample variance
df = len(samples) - 1

# 1. Probability Density Function (PDF)
pdf_values = stats.t.pdf(x, df)

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.t.cdf(x, df)

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [-6.264922 -6.137964 -6.011006 -5.884048 -5.75709  -5.630132 -5.503174
 -5.376216 -5.249258 -5.1223   ...  5.161292  5.28825   5.415208  5.542166
  5.669124  5.796082  5.923039  6.049997  6.176955  6.303913] 

PDF values: [1.241274e-09 2.719701e-09 5.864793e-09 1.244683e-08 2.599778e-08
 5.344196e-08 1.081170e-07 2.152622e-07 4.217950e-07 8.133769e-07 ...
 6.659509e-07 3.436522e-07 1.745234e-07 8.722654e-08 4.290492e-08
 2.076978e-08 9.895235e-09 4.639730e-09 2.141090e-09 9.724251e-10] 

CDF values: [1.941868e-10 4.337952e-10 9.540927e-10 2.066045e-09 4.404897e-09
 9.246595e-09 1.911095e-08 3.889031e-08 7.792276e-08 1.537293e-07 ...
 9.999999e-01 9.999999e-01 1.000000e+00 1.000000e+00 1.000000e+00
 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00]


summarize_distribution(samples)

📌 Mean      :  -0.0023
📈 Variance  :   1.2554
📏 Std Dev   :   1.1205
🔀 Skewness  :   0.0416
🎯 Kurtosis  :   1.1310


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: -0.0023
  2ᵗʰ Moment: 1.2554
  3ᵗʰ Moment: 0.0498
  4ᵗʰ Moment: 6.5104


plot_distribution(samples, "Student's t-Distribution", stats.t, (len(samples) - 1,))


def mle_student_t(samples):
    """ MLE for Student's t-Distribution: Estimates degrees of freedom (df) """
    def neg_log_likelihood(df):
        return -np.sum(stats.t.logpdf(samples, df))
    
    result = minimize(neg_log_likelihood, x0=[10], method="L-BFGS-B", bounds=[(1, None)])
    return result.x[0]

# Example usage
estimated_df = mle_student_t(samples)
print(f"MLE Estimated df: {estimated_df:.3f}")

MLE Estimated df: 9.536


def mom_student_t(samples):
    """ MoM for Student's t-Distribution: Estimates degrees of freedom (df) """
    sample_var = np.var(samples)
    
    def solve_df(df):
        return df / (df - 2) - sample_var  # Equation to solve

    from scipy.optimize import fsolve
    estimated_df = fsolve(solve_df, x0=10)[0]
    return estimated_df

# Example usage
estimated_df = mom_student_t(samples)
print(f"MoM Estimated df: {estimated_df:.3f}")

MoM Estimated df: 9.830


if False:
    # NOTE: Avoid using Bayesian inference with a Gamma prior to estimate degrees of freedom (df)
    # for Student's t-distribution. This approach is statistically inappropriate for several reasons:
    #
    # 1. The df parameter controls the heaviness of the tails — it's not a scale or rate parameter,
    #    so modeling it with a Gamma distribution lacks theoretical justification.
    #
    # 2. The df is inherently non-linear and non-conjugate in the likelihood function of the t-distribution.
    #    Using a conjugate prior (like Gamma) does not yield a valid posterior.
    #
    # 3. Resulting estimates can be misleading and orders of magnitude off (e.g., posterior mean ≈ 0.5 
    #    while MLE/MoM ≈ 9), especially for small-to-moderate sample sizes.
    #
    # 4. Proper Bayesian treatment requires a custom likelihood function and non-conjugate priors,
    #    typically handled via MCMC sampling (e.g., using PyMC or Stan), not closed-form posteriors.
    #
    # Use MLE (maximum likelihood) instead, or resort to full Bayesian sampling with a valid model
    # if you have strong prior beliefs about df.

    from scipy.stats import gamma
    import numpy as np

    def summarize_gamma(alpha, beta_val):
        mean = alpha / beta_val
        ci = gamma.ppf([0.025, 0.975], a=alpha, scale=1/beta_val)
        return mean, ci

    def bayesian_student_t(samples, alpha_prior=1, beta_prior=1):
        """ Bayesian estimation for Student's t-Distribution using Gamma prior """
        n = len(samples)
        posterior_alpha = alpha_prior + n / 2
        posterior_beta = beta_prior + np.sum(samples**2) / 2

        return posterior_alpha, posterior_beta

    # Example usage
    posterior_alpha, posterior_beta = bayesian_student_t(samples)
    mean, ci = summarize_gamma(posterior_alpha, posterior_beta)
    print(f"Posterior estimate for Student’s t degrees of freedom:\n"
        f"- Distribution: Gamma(shape={posterior_alpha:.1f}, rate={posterior_beta:.1f})\n"
        f"- Mean: {mean:.2f}\n"
        f"- 95% credible interval: [{ci[0]:.2f}, {ci[1]:.2f}]")


import numpy as np

# Define parameter
lambda_ = 3  # Average number of events per interval
size = 10000  

# Generate samples
samples = np.random.poisson(lambda_, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[3 3 0 0 4 2 0 3 5 6]


# Define x range (only integer values for discrete distribution)
x = np.arange(min(samples), max(samples)+1)

# Lambda estimated as sample mean
lambda_ = np.mean(samples)

# 1. Probability Mass Function (PMF)
pmf_values = stats.poisson.pmf(x, lambda_)

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.poisson.cdf(x, lambda_)

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [ 0  1  2  3  4  5  6  7  8  9 10 11 12] 

PDF values: [1.241274e-09 2.719701e-09 5.864793e-09 1.244683e-08 2.599778e-08
 5.344196e-08 1.081170e-07 2.152622e-07 4.217950e-07 8.133769e-07 ...
 6.659509e-07 3.436522e-07 1.745234e-07 8.722654e-08 4.290492e-08
 2.076978e-08 9.895235e-09 4.639730e-09 2.141090e-09 9.724251e-10] 

CDF values: [0.04794  0.193573 0.414775 0.638764 0.808872 0.912223 0.96455  0.987258
 0.995881 0.998792 0.999676 0.99992  0.999982]


summarize_distribution(samples)

📌 Mean      :   3.0378
📈 Variance  :   3.0498
📏 Std Dev   :   1.7464
🔀 Skewness  :   0.5627
🎯 Kurtosis  :   0.2867


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: 3.0378
  2ᵗʰ Moment: 12.2780
  3ᵗʰ Moment: 58.8240
  4ᵗʰ Moment: 321.0080


plot_distribution(samples, "Poisson Distribution", stats.poisson, (np.mean(samples),), discrete=True)


def mle_poisson(samples):
    """ MLE for Poisson Distribution: Estimates lambda (mean rate of occurrences) """
    estimated_lambda = np.mean(samples)
    return estimated_lambda

# Example usage
estimated_lambda = mle_poisson(samples)
print(f"MLE Estimated λ: {estimated_lambda:.3f}")

MLE Estimated λ: 3.038


def mom_poisson(samples):
    """ MoM for Poisson Distribution: Estimates lambda (mean rate of occurrences) """
    estimated_lambda = np.mean(samples)
    return estimated_lambda

# Example usage
estimated_lambda = mom_poisson(samples)
print(f"MoM Estimated λ: {estimated_lambda:.3f}")

MoM Estimated λ: 3.038


from scipy.stats import gamma
import numpy as np

def summarize_gamma(alpha, beta_val):
    mean = alpha / beta_val
    ci = gamma.ppf([0.025, 0.975], a=alpha, scale=1 / beta_val)
    return mean, ci

def bayesian_poisson(samples, alpha_prior=1, beta_prior=1):
    """
    Bayesian estimation for Poisson rate λ using a Gamma prior.
    Assumes 'samples' contains raw Poisson-distributed count data.
    """
    total_events = np.sum(samples)
    total_observations = len(samples)

    posterior_alpha = alpha_prior + total_events
    posterior_beta = beta_prior + total_observations

    return posterior_alpha, posterior_beta

# DEBUG: Inspect your input sample
print(f"Sample mean: {np.mean(samples):.3f}")
print(f"Sample sum: {np.sum(samples):.0f} | Sample size: {len(samples)}")

# Run Bayesian estimation
posterior_alpha, posterior_beta = bayesian_poisson(samples)
mean, ci = summarize_gamma(posterior_alpha, posterior_beta)

# Output
print(f"\nPosterior estimate for Poisson rate λ:")
print(f"- Distribution: Gamma(shape={posterior_alpha:.0f}, rate={posterior_beta:.0f})")
print(f"- Mean rate: {mean:.3f} per unit")
print(f"- 95% credible interval: [{ci[0]:.3f}, {ci[1]:.3f}]")

Sample mean: 3.038
Sample sum: 30378 | Sample size: 10000

Posterior estimate for Poisson rate λ:
- Distribution: Gamma(shape=30379, rate=10001)
- Mean rate: 3.038 per unit
- 95% credible interval: [3.004, 3.072]


import numpy as np

# Define probability of success
p = 0.5  
size = 10000  

# Generate samples (0 or 1 outcomes)
samples = np.random.binomial(1, p, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[1 1 0 1 1 1 1 0 1 1]


# Define x values (0 and 1 only for Bernoulli)
x = np.array([0, 1])

# Probability of success (p) estimated from sample mean
p = np.mean(samples)

# 1. Probability Mass Function (PMF)
pmf_values = stats.bernoulli.pmf(x, p)

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.bernoulli.cdf(x, p)

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [0 1] 

PDF values: [1.241274e-09 2.719701e-09 5.864793e-09 1.244683e-08 2.599778e-08
 5.344196e-08 1.081170e-07 2.152622e-07 4.217950e-07 8.133769e-07 ...
 6.659509e-07 3.436522e-07 1.745234e-07 8.722654e-08 4.290492e-08
 2.076978e-08 9.895235e-09 4.639730e-09 2.141090e-09 9.724251e-10] 

CDF values: [0.5035 1.    ]


summarize_distribution(samples)

📌 Mean      :   0.4965
📈 Variance  :   0.2500
📏 Std Dev   :   0.5000
🔀 Skewness  :   0.0140
🎯 Kurtosis  :  -1.9998


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: 0.4965
  2ᵗʰ Moment: 0.4965
  3ᵗʰ Moment: 0.4965
  4ᵗʰ Moment: 0.4965


plot_distribution(samples, "Bernoulli Distribution", stats.bernoulli, (np.mean(samples),), discrete=True)


def mle_bernoulli(samples):
    """ MLE for Bernoulli Distribution: Estimates probability of success (p) """
    estimated_p = np.mean(samples)
    return estimated_p

# Example usage
estimated_p = mle_bernoulli(samples)
print(f"MLE Estimated p: {estimated_p:.3f}")

MLE Estimated p: 0.496


def mom_bernoulli(samples):
    """ MoM for Bernoulli Distribution: Estimates probability of success (p) """
    estimated_p = np.mean(samples)
    return estimated_p

# Example usage
estimated_p = mom_bernoulli(samples)
print(f"MoM Estimated p: {estimated_p:.3f}")

MoM Estimated p: 0.496


from scipy.stats import beta
import numpy as np

def summarize_beta(alpha, beta_val):
    mean = alpha / (alpha + beta_val)
    ci = beta.ppf([0.025, 0.975], a=alpha, b=beta_val)
    return mean, ci

def bayesian_bernoulli(samples, alpha_prior=1, beta_prior=1):
    """ Bayesian estimation for Bernoulli Distribution using Beta prior """
    successes = np.sum(samples)
    failures = len(samples) - successes

    posterior_alpha = alpha_prior + successes
    posterior_beta = beta_prior + failures

    return posterior_alpha, posterior_beta

# Example usage
posterior_alpha, posterior_beta = bayesian_bernoulli(samples)
mean, ci = summarize_beta(posterior_alpha, posterior_beta)

print(f"Posterior estimate for Bernoulli probability p:")
print(f"- Distribution: Beta(α={posterior_alpha}, β={posterior_beta})")
print(f"- Mean p: {mean:.3f}")
print(f"- 95% credible interval: [{ci[0]:.3f}, {ci[1]:.3f}]")

Posterior estimate for Bernoulli probability p:
- Distribution: Beta(α=4966, β=5036)
- Mean p: 0.497
- 95% credible interval: [0.487, 0.506]


import numpy as np

# Define parameters
n = 10   # Number of trials
p = 0.5  # Probability of success
size = 10000  

# Generate samples
samples = np.random.binomial(n, p, size)

# Print first 10 samples
print(np.round(samples[:10], 2))

[6 6 1 6 5 4 2 8 6 6]


# Define x range (integer values from 0 to max observed trials)
x = np.arange(min(samples), max(samples)+1)

# Number of trials (n) estimated as max observed value
n = max(samples)

# Probability of success (p) estimated from sample mean divided by n
p = np.mean(samples) / n

# 1. Probability Mass Function (PMF)
pmf_values = stats.binom.pmf(x, n, p)

# 2. Cumulative Distribution Function (CDF)
cdf_values = stats.binom.cdf(x, n, p)

# 3. Print the values
np.set_printoptions(edgeitems=10, threshold=20, precision=6)
print("x values:", x, "\n")
print("PDF values:", pdf_values, "\n")
print("CDF values:", cdf_values, "\n")

x values: [ 0  1  2  3  4  5  6  7  8  9 10] 

PDF values: [1.241274e-09 2.719701e-09 5.864793e-09 1.244683e-08 2.599778e-08
 5.344196e-08 1.081170e-07 2.152622e-07 4.217950e-07 8.133769e-07 ...
 6.659509e-07 3.436522e-07 1.745234e-07 8.722654e-08 4.290492e-08
 2.076978e-08 9.895235e-09 4.639730e-09 2.141090e-09 9.724251e-10] 

CDF values: [0.001029 0.011213 0.056561 0.176224 0.383442 0.629502 0.832406 0.947138
 0.989712 0.999074 1.      ]


summarize_distribution(samples)

📌 Mean      :   4.9737
📈 Variance  :   2.5954
📏 Std Dev   :   1.6110
🔀 Skewness  :  -0.0242
🎯 Kurtosis  :  -0.2114


summarize_moments(samples)

📊 Raw Moments:
  1ᵗʰ Moment: 4.9737
  2ᵗʰ Moment: 27.3331
  3ᵗʰ Moment: 161.6631
  4ᵗʰ Moment: 1013.9527


plot_distribution(samples, "Binomial Distribution", stats.binom, (max(samples), np.mean(samples) / max(samples)), discrete=True)


def mle_binomial(samples, n):
    """ MLE for Binomial Distribution: Estimates probability of success (p) given n trials """
    estimated_p = np.mean(samples) / n
    return estimated_p

# Example usage
n = max(samples)  # Assuming n is the max observed value
estimated_p = mle_binomial(samples, n)
print(f"MLE Estimated p: {estimated_p:.3f} (given n={n})")

MLE Estimated p: 0.497 (given n=10)


def mom_binomial(samples, n):
    """ MoM for Binomial Distribution: Estimates probability of success (p) given n trials """
    sample_mean = np.mean(samples)
    sample_var = np.var(samples)

    estimated_p = sample_mean / n
    return estimated_p

# Example usage
n = max(samples)  # Assuming n is the max observed value
estimated_p = mom_binomial(samples, n)
print(f"MoM Estimated p: {estimated_p:.3f} (given n={n})")

MoM Estimated p: 0.497 (given n=10)


from scipy.stats import beta
import numpy as np

def summarize_beta(alpha, beta_val):
    mean = alpha / (alpha + beta_val)
    ci = beta.ppf([0.025, 0.975], a=alpha, b=beta_val)
    return mean, ci

def bayesian_binomial(samples, n, alpha_prior=1, beta_prior=1):
    """ Bayesian estimation for Binomial Distribution using Beta prior """
    successes = np.sum(samples)
    total_trials = n * len(samples)

    posterior_alpha = alpha_prior + successes
    posterior_beta = beta_prior + (total_trials - successes)

    return posterior_alpha, posterior_beta

# Example usage
n = max(samples)  # Assuming n is the known number of trials per sample
posterior_alpha, posterior_beta = bayesian_binomial(samples, n)
mean, ci = summarize_beta(posterior_alpha, posterior_beta)

print(f"Posterior estimate for Binomial probability p:")
print(f"- Distribution: Beta(α={posterior_alpha}, β={posterior_beta})")
print(f"- Mean p: {mean:.3f}")
print(f"- 95% credible interval: [{ci[0]:.3f}, {ci[1]:.3f}]")

Posterior estimate for Binomial probability p:
- Distribution: Beta(α=49738, β=50264)
- Mean p: 0.497
- 95% credible interval: [0.494, 0.500]

📖 Statistical Distributions¶

📌 Glossary

🧠 Basic Concepts

🎲 Types of Random Variables

🎯 Probability

🧠 Estimation & Inference

📊 Distributions Cheatsheet

📐 Continuous Distributions¶

📊 Uniform

🎲 What is a Uniform Distribution?

📌 Parameters

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Summarize Distribution¶

Visualizing Distributions¶

🔍 Purpose

🧠 Interpretation

⚙️ What does the function do?

Parameter Estimation

📈 Normal(AKA Gaussian)

📈 What is a Normal Distribution?

📌 Parameters

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Visualizing Distributions¶

Parameter Estimation¶

⏱️ Exponential

⏱️ What is an Exponential Distribution?

📌 Parameter

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Summarize Distribution¶

Visualizing Distributions¶

Parameter Estimation¶

📦 Chi Square (χ²)

📦 What is a Chi-Square (χ²) Distribution?

📌 Parameter

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Summarize Distribution¶

Visualizing Distributions¶

Parameter Estimation¶

📘 Student t

📘 What is a Student’s t-Distribution?

📌 Parameter

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Summarize Distribution¶

Visualizing Distributions¶

Parameter Estimation¶

🎲 Discrete Distributions¶

🔔 Poisson

🔔 What is a Poisson Distribution?

📌 Parameter

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Summarize Distribution¶

Visualizing Distributions¶

Parameter Estimation¶

⚪ Bernoulli

⚪ What is a Bernoulli Distribution?

📌 Parameter

🧠 Real-Life Examples

🧾 When to Use

Sample from Distribution¶

Key Properties¶

Summarize Distribution¶

Visualizing Distributions¶

📈 Normal
(AKA Gaussian)