import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.spatial.distance import (
    minkowski,
    mahalanobis,
    cosine,
    hamming,
    jaccard,
    euclidean,
)

from scipy.stats import pearsonr, spearmanr
from numpy.linalg import norm

def compute_euclidean_distance(x, y, method="both", visualize=False):
    """
    Compute the Euclidean distance between two vectors using manual and/or library methods.

    Parameters:
    - x (array-like): First vector
    - y (array-like): Second vector
    - method (str): 'manual', 'library', or 'both' (default: 'both')
    - visualize (bool): Whether to show a 2D visualization if applicable (default: False)

    Returns:
    - None (prints results directly)
    """
    x = np.array(x)
    y = np.array(y)

    lib_dist = None

    if method in ["library", "both"]:
        lib_dist = euclidean(x, y)
        print(f"⚙️  Euclidean Distance: {lib_dist:.4f}")

    if method in ["manual", "both"]:
        manual_dist = np.sqrt(np.sum((x - y) ** 2))
        print(f"📐 Euclidean Distance (Custom Code): {manual_dist:.4f}")

    if visualize and len(x) == 2 and len(y) == 2:
        if lib_dist is None:
            lib_dist = euclidean(x, y)  # fallback for plotting

        plt.figure(figsize=(5, 5))
        plt.scatter(*x, color='blue', s=100)
        plt.scatter(*y, color='green', s=100)
        plt.plot([x[0], y[0]], [x[1], y[1]], 'r--')

        # Annotate distance
        mid_x, mid_y = (x[0] + y[0]) / 2, (x[1] + y[1]) / 2
        plt.text(mid_x, mid_y, f"Distance = {lib_dist:.4f}", fontsize=12, color='red', ha='center', va='bottom')

        # Annotate points
        plt.text(*x, f'  x {tuple(x)}', fontsize=12, verticalalignment='bottom')
        plt.text(*y, f'  y {tuple(y)}', fontsize=12, verticalalignment='bottom')

        plt.title("Euclidean Distance Visualization (2D)")
        plt.axis('equal')
        plt.show()

# Example usage
x = [1, 2]
y = [4, 6]
compute_euclidean_distance(x, y, method="both", visualize=True)

⚙️  Euclidean Distance: 5.0000
📐 Euclidean Distance (Custom Code): 5.0000

# 5D example (no visualization)
x_5d = [1, 3, 5, 7, 9]
y_5d = [2, 4, 6, 8, 10]
compute_euclidean_distance(x_5d, y_5d, method="both", visualize=False)

⚙️  Euclidean Distance: 2.2361
📐 Euclidean Distance (Custom Code): 2.2361

import numpy as np
from scipy.spatial.distance import cityblock
import matplotlib.pyplot as plt

def compute_manhattan_distance(x, y, method="both", visualize=False):
    """
    Compute the Manhattan (L1) distance between two vectors using manual and/or library methods.

    Parameters:
    - x (array-like): First vector
    - y (array-like): Second vector
    - method (str): 'manual', 'library', or 'both' (default: 'both')
    - visualize (bool): Whether to show a 2D visualization if applicable (default: False)

    Returns:
    - None (prints results directly)
    """
    x = np.array(x)
    y = np.array(y)

    lib_dist = None

    if method in ["library", "both"]:
        lib_dist = cityblock(x, y)
        print(f"⚙️  Manhattan Distance: {lib_dist:.4f}")

    if method in ["manual", "both"]:
        manual_dist = np.sum(np.abs(x - y))
        print(f"📐 Manhattan Distance (Custom Code): {manual_dist:.4f}")

    if visualize and len(x) == 2 and len(y) == 2:
        if lib_dist is None:
            lib_dist = cityblock(x, y)

        plt.figure(figsize=(5, 5))
        plt.scatter(*x, color='blue', s=100)
        plt.scatter(*y, color='green', s=100)

        # Draw horizontal and vertical segments
        plt.plot([x[0], y[0]], [x[1], x[1]], 'r--')  # horizontal
        plt.plot([y[0], y[0]], [x[1], y[1]], 'r--')  # vertical

        # Annotate distances on the segments
        plt.text((x[0] + y[0]) / 2, x[1], f"Δx = {abs(x[0] - y[0])}", 
                 fontsize=10, color='blue', ha='center', va='bottom')
        plt.text(y[0], (x[1] + y[1]) / 2, f"Δy = {abs(x[1] - y[1])}", 
                 fontsize=10, color='green', ha='left', va='center')

        # Annotate total distance
        mid_x, mid_y = (x[0] + y[0]) / 2, (x[1] + y[1]) / 2
        plt.text(mid_x, mid_y, f"Distance = {lib_dist:.4f}", fontsize=12, color='red', ha='center', va='bottom')

        # Label points with coordinates
        plt.text(*x, f'  x {tuple(x)}', fontsize=12, verticalalignment='bottom')
        plt.text(*y, f'  y {tuple(y)}', fontsize=12, verticalalignment='bottom')

        plt.title("Manhattan Distance Visualization (2D)")
        plt.axis('equal')
        plt.show()

# Example usage
x = [1, 2]
y = [4, 6]
compute_manhattan_distance(x, y, method="both", visualize=True)

⚙️  Manhattan Distance: 7.0000
📐 Manhattan Distance (Custom Code): 7.0000

# 5D usage (no plot)
x_5d = [1, 3, 5, 7, 9]
y_5d = [2, 4, 6, 8, 10]
compute_manhattan_distance(x_5d, y_5d, method="both", visualize=False)

⚙️  Manhattan Distance: 5.0000
📐 Manhattan Distance (Custom Code): 5.0000

def compute_minkowski_distance(x, y, p=3, method="both", visualize=False):
    """
    Compute the Minkowski distance between two vectors using manual and/or library methods.

    Parameters:
    - x (array-like): First vector
    - y (array-like): Second vector
    - p (int or float): Order of the norm (e.g., 1 for Manhattan, 2 for Euclidean)
    - method (str): 'manual', 'library', or 'both' (default: 'both')
    - visualize (bool): Show a 2D visualization (only works for p=1 or p=2) (default: False)

    Returns:
    - None (prints results directly)
    """
    x = np.array(x)
    y = np.array(y)

    lib_dist = None

    if method in ["library", "both"]:
        lib_dist = minkowski(x, y, p)
        print(f"⚙️  Minkowski Distance (p = {p}): {lib_dist:.4f}")

    if method in ["manual", "both"]:
        manual_dist = np.sum(np.abs(x - y) ** p) ** (1 / p)
        print(f"📐 Minkowski Distance (Custom Code, p = {p}): {manual_dist:.4f}")

    if visualize:
        if len(x) != 2 or len(y) != 2:
            print("⚠️  Visualization skipped: only supported for 2D vectors.")
        elif p not in [1, 2]:
            print(f"⚠️  Visualization skipped: p = {p} is not supported for geometric interpretation (only p = 1 or 2).")
        else:
            plt.figure(figsize=(5, 5))
            plt.scatter(*x, color='blue', s=100)
            plt.scatter(*y, color='green', s=100)
            plt.plot([x[0], y[0]], [x[1], y[1]], 'r--')

            # Annotate distance
            mid_x, mid_y = (x[0] + y[0]) / 2, (x[1] + y[1]) / 2
            plt.text(mid_x, mid_y, f"Distance = {lib_dist:.4f}", fontsize=12, color='red', ha='center', va='bottom')

            # Label points
            plt.text(*x, f'  x {tuple(x)}', fontsize=12, verticalalignment='bottom')
            plt.text(*y, f'  y {tuple(y)}', fontsize=12, verticalalignment='bottom')

            plt.title(f"Minkowski Distance Visualization (2D, p = {p})")
            plt.axis('equal')
            plt.show()

# Example usage (p = 3, should not visualize)
x = [1, 2]
y = [4, 6]
compute_minkowski_distance(x, y, p=3, method="both", visualize=True)

⚙️  Minkowski Distance (p = 3): 4.4979
📐 Minkowski Distance (Custom Code, p = 3): 4.4979
⚠️  Visualization skipped: p = 3 is not supported for geometric interpretation (only p = 1 or 2).

# Example usage (p = 2, will visualize)
compute_minkowski_distance(x, y, p=2, method="both", visualize=True)

⚙️  Minkowski Distance (p = 2): 5.0000
📐 Minkowski Distance (Custom Code, p = 2): 5.0000

def compute_mahalanobis_distance(x, y, data=None, cov_matrix=None, method="both"):
    """
    Compute the Mahalanobis distance between two vectors using manual and/or library methods.

    Parameters:
    - x (array-like): First vector
    - y (array-like): Second vector
    - data (array-like, optional): Dataset to compute covariance matrix if cov_matrix is not provided
    - cov_matrix (ndarray, optional): Precomputed covariance matrix
    - method (str): 'manual', 'library', or 'both' (default: 'both')

    Returns:
    - None (prints results directly)
    """
    x = np.array(x)
    y = np.array(y)

    if cov_matrix is None:
        if data is None:
            raise ValueError("Either a covariance matrix or sample data must be provided.")
        data = np.array(data)
        cov_matrix = np.cov(data.T)

    try:
        inv_cov = np.linalg.inv(cov_matrix)
    except np.linalg.LinAlgError:
        raise ValueError("Covariance matrix is singular or not invertible.")

    lib_dist = None

    if method in ["library", "both"]:
        lib_dist = mahalanobis(x, y, inv_cov)
        print(f"⚙️  Mahalanobis Distance: {lib_dist:.4f}")

    if method in ["manual", "both"]:
        diff = x - y
        manual_dist = np.sqrt(diff.T @ inv_cov @ diff)
        print(f"📐 Mahalanobis Distance (Custom Code): {manual_dist:.4f}")

# Example usage
data = [
    [1, 2],
    [4, 6],
    [3, 5],
    [5, 7]
]
x = [1, 2]
y = [4, 6]
compute_mahalanobis_distance(x, y, data=data, method="both")

⚙️  Mahalanobis Distance: 2.0000
📐 Mahalanobis Distance (Custom Code): 2.0000

# Example with 5D
data_5d = np.random.randn(100, 5)
x_5d = data_5d[0]
y_5d = data_5d[1]
compute_mahalanobis_distance(x_5d, y_5d, data=data_5d, method="both")

⚙️  Mahalanobis Distance: 2.6219
📐 Mahalanobis Distance (Custom Code): 2.6219

def compute_cosine_similarity_distance(x, y, method="both", visualize=False):
    x = np.array(x)
    y = np.array(y)
    origin = np.zeros(2)

    if method in ["library", "both"]:
        cos_dist = cosine(x, y)
        cos_sim = 1 - cos_dist
        print(f"⚙️  Cosine Similarity: {cos_sim:.4f}")
        print(f"⚙️  Cosine Distance  : {cos_dist:.4f}")

    if method in ["manual", "both"]:
        manual_sim = np.dot(x, y) / (norm(x) * norm(y))
        manual_dist = 1 - manual_sim
        print(f"📐 Cosine Similarity (Custom Code): {manual_sim:.4f}")
        print(f"📐 Cosine Distance (Custom Code)  : {manual_dist:.4f}")

    if visualize and len(x) == 2 and len(y) == 2:
        angle_rad = np.arccos(np.clip(np.dot(x, y) / (norm(x) * norm(y)), -1.0, 1.0))
        angle_deg = np.degrees(angle_rad)
        angle_label = f"θ ≈ {angle_deg:.1f}°"
        angle_pos = (x + y) / 2

        fig, ax = plt.subplots(figsize=(6, 6), dpi=100)
        ax.quiver(*origin, *x, angles='xy', scale_units='xy', scale=1, color='blue', label=f"x {tuple(x)}")
        ax.quiver(*origin, *y, angles='xy', scale_units='xy', scale=1, color='green', label=f"y {tuple(y)}")
        ax.text(*angle_pos, angle_label, fontsize=12, color='red', ha='center')

        all_coords = np.array([origin, x, y])
        min_x, max_x = all_coords[:, 0].min(), all_coords[:, 0].max()
        min_y, max_y = all_coords[:, 1].min(), all_coords[:, 1].max()
        pad = 1
        ax.set_xlim(min_x - pad, max_x + pad)
        ax.set_ylim(min_y - pad, max_y + pad)

        ax.set_aspect('equal')
        ax.grid(True)
        ax.set_xlabel("X-axis")
        ax.set_ylabel("Y-axis")
        ax.legend()
        ax.set_title("Cosine Similarity Visualization (2D)")
        plt.tight_layout()
        plt.show()

# # Example usage (2D)
x = [1, 2]
y = [4, 6]
compute_cosine_similarity_distance(x, y, method="both", visualize=True)

⚙️  Cosine Similarity: 0.9923
⚙️  Cosine Distance  : 0.0077
📐 Cosine Similarity (Custom Code): 0.9923
📐 Cosine Distance (Custom Code)  : 0.0077

# Example usage (5D)
x_5d = [1, 3, 5, 7, 9]
y_5d = [2, 4, 6, 8, 10]
compute_cosine_similarity_distance(x_5d, y_5d, method="both", visualize=False)

⚙️  Cosine Similarity: 0.9972
⚙️  Cosine Distance  : 0.0028
📐 Cosine Similarity (Custom Code): 0.9972
📐 Cosine Distance (Custom Code)  : 0.0028

def compute_hamming_distance(x, y, method="both"):
    """
    Compute the Hamming distance between two equal-length vectors using manual and/or library methods.

    Parameters:
    - x (array-like or string): First input
    - y (array-like or string): Second input
    - method (str): 'manual', 'library', or 'both' (default: 'both')

    Returns:
    - None (prints results directly)
    """
    if len(x) != len(y):
        raise ValueError("Inputs must be of equal length.")

    x = np.array(list(x)) if isinstance(x, str) else np.array(x)
    y = np.array(list(y)) if isinstance(y, str) else np.array(y)

    lib_dist = None

    if method in ["library", "both"]:
        lib_dist = hamming(x, y) * len(x)  # convert from proportion to raw count
        print(f"⚙️  Hamming Distance: {lib_dist:.4f}")

    if method in ["manual", "both"]:
        manual_dist = np.sum(x != y)
        print(f"📐 Hamming Distance (Custom Code): {manual_dist:.4f}")

# Example usage: binary lists
x = [1, 0, 1, 1, 0, 1]
y = [1, 1, 0, 1, 0, 0]
compute_hamming_distance(x, y, method="both")

⚙️  Hamming Distance: 3.0000
📐 Hamming Distance (Custom Code): 3.0000

# Example usage: strings
compute_hamming_distance("dancer", "danger", method="both")

⚙️  Hamming Distance: 1.0000
📐 Hamming Distance (Custom Code): 1.0000

def compute_jaccard_distance(x, y, method="both"):
    """
    Compute Jaccard Similarity and Jaccard Distance between two inputs.

    Parameters:
    - x (array-like, set, or string): First input
    - y (array-like, set, or string): Second input
    - method (str): 'manual', 'library', or 'both' (default: 'both')

    Returns:
    - None (prints results directly)
    """
    # Convert string to set of characters
    if isinstance(x, str) and isinstance(y, str):
        x = set(x)
        y = set(y)

    # Convert binary vectors to numpy arrays
    elif isinstance(x, (list, tuple, np.ndarray)) and isinstance(y, (list, tuple, np.ndarray)):
        x = np.array(x)
        y = np.array(y)

    lib_dist = None

    if method in ["library", "both"] and isinstance(x, np.ndarray):
        lib_dist = jaccard(x, y)
        lib_sim = 1 - lib_dist
        print(f"⚙️  Jaccard Similarity: {lib_sim:.4f}")
        print(f"⚙️  Jaccard Distance  : {lib_dist:.4f}")

    if method in ["manual", "both"]:
        if isinstance(x, np.ndarray):
            intersection = np.sum(np.logical_and(x, y))
            union = np.sum(np.logical_or(x, y))
        else:  # assumes sets
            intersection = len(x & y)
            union = len(x | y)

        manual_sim = intersection / union if union != 0 else 0
        manual_dist = 1 - manual_sim

        print(f"📐 Jaccard Similarity (Custom Code): {manual_sim:.4f}")
        print(f"📐 Jaccard Distance (Custom Code)  : {manual_dist:.4f}")

# Example: binary vectors
x_bin = [1, 0, 1, 1, 0]
y_bin = [0, 1, 1, 1, 0]
compute_jaccard_distance(x_bin, y_bin, method="both")

⚙️  Jaccard Similarity: 0.5000
⚙️  Jaccard Distance  : 0.5000
📐 Jaccard Similarity (Custom Code): 0.5000
📐 Jaccard Distance (Custom Code)  : 0.5000

# Example: English word character sets
compute_jaccard_distance("night", "thing", method="both")

📐 Jaccard Similarity (Custom Code): 1.0000
📐 Jaccard Distance (Custom Code)  : 0.0000

def compute_pearson_correlation(x, y, method="both"):
    """
    Compute Pearson Correlation between two vectors.

    Parameters:
    - x (array-like): First variable
    - y (array-like): Second variable
    - method (str): 'manual', 'library', or 'both' (default: 'both')

    Returns:
    - None (prints results directly)
    """
    x = np.array(x)
    y = np.array(y)

    if len(x) != len(y):
        raise ValueError("x and y must be of equal length.")

    if method in ["library", "both"]:
        lib_corr, _ = pearsonr(x, y)
        print(f"⚙️  Pearson Correlation: {lib_corr:.4f}")

    if method in ["manual", "both"]:
        x_mean = np.mean(x)
        y_mean = np.mean(y)
        numerator = np.sum((x - x_mean) * (y - y_mean))
        denominator = np.sqrt(np.sum((x - x_mean)**2)) * np.sqrt(np.sum((y - y_mean)**2))
        manual_corr = numerator / denominator if denominator != 0 else 0
        print(f"📐 Pearson Correlation (Custom Code): {manual_corr:.4f}")

# Example usage
x = [10, 20, 30, 40, 50]
y = [15, 25, 35, 45, 60]
compute_pearson_correlation(x, y, method="both")

⚙️  Pearson Correlation: 0.9959
📐 Pearson Correlation (Custom Code): 0.9959

# Strong but imperfect negative correlation
x_neg = [10, 20, 30, 40, 50]
y_neg = [92, 69, 48, 33, 13]  # slightly perturbed from a perfect linear drop
compute_pearson_correlation(x_neg, y_neg, method="both")

⚙️  Pearson Correlation: -0.9976
📐 Pearson Correlation (Custom Code): -0.9976

# x and y are unrelated → correlation close to 0
x_rand = [1, 2, 3, 4, 5]
y_rand = [42, 5, 67, 18, 33]
compute_pearson_correlation(x_rand, y_rand, method="both")

⚙️  Pearson Correlation: -0.0334
📐 Pearson Correlation (Custom Code): -0.0334

def compute_spearman_correlation(x, y, method="both"):
    """
    Compute Spearman Rank Correlation between two vectors.

    Parameters:
    - x (array-like): First variable
    - y (array-like): Second variable
    - method (str): 'manual', 'library', or 'both' (default: 'both')

    Returns:
    - None (prints results directly)
    """
    x = np.array(x)
    y = np.array(y)

    if len(x) != len(y):
        raise ValueError("x and y must be of equal length.")

    if method in ["library", "both"]:
        lib_corr, _ = spearmanr(x, y)
        print(f"⚙️  Spearman Correlation: {lib_corr:.4f}")

    if method in ["manual", "both"]:
        rx = pd.Series(x).rank(method='average').values
        ry = pd.Series(y).rank(method='average').values

        rx_mean = np.mean(rx)
        ry_mean = np.mean(ry)
        numerator = np.sum((rx - rx_mean) * (ry - ry_mean))
        denominator = np.sqrt(np.sum((rx - rx_mean)**2)) * np.sqrt(np.sum((ry - ry_mean)**2))
        manual_corr = numerator / denominator if denominator != 0 else 0
        print(f"📐 Spearman Correlation (Custom Code): {manual_corr:.4f}")

# Example 1: Monotonic but non-linear (Spearman high, Pearson not)
x = [1, 2, 3, 4, 5]
y = [2, 4, 8, 16, 32]  # exponential
compute_spearman_correlation(x, y, method="both")

⚙️  Spearman Correlation: 1.0000
📐 Spearman Correlation (Custom Code): 1.0000

# Example 2: Tied ranks
x_tied = [1, 2, 2, 3, 4]
y_tied = [10, 20, 20, 30, 40]
compute_spearman_correlation(x_tied, y_tied, method="both")

⚙️  Spearman Correlation: 1.0000
📐 Spearman Correlation (Custom Code): 1.0000

📖 Statistical Similarity¶

🧭 Overview¶

📏 Distance Metrics for Numeric Data¶

📌 Euclidean Distance¶

📌 Manhattan Distance¶

📌 Minkowski Distance¶

📌 Mahalanobis Distance¶

🧮 Distance Metrics for Vectors and Angles¶

📌 Cosine Similarity / Distance¶

🔤 Distance Metrics for Categorical or Binary Data¶

📌 Hamming Distance¶

📌 Jaccard Similarity / Distance¶

📊 Similarity Measures for Continuous Data¶

📌 Pearson Correlation¶

📌 Spearman Rank Correlation¶