Source code for pyLOM.NN.stats

import numpy as np
from .. import pprint
from ..utils import raiseError


[docs]
class RegressionEvaluator():
    r"""
    Evaluator class for regression tasks. 
    Includes methods to calculate the mean squared error (MSE), mean absolute error (MAE), 
    mean relative error (MRE), quantiles of the absolute errors, L2 error, and R-squared.

    Args:
        tolerance (float): Tolerance level to consider values close to zero for MRE calculation (default: ``1e-4``).
    """

    def __init__(
        self,
        tolerance: float = 1e-4,
    ) -> None:
        self.tolerance = tolerance

    @property
    def tolerance(self) -> float:
        return self._tolerance
    
    @tolerance.setter
    def tolerance(self, value: float) -> None:
        if value < 0:
            raise raiseError("Tolerance must be a positive value.")
        if value == 0:
            raise raiseError("Tolerance cannot be zero.")
        if value > 1e-2:
            raise raiseError("Tolerance should be less than 1e-2.")
        self._tolerance = value


[docs]
    def mean_squared_error(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
    ) -> float:
        """
        Compute the mean squared error (MSE) between the true values and the predicted values.

        Args:
            y_true (numpy.ndarray): The true values.
            y_pred (numpy.ndarray): The predicted values.

        Returns:
            float: The mean squared error.
        """
        return np.mean((y_true - y_pred) ** 2)

    

[docs]
    def mean_absolute_error(self, y_true, y_pred):
        """
        Compute the mean absolute error (MAE) between the true values and the predicted values.

        Args:
            y_true (numpy.ndarray): The true values.
            y_pred (numpy.ndarray): The predicted values.

        Returns:
            float: The mean absolute error.
        """
        return np.mean(np.abs(y_true - y_pred))

    

[docs]
    def mean_relative_error(
        self,
        y_pred: np.ndarray,
        y_true: np.ndarray,
    ) -> float:
        """
        Compute the mean relative error (MRE) between the true values and the predicted values,
        adding a tolerance level to consider values close to zero.

        Args:
            y_true (numpy.ndarray): The true values.
            y_pred (numpy.ndarray): The predicted values.
            tolerance (float): Tolerance level to consider values close to zero. Default is 1e-4.

        Returns:
            float: The mean relative error excluding cases where y_true is close to zero.
        """
        relative_errors = np.abs((y_true - y_pred) / (y_true + self.tolerance))
        return np.mean(relative_errors) * 100

    

[docs]
    def ae_q(
        self,
        y_pred: np.ndarray,
        y_true: np.ndarray,
        quantile: int,
    ) -> float:
        """
        Calculate the quantile of the absolute errors between the true and predicted values.

        Args:
            y_true (numpy.ndarray): The true values.
            y_pred (numpy.ndarray): The predicted values.
            quantile (int): The quantile to calculate. Must be between 0 and 100.

        Returns:
            float: The quantile of the absolute errors.
        """

        absolute_errors = np.abs(y_true - y_pred)
        return np.percentile(absolute_errors, quantile)    



[docs]
    def l2_error(
        self,
        y_pred: np.ndarray,
        y_true: np.ndarray,
    ) -> float:
        """
        Calculate the L2 error between the true and predicted values.

        Args:
            y_true (numpy.ndarray): The true values.
            y_pred (numpy.ndarray): The predicted values.

        Returns:
            float: The L2 error.
        """

        return np.linalg.norm(y_true - y_pred) / np.linalg.norm(y_true)

    

[docs]
    def R2(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
    ) -> float:
        """
        Calculate the R-squared (coefficient of determination) for a set of true and predicted values.

        Args:
            y_true (numpy.ndarray): The true values.
            y_pred (numpy.ndarray): The predicted values.

        Returns:
            float: The R-squared value.
        """
        y_mean = np.mean(y_true)
        total_sum_of_squares = np.sum((y_true - y_mean) ** 2)
        residual_sum_of_squares = np.sum((y_true - y_pred) ** 2)
        r_squared = 1 - (residual_sum_of_squares / total_sum_of_squares)
        return r_squared



[docs]
    def print_metrics(self):
        """
        Print the calculated regression metrics.
        """
        if self._metrics is None:
            raise raiseError("No metrics have been calculated yet.")
        
        pprint(0, "\nRegression evaluator metrics:")
        
        for key, value in self._metrics.items():
            if key == "mre":
                pprint(0, f"{key}: {value:.4f}%")
            elif key == "r2":
                pprint(0, f"{key}: {value:.4f}")
            else:
                if value < 1e-3 or value > 1e3:
                    pprint(0, f"{key}: {value:.4e}")
                else:
                    pprint(0, f"{key}: {value:.4f}")




[docs]
    def __call__(
        self,
        y_true: np.ndarray,
        y_pred: np.ndarray,
    ) -> dict:
        """
        Calculate multiple regression metrics between the true and predicted values.

        Args:
            y_true (numpy.ndarray): An array-like object containing the true values.
            y_pred (numpy.ndarray): An array-like object containing the predicted values.

        Returns:
            dict: A dictionary containing the calculated regression metrics.
        """
        try:
            y_true = np.array(y_true)
            y_pred = np.array(y_pred)
        except AttributeError:
            raise raiseError(f"could not create numpy arrays from object with type {type(y_true)}")
        
        mse = self.mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        mae = self.mean_absolute_error(y_true, y_pred)
        mre = self.mean_relative_error(y_true, y_pred)
        aq_95 = self.ae_q(y_true, y_pred, 95)
        aq_99 = self.ae_q(y_true, y_pred, 99)
        r2 = self.R2(y_true, y_pred)
        l2_error = self.l2_error(y_true, y_pred)
        self._metrics = {
            "mse": mse,
            "rmse": rmse,
            "mae": mae, 
            "mre": mre, 
            "ae_95": aq_95,
            "ae_99": aq_99,
            "r2": r2,
            "l2_error": l2_error 
        }
        return self._metrics