Source code for pyLOM.NN.pipeline
#!/usr/bin/env python
#
# pyLOM - Python Low Order Modeling.
#
# NN pipeline routines.
#
# Last rev: 02/10/2024
from typing import List, Dict, Any
from .optimizer import OptunaOptimizer
from ..utils.errors import raiseWarning
from .. import pprint
[docs]
class Pipeline:
r"""
Pipeline class to train and evaluate models.
To optimize a model, provide an optimizer and model class.
To train a model with fixed parameters, provide a model and training parameters.
Args:
train_dataset: The training dataset.
valid_dataset (optional): The validation dataset. Default is ``None``.
test_dataset (optional): The test dataset. Default is ``None``.
model (Model, optional): The model to train. Default is ``None``.
If optimizer and model_class are provided, this is not used.
training_params (Dict, optional): The parameters for training the model. Default is ``None``.
If optimizer and model_class are provided, this is not used.
optimizer (OptunaOptimizer, optional): The optimizer to use for optimization. Default is ``None``.
model_class (Model, optional): The model class to use for optimization. Default is ``None``.
evaluators (List, optional): The evaluators to use for evaluating the model. Default is ``[]``.
Raises:
AssertionError: If neither model and training_params nor optimizer and model_class are provided.
"""
def __init__(
self,
train_dataset,
valid_dataset=None,
test_dataset=None,
model=None,
training_params: Dict = None,
optimizer: OptunaOptimizer = None,
model_class=None,
evaluators: List = [],
):
self._model = model
self.train_dataset = train_dataset
self.test_dataset = test_dataset
self.valid_dataset = valid_dataset
self.optimizer = optimizer
self.training_params = training_params
self.model_class = model_class
self.evaluators = evaluators
assert (self.optimizer is not None and self.model_class is not None) or (
self._model is not None and self.training_params is not None
), "Either model and training_params or optimizer and model_class must be provided"
@property
def model(self):
"""
Get the trained model.
"""
return self._model
[docs]
def run(self) -> Any:
"""
Run the pipeline, this will train the model and return the output of the model's fit method. If optuna is used, the model will be trained with the optimized parameters.
Returns:
model_output (Any): The output of the model's fit method.
"""
if self.optimizer is not None:
if self.valid_dataset is None:
self.valid_dataset = self.train_dataset
raiseWarning( "Validation dataset not provided, using train dataset for evaluation on optimization")
self._model, self.training_params = self.model_class.create_optimized_model(
train_dataset = self.train_dataset,
eval_dataset = self.valid_dataset,
optuna_optimizer = self.optimizer,
)
pprint(0, "Training now a model with optimized parameters")
model_output = self._model.fit(
self.train_dataset, eval_dataset=self.test_dataset, **self.training_params
)
return model_output