Source code for skfeaturellm.transformations.binary.arithmetic

"""
Binary arithmetic transformations for feature engineering.
"""

from abc import abstractmethod
from typing import Any, Dict, List, Optional, Set, Union

import numpy as np
import pandas as pd

from skfeaturellm.transformations.base import (
    BaseTransformation,
    TransformationError,
)
from skfeaturellm.transformations.pipeline import register_transformation


[docs] class DivisionByZeroError(TransformationError): """Raised when a division by zero is detected.""" pass
[docs] class BinaryArithmeticTransformation(BaseTransformation): """ Base class for binary arithmetic transformations. Supports operations between two columns or between a column and a constant. Parameters ---------- feature_name : str Name for the resulting feature columns : List[str] List of column names (1 or 2 columns) parameters : Optional[Dict[str, Any]] Optional parameters dict with 'constant' key for column-constant operations """ def __init__( self, feature_name: str, columns: List[str], parameters: Optional[Dict[str, Any]] = None, ): if len(columns) == 1: # Column-constant operation: must have constant in parameters if parameters is None or "constant" not in parameters: raise ValueError( "Binary operation with 1 column requires 'constant' in parameters" ) self._left_column = columns[0] self._right_column = None self._right_constant = parameters["constant"] elif len(columns) == 2: # Column-column operation if parameters is not None and "constant" in parameters: raise ValueError( "Binary operation with 2 columns should not have 'constant' in parameters" ) self._left_column = columns[0] self._right_column = columns[1] self._right_constant = None else: raise ValueError( f"Binary operation requires 1 or 2 columns, got {len(columns)}" ) self._feature_name = feature_name @property def feature_name(self) -> str: return self._feature_name
[docs] def get_required_columns(self) -> Set[str]: columns = {self._left_column} if self._right_column is not None: columns.add(self._right_column) return columns
def _get_operands( self, df: pd.DataFrame ) -> tuple[pd.Series, Union[pd.Series, float]]: """Get left and right operands from the DataFrame.""" left = df[self._left_column] if self._right_column is not None: right: Union[pd.Series, float] = df[self._right_column] else: right = self._right_constant # type: ignore return left, right @abstractmethod def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: """Apply the specific arithmetic operation.""" pass
[docs] def transform(self, df: pd.DataFrame) -> pd.Series: """Apply the transformation.""" self.validate_columns(df) left, right = self._get_operands(df) result = self._apply_operation(left, right) result.name = self._feature_name return result
[docs] @register_transformation("add") class AddTransformation(BinaryArithmeticTransformation): """ Addition transformation: left + right. Examples -------- >>> t = AddTransformation("total", columns=["a", "b"]) >>> t = AddTransformation("plus_ten", columns=["a"], parameters={"constant": 10.0}) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Addition of two columns or a column and a constant"
def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: return left + right
[docs] @register_transformation("sub") class SubTransformation(BinaryArithmeticTransformation): """ Subtraction transformation: left - right. Examples -------- >>> t = SubTransformation("difference", columns=["a", "b"]) >>> t = SubTransformation("minus_ten", columns=["a"], parameters={"constant": 10.0}) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Subtraction of two columns or a column and a constant"
def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: return left - right
[docs] @register_transformation("mul") class MulTransformation(BinaryArithmeticTransformation): """ Multiplication transformation: left * right. Examples -------- >>> t = MulTransformation("product", columns=["a", "b"]) >>> t = MulTransformation("doubled", columns=["a"], parameters={"constant": 2.0}) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Multiplication of two columns or a column and a constant"
def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: return left * right
[docs] @register_transformation("div") class DivTransformation(BinaryArithmeticTransformation): """ Division transformation: left / right. Raises DivisionByZeroError if division by zero is detected. Examples -------- >>> t = DivTransformation("ratio", columns=["a", "b"]) >>> t = DivTransformation("halved", columns=["a"], parameters={"constant": 2.0}) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Division of two columns or a column and a constant"
def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: self._check_division_by_zero(right) return left / right def _check_division_by_zero(self, right: Union[pd.Series, float]) -> None: """Check for division by zero and raise if detected.""" if isinstance(right, pd.Series): if (right == 0).any(): raise DivisionByZeroError( f"Division by zero detected in column '{self._right_column}'" ) elif right == 0: raise DivisionByZeroError("Division by zero: constant is 0")
[docs] @register_transformation("max") class MaxTransformation(BinaryArithmeticTransformation): """ Element-wise maximum transformation: max(left, right). Examples -------- >>> t = MaxTransformation("max_ab", columns=["a", "b"]) >>> t = MaxTransformation("at_least_zero", columns=["a"], parameters={"constant": 0.0}) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Element-wise maximum of two columns or a column and a constant"
def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: return np.maximum(left, right)
[docs] @register_transformation("min") class MinTransformation(BinaryArithmeticTransformation): """ Element-wise minimum transformation: min(left, right). Examples -------- >>> t = MinTransformation("min_ab", columns=["a", "b"]) >>> t = MinTransformation("at_most_100", columns=["a"], parameters={"constant": 100.0}) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Element-wise minimum of two columns or a column and a constant"
def _apply_operation( self, left: pd.Series, right: Union[pd.Series, float] ) -> pd.Series: return np.minimum(left, right)