Source code for skfeaturellm.transformations.unary.arithmetic

"""
Unary arithmetic transformations for feature engineering.
"""

from typing import Any, Dict, List, Optional, Set

import numpy as np
import pandas as pd

from skfeaturellm.transformations.base import BaseTransformation, TransformationError
from skfeaturellm.transformations.pipeline import register_transformation


[docs] class InvalidValueError(TransformationError): """Raised when a transformation encounters invalid values (e.g., log of negative).""" pass
[docs] class UnaryTransformation(BaseTransformation): """ Base class for unary transformations (single column operations). Parameters ---------- feature_name : str Name for the resulting feature columns : List[str] List with exactly one column name parameters : Optional[Dict[str, Any]] Optional parameters (not used for basic unary operations) """ def __init__( self, feature_name: str, columns: List[str], parameters: Optional[Dict[str, Any]] = None, ): if len(columns) != 1: raise ValueError( f"Unary operation requires exactly 1 column, got {len(columns)}" ) self._feature_name = feature_name self._column = columns[0] self._parameters = parameters or {} @property def feature_name(self) -> str: return self._feature_name
[docs] def get_required_columns(self) -> Set[str]: return {self._column}
[docs] def transform(self, df: pd.DataFrame) -> pd.Series: """Apply the transformation.""" self.validate_columns(df) values = df[self._column] result = self._apply_operation(values) result.name = self._feature_name return result
def _apply_operation(self, values: pd.Series) -> pd.Series: """Apply the specific unary operation. Must be implemented by subclasses.""" raise NotImplementedError
[docs] @register_transformation("log") class LogTransformation(UnaryTransformation): """ Natural logarithm transformation: log(column). Raises InvalidValueError if any values are <= 0. Examples -------- >>> t = LogTransformation("log_income", columns=["income"]) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Natural logarithm (log(column)) - useful for right-skewed distributions"
def _apply_operation(self, values: pd.Series) -> pd.Series: if (values <= 0).any(): raise InvalidValueError( f"Log transformation requires all values > 0 in column '{self._column}'" ) return np.log(values)
[docs] @register_transformation("log1p") class Log1pTransformation(UnaryTransformation): """ Log(1+x) transformation: log(1 + column). Useful for data with zeros. Raises InvalidValueError if any values are < 0. Examples -------- >>> t = Log1pTransformation("log1p_count", columns=["count"]) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Log(1+x) transformation (log(1 + column)) - handles zero values"
def _apply_operation(self, values: pd.Series) -> pd.Series: if (values < 0).any(): raise InvalidValueError( f"Log1p transformation requires all values >= 0 in column '{self._column}'" ) return np.log1p(values)
[docs] @register_transformation("abs") class AbsTransformation(UnaryTransformation): """ Absolute value transformation: abs(column). Examples -------- >>> t = AbsTransformation("abs_diff", columns=["difference"]) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Absolute value (abs(column)) - magnitude regardless of sign"
def _apply_operation(self, values: pd.Series) -> pd.Series: return np.abs(values)
[docs] @register_transformation("exp") class ExpTransformation(UnaryTransformation): """ Exponential transformation: exp(column). Examples -------- >>> t = ExpTransformation("exp_log_price", columns=["log_price"]) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Exponential (exp(column)) - inverse of log"
def _apply_operation(self, values: pd.Series) -> pd.Series: return np.exp(values)
[docs] @register_transformation("sqrt") class SqrtTransformation(UnaryTransformation): """ Square root transformation: sqrt(column). Raises InvalidValueError if any values are < 0. Examples -------- >>> t = SqrtTransformation("sqrt_area", columns=["area"]) """
[docs] @classmethod def get_prompt_description(cls) -> str: return "Square root (sqrt(column)) - useful for right-skewed non-negative data"
def _apply_operation(self, values: pd.Series) -> pd.Series: if (values < 0).any(): raise InvalidValueError( f"Sqrt transformation requires all values >= 0 in column '{self._column}'" ) return np.sqrt(values)
[docs] @register_transformation("pow") class PowTransformation(UnaryTransformation): """ Power transformation: column ** power. Raises InvalidValueError for invalid operations (e.g., negative base with fractional exponent). Examples -------- >>> t = PowTransformation("age_squared", columns=["age"], parameters={"power": 2}) >>> t = PowTransformation("sqrt_area", columns=["area"], parameters={"power": 0.5}) >>> t = PowTransformation("inverse_distance", columns=["distance"], parameters={"power": -1}) """ def __init__( self, feature_name: str, columns: List[str], parameters: Optional[Dict[str, Any]] = None, ): super().__init__(feature_name, columns, parameters) if parameters is None or "power" not in parameters: raise ValueError("PowTransformation requires 'power' in parameters") self._power = parameters["power"]
[docs] @classmethod def get_prompt_description(cls) -> str: return "Power (column ** power) - flexible exponentiation for various transformations"
def _apply_operation(self, values: pd.Series) -> pd.Series: # Check for invalid operations if self._power < 0 and (values == 0).any(): raise InvalidValueError( f"Power transformation with negative exponent requires all values != 0 in column '{self._column}'" ) # Check for fractional powers with negative values if not isinstance(self._power, int) and (values < 0).any(): raise InvalidValueError( f"Power transformation with fractional exponent requires all values >= 0 in column '{self._column}'" ) return values.astype(float).pow(self._power)