Source code for skfeaturellm.transformations.base

"""
Base classes and protocols for feature transformations.
"""

from abc import ABC, abstractmethod
from typing import List, Set

import pandas as pd


[docs] class TransformationError(Exception): """Base exception for transformation errors.""" pass
[docs] class ColumnNotFoundError(TransformationError): """Raised when a required column is not found in the DataFrame.""" pass
[docs] class BaseTransformation(ABC): """ Abstract base class for all feature transformations. Subclasses must implement: - transform(): Apply the transformation to a DataFrame (replaces execute()) - get_required_columns(): Return columns needed for the transformation - feature_name property: Name of the output feature - get_prompt_description(): Return description for LLM prompts The fit/transform pattern mirrors scikit-learn conventions: - fit(df): learn any stateful parameters from training data; stateless transforms inherit the default no-op implementation. - transform(df): apply the transformation using fitted state. - fit_transform(df): convenience method combining fit + transform. """
[docs] @classmethod @abstractmethod def get_prompt_description(cls) -> str: """ Return a description of this transformation for use in LLM prompts. Returns ------- str Human-readable description of what this transformation does """ pass
@property @abstractmethod def feature_name(self) -> str: """Name of the resulting feature.""" pass
[docs] @abstractmethod def get_required_columns(self) -> Set[str]: """ Return the set of column names required by this transformation. Returns ------- Set[str] Set of required column names """ pass
[docs] def fit(self, df: pd.DataFrame) -> "BaseTransformation": """ Fit the transformation to training data. The default implementation validates required columns and returns self. Stateful subclasses should override this to learn parameters from the training data. Parameters ---------- df : pd.DataFrame The training DataFrame Returns ------- BaseTransformation self """ self.validate_columns(df) return self
[docs] @abstractmethod def transform(self, df: pd.DataFrame) -> pd.Series: """ Apply the transformation to a DataFrame. Parameters ---------- df : pd.DataFrame The input DataFrame Returns ------- pd.Series The resulting feature values with name set to feature_name Raises ------ TransformationError If the transformation fails """ pass
[docs] def fit_transform(self, df: pd.DataFrame) -> pd.Series: """ Fit and transform in a single step. Parameters ---------- df : pd.DataFrame The input DataFrame Returns ------- pd.Series The resulting feature values """ return self.fit(df).transform(df)
[docs] def validate_columns(self, df: pd.DataFrame) -> None: """ Validate that all required columns exist in the DataFrame. Parameters ---------- df : pd.DataFrame The input DataFrame Raises ------ ColumnNotFoundError If any required column is missing """ required = self.get_required_columns() missing = required - set(df.columns) if missing: raise ColumnNotFoundError( f"Columns not found in DataFrame: {missing}. " f"Available columns: {list(df.columns)}" )