Source code for breos.load_profiles

"""
Load profile management module.

This module handles residential and commercial load profiles,
including loading from CSV files, scaling to annual consumption,
and resampling between hourly and 15-minute intervals.
"""

from contextlib import nullcontext
from importlib.resources import as_file
from pathlib import Path
from typing import Dict, Optional, Union

import numpy as np
import pandas as pd

from breos.resources import rlp_resource
from breos.utils import count_leap_years, get_hours_per_step, is_leap_year
from breos.weather import resample_to_15min

# Profile type mappings
PROFILE_FILES = {
    "1": "h0SLP_demandlib_1000kwh_hourly.csv",
    "4": "EREDES_2025_BTN_1000kwh_hourly.csv",
    "5": "EREDES_2025_BTN_1000kwh_hourly.csv",
    "6": "EREDES_2025_BTN_1000kwh_hourly.csv",
    "7": "bdew_h0_2025_15min.csv",
    "8": "REE_2026_2.0TD_1000kwh_hourly.csv",
}

PROFILE_FILES_15MIN = {
    "1": "h0SLP_demandlib_1000kwh_15min.csv",
    "4": "EREDES_2025_BTN_1000kwh_15min.csv",
    "5": "EREDES_2025_BTN_1000kwh_15min.csv",
    "6": "EREDES_2025_BTN_1000kwh_15min.csv",
    "7": "bdew_h0_2025_15min.csv",
    "8": "REE_2026_2.0TD_1000kwh_15min.csv",
}

PROFILE_FILE_NATIVE_FREQ = {
    "7": "15min",
}

PROFILE_NAMES = {
    "1": "H0SLP (demandlib)",
    "4": "E-Redes 2025 - BTN A (external file required)",
    "5": "E-Redes 2025 - BTN B (external file required)",
    "6": "E-Redes 2025 - BTN C (external file required)",
    "7": "BDEW H0 2025 (external file required)",
    "8": "REE 2026 - 2.0TD (external file required)",
}

# Note: "bdew_h0" maps to the bundled demandlib profile "1", which
# implements the BDEW H0 standard shape. Profile "7" is the externally
# published BDEW H0 2025 file and must be supplied via rlp_directory —
# request it explicitly as "7" if that exact dataset is needed.
PROFILE_ALIASES = {
    "default": "1",
    "demandlib_h0": "1",
    "h0": "1",
    "bdew_h0": "1",
    "crest": "1",
    "eredes_btn_c": "6",
    "ree_2.0td": "8",
}

# Column mappings for E-Redes profiles
EREDES_COLUMNS = {
    "4": "BTN A - Wh",
    "5": "BTN B - Wh",
    "6": "BTN C - Wh",
}


[docs] def load_profile( profile_type: str, annual_consumption_kwh: float, start_date: str = "2025-01-01", freq: str = "h", num_years: int = 1, rlp_directory: Optional[str] = None, timezone: Optional[str] = "UTC", ) -> pd.DataFrame: """ Load and scale a residential/commercial load profile. This is the main function for loading load profiles. It supports the bundled H0SLP demandlib profile, user-supplied external CSVs through ``rlp_directory``, scaling to target annual consumption, multi-year extension, and hourly or 15-minute output. Args: profile_type: Profile type key (see PROFILE_NAMES) or name annual_consumption_kwh: Target annual consumption in kWh start_date: Start date for the profile (YYYY-MM-DD) freq: Time frequency ('h' for hourly, '15min' for 15-minute) num_years: Number of years to generate rlp_directory: Directory containing RLP files. When omitted, BREOS uses only redistributable packaged profiles. timezone: Timezone for the index. Profile rows are wall-clock local behavior (H0 morning/evening peaks), so pass the location's timezone to pin them to local time; the simulation aligns load and PV by UTC instant. The 'UTC' default keeps the legacy UTC-clock convention for callers without a location. Returns: DataFrame with 'Electrical Consumption [W]' column and DatetimeIndex Raises: ValueError: If profile_type is not recognized """ profile_type = PROFILE_ALIASES.get(str(profile_type).lower(), str(profile_type)) if profile_type not in PROFILE_FILES: raise ValueError(f"Unknown profile type: {profile_type}. Valid types: {list(PROFILE_FILES.keys())}") packaged = rlp_directory is None rlp_path = None if packaged else Path(rlp_directory) def _candidate(filename: str): return rlp_resource(filename) if packaged else rlp_path / filename def _exists(candidate) -> bool: return candidate.is_file() if packaged else candidate.exists() # Prefer native 15-minute files when requested. If only a native # 15-minute external profile is available, load it and downsample later. native_candidate = _candidate(PROFILE_FILES_15MIN[profile_type]) if profile_type in PROFILE_FILES_15MIN else None use_native_15min = freq in ("15min", "15T") and profile_type in PROFILE_FILES_15MIN and _exists(native_candidate) if use_native_15min: csv_resource = native_candidate native_freq = "15min" else: hourly_candidate = _candidate(PROFILE_FILES[profile_type]) if _exists(hourly_candidate): csv_resource = hourly_candidate native_freq = PROFILE_FILE_NATIVE_FREQ.get(profile_type, "h") elif profile_type in PROFILE_FILES_15MIN and _exists(native_candidate): csv_resource = native_candidate native_freq = "15min" else: csv_resource = hourly_candidate native_freq = "h" if not _exists(csv_resource): if packaged: profile_name = PROFILE_NAMES.get(profile_type, profile_type) raise ValueError( f"Profile type {profile_type!r} ({profile_name}) is not bundled with BREOS. " "Its upstream redistribution terms are not confirmed for package release. " "Pass rlp_directory with a licensed local copy or use profile_type='1'." ) raise FileNotFoundError(f"Load profile file not found: {csv_resource}") # Load the profile path_context = as_file(csv_resource) if packaged else nullcontext(csv_resource) with path_context as csv_file: df = _load_profile_csv(Path(csv_file), profile_type) # Create a naive wall-clock index for one year; rows describe household # behavior at local clock time and are pinned to the timezone afterwards start_year = int(start_date[:4]) hours_in_year = 8760 # Non-leap year steps_per_hour = 4 if native_freq == "15min" else 1 new_index = pd.date_range(start=start_date, periods=hours_in_year * steps_per_hour, freq=native_freq) # Adjust if profile has different length if len(df) < len(new_index): # Repeat to fill repeats = (len(new_index) // len(df)) + 1 df = pd.concat([df] * repeats, ignore_index=True).iloc[: len(new_index)] elif len(df) > len(new_index): df = df.iloc[: len(new_index)] df.index = new_index df.index.name = "DateTime" # Scale to target consumption scale_to_annual_consumption(df, annual_consumption_kwh) # Extend to multiple years if needed (on the naive wall-clock index, so # each year is localized at its own DST transition dates below) if num_years > 1: df = _extend_to_years(df, start_year, num_years) df = _localize_wall_clock_index(df, timezone, native_freq) # Resample if needed (hourly to 15-min) if freq in ("15min", "15T") and native_freq == "h": df = _resample_load_to_15min(df) elif freq == "h" and native_freq == "15min": df = df.resample("h").mean() return df
def _localize_wall_clock_index(df: pd.DataFrame, timezone: Optional[str], freq: str) -> pd.DataFrame: """Pin naive wall-clock profile rows to a timezone's legal time. Household behavior follows the legal clock, so each row keeps its wall-clock label. In a DST-observing timezone the spring-forward hour does not exist (its rows are dropped) and the fall-back hour occurs twice (the rows cover the standard-time occurrence; the DST instants are forward-filled), keeping the result evenly spaced in absolute time. """ localized = df.copy() if timezone is None or timezone == "UTC": localized.index = localized.index.tz_localize("UTC") return localized idx = localized.index.tz_localize(timezone, nonexistent="shift_forward", ambiguous=False) localized.index = idx localized = localized[~localized.index.duplicated(keep="last")] full = pd.date_range(localized.index[0], localized.index[-1], freq=freq) localized = localized.reindex(full).ffill() localized.index.name = "DateTime" return localized def _load_profile_csv(csv_file: Path, profile_type: str) -> pd.DataFrame: """Load a profile CSV file and standardize column names.""" try: if profile_type == "1": # H0SLP demandlib format (hourly has 'Electrical Consumption [kW]', 15min has 'h0_dyn' in kW) df = pd.read_csv(csv_file, index_col=0) if "Electrical Consumption [kW]" in df.columns: df["Electrical Consumption [kW]"] *= 1000 df.rename(columns={"Electrical Consumption [kW]": "Electrical Consumption [W]"}, inplace=True) elif "h0_dyn" in df.columns: df["h0_dyn"] *= 1000 df.rename(columns={"h0_dyn": "Electrical Consumption [W]"}, inplace=True) elif profile_type in ("4", "5", "6"): # E-Redes format df = pd.read_csv(csv_file) col_name = EREDES_COLUMNS[profile_type] if col_name in df.columns: df = df[[col_name]].copy() df.rename(columns={col_name: "Electrical Consumption [W]"}, inplace=True) else: # Try to find the column for col in df.columns: if "BTN" in col: df = df[[col]].copy() df.rename(columns={col: "Electrical Consumption [W]"}, inplace=True) break else: df = pd.read_csv(csv_file, index_col=0) df.columns = ["Electrical Consumption [W]"] return df[["Electrical Consumption [W]"]].copy() except Exception as e: raise ValueError(f"Error loading profile from {csv_file}: {e}")
[docs] def scale_to_annual_consumption( load_df: pd.DataFrame, annual_consumption_kwh: float, column: str = "Electrical Consumption [W]" ) -> None: """ Scale load profile to match target annual consumption. Modifies the DataFrame in place. Args: load_df: DataFrame with load data (in W) annual_consumption_kwh: Target annual consumption in kWh column: Name of the consumption column """ # Calculate current annual consumption in Wh current_annual_wh = load_df[column].sum() # Determine hours per step if isinstance(load_df.index, pd.DatetimeIndex): # Infer from index frequency if len(load_df) > 1: diff = (load_df.index[1] - load_df.index[0]).total_seconds() / 3600 hours_per_step = diff else: hours_per_step = 1.0 else: hours_per_step = 1.0 # Assume hourly # Current annual in Wh (power * hours_per_step) current_annual_wh = load_df[column].sum() * hours_per_step # Target in Wh target_annual_wh = annual_consumption_kwh * 1000 # Scale if current_annual_wh > 0: scaling_factor = target_annual_wh / current_annual_wh load_df[column] *= scaling_factor
def _extend_to_years(df: pd.DataFrame, start_year: int, num_years: int) -> pd.DataFrame: """ Extend a 1-year profile to multiple years by repeating data. Generates a fresh index for each year to handle leap years correctly and avoid duplicates from simple date shifting. """ def _calendar_key(ts: pd.Timestamp, day_override: Optional[int] = None): offset = ts.utcoffset() offset_seconds = int(offset.total_seconds()) if offset is not None else None return ( ts.month, ts.day if day_override is None else day_override, ts.hour, ts.minute, offset_seconds, ) # Build a calendar lookup from the canonical source year. Feb. 29 is excluded # so leap years can duplicate Feb. 28 without shifting the rest of the year. source_rows = {} for ts, row in df.iterrows(): if ts.month == 2 and ts.day == 29: continue source_rows[_calendar_key(ts)] = row.to_numpy(copy=True) freq = pd.infer_freq(df.index) or "h" tz = df.index.tz dfs = [] for i in range(num_years): current_year = start_year + i # Generate full index for this year year_start = f"{current_year}-01-01 00:00" year_end = f"{current_year}-12-31 23:45" # Cover max potential range year_index = pd.date_range(start=year_start, end=year_end, freq=freq, tz=tz) # Cap at end of year exactly year_index = year_index[year_index.year == current_year] year_values = [] for ts in year_index: day_override = 28 if (ts.month == 2 and ts.day == 29) else None key = _calendar_key(ts, day_override=day_override) if key not in source_rows: raise KeyError(f"Missing canonical load value for {ts}") year_values.append(source_rows[key]) year_values = np.vstack(year_values) # Create DataFrame year_df = pd.DataFrame(data=year_values, index=year_index, columns=df.columns) dfs.append(year_df) return pd.concat(dfs) def _resample_load_to_15min(df: pd.DataFrame) -> pd.DataFrame: """Resample hourly load to 15-minute using interpolation.""" # For load, we typically want to interpolate (not sum) # because the values represent average power in W df_15min = resample_to_15min(df, method="makima") # Ensure no negative values for col in df_15min.columns: df_15min[col] = df_15min[col].clip(lower=0) return df_15min
[docs] def align_load_to_pv(load_df: pd.DataFrame, pv_series: pd.Series, freq: str = "h") -> pd.DataFrame: """ Align load profile DatetimeIndex to match PV production data. This handles the common case where load profiles use a generic year (e.g., 2023) but PV/TMY data uses a different year (e.g., 1990). Args: load_df: Load profile DataFrame with DatetimeIndex pv_series: PV production Series with DatetimeIndex freq: Time frequency Returns: Load DataFrame with index aligned to PV data's year """ # Get PV time range pv_start = pv_series.index[0] pv_end = pv_series.index[-1] # Create new index matching PV's year new_index = pd.date_range(start=pv_start, end=pv_end, freq=freq) # Get the load values (ignoring year) load_values = load_df.iloc[:, 0].values # Adjust length if needed if len(load_values) < len(new_index): # Repeat to fill repeats = (len(new_index) // len(load_values)) + 1 load_values = np.tile(load_values, repeats)[: len(new_index)] elif len(load_values) > len(new_index): load_values = load_values[: len(new_index)] # Create new DataFrame result = pd.DataFrame({load_df.columns[0]: load_values}, index=new_index) result.index.name = "DateTime" return result