"""
Load profile management module.
This module handles residential and commercial load profiles,
including loading from CSV files, scaling to annual consumption,
and resampling between hourly and 15-minute intervals.
"""
from contextlib import nullcontext
from importlib.resources import as_file
from pathlib import Path
from typing import Dict, Optional, Union
import numpy as np
import pandas as pd
from breos.resources import rlp_resource
from breos.utils import count_leap_years, get_hours_per_step, is_leap_year
from breos.weather import resample_to_15min
# Profile type mappings
PROFILE_FILES = {
"1": "h0SLP_demandlib_1000kwh_hourly.csv",
"4": "EREDES_2025_BTN_1000kwh_hourly.csv",
"5": "EREDES_2025_BTN_1000kwh_hourly.csv",
"6": "EREDES_2025_BTN_1000kwh_hourly.csv",
"7": "bdew_h0_2025_15min.csv",
"8": "REE_2026_2.0TD_1000kwh_hourly.csv",
}
PROFILE_FILES_15MIN = {
"1": "h0SLP_demandlib_1000kwh_15min.csv",
"4": "EREDES_2025_BTN_1000kwh_15min.csv",
"5": "EREDES_2025_BTN_1000kwh_15min.csv",
"6": "EREDES_2025_BTN_1000kwh_15min.csv",
"7": "bdew_h0_2025_15min.csv",
"8": "REE_2026_2.0TD_1000kwh_15min.csv",
}
PROFILE_FILE_NATIVE_FREQ = {
"7": "15min",
}
PROFILE_NAMES = {
"1": "H0SLP (demandlib)",
"4": "E-Redes 2025 - BTN A (external file required)",
"5": "E-Redes 2025 - BTN B (external file required)",
"6": "E-Redes 2025 - BTN C (external file required)",
"7": "BDEW H0 2025 (external file required)",
"8": "REE 2026 - 2.0TD (external file required)",
}
# Note: "bdew_h0" maps to the bundled demandlib profile "1", which
# implements the BDEW H0 standard shape. Profile "7" is the externally
# published BDEW H0 2025 file and must be supplied via rlp_directory —
# request it explicitly as "7" if that exact dataset is needed.
PROFILE_ALIASES = {
"default": "1",
"demandlib_h0": "1",
"h0": "1",
"bdew_h0": "1",
"crest": "1",
"eredes_btn_c": "6",
"ree_2.0td": "8",
}
# Column mappings for E-Redes profiles
EREDES_COLUMNS = {
"4": "BTN A - Wh",
"5": "BTN B - Wh",
"6": "BTN C - Wh",
}
[docs]
def load_profile(
profile_type: str,
annual_consumption_kwh: float,
start_date: str = "2025-01-01",
freq: str = "h",
num_years: int = 1,
rlp_directory: Optional[str] = None,
timezone: Optional[str] = "UTC",
) -> pd.DataFrame:
"""
Load and scale a residential/commercial load profile.
This is the main function for loading load profiles. It supports the
bundled H0SLP demandlib profile, user-supplied external CSVs through
``rlp_directory``, scaling to target annual consumption, multi-year
extension, and hourly or 15-minute output.
Args:
profile_type: Profile type key (see PROFILE_NAMES) or name
annual_consumption_kwh: Target annual consumption in kWh
start_date: Start date for the profile (YYYY-MM-DD)
freq: Time frequency ('h' for hourly, '15min' for 15-minute)
num_years: Number of years to generate
rlp_directory: Directory containing RLP files. When omitted, BREOS
uses only redistributable packaged profiles.
timezone: Timezone for the index. Profile rows are wall-clock local
behavior (H0 morning/evening peaks), so pass the location's
timezone to pin them to local time; the simulation aligns load
and PV by UTC instant. The 'UTC' default keeps the legacy
UTC-clock convention for callers without a location.
Returns:
DataFrame with 'Electrical Consumption [W]' column and DatetimeIndex
Raises:
ValueError: If profile_type is not recognized
"""
profile_type = PROFILE_ALIASES.get(str(profile_type).lower(), str(profile_type))
if profile_type not in PROFILE_FILES:
raise ValueError(f"Unknown profile type: {profile_type}. Valid types: {list(PROFILE_FILES.keys())}")
packaged = rlp_directory is None
rlp_path = None if packaged else Path(rlp_directory)
def _candidate(filename: str):
return rlp_resource(filename) if packaged else rlp_path / filename
def _exists(candidate) -> bool:
return candidate.is_file() if packaged else candidate.exists()
# Prefer native 15-minute files when requested. If only a native
# 15-minute external profile is available, load it and downsample later.
native_candidate = _candidate(PROFILE_FILES_15MIN[profile_type]) if profile_type in PROFILE_FILES_15MIN else None
use_native_15min = freq in ("15min", "15T") and profile_type in PROFILE_FILES_15MIN and _exists(native_candidate)
if use_native_15min:
csv_resource = native_candidate
native_freq = "15min"
else:
hourly_candidate = _candidate(PROFILE_FILES[profile_type])
if _exists(hourly_candidate):
csv_resource = hourly_candidate
native_freq = PROFILE_FILE_NATIVE_FREQ.get(profile_type, "h")
elif profile_type in PROFILE_FILES_15MIN and _exists(native_candidate):
csv_resource = native_candidate
native_freq = "15min"
else:
csv_resource = hourly_candidate
native_freq = "h"
if not _exists(csv_resource):
if packaged:
profile_name = PROFILE_NAMES.get(profile_type, profile_type)
raise ValueError(
f"Profile type {profile_type!r} ({profile_name}) is not bundled with BREOS. "
"Its upstream redistribution terms are not confirmed for package release. "
"Pass rlp_directory with a licensed local copy or use profile_type='1'."
)
raise FileNotFoundError(f"Load profile file not found: {csv_resource}")
# Load the profile
path_context = as_file(csv_resource) if packaged else nullcontext(csv_resource)
with path_context as csv_file:
df = _load_profile_csv(Path(csv_file), profile_type)
# Create a naive wall-clock index for one year; rows describe household
# behavior at local clock time and are pinned to the timezone afterwards
start_year = int(start_date[:4])
hours_in_year = 8760 # Non-leap year
steps_per_hour = 4 if native_freq == "15min" else 1
new_index = pd.date_range(start=start_date, periods=hours_in_year * steps_per_hour, freq=native_freq)
# Adjust if profile has different length
if len(df) < len(new_index):
# Repeat to fill
repeats = (len(new_index) // len(df)) + 1
df = pd.concat([df] * repeats, ignore_index=True).iloc[: len(new_index)]
elif len(df) > len(new_index):
df = df.iloc[: len(new_index)]
df.index = new_index
df.index.name = "DateTime"
# Scale to target consumption
scale_to_annual_consumption(df, annual_consumption_kwh)
# Extend to multiple years if needed (on the naive wall-clock index, so
# each year is localized at its own DST transition dates below)
if num_years > 1:
df = _extend_to_years(df, start_year, num_years)
df = _localize_wall_clock_index(df, timezone, native_freq)
# Resample if needed (hourly to 15-min)
if freq in ("15min", "15T") and native_freq == "h":
df = _resample_load_to_15min(df)
elif freq == "h" and native_freq == "15min":
df = df.resample("h").mean()
return df
def _localize_wall_clock_index(df: pd.DataFrame, timezone: Optional[str], freq: str) -> pd.DataFrame:
"""Pin naive wall-clock profile rows to a timezone's legal time.
Household behavior follows the legal clock, so each row keeps its
wall-clock label. In a DST-observing timezone the spring-forward hour
does not exist (its rows are dropped) and the fall-back hour occurs
twice (the rows cover the standard-time occurrence; the DST instants
are forward-filled), keeping the result evenly spaced in absolute time.
"""
localized = df.copy()
if timezone is None or timezone == "UTC":
localized.index = localized.index.tz_localize("UTC")
return localized
idx = localized.index.tz_localize(timezone, nonexistent="shift_forward", ambiguous=False)
localized.index = idx
localized = localized[~localized.index.duplicated(keep="last")]
full = pd.date_range(localized.index[0], localized.index[-1], freq=freq)
localized = localized.reindex(full).ffill()
localized.index.name = "DateTime"
return localized
def _load_profile_csv(csv_file: Path, profile_type: str) -> pd.DataFrame:
"""Load a profile CSV file and standardize column names."""
try:
if profile_type == "1":
# H0SLP demandlib format (hourly has 'Electrical Consumption [kW]', 15min has 'h0_dyn' in kW)
df = pd.read_csv(csv_file, index_col=0)
if "Electrical Consumption [kW]" in df.columns:
df["Electrical Consumption [kW]"] *= 1000
df.rename(columns={"Electrical Consumption [kW]": "Electrical Consumption [W]"}, inplace=True)
elif "h0_dyn" in df.columns:
df["h0_dyn"] *= 1000
df.rename(columns={"h0_dyn": "Electrical Consumption [W]"}, inplace=True)
elif profile_type in ("4", "5", "6"):
# E-Redes format
df = pd.read_csv(csv_file)
col_name = EREDES_COLUMNS[profile_type]
if col_name in df.columns:
df = df[[col_name]].copy()
df.rename(columns={col_name: "Electrical Consumption [W]"}, inplace=True)
else:
# Try to find the column
for col in df.columns:
if "BTN" in col:
df = df[[col]].copy()
df.rename(columns={col: "Electrical Consumption [W]"}, inplace=True)
break
else:
df = pd.read_csv(csv_file, index_col=0)
df.columns = ["Electrical Consumption [W]"]
return df[["Electrical Consumption [W]"]].copy()
except Exception as e:
raise ValueError(f"Error loading profile from {csv_file}: {e}")
[docs]
def scale_to_annual_consumption(
load_df: pd.DataFrame, annual_consumption_kwh: float, column: str = "Electrical Consumption [W]"
) -> None:
"""
Scale load profile to match target annual consumption.
Modifies the DataFrame in place.
Args:
load_df: DataFrame with load data (in W)
annual_consumption_kwh: Target annual consumption in kWh
column: Name of the consumption column
"""
# Calculate current annual consumption in Wh
current_annual_wh = load_df[column].sum()
# Determine hours per step
if isinstance(load_df.index, pd.DatetimeIndex):
# Infer from index frequency
if len(load_df) > 1:
diff = (load_df.index[1] - load_df.index[0]).total_seconds() / 3600
hours_per_step = diff
else:
hours_per_step = 1.0
else:
hours_per_step = 1.0 # Assume hourly
# Current annual in Wh (power * hours_per_step)
current_annual_wh = load_df[column].sum() * hours_per_step
# Target in Wh
target_annual_wh = annual_consumption_kwh * 1000
# Scale
if current_annual_wh > 0:
scaling_factor = target_annual_wh / current_annual_wh
load_df[column] *= scaling_factor
def _extend_to_years(df: pd.DataFrame, start_year: int, num_years: int) -> pd.DataFrame:
"""
Extend a 1-year profile to multiple years by repeating data.
Generates a fresh index for each year to handle leap years correctly
and avoid duplicates from simple date shifting.
"""
def _calendar_key(ts: pd.Timestamp, day_override: Optional[int] = None):
offset = ts.utcoffset()
offset_seconds = int(offset.total_seconds()) if offset is not None else None
return (
ts.month,
ts.day if day_override is None else day_override,
ts.hour,
ts.minute,
offset_seconds,
)
# Build a calendar lookup from the canonical source year. Feb. 29 is excluded
# so leap years can duplicate Feb. 28 without shifting the rest of the year.
source_rows = {}
for ts, row in df.iterrows():
if ts.month == 2 and ts.day == 29:
continue
source_rows[_calendar_key(ts)] = row.to_numpy(copy=True)
freq = pd.infer_freq(df.index) or "h"
tz = df.index.tz
dfs = []
for i in range(num_years):
current_year = start_year + i
# Generate full index for this year
year_start = f"{current_year}-01-01 00:00"
year_end = f"{current_year}-12-31 23:45" # Cover max potential range
year_index = pd.date_range(start=year_start, end=year_end, freq=freq, tz=tz)
# Cap at end of year exactly
year_index = year_index[year_index.year == current_year]
year_values = []
for ts in year_index:
day_override = 28 if (ts.month == 2 and ts.day == 29) else None
key = _calendar_key(ts, day_override=day_override)
if key not in source_rows:
raise KeyError(f"Missing canonical load value for {ts}")
year_values.append(source_rows[key])
year_values = np.vstack(year_values)
# Create DataFrame
year_df = pd.DataFrame(data=year_values, index=year_index, columns=df.columns)
dfs.append(year_df)
return pd.concat(dfs)
def _resample_load_to_15min(df: pd.DataFrame) -> pd.DataFrame:
"""Resample hourly load to 15-minute using interpolation."""
# For load, we typically want to interpolate (not sum)
# because the values represent average power in W
df_15min = resample_to_15min(df, method="makima")
# Ensure no negative values
for col in df_15min.columns:
df_15min[col] = df_15min[col].clip(lower=0)
return df_15min
[docs]
def align_load_to_pv(load_df: pd.DataFrame, pv_series: pd.Series, freq: str = "h") -> pd.DataFrame:
"""
Align load profile DatetimeIndex to match PV production data.
This handles the common case where load profiles use a generic year (e.g., 2023)
but PV/TMY data uses a different year (e.g., 1990).
Args:
load_df: Load profile DataFrame with DatetimeIndex
pv_series: PV production Series with DatetimeIndex
freq: Time frequency
Returns:
Load DataFrame with index aligned to PV data's year
"""
# Get PV time range
pv_start = pv_series.index[0]
pv_end = pv_series.index[-1]
# Create new index matching PV's year
new_index = pd.date_range(start=pv_start, end=pv_end, freq=freq)
# Get the load values (ignoring year)
load_values = load_df.iloc[:, 0].values
# Adjust length if needed
if len(load_values) < len(new_index):
# Repeat to fill
repeats = (len(new_index) // len(load_values)) + 1
load_values = np.tile(load_values, repeats)[: len(new_index)]
elif len(load_values) > len(new_index):
load_values = load_values[: len(new_index)]
# Create new DataFrame
result = pd.DataFrame({load_df.columns[0]: load_values}, index=new_index)
result.index.name = "DateTime"
return result