Source code for breos.utils

"""
Utility functions for breos library.
"""

import multiprocessing
import os
import re

import numpy as np
import pandas as pd

_SAFE_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")


[docs] def safe_path_slug(name: str) -> str: """Validate *name* as a safe filename component and return it lower-cased. Input is lower-cased, then validated: allowed characters are ASCII letters, digits, ``_``, ``-``. The result must start with an alphanumeric character and be at most 64 characters. Anything else (path separators, ``..``, NUL bytes, spaces, dots) is rejected so the value cannot be used to escape an intended output directory when interpolated into a filename. """ if not isinstance(name, str): raise TypeError(f"safe_path_slug: expected str, got {type(name).__name__}") lowered = name.lower() if not _SAFE_SLUG_RE.match(lowered): raise ValueError( f"Cannot use {name!r} as a filename component " "(allowed: a-z, 0-9, _, -; must start alphanumeric; max 64 chars)" ) return lowered
[docs] def is_leap_year(year: int) -> bool: """ Check if a year is a leap year. Args: year: Year to check Returns: True if leap year, False otherwise """ return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
[docs] def count_leap_years(start_year: int, num_years: int) -> int: """ Count the number of leap years in a range. Args: start_year: Starting year num_years: Number of years to count Returns: Number of leap years in the range """ return sum(1 for year in range(start_year, start_year + num_years) if is_leap_year(year))
[docs] def remap_datetime_index_years(obj, year_offset: int): """Shift a DatetimeIndex-bearing object by whole years without Feb. 29 crashes. Feb. 29 entries whose target year is not a leap year are dropped. That avoids both ``Timestamp.replace`` failures and duplicate Feb. 28 labels that would later make ``reindex`` fail. """ if year_offset == 0: return obj index = obj if isinstance(obj, pd.DatetimeIndex) else getattr(obj, "index", None) if not isinstance(index, pd.DatetimeIndex): return obj keep = np.ones(len(index), dtype=bool) remapped = [] for pos, ts in enumerate(index): target_year = ts.year + year_offset if ts.month == 2 and ts.day == 29 and not is_leap_year(target_year): keep[pos] = False continue remapped.append(ts.replace(year=target_year)) new_index = pd.DatetimeIndex(remapped) if isinstance(obj, pd.DatetimeIndex): return new_index out = obj.iloc[keep].copy() out.index = new_index return out
[docs] def number_of_cores() -> int: """ Get the number of available CPU cores for parallel processing. Returns: Number of CPU cores (leaves 1 core free for system) """ total_cores = multiprocessing.cpu_count() # Leave at least 1 core for system, use at least 1 for computation return max(1, total_cores - 1)
[docs] def get_hours_per_step(freq: str) -> float: """ Get the number of hours per timestep based on frequency. Args: freq: Frequency string ('h' for hourly, '15min' for 15-minute) Returns: Hours per timestep (1.0 for hourly, 0.25 for 15-min) Raises: ValueError: If freq is not recognized """ freq_map = { "h": 1.0, "H": 1.0, "1h": 1.0, "1H": 1.0, "15min": 0.25, "15T": 0.25, "15m": 0.25, } if freq not in freq_map: raise ValueError(f"Unsupported frequency: {freq}. Use 'h' or '15min'.") return freq_map[freq]
[docs] def get_steps_per_day(freq: str) -> int: """ Get the number of timesteps per day based on frequency. Args: freq: Frequency string ('h' for hourly, '15min' for 15-minute) Returns: Steps per day (24 for hourly, 96 for 15-min) """ hours_per_step = get_hours_per_step(freq) return int(24 / hours_per_step)
[docs] def get_steps_per_year(freq: str, leap_year: bool = False) -> int: """ Get the number of timesteps per year based on frequency. Args: freq: Frequency string ('h' for hourly, '15min' for 15-minute) leap_year: Whether to account for leap year (366 days) Returns: Steps per year (8760/8784 for hourly, 35040/35136 for 15-min) """ days = 366 if leap_year else 365 return get_steps_per_day(freq) * days