Source code for climagrid.sources.noaa_hrrr

"""
NOAA HRRR adapter: High-Resolution Rapid Refresh NWP model.

Fetches 3-km CONUS hourly analysis fields (temperature, wind, precipitation,
humidity, solar irradiance) via the Herbie package from NOAA NOMADS or
cloud archives (AWS NODD, Google, Azure).

Requires optional dependency: pip install climagrid[noaa-nwp]

Docs: https://herbie.readthedocs.io/
"""

from __future__ import annotations

import warnings
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

from climagrid.sources.base import BaseEnvironmentalSource, BoundingBox

try:
    from herbie import Herbie
    _HERBIE_AVAILABLE = True
except ImportError:
    _HERBIE_AVAILABLE = False


# HRRR searchString patterns for the variables we need
# These match GRIB2 level/parameter descriptions
_HRRR_FIELDS = {
    "TMP:2 m": "hrrr_temperature_2m",
    "UGRD:10 m": "_hrrr_u_wind",          # combined with VGRD for speed/direction
    "VGRD:10 m": "_hrrr_v_wind",
    "RH:2 m": "hrrr_relative_humidity_2m",
    "APCP:surface": "hrrr_precipitation_rate",
    "DSWRF:surface": "hrrr_solar_irradiance_ghi",
    "SNOD:surface": "hrrr_snow_depth",
}


[docs] class HrrrAdapter(BaseEnvironmentalSource): """ Fetches NOAA HRRR NWP data at 3 km CONUS resolution. Each call fetches the analysis hour (fxx=0) for every UTC hour in [start_dt, end_dt), subsets to the bounding box, and returns a long-form DataFrame with one row per (lat, lon, timestamp). Parameters ---------- product: HRRR product type. "sfc" (surface fields) covers all variables needed for grid asset stress analysis. fxx: Forecast hour. Use 0 for analysis (best accuracy for past dates), 1-18 for near-real-time forecasting. save_dir: Local directory for GRIB2 file caching. Defaults to ~/data/hrrr. """ def __init__( self, product: str = "sfc", fxx: int = 0, save_dir: str | None = None, ): if not _HERBIE_AVAILABLE: raise ImportError( "Herbie is required for HrrrAdapter. " "Install with: pip install climagrid[noaa-nwp]" ) self._product = product self._fxx = fxx self._save_dir = save_dir @property def source_name(self) -> str: return "noaa_hrrr"
[docs] def fetch( self, bbox: BoundingBox, start_dt: datetime, end_dt: datetime, ) -> pd.DataFrame: start_dt = self._ensure_utc(start_dt) end_dt = self._ensure_utc(end_dt) self._validate_time_range(start_dt, end_dt) hours = self._hour_range(start_dt, end_dt) frames = [] for dt in hours: try: df = self._fetch_one_hour(dt, bbox) frames.append(df) except Exception as exc: warnings.warn(f"HRRR fetch failed for {dt}: {exc}", RuntimeWarning, stacklevel=2) if not frames: return pd.DataFrame() return pd.concat(frames, ignore_index=True)
def _fetch_one_hour(self, dt: datetime, bbox: BoundingBox) -> pd.DataFrame: # Herbie requires a naive UTC datetime: strip tzinfo before passing. # Only pass save_dir when explicitly set; Path(None) raises TypeError. herbie_kwargs: dict = dict( model="hrrr", product=self._product, fxx=self._fxx, verbose=False, ) if self._save_dir is not None: herbie_kwargs["save_dir"] = self._save_dir H = Herbie(dt.replace(tzinfo=None), **herbie_kwargs) # Fetch each variable separately and merge frames: list[pd.DataFrame] = [] for search_str, col_name in _HRRR_FIELDS.items(): try: ds = H.xarray(search_str, remove_grib=True) df_var = self._extract_bbox(ds, bbox, col_name, dt) frames.append(df_var) except Exception: continue if not frames: return pd.DataFrame() # Merge on lat/lon result = frames[0] for df_var in frames[1:]: merge_cols = [c for c in ["lat", "lon", "timestamp"] if c in df_var.columns] result = result.merge(df_var, on=merge_cols, how="outer") # Derive wind speed and direction from U/V components if "_hrrr_u_wind" in result.columns and "_hrrr_v_wind" in result.columns: u = result["_hrrr_u_wind"] v = result["_hrrr_v_wind"] result["hrrr_wind_speed_10m"] = np.sqrt(u**2 + v**2) result["hrrr_wind_direction_10m"] = ( np.degrees(np.arctan2(-u, -v)) % 360 ) result = result.drop(columns=["_hrrr_u_wind", "_hrrr_v_wind"]) # Convert temperature from K to °C if "hrrr_temperature_2m" in result.columns: result["hrrr_temperature_2m"] = result["hrrr_temperature_2m"] - 273.15 return result @staticmethod def _extract_bbox( ds, bbox: BoundingBox, col_name: str, dt: datetime ) -> pd.DataFrame: """Subset an xarray Dataset to the bounding box and return a DataFrame.""" # HRRR uses latitude/longitude coordinate names # Check for data variables before accessing coordinates data_vars = list(ds.data_vars) if not data_vars: return pd.DataFrame() lat_name = "latitude" if "latitude" in ds.coords else "lat" lon_name = "longitude" if "longitude" in ds.coords else "lon" lat = ds[lat_name].values lon = ds[lon_name].values # HRRR uses 0-360 longitude convention; normalize to -180 to 180 import numpy as np lon = np.where(lon > 180, lon - 360, lon) # Spatial mask mask = ( (lat >= bbox.min_lat) & (lat <= bbox.max_lat) & (lon >= bbox.min_lon) & (lon <= bbox.max_lon) ) var = data_vars[0] values = ds[var].values flat_lat = lat[mask] flat_lon = lon[mask] flat_val = values[mask] if values.shape == lat.shape else values.flatten()[mask.flatten()] return pd.DataFrame( { "lat": flat_lat, "lon": flat_lon, col_name: flat_val, "timestamp": pd.Timestamp(dt).tz_convert("UTC") if dt.tzinfo else pd.Timestamp(dt, tz="UTC"), } ) @staticmethod def _hour_range(start_dt: datetime, end_dt: datetime) -> list[datetime]: hours = [] current = start_dt.replace(minute=0, second=0, microsecond=0) while current < end_dt: hours.append(current) current += timedelta(hours=1) return hours