Download API¶

Weather file downloading and caching.

WeatherDownloader¶

`idfkit.weather.download.WeatherDownloader` ¶

Download and cache weather files from climate.onebuilding.org.

Downloaded ZIP archives are extracted and cached locally so that subsequent requests for the same station and dataset are served from disk without a network call.

Examples:

from idfkit.weather import StationIndex, WeatherDownloader

station = StationIndex.load().search("chicago ohare")[0].station
downloader = WeatherDownloader()
files = downloader.download(station)
print(files.epw)

Parameters:

Name	Type	Description	Default
`cache_dir`	`Path \| None`	Override the default cache directory.	`None`
`max_age`	`timedelta \| float \| None`	Maximum age of cached files before re-downloading. Can be a timedelta or a number of seconds. If `None` (default), cached files never expire.	`None`

Note

The cache has no size limit. For CI/CD environments with limited disk space, consider using clear_cache periodically or setting a max_age to force re-downloads of stale files.

Source code in src/idfkit/weather/download.py

class WeatherDownloader:
    """Download and cache weather files from climate.onebuilding.org.

    Downloaded ZIP archives are extracted and cached locally so that
    subsequent requests for the same station and dataset are served from
    disk without a network call.

    Examples:
        ```python
        from idfkit.weather import StationIndex, WeatherDownloader

        station = StationIndex.load().search("chicago ohare")[0].station
        downloader = WeatherDownloader()
        files = downloader.download(station)
        print(files.epw)
        ```

    Args:
        cache_dir: Override the default cache directory.
        max_age: Maximum age of cached files before re-downloading.
            Can be a [timedelta][datetime.timedelta] or a number of seconds.
            If ``None`` (default), cached files never expire.

    Note:
        The cache has no size limit. For CI/CD environments with limited disk
        space, consider using [clear_cache][idfkit.weather.download.WeatherDownloader.clear_cache] periodically or setting
        a ``max_age`` to force re-downloads of stale files.
    """

    __slots__ = ("_cache_dir", "_max_age_seconds")

    def __init__(
        self,
        cache_dir: Path | None = None,
        max_age: timedelta | float | None = None,
    ) -> None:
        self._cache_dir = cache_dir or default_cache_dir()
        if max_age is None:
            self._max_age_seconds: float | None = None
        elif isinstance(max_age, timedelta):
            self._max_age_seconds = max_age.total_seconds()
        else:
            self._max_age_seconds = float(max_age)

    def _is_stale(self, path: Path) -> bool:
        """Check if a cached file is older than max_age."""
        if self._max_age_seconds is None:
            return False
        if not path.exists():
            return True
        age = time.time() - path.stat().st_mtime
        return age > self._max_age_seconds

    def download(self, station: WeatherStation) -> WeatherFiles:
        """Download and extract weather files for *station*.

        If the files are already cached and not stale, no network request is made.

        Args:
            station: The weather station to download files for.

        Returns:
            A [WeatherFiles][idfkit.weather.download.WeatherFiles] with paths to the extracted files.

        Raises:
            RuntimeError: If the download or extraction fails.
        """
        # Derive a cache subdirectory from the ZIP filename
        zip_filename = station.url.rsplit("/", maxsplit=1)[-1]
        stem = zip_filename.removesuffix(".zip")
        station_dir = self._cache_dir / "files" / str(station.wmo) / stem
        zip_path = station_dir / zip_filename

        # Download if not cached or if stale
        if not zip_path.exists() or self._is_stale(zip_path):
            station_dir.mkdir(parents=True, exist_ok=True)
            logger.info("Downloading weather data for %s (WMO %s)", station.display_name, station.wmo)
            try:
                req = Request(station.url, headers={"User-Agent": _USER_AGENT})  # noqa: S310
                with urlopen(req, timeout=120) as resp:  # noqa: S310
                    zip_path.write_bytes(resp.read())
            except (HTTPError, URLError, TimeoutError, OSError) as exc:
                msg = f"Failed to download weather data from {station.url}: {exc}"
                raise RuntimeError(msg) from exc
        else:
            logger.debug("Cache hit for station %s (WMO %s)", station.display_name, station.wmo)

        # Extract if EPW doesn't already exist or if the ZIP is newer than
        # the EPW (i.e. we just re-downloaded).  We compare against the ZIP's
        # mtime rather than calling ``_is_stale(epw_path)`` because
        # ``zipfile.extractall`` preserves archive-internal timestamps, so the
        # extracted EPW's mtime can be arbitrarily old and would always appear
        # stale.
        epw_path = self._find_file(station_dir, ".epw")
        needs_extract = epw_path is None or (zip_path.exists() and epw_path.stat().st_mtime < zip_path.stat().st_mtime)
        if needs_extract:
            try:
                with zipfile.ZipFile(zip_path) as zf:
                    zf.extractall(station_dir)
            except zipfile.BadZipFile as exc:
                msg = f"Downloaded file is not a valid ZIP archive: {zip_path}"
                raise RuntimeError(msg) from exc
            epw_path = self._find_file(station_dir, ".epw")

        if epw_path is None:
            msg = f"No .epw file found in downloaded archive for {station.display_name}"
            raise RuntimeError(msg)

        ddy_path = self._find_file(station_dir, ".ddy")
        if ddy_path is None:
            msg = f"No .ddy file found in downloaded archive for {station.display_name}"
            raise RuntimeError(msg)

        stat_path = self._find_file(station_dir, ".stat")

        return WeatherFiles(
            epw=epw_path,
            ddy=ddy_path,
            stat=stat_path,
            zip_path=zip_path,
            station=station,
        )

    def get_epw(self, station: WeatherStation) -> Path:
        """Download and return the path to the EPW file."""
        return self.download(station).epw

    def get_ddy(self, station: WeatherStation) -> Path:
        """Download and return the path to the DDY file."""
        return self.download(station).ddy

    def clear_cache(self) -> None:
        """Remove all cached weather files.

        This removes the entire ``files/`` subdirectory within the cache,
        which contains all downloaded ZIP archives and extracted files.
        """
        files_dir = self._cache_dir / "files"
        if files_dir.exists():
            shutil.rmtree(files_dir)

    @staticmethod
    def _find_file(directory: Path, suffix: str) -> Path | None:
        """Find the first file with the given suffix in *directory*."""
        for p in directory.iterdir():
            if p.suffix.lower() == suffix.lower() and p.is_file():
                return p
        return None

`download(station)` ¶

Download and extract weather files for station.

If the files are already cached and not stale, no network request is made.

Parameters:

Name	Type	Description	Default
`station`	`WeatherStation`	The weather station to download files for.	required

Returns:

Type	Description
`WeatherFiles`	A WeatherFiles with paths to the extracted files.

Raises:

Type	Description
`RuntimeError`	If the download or extraction fails.

Source code in src/idfkit/weather/download.py

def download(self, station: WeatherStation) -> WeatherFiles:
    """Download and extract weather files for *station*.

    If the files are already cached and not stale, no network request is made.

    Args:
        station: The weather station to download files for.

    Returns:
        A [WeatherFiles][idfkit.weather.download.WeatherFiles] with paths to the extracted files.

    Raises:
        RuntimeError: If the download or extraction fails.
    """
    # Derive a cache subdirectory from the ZIP filename
    zip_filename = station.url.rsplit("/", maxsplit=1)[-1]
    stem = zip_filename.removesuffix(".zip")
    station_dir = self._cache_dir / "files" / str(station.wmo) / stem
    zip_path = station_dir / zip_filename

    # Download if not cached or if stale
    if not zip_path.exists() or self._is_stale(zip_path):
        station_dir.mkdir(parents=True, exist_ok=True)
        logger.info("Downloading weather data for %s (WMO %s)", station.display_name, station.wmo)
        try:
            req = Request(station.url, headers={"User-Agent": _USER_AGENT})  # noqa: S310
            with urlopen(req, timeout=120) as resp:  # noqa: S310
                zip_path.write_bytes(resp.read())
        except (HTTPError, URLError, TimeoutError, OSError) as exc:
            msg = f"Failed to download weather data from {station.url}: {exc}"
            raise RuntimeError(msg) from exc
    else:
        logger.debug("Cache hit for station %s (WMO %s)", station.display_name, station.wmo)

    # Extract if EPW doesn't already exist or if the ZIP is newer than
    # the EPW (i.e. we just re-downloaded).  We compare against the ZIP's
    # mtime rather than calling ``_is_stale(epw_path)`` because
    # ``zipfile.extractall`` preserves archive-internal timestamps, so the
    # extracted EPW's mtime can be arbitrarily old and would always appear
    # stale.
    epw_path = self._find_file(station_dir, ".epw")
    needs_extract = epw_path is None or (zip_path.exists() and epw_path.stat().st_mtime < zip_path.stat().st_mtime)
    if needs_extract:
        try:
            with zipfile.ZipFile(zip_path) as zf:
                zf.extractall(station_dir)
        except zipfile.BadZipFile as exc:
            msg = f"Downloaded file is not a valid ZIP archive: {zip_path}"
            raise RuntimeError(msg) from exc
        epw_path = self._find_file(station_dir, ".epw")

    if epw_path is None:
        msg = f"No .epw file found in downloaded archive for {station.display_name}"
        raise RuntimeError(msg)

    ddy_path = self._find_file(station_dir, ".ddy")
    if ddy_path is None:
        msg = f"No .ddy file found in downloaded archive for {station.display_name}"
        raise RuntimeError(msg)

    stat_path = self._find_file(station_dir, ".stat")

    return WeatherFiles(
        epw=epw_path,
        ddy=ddy_path,
        stat=stat_path,
        zip_path=zip_path,
        station=station,
    )

WeatherFiles¶

`idfkit.weather.download.WeatherFiles` `dataclass` ¶

Paths to downloaded and extracted weather files.

Attributes:

Name	Type	Description
`epw`	`Path`	Path to the `.epw` file (always present after extraction).
`ddy`	`Path`	Path to the `.ddy` file (always present after extraction).
`stat`	`Path \| None`	Path to the `.stat` file, or `None` if not included.
`zip_path`	`Path`	Path to the original downloaded ZIP archive.
`station`	`WeatherStation`	The station this download corresponds to.

Source code in src/idfkit/weather/download.py

@dataclass(frozen=True)
class WeatherFiles:
    """Paths to downloaded and extracted weather files.

    Attributes:
        epw: Path to the ``.epw`` file (always present after extraction).
        ddy: Path to the ``.ddy`` file (always present after extraction).
        stat: Path to the ``.stat`` file, or ``None`` if not included.
        zip_path: Path to the original downloaded ZIP archive.
        station: The station this download corresponds to.
    """

    epw: Path
    ddy: Path
    stat: Path | None
    zip_path: Path
    station: WeatherStation

Download API¶

WeatherDownloader¶

`idfkit.weather.download.WeatherDownloader` ¶

`download(station)` ¶

WeatherFiles¶

`idfkit.weather.download.WeatherFiles` `dataclass` ¶

`epw` `instance-attribute` ¶

`ddy` `instance-attribute` ¶

Download API¶

WeatherDownloader¶

idfkit.weather.download.WeatherDownloader ¶

download(station) ¶

WeatherFiles¶

idfkit.weather.download.WeatherFiles dataclass ¶

epw instance-attribute ¶

ddy instance-attribute ¶

`idfkit.weather.download.WeatherDownloader` ¶

`download(station)` ¶

`idfkit.weather.download.WeatherFiles` `dataclass` ¶

`epw` `instance-attribute` ¶

`ddy` `instance-attribute` ¶