Skip to content

Cache

cxg.cache

CacheState dataclass

Snapshot of a cache file's current state.

Source code in src/cxg/cache.py
@dataclass(slots=True)
class CacheState:
    """Snapshot of a cache file's current state."""

    path: Path
    exists: bool
    fetched_at: datetime | None
    count: int
    response_bytes: int
    size_bytes: int
    is_expired: bool

get_cache_dir()

Return the cache directory path, respecting CXG_CACHE_DIR, then config.

Source code in src/cxg/cache.py
def get_cache_dir() -> Path:
    """Return the cache directory path, respecting CXG_CACHE_DIR, then config."""
    override = os.environ.get("CXG_CACHE_DIR")
    if override:
        return Path(override).expanduser()
    from cxg.config import load_config

    cfg = load_config()
    if cfg.cache.dir:
        return Path(cfg.cache.dir).expanduser()
    return Path(user_cache_dir("cxg"))

get_cache_ttl()

Return the cache TTL in seconds, respecting CXG_CACHE_TTL, then config.

Source code in src/cxg/cache.py
def get_cache_ttl() -> int:
    """Return the cache TTL in seconds, respecting CXG_CACHE_TTL, then config."""
    raw = os.environ.get("CXG_CACHE_TTL")
    if raw is not None:
        try:
            return max(0, int(raw))
        except ValueError:
            return DEFAULT_CACHE_TTL
    from cxg.config import load_config

    cfg = load_config()
    if cfg.cache.ttl is not None:
        return max(0, cfg.cache.ttl)
    return DEFAULT_CACHE_TTL

datasets_cache_path()

Return the path to the datasets cache file.

Source code in src/cxg/cache.py
def datasets_cache_path() -> Path:
    """Return the path to the datasets cache file."""
    return get_cache_dir() / DATASETS_CACHE_NAME

collections_cache_path()

Return the path to the collections cache file.

Source code in src/cxg/cache.py
def collections_cache_path() -> Path:
    """Return the path to the collections cache file."""
    return get_cache_dir() / COLLECTIONS_CACHE_NAME

load_json(path)

Load and parse a JSON file, returning None on failure.

Source code in src/cxg/cache.py
def load_json(path: Path) -> dict[str, Any] | None:
    """Load and parse a JSON file, returning None on failure."""
    if not path.exists():
        return None
    try:
        with path.open("r", encoding="utf-8") as handle:
            return json.load(handle)
    except (json.JSONDecodeError, PermissionError):
        return None

load_datasets_cache()

Load the datasets cache file.

Source code in src/cxg/cache.py
def load_datasets_cache() -> dict[str, Any] | None:
    """Load the datasets cache file."""
    return load_json(datasets_cache_path())

load_collections_cache()

Load the collections cache file.

Source code in src/cxg/cache.py
def load_collections_cache() -> dict[str, Any] | None:
    """Load the collections cache file."""
    return load_json(collections_cache_path())

write_json_atomic(path, payload)

Atomically write a JSON payload using a temp file and rename.

Source code in src/cxg/cache.py
def write_json_atomic(path: Path, payload: dict[str, Any]) -> None:
    """Atomically write a JSON payload using a temp file and rename."""
    path.parent.mkdir(parents=True, exist_ok=True)
    fd, temp_name = tempfile.mkstemp(dir=path.parent, prefix=f".{path.name}.", suffix=".tmp")
    temp_path = Path(temp_name)
    try:
        with os.fdopen(fd, "w", encoding="utf-8") as handle:
            handle.write(json.dumps(payload, ensure_ascii=True))
            handle.flush()
            os.fsync(handle.fileno())
        temp_path.replace(path)
    finally:
        if temp_path.exists():
            temp_path.unlink(missing_ok=True)

write_datasets_cache(datasets, response_bytes)

Sort, index, and write datasets to the cache file.

Source code in src/cxg/cache.py
def write_datasets_cache(
    datasets: list[dict[str, Any]], response_bytes: int
) -> list[dict[str, Any]]:
    """Sort, index, and write datasets to the cache file."""
    # Sort by published_at desc (matching default list output) with dataset_id
    # as tiebreaker so indices are contiguous in the default view.
    datasets = sorted(
        datasets,
        key=lambda d: (d.get("published_at") or "", d.get("dataset_id") or ""),
        reverse=True,
    )
    _assign_indices(datasets)
    payload = {
        "fetched_at": datetime.now(tz=UTC).isoformat(),
        "count": len(datasets),
        "response_bytes": response_bytes,
        "datasets": datasets,
    }
    write_json_atomic(datasets_cache_path(), payload)
    return datasets

write_collections_cache(collections, response_bytes)

Sort, index, and write collections to the cache file.

Source code in src/cxg/cache.py
def write_collections_cache(
    collections: list[dict[str, Any]], response_bytes: int
) -> list[dict[str, Any]]:
    """Sort, index, and write collections to the cache file."""
    # Sort by name (case-insensitive) with collection_id as tiebreaker.
    collections = sorted(
        collections,
        key=lambda c: (
            (c.get("name") or "").casefold(),
            c.get("collection_id") or "",
        ),
    )
    _assign_indices(collections)
    payload = {
        "fetched_at": datetime.now(tz=UTC).isoformat(),
        "count": len(collections),
        "response_bytes": response_bytes,
        "collections": collections,
    }
    write_json_atomic(collections_cache_path(), payload)
    return collections

clear_cache()

Delete all cache files and return a list of removed paths.

Source code in src/cxg/cache.py
def clear_cache() -> list[Path]:
    """Delete all cache files and return a list of removed paths."""
    removed: list[Path] = []
    for path in (datasets_cache_path(), collections_cache_path()):
        if path.exists():
            path.unlink()
            removed.append(path)
    return removed

is_expired(fetched_at, ttl=None)

Check if a cache timestamp has expired.

A TTL of 0 or a missing timestamp is always considered expired.

Source code in src/cxg/cache.py
def is_expired(fetched_at: datetime | None, ttl: int | None = None) -> bool:
    """Check if a cache timestamp has expired.

    A TTL of 0 or a missing timestamp is always considered expired.
    """
    if ttl is None:
        ttl = get_cache_ttl()
    if ttl == 0:
        return True
    if fetched_at is None:
        return True
    age = (datetime.now(tz=UTC) - fetched_at).total_seconds()
    return age >= ttl

current_fetched_at(path)

Read just the fetched_at timestamp from a cache file without loading all data.

Source code in src/cxg/cache.py
def current_fetched_at(path: Path) -> str | None:
    """Read just the fetched_at timestamp from a cache file without loading all data."""
    payload = load_json(path)
    if payload is None:
        return None
    return payload.get("fetched_at")

get_cache_state()

Build a CacheState for the datasets cache.

Source code in src/cxg/cache.py
def get_cache_state() -> CacheState:
    """Build a CacheState for the datasets cache."""
    return _build_cache_state(datasets_cache_path())

get_collections_cache_state()

Build a CacheState for the collections cache.

Source code in src/cxg/cache.py
def get_collections_cache_state() -> CacheState:
    """Build a CacheState for the collections cache."""
    return _build_cache_state(collections_cache_path())