Skip to content

state

napt.state

State tracking and version management for NAPT.

This module provides state persistence for tracking discovered application versions, ETags, and file metadata between runs. This enables:

  • Efficient conditional downloads (HTTP 304 Not Modified)
  • Version change detection
  • Bandwidth optimization for scheduled workflows

The state file is a JSON file that stores:

  • Discovered versions from vendors
  • HTTP ETags and Last-Modified headers for conditional requests
  • File paths and SHA-256 hashes for cached installers
  • Last checked timestamps for monitoring

State tracking is enabled by default and can be disabled with --stateless flag.

Example

Basic usage:

from pathlib import Path
from napt.state import load_state, save_state

state = load_state(Path("state/versions.json"))

app_id = "napt-chrome"
cache = state.get("apps", {}).get(app_id)

state["apps"][app_id] = {
    "url": "https://dl.google.com/chrome.msi",
    "etag": 'W/"abc123"',
    "sha256": "abc123...",
    "known_version": "130.0.0"
}

save_state(state, Path("state/versions.json"))

StateTracker

Manages application state tracking with automatic persistence.

This class provides a high-level interface for loading, querying, and updating the state file. It handles file I/O, error recovery, and provides convenience methods for common operations.

Attributes:

Name Type Description
state_file

Path to the JSON state file.

state dict[str, Any]

In-memory state dictionary.

Example

Basic usage:

from pathlib import Path

tracker = StateTracker(Path("state/versions.json"))
tracker.load()
cache = tracker.get_cache("napt-chrome")
tracker.update_cache(
    "napt-chrome",
    url="https://...",
    sha256="...",
    known_version="130.0.0"
)
tracker.save()

Source code in napt/state/tracker.py
class StateTracker:
    """Manages application state tracking with automatic persistence.

    This class provides a high-level interface for loading, querying, and
    updating the state file. It handles file I/O, error recovery, and
    provides convenience methods for common operations.

    Attributes:
        state_file: Path to the JSON state file.
        state: In-memory state dictionary.

    Example:
        Basic usage:
            ```python
            from pathlib import Path

            tracker = StateTracker(Path("state/versions.json"))
            tracker.load()
            cache = tracker.get_cache("napt-chrome")
            tracker.update_cache(
                "napt-chrome",
                url="https://...",
                sha256="...",
                known_version="130.0.0"
            )
            tracker.save()
            ```

    """

    def __init__(self, state_file: Path):
        """Initialize state tracker.

        Args:
            state_file: Path to JSON state file. Created if doesn't exist.

        """
        self.state_file = state_file
        self.state: dict[str, Any] = {}

    def load(self) -> dict[str, Any]:
        """Load state from file.

        Creates default state structure if file doesn't exist.
        Handles corrupted files by creating backup and starting fresh.

        Returns:
            Loaded state dictionary.

        Raises:
            OSError: If file permissions prevent reading.

        """
        try:
            self.state = load_state(self.state_file)
        except FileNotFoundError:
            # First run, create default state
            self.state = create_default_state()
            self.state_file.parent.mkdir(parents=True, exist_ok=True)
            self.save()
        except json.JSONDecodeError as err:
            # Corrupted file, backup and create new
            backup = self.state_file.with_suffix(".json.backup")
            self.state_file.rename(backup)
            self.state = create_default_state()
            self.save()
            raise PackagingError(
                f"Corrupted state file backed up to {backup}. "
                f"Created fresh state file."
            ) from err

        return self.state

    def save(self) -> None:
        """Save current state to file.

        Updates metadata.last_updated timestamp automatically.
        Creates parent directories if needed.

        Raises:
            OSError: If file permissions prevent writing.

        """
        # Update metadata
        self.state.setdefault("metadata", {})
        self.state["metadata"]["last_updated"] = datetime.now(UTC).isoformat()

        # Ensure parent directory exists
        self.state_file.parent.mkdir(parents=True, exist_ok=True)

        save_state(self.state, self.state_file)

    def get_cache(self, recipe_id: str) -> dict[str, Any] | None:
        """Get cached information for a recipe.

        Args:
            recipe_id: Recipe identifier (from recipe's 'id' field).

        Returns:
            Cached data if available, None otherwise.

        Example:
            Retrieve cached information:
                ```python
                cache = tracker.get_cache("napt-chrome")
                if cache:
                    etag = cache.get('etag')
                    known_version = cache.get('known_version')
                ```

        """
        return self.state.get("apps", {}).get(recipe_id)

    def update_cache(
        self,
        recipe_id: str,
        url: str,
        sha256: str,
        etag: str | None = None,
        last_modified: str | None = None,
        known_version: str | None = None,
        strategy: str | None = None,
    ) -> None:
        """Update cached information for a recipe.

        Args:
            recipe_id: Recipe identifier.
            url: Download URL for provenance tracking. For version-first strategies
                (url_pattern, api_github, api_json), this is the actual download URL
                from version_info. For file-first (url_download), this is source.url.
            sha256: SHA-256 hash of file (for integrity checks).
            etag: ETag header from download response. Used by url_download for HTTP 304
                conditional requests. Saved but unused by version-first strategies.
            last_modified: Last-Modified header from download response.
                Used by url_download as fallback for conditional requests.
                Saved but unused by version-first.
            known_version: Version string. PRIMARY cache key for
                version-first strategies (compared to skip downloads).
                Informational only for url_download.
            strategy: Discovery strategy used (for debugging).

        Example:
            Update cache entry:
                ```python
                tracker.update_cache(
                    "napt-chrome",
                    url="https://dl.google.com/chrome.msi",
                    sha256="abc123...",
                    etag='W/"def456"',
                    known_version="130.0.0"
                )
                ```

        Note:
            Schema v2: Removed file_path, last_checked, and renamed
            version -> known_version.

            Field usage differs by strategy type:

            - Version-first: known_version is PRIMARY cache key,
                etag/last_modified unused
            - File-first: etag/last_modified are PRIMARY cache keys,
                known_version informational

            Filesystem is the source of truth; state is for optimization only.

        """
        if "apps" not in self.state:
            self.state["apps"] = {}

        cache_entry = {
            "url": url,
            "etag": etag,
            "last_modified": last_modified,
            "sha256": sha256,
        }

        # Optional fields (only add if provided)
        if known_version is not None:
            cache_entry["known_version"] = known_version
        if strategy is not None:
            cache_entry["strategy"] = strategy

        self.state["apps"][recipe_id] = cache_entry

    def has_version_changed(self, recipe_id: str, new_version: str) -> bool:
        """Check if discovered version differs from cached known_version.

        Args:
            recipe_id: Recipe identifier.
            new_version: Newly discovered version.

        Returns:
            True if version changed or no cached version exists.

        Example:
            Check if version has changed:
                ```python
                if tracker.has_version_changed("napt-chrome", "130.0.0"):
                    print("New version available!")
                ```

        Note:
            Uses 'known_version' field which is informational only.
            Real version should be extracted from filesystem during build.

        """
        cache = self.get_cache(recipe_id)
        if not cache:
            return True  # No cache, treat as changed

        return cache.get("known_version") != new_version

__init__

__init__(state_file: Path)

Initialize state tracker.

Parameters:

Name Type Description Default
state_file Path

Path to JSON state file. Created if doesn't exist.

required
Source code in napt/state/tracker.py
def __init__(self, state_file: Path):
    """Initialize state tracker.

    Args:
        state_file: Path to JSON state file. Created if doesn't exist.

    """
    self.state_file = state_file
    self.state: dict[str, Any] = {}

load

load() -> dict[str, Any]

Load state from file.

Creates default state structure if file doesn't exist. Handles corrupted files by creating backup and starting fresh.

Returns:

Type Description
dict[str, Any]

Loaded state dictionary.

Raises:

Type Description
OSError

If file permissions prevent reading.

Source code in napt/state/tracker.py
def load(self) -> dict[str, Any]:
    """Load state from file.

    Creates default state structure if file doesn't exist.
    Handles corrupted files by creating backup and starting fresh.

    Returns:
        Loaded state dictionary.

    Raises:
        OSError: If file permissions prevent reading.

    """
    try:
        self.state = load_state(self.state_file)
    except FileNotFoundError:
        # First run, create default state
        self.state = create_default_state()
        self.state_file.parent.mkdir(parents=True, exist_ok=True)
        self.save()
    except json.JSONDecodeError as err:
        # Corrupted file, backup and create new
        backup = self.state_file.with_suffix(".json.backup")
        self.state_file.rename(backup)
        self.state = create_default_state()
        self.save()
        raise PackagingError(
            f"Corrupted state file backed up to {backup}. "
            f"Created fresh state file."
        ) from err

    return self.state

save

save() -> None

Save current state to file.

Updates metadata.last_updated timestamp automatically. Creates parent directories if needed.

Raises:

Type Description
OSError

If file permissions prevent writing.

Source code in napt/state/tracker.py
def save(self) -> None:
    """Save current state to file.

    Updates metadata.last_updated timestamp automatically.
    Creates parent directories if needed.

    Raises:
        OSError: If file permissions prevent writing.

    """
    # Update metadata
    self.state.setdefault("metadata", {})
    self.state["metadata"]["last_updated"] = datetime.now(UTC).isoformat()

    # Ensure parent directory exists
    self.state_file.parent.mkdir(parents=True, exist_ok=True)

    save_state(self.state, self.state_file)

get_cache

get_cache(recipe_id: str) -> dict[str, Any] | None

Get cached information for a recipe.

Parameters:

Name Type Description Default
recipe_id str

Recipe identifier (from recipe's 'id' field).

required

Returns:

Type Description
dict[str, Any] | None

Cached data if available, None otherwise.

Example

Retrieve cached information:

cache = tracker.get_cache("napt-chrome")
if cache:
    etag = cache.get('etag')
    known_version = cache.get('known_version')

Source code in napt/state/tracker.py
def get_cache(self, recipe_id: str) -> dict[str, Any] | None:
    """Get cached information for a recipe.

    Args:
        recipe_id: Recipe identifier (from recipe's 'id' field).

    Returns:
        Cached data if available, None otherwise.

    Example:
        Retrieve cached information:
            ```python
            cache = tracker.get_cache("napt-chrome")
            if cache:
                etag = cache.get('etag')
                known_version = cache.get('known_version')
            ```

    """
    return self.state.get("apps", {}).get(recipe_id)

update_cache

update_cache(recipe_id: str, url: str, sha256: str, etag: str | None = None, last_modified: str | None = None, known_version: str | None = None, strategy: str | None = None) -> None

Update cached information for a recipe.

Parameters:

Name Type Description Default
recipe_id str

Recipe identifier.

required
url str

Download URL for provenance tracking. For version-first strategies (url_pattern, api_github, api_json), this is the actual download URL from version_info. For file-first (url_download), this is source.url.

required
sha256 str

SHA-256 hash of file (for integrity checks).

required
etag str | None

ETag header from download response. Used by url_download for HTTP 304 conditional requests. Saved but unused by version-first strategies.

None
last_modified str | None

Last-Modified header from download response. Used by url_download as fallback for conditional requests. Saved but unused by version-first.

None
known_version str | None

Version string. PRIMARY cache key for version-first strategies (compared to skip downloads). Informational only for url_download.

None
strategy str | None

Discovery strategy used (for debugging).

None
Example

Update cache entry:

tracker.update_cache(
    "napt-chrome",
    url="https://dl.google.com/chrome.msi",
    sha256="abc123...",
    etag='W/"def456"',
    known_version="130.0.0"
)

Note

Schema v2: Removed file_path, last_checked, and renamed version -> known_version.

Field usage differs by strategy type:

  • Version-first: known_version is PRIMARY cache key, etag/last_modified unused
  • File-first: etag/last_modified are PRIMARY cache keys, known_version informational

Filesystem is the source of truth; state is for optimization only.

Source code in napt/state/tracker.py
def update_cache(
    self,
    recipe_id: str,
    url: str,
    sha256: str,
    etag: str | None = None,
    last_modified: str | None = None,
    known_version: str | None = None,
    strategy: str | None = None,
) -> None:
    """Update cached information for a recipe.

    Args:
        recipe_id: Recipe identifier.
        url: Download URL for provenance tracking. For version-first strategies
            (url_pattern, api_github, api_json), this is the actual download URL
            from version_info. For file-first (url_download), this is source.url.
        sha256: SHA-256 hash of file (for integrity checks).
        etag: ETag header from download response. Used by url_download for HTTP 304
            conditional requests. Saved but unused by version-first strategies.
        last_modified: Last-Modified header from download response.
            Used by url_download as fallback for conditional requests.
            Saved but unused by version-first.
        known_version: Version string. PRIMARY cache key for
            version-first strategies (compared to skip downloads).
            Informational only for url_download.
        strategy: Discovery strategy used (for debugging).

    Example:
        Update cache entry:
            ```python
            tracker.update_cache(
                "napt-chrome",
                url="https://dl.google.com/chrome.msi",
                sha256="abc123...",
                etag='W/"def456"',
                known_version="130.0.0"
            )
            ```

    Note:
        Schema v2: Removed file_path, last_checked, and renamed
        version -> known_version.

        Field usage differs by strategy type:

        - Version-first: known_version is PRIMARY cache key,
            etag/last_modified unused
        - File-first: etag/last_modified are PRIMARY cache keys,
            known_version informational

        Filesystem is the source of truth; state is for optimization only.

    """
    if "apps" not in self.state:
        self.state["apps"] = {}

    cache_entry = {
        "url": url,
        "etag": etag,
        "last_modified": last_modified,
        "sha256": sha256,
    }

    # Optional fields (only add if provided)
    if known_version is not None:
        cache_entry["known_version"] = known_version
    if strategy is not None:
        cache_entry["strategy"] = strategy

    self.state["apps"][recipe_id] = cache_entry

has_version_changed

has_version_changed(recipe_id: str, new_version: str) -> bool

Check if discovered version differs from cached known_version.

Parameters:

Name Type Description Default
recipe_id str

Recipe identifier.

required
new_version str

Newly discovered version.

required

Returns:

Type Description
bool

True if version changed or no cached version exists.

Example

Check if version has changed:

if tracker.has_version_changed("napt-chrome", "130.0.0"):
    print("New version available!")

Note

Uses 'known_version' field which is informational only. Real version should be extracted from filesystem during build.

Source code in napt/state/tracker.py
def has_version_changed(self, recipe_id: str, new_version: str) -> bool:
    """Check if discovered version differs from cached known_version.

    Args:
        recipe_id: Recipe identifier.
        new_version: Newly discovered version.

    Returns:
        True if version changed or no cached version exists.

    Example:
        Check if version has changed:
            ```python
            if tracker.has_version_changed("napt-chrome", "130.0.0"):
                print("New version available!")
            ```

    Note:
        Uses 'known_version' field which is informational only.
        Real version should be extracted from filesystem during build.

    """
    cache = self.get_cache(recipe_id)
    if not cache:
        return True  # No cache, treat as changed

    return cache.get("known_version") != new_version

load_state

load_state(state_file: Path) -> dict[str, Any]

Load state from JSON file.

Parameters:

Name Type Description Default
state_file Path

Path to JSON state file.

required

Returns:

Type Description
dict[str, Any]

Loaded state dictionary.

Raises:

Type Description
FileNotFoundError

If state file doesn't exist.

JSONDecodeError

If file contains invalid JSON.

OSError

If file cannot be read due to permissions.

Example

Load state from file:

from pathlib import Path

state = load_state(Path("state/versions.json"))
apps = state.get("apps", {})

Source code in napt/state/tracker.py
def load_state(state_file: Path) -> dict[str, Any]:
    """Load state from JSON file.

    Args:
        state_file: Path to JSON state file.

    Returns:
        Loaded state dictionary.

    Raises:
        FileNotFoundError: If state file doesn't exist.
        json.JSONDecodeError: If file contains invalid JSON.
        OSError: If file cannot be read due to permissions.

    Example:
        Load state from file:
            ```python
            from pathlib import Path

            state = load_state(Path("state/versions.json"))
            apps = state.get("apps", {})
            ```

    """
    with open(state_file, encoding="utf-8") as f:
        return json.load(f)

save_state

save_state(state: dict[str, Any], state_file: Path) -> None

Save state to JSON file with pretty-printing.

Creates parent directories if needed. Uses 2-space indentation and sorted keys for consistent diffs in version control.

Parameters:

Name Type Description Default
state dict[str, Any]

State dictionary to save.

required
state_file Path

Path to JSON state file.

required

Raises:

Type Description
OSError

If file cannot be written due to permissions.

Example

Save state to file:

from pathlib import Path

state = {"metadata": {}, "apps": {}}
save_state(state, Path("state/versions.json"))

Note
  • Uses 2-space indentation for readability
  • Sorts keys alphabetically for consistent diffs
  • Adds trailing newline for git compatibility
Source code in napt/state/tracker.py
def save_state(state: dict[str, Any], state_file: Path) -> None:
    """Save state to JSON file with pretty-printing.

    Creates parent directories if needed. Uses 2-space indentation
    and sorted keys for consistent diffs in version control.

    Args:
        state: State dictionary to save.
        state_file: Path to JSON state file.

    Raises:
        OSError: If file cannot be written due to permissions.

    Example:
        Save state to file:
            ```python
            from pathlib import Path

            state = {"metadata": {}, "apps": {}}
            save_state(state, Path("state/versions.json"))
            ```

    Note:
        - Uses 2-space indentation for readability
        - Sorts keys alphabetically for consistent diffs
        - Adds trailing newline for git compatibility

    """
    state_file.parent.mkdir(parents=True, exist_ok=True)

    with open(state_file, "w", encoding="utf-8") as f:
        json.dump(state, f, indent=2, sort_keys=True)
        f.write("\n")  # Trailing newline for git