# Copyright © The Debusine Developers
# See the AUTHORS file at the top-level directory of this distribution
#
# This file is part of Debusine. It is subject to the license terms
# in the LICENSE file found in the top-level directory of this
# distribution. No part of Debusine, including this file, may be copied,
# modified, propagated, or distributed except according to the terms
# contained in the LICENSE file.

"""Server-side task to mirror an external APT suite into a debusine."""

import binascii
import functools
import logging
import os
import shlex
import shutil
import subprocess
import tempfile
from collections import defaultdict
from collections.abc import Generator, Sequence
from contextlib import ExitStack, contextmanager
from dataclasses import dataclass, field
from enum import Enum, auto
from operator import itemgetter
from pathlib import Path, PurePath
from textwrap import dedent
from typing import TYPE_CHECKING, override
from urllib.parse import ParseResult, urlparse, urlunparse

import requests
from debian.deb822 import Deb822, Packages, Release, Sources
from django.db import transaction
from django_pglocks import advisory_lock

from debusine.artifacts.local_artifact import (
    BinaryPackage,
    RepositoryIndex,
    SourcePackage,
)
from debusine.artifacts.models import (
    ArtifactCategory,
    CollectionCategory,
    WorkRequestResults,
)
from debusine.assets.models import AssetCategory, BasicAPTAuthenticationData
from debusine.client.client_utils import download_file
from debusine.db.locks import LockError, LockType
from debusine.db.models import (
    Artifact,
    ArtifactRelation,
    Asset,
    AssetUsage,
    Collection,
    CollectionItem,
)
from debusine.db.models.tasks import DefaultDynamicData
from debusine.server.collections import DebianSuiteManager
from debusine.server.tasks import BaseServerTask
from debusine.server.tasks.models import APTMirrorData
from debusine.tasks import TaskConfigError
from debusine.tasks.models import BaseDynamicTaskData
from debusine.utils import calculate_hash
from debusine.utils.urls import URLPath

if TYPE_CHECKING:
    from requests.sessions import _Auth


@dataclass(frozen=True, kw_only=True)
class IndexFile:
    """Details of an index file found in a ``Release`` file."""

    component: str
    architecture: str | None = None
    size: int
    sha256: str
    must_exist: bool


class IndexFiles(dict[str, IndexFile]):
    """Alternatives for a single index file, indexed by compression suffix."""


@dataclass(frozen=True, kw_only=True, slots=True)
class IndexedPackage[T: Deb822]:
    """Details of a source or binary package found in an index."""

    contents: T
    component: str
    ignore_unauthorized: bool


class FilterMatchResult(Enum):
    """Results of matching a package against a list of filters."""

    #: No filter matched this package.
    NO_MATCH = auto()

    #: A filter matched this package.
    MATCH = auto()

    #: A filter matched this package.  When downloading it, the task should
    #: ignore failures indicating insufficient authorization (HTTP 401 or
    #: 403).
    MATCH_IGNORE_UNAUTHORIZED = auto()

    @property
    def matched(self) -> bool:
        """Return whether a filter matched this package."""
        return self in {
            FilterMatchResult.MATCH,
            FilterMatchResult.MATCH_IGNORE_UNAUTHORIZED,
        }


@dataclass(frozen=True, kw_only=True, slots=True)
class PlanAdd[T: Deb822]:
    """A plan for adding a single item to a collection."""

    name: str
    contents: T
    component: str
    ignore_unauthorized: bool = False


@dataclass(frozen=True, kw_only=True, slots=True)
class PlanReplace[T: Deb822](PlanAdd[T]):
    """A plan for replacing a single item in a collection."""

    item: CollectionItem


@dataclass(frozen=True, kw_only=True, slots=True)
class Plan[T: Deb822]:
    """A plan for updating items in a collection."""

    add: list[PlanAdd[T]] = field(default_factory=list)
    replace: list[PlanReplace[T]] = field(default_factory=list)
    remove: list[CollectionItem] = field(default_factory=list)


class InconsistentMirrorError(Exception):
    """The remote mirror is inconsistent."""


logger = logging.getLogger(__name__)


def _run_and_log_errors(
    args: Sequence[str],
    cwd: Path | None = None,
    env: dict[str, str] | None = None,
) -> subprocess.CompletedProcess[str]:
    """Run a subprocess, logging stderr on failure."""
    try:
        return subprocess.run(
            args, cwd=cwd, env=env, text=True, check=True, capture_output=True
        )
    except subprocess.CalledProcessError as e:
        logger.error("Error output from %s:\n%s", shlex.join(e.cmd), e.stderr)
        raise


class APTMirror(
    BaseServerTask[APTMirrorData, BaseDynamicTaskData],
    DefaultDynamicData[APTMirrorData],
):
    """Task that mirrors an external APT suite into a debusine collection."""

    TASK_VERSION = 1
    TASK_MANAGES_TRANSACTIONS = True

    @functools.cached_property
    def collection(self) -> Collection:
        """The `debian:suite` collection this task is updating."""
        try:
            return Collection.objects.get(
                name=self.data.collection,
                category=CollectionCategory.SUITE,
                workspace=self.workspace,
            )
        except Collection.DoesNotExist:
            raise TaskConfigError(
                f"Collection '{self.data.collection}' in '{self.workspace}' "
                f"with category '{CollectionCategory.SUITE}' not found"
            )

    @functools.cached_property
    def collection_manager(self) -> DebianSuiteManager:
        """The manager for the suite this task is updating."""
        manager = self.collection.manager
        assert isinstance(manager, DebianSuiteManager)
        return manager

    @functools.cached_property
    def authentication_data(self) -> BasicAPTAuthenticationData | None:
        """Authentication data to use for this mirroring task, if any."""
        if self.data.authentication is None:
            return None

        authentication = Asset.objects.get_by_slug(
            category=AssetCategory.APT_AUTHENTICATION,
            slug=self.data.authentication,
            workspace=self.workspace,
        )
        try:
            asset_usage = authentication.usage.get(workspace=self.workspace)
        except AssetUsage.DoesNotExist:
            has_permission = False
        else:
            has_permission = asset_usage.can_use_apt_authentication_with(
                self.work_request.created_by
            )
        if not has_permission:
            raise InconsistentMirrorError(
                f"{self.work_request.created_by} cannot use asset"
                f" {self.data.authentication!r} for APT authentication"
            )
        assert isinstance(authentication.data_model, BasicAPTAuthenticationData)
        return authentication.data_model

    def make_apt_environment(self, temp_path: Path) -> dict[str, str]:
        """Make a suitable process environment for running apt commands."""
        env = os.environ.copy()
        env["APT_CONFIG"] = str(temp_path / "etc/apt/apt.conf")
        return env

    def write_apt_config(
        self, path: Path, options: list[tuple[str, str | None]]
    ) -> None:
        """Write APT configuration options out to a file."""
        apt_config = [
            f'#clear {key};' if value is None else f'{key} "{value}";'
            for key, value in options
        ]
        path.write_text("\n".join(apt_config) + "\n")

    def get_apt_lists_path(self, temp_path: Path) -> Path:
        """Get the path where APT stores downloaded index files."""
        # We have to import this late, as otherwise mypy_django_plugin tries
        # to import it during early initialization and fails (since we can't
        # install it from PyPI and so it isn't present in mypy's virtual
        # environment).
        import apt_pkg

        with self.apt_config(temp_path):
            return Path(apt_pkg.config.find_dir("Dir::State::lists"))

    @functools.cache
    def get_apt_base_uri(self, temp_path: Path) -> str:
        """Get APT's version of the base repository download URI."""
        # We have to import this late, as otherwise mypy_django_plugin tries
        # to import it during early initialization and fails (since we can't
        # install it from PyPI and so it isn't present in mypy's virtual
        # environment).
        import apt_pkg

        with self.apt_config(temp_path):
            source_list = apt_pkg.SourceList()
            source_list.read_main_list()
            assert len(source_list.list) == 1
            assert isinstance(source_list.list[0].uri, str)
            return source_list.list[0].uri

    @property
    def suite_prefix(self) -> str:
        """Return the URL prefix for the suite."""
        if self.data.suite.endswith("/"):
            return self.data.suite
        else:
            return f"dists/{self.data.suite}/"

    def get_apt_release_file_name(
        self, temp_path: Path, release_name: str
    ) -> str:
        """Get the file name where APT stores a given ``Release`` file."""
        # We have to import this late, as otherwise mypy_django_plugin tries
        # to import it during early initialization and fails (since we can't
        # install it from PyPI and so it isn't present in mypy's virtual
        # environment).
        import apt_pkg

        name = apt_pkg.uri_to_filename(
            self.get_apt_base_uri(temp_path) + self.suite_prefix + release_name
        )
        assert isinstance(name, str)
        return name

    def fetch_meta_indexes(self, temp_path: Path) -> None:
        """Fetch meta-indexes (``Release`` files) for this suite."""
        (temp_path / "etc/apt/apt.conf.d").mkdir(parents=True)
        (temp_path / "etc/apt/preferences.d").mkdir(parents=True)
        (temp_path / "etc/apt/sources.list.d").mkdir(parents=True)
        (temp_path / "var/lib/apt/lists/partial").mkdir(parents=True)

        apt_config: list[tuple[str, str | None]] = [
            ("Dir", str(temp_path)),
            ("Acquire::IndexTargets", None),
        ]
        apt_config_path = temp_path / "etc/apt/apt.conf"
        self.write_apt_config(apt_config_path, apt_config)

        if self.authentication_data is not None:
            apt_auth_path = (
                temp_path
                / f"etc/apt/auth.conf.d/{self.data.authentication}.conf"
            )
            apt_auth_path.parent.mkdir(parents=True)
            apt_auth_path.write_text(
                dedent(
                    f"""\
                    machine {self.data.url}
                    login {self.authentication_data.credentials.username}
                    password {self.authentication_data.credentials.password}
                    """
                )
            )

        source = {
            "Types": "deb deb-src",
            "URIs": str(self.data.url),
            "Suites": self.data.suite,
        }
        if self.data.components is not None:
            source["Components"] = " ".join(self.data.components)
        if self.data.signing_key is not None:
            source["Signed-By"] = "\n" + "\n".join(
                f" {line}" if line else " ."
                for line in self.data.signing_key.splitlines()
            )
        (temp_path / "etc/apt/sources.list.d/mirror.sources").write_text(
            str(Deb822(source))
        )

        # Since we clear Acquire::IndexTargets, this just fetches and
        # verifies the Release files.
        logger.info(
            "Fetching meta-indexes for %s %s", self.data.url, self.data.suite
        )
        _run_and_log_errors(
            ["apt-get", "update"], env=self.make_apt_environment(temp_path)
        )

    def get_release_path(self, temp_path: Path) -> Path:
        """Get the preferred path to the fetched ``Release`` file."""
        for release_name in ("InRelease", "Release"):
            lists_path = self.get_apt_lists_path(temp_path)
            release_path = lists_path / self.get_apt_release_file_name(
                temp_path, release_name
            )
            if release_path.exists():
                return release_path
        raise InconsistentMirrorError(
            "Cannot mirror a repository without a Release/InRelease file"
        )

    def get_index_files(
        self, temp_path: Path, only_type: str | None = None
    ) -> dict[str, IndexFiles]:
        """Find all relevant index files mentioned by the ``Release`` file."""
        with self.get_release_path(temp_path).open(mode="rb") as release_file:
            release = Release(release_file)

        # An index file may be stored in multiple compression formats; from
        # our point of view, fetching any single one of them will do.
        # Uncompressed indexes may have checksums in the Release file
        # without actually existing on the server.
        # https://wiki.debian.org/DebianRepository/Format#Compression_of_indices
        # https://wiki.debian.org/DebianRepository/Format#MD5Sum.2C_SHA1.2C_SHA256
        index_files: dict[str, IndexFiles] = defaultdict(IndexFiles)
        for entry in release["SHA256"]:
            entry_path = PurePath(entry["name"])
            index_type = entry_path.stem.split("-", 1)[0]
            if only_type not in {None, index_type}:
                continue

            if index_type in {
                "Contents",
                "Packages",
                "Sources",
                "Translation",
            }:
                if len(entry_path.parts) > 1:
                    component = entry_path.parts[0]
                else:
                    component = "main"
            else:
                continue

            architecture: str | None = None
            if (
                index_type == "Packages"
                and len(entry_path.parts) > 1
                and entry_path.parts[-2].startswith("binary-")
            ):
                architecture = entry_path.parts[-2].removeprefix("binary-")
            elif entry_path.stem.startswith("Contents-"):
                architecture = entry_path.stem.rsplit("-", 1)[1]

            base_rel_path = str(entry_path.parent / entry_path.stem)
            index_files[base_rel_path][entry_path.suffix or ""] = IndexFile(
                component=component,
                architecture=architecture,
                size=int(entry["size"]),
                sha256=entry["sha256"],
                must_exist=bool(entry_path.suffix),
            )
        return index_files

    @functools.cached_property
    def base_url_parsed(self) -> ParseResult:
        """A parsed version of the archive's base URL."""
        return urlparse(str(self.data.url))

    def fetch_archive_file(
        self,
        root: Path,
        trailing_path: str,
        *,
        expected_size: int,
        expected_sha256: str,
        must_exist: bool = True,
        ignore_unauthorized: bool = False,
    ) -> bool:
        """
        Fetch a URL from the archive given the trailing part of its path.

        :param root: the root of the directory structure where files should
          be fetched.
        :param trailing_path: the trailing part of the URL path, appended to
          each of the archive's base URL and the ``root`` parameter.
        :param expected_size: the expected size of the fetched file.
        :param expected_sha256: the expected SHA256 hex digest of the
          fetched file.
        :param must_exist: if True (default), fail if fetching the URL
          returns HTTP 404; otherwise, raise an exception in such cases.
        :param ignore_unauthorized: if True, return False if fetching the
          URL returns HTTP 401 or 403; otherwise, raise an exception in such
          cases.
        :raises requests.HTTPError: if fetching the URL failed.
        :raises InconsistentMirrorError: if the contents of the URL do not
          have the expected size and SHA256 digest.
        :return: True if the URL was successfully fetched; False if fetching
          the URL failed but the error was ignored due to
          ``must_exist=False`` or ``ignore_unauthorized=True``.
        """
        url = urlunparse(
            self.base_url_parsed._replace(
                path=str(
                    URLPath(self.base_url_parsed.path, is_file=False)
                    + trailing_path
                )
            )
        )
        target_path = root / trailing_path
        if not target_path.resolve().is_relative_to(root):
            raise InconsistentMirrorError(f"{trailing_path!r} escapes {root}")
        target_path.parent.mkdir(parents=True, exist_ok=True)
        try:
            auth: _Auth | None = None
            if self.authentication_data:
                auth = (
                    self.authentication_data.credentials.username,
                    self.authentication_data.credentials.password,
                )
            stats = download_file(
                url, target_path, auth=auth, hashes=("sha256",)
            )
        except requests.HTTPError as e:
            if (
                isinstance(e.response, requests.Response)
                and e.response.status_code == requests.codes.not_found
                and not must_exist
            ):
                logger.info("%s not found; skipping", url)
                return False
            elif (
                isinstance(e.response, requests.Response)
                and e.response.status_code
                in {requests.codes.unauthorized, requests.codes.forbidden}
                and ignore_unauthorized
            ):
                logger.info("%s not authorized; skipping", url)
                return False
            logger.exception("Failed to download %s", url)
            raise
        else:
            assert isinstance(stats["size"], int)
            if stats["size"] != expected_size:
                target_path.unlink()
                raise InconsistentMirrorError(
                    f"Expected {trailing_path!r} to have size "
                    f"{expected_size} bytes; got {stats['size']} bytes"
                )
            assert isinstance(stats["sha256"], str)
            if stats["sha256"] != expected_sha256:
                target_path.unlink()
                raise InconsistentMirrorError(
                    f"Expected {trailing_path!r} to have SHA256 "
                    f"{expected_sha256!r}; got {stats['sha256']!r}"
                )
            return True

    def fetch_indexes(self, temp_path: Path) -> None:
        """Fetch indexes for this suite."""
        (indexes_path := temp_path / "indexes").mkdir()
        compress_exts = (".xz", ".bz2", ".gz", "")
        architectures: set[str] | None = None
        if self.data.architectures is not None:
            architectures = set(self.data.architectures) | {"all"}
        for rel_path, index_files in sorted(
            self.get_index_files(temp_path).items()
        ):
            for compress_ext in compress_exts:
                if (index_file := index_files.get(compress_ext)) is None:
                    continue
                if (
                    self.data.components is not None
                    and index_file.component not in self.data.components
                ):
                    logger.info(
                        "Skipping %s (component not in %s)",
                        rel_path + compress_ext,
                        self.data.components,
                    )
                    break
                if (
                    architectures is not None
                    and index_file.architecture is not None
                    and index_file.architecture not in architectures
                ):
                    logger.info(
                        "Skipping %s (architecture not in %s)",
                        rel_path + compress_ext,
                        sorted(architectures),
                    )
                    break

                if self.fetch_archive_file(
                    indexes_path,
                    self.suite_prefix + rel_path + compress_ext,
                    expected_size=index_file.size,
                    expected_sha256=index_file.sha256,
                    must_exist=index_file.must_exist,
                ):
                    break
            else:
                raise InconsistentMirrorError(
                    f"No alternatives found for {rel_path!r}"
                )

    def get_fetched_index_files(
        self, temp_path: Path, only_type: str | None = None
    ) -> dict[str, tuple[Path, IndexFile]]:
        """Find index files that were successfully fetched."""
        indexes_path = temp_path / "indexes"
        indexes: dict[str, tuple[Path, IndexFile]] = {}
        for base_rel_path, index_files in self.get_index_files(
            temp_path, only_type=only_type
        ).items():
            for suffix, index_file in index_files.items():
                rel_path = base_rel_path + suffix
                index_path = indexes_path / (self.suite_prefix + rel_path)
                if index_path.exists():
                    indexes[rel_path] = (index_path, index_file)
        return indexes

    def match_filters(
        self,
        *,
        source_name: str,
        binary_name: str | None = None,
        priority: str | None = None,
        section: str | None = None,
    ) -> FilterMatchResult:
        """Check whether a package matches any of the configured filters."""
        if self.data.filters is None:
            return FilterMatchResult.MATCH

        result = FilterMatchResult.NO_MATCH
        for filt in self.data.filters:
            if (
                (
                    filt.binary_name is None
                    or (
                        binary_name is not None
                        and filt.binary_name.match(binary_name)
                    )
                )
                and (filt.priority is None or filt.priority == priority)
                and (filt.section is None or filt.section == section)
                and (
                    filt.source_name is None
                    or filt.source_name.match(source_name)
                )
            ):
                if filt.ignore_unauthorized:
                    result = FilterMatchResult.MATCH_IGNORE_UNAUTHORIZED
                    break
                else:
                    result = FilterMatchResult.MATCH
        return result

    def plan_sources(self, temp_path: Path) -> Plan[Sources]:
        """Plan the update of all source packages in the collection."""
        # Source packages from the remote collection
        indexes: dict[str, IndexedPackage[Sources]] = {}
        for index_path, index_file in self.get_fetched_index_files(
            temp_path, only_type="Sources"
        ).values():
            for source in Sources.iter_paragraphs(index_path, use_apt_pkg=True):
                name = "{Package}_{Version}".format(**source)
                if name in indexes:
                    raise InconsistentMirrorError(
                        f"{name} found in multiple components: "
                        f"{indexes[name].component} and {index_file.component}"
                    )

                match_result = self.match_filters(
                    source_name=source["Package"],
                    priority=source.get("Priority"),
                    section=source.get("Section"),
                )
                if match_result.matched:
                    indexes[name] = IndexedPackage[Sources](
                        contents=source,
                        component=index_file.component,
                        ignore_unauthorized=(
                            match_result
                            == FilterMatchResult.MATCH_IGNORE_UNAUTHORIZED
                        ),
                    )

        # Source packages from the local collection
        items: dict[str, tuple[CollectionItem, dict[str, str]]] = {}
        for item in (
            self.collection.child_items.active()
            .filter(
                child_type=CollectionItem.Types.ARTIFACT,
                category=ArtifactCategory.SOURCE_PACKAGE,
            )
            .only("name", "data", "artifact")
            .prefetch_related("artifact__fileinartifact_set__file")
        ):
            artifact = item.artifact
            assert artifact is not None
            items[item.name] = (
                item,
                {
                    file_in_artifact.path: file_in_artifact.file.sha256.hex()
                    for file_in_artifact in artifact.fileinartifact_set.all()
                },
            )

        plan = Plan[Sources]()

        for name, indexed in sorted(indexes.items()):
            index_checksums = {
                checksum["name"]: checksum["sha256"]
                for checksum in indexed.contents["Checksums-Sha256"]
            }
            if name in items:
                item, item_checksums = items[name]
                if index_checksums != item_checksums:
                    plan.replace.append(
                        PlanReplace[Sources](
                            name=name,
                            contents=indexed.contents,
                            component=indexed.component,
                            item=item,
                            ignore_unauthorized=indexed.ignore_unauthorized,
                        )
                    )
            else:
                plan.add.append(
                    PlanAdd[Sources](
                        name=name,
                        contents=indexed.contents,
                        component=indexed.component,
                        ignore_unauthorized=indexed.ignore_unauthorized,
                    )
                )

        for name in sorted(items.keys() - indexes.keys()):
            item, _ = items[name]
            plan.remove.append(item)

        return plan

    def add_source(self, temp_path: Path, plan: PlanAdd[Sources]) -> None:
        """Download a source package and add it to the collection."""
        with ExitStack() as stack:
            (temp_download := temp_path / "download").mkdir(parents=True)
            stack.callback(shutil.rmtree, temp_download)

            source_artifact: Artifact | None = None
            if (
                self.collection_manager.parent_archive is not None
                and (
                    source_from_archive
                    := self.collection_manager.parent_archive.manager.lookup(
                        f"source-version:{plan.name}"
                    )
                )
                is not None
            ):
                # The parent archive already has this package, so we don't
                # need to download it again as long as its checksums match.
                assert source_from_archive.artifact is not None
                if sorted(
                    source_from_archive.artifact.fileinartifact_set.values_list(
                        "path", "file__size", "file__sha256"
                    ),
                    key=itemgetter(0),
                ) == sorted(
                    [
                        (
                            checksum["name"],
                            int(checksum["size"]),
                            binascii.unhexlify(checksum["sha256"]),
                        )
                        for checksum in plan.contents["Checksums-Sha256"]
                    ],
                    key=itemgetter(0),
                ):
                    source_artifact = source_from_archive.artifact

            if source_artifact is None:
                # We don't have this package anywhere usable, so we need to
                # download it again.
                # https://wiki.debian.org/DebianRepository/Format#A.22Sources.22_Indices
                # says this is mandatory, but "apt-ftparchive sources" omits
                # it if it's "./".
                directory = PurePath(plan.contents.get("Directory", "."))
                for checksum in plan.contents["Checksums-Sha256"]:
                    if not self.fetch_archive_file(
                        temp_download,
                        (directory / checksum["name"]).as_posix(),
                        expected_size=int(checksum["size"]),
                        expected_sha256=checksum["sha256"],
                        ignore_unauthorized=plan.ignore_unauthorized,
                    ):
                        # Unauthorized, but ignored by filters.
                        return
                source_package = SourcePackage.create(
                    name=plan.contents["Package"],
                    version=plan.contents["Version"],
                    files=list((temp_download / directory).iterdir()),
                )
                source_artifact = Artifact.objects.create_from_local_artifact(
                    source_package,
                    self.workspace,
                    created_by_work_request=self.work_request,
                )

            with self.collection_manager.allow_rewinding_versions():
                self.collection_manager.add_artifact(
                    source_artifact,
                    user=self.work_request.created_by,
                    variables={
                        "component": plan.component,
                        "section": plan.contents["Section"],
                    },
                    replace=self.collection_manager.may_reuse_versions,
                )

    def update_sources(self, temp_path: Path, plan: Plan[Sources]) -> None:
        """
        Update all source packages in the collection.

        This may take a long time, so it commits transactions as it goes
        rather than taking a single long transaction.  If processing an item
        fails, then the results of earlier processing will remain visible.
        """
        for add in plan.add:
            with transaction.atomic():
                self.add_source(temp_path, add)

        for replace in plan.replace:
            with transaction.atomic():
                self.collection_manager.remove_item(replace.item)
                self.add_source(temp_path, replace)

        with transaction.atomic():
            for remove in plan.remove:
                self.collection_manager.remove_item(remove)

    def plan_binaries(self, temp_path: Path) -> Plan[Packages]:
        """Plan the update of all binary packages in the collection."""
        # Binary packages from the remote collection
        indexes: dict[str, IndexedPackage[Packages]] = {}
        for index_path, index_file in self.get_fetched_index_files(
            temp_path, only_type="Packages"
        ).values():
            for binary in Packages.iter_paragraphs(
                index_path, use_apt_pkg=True
            ):
                name = "{Package}_{Version}_{Architecture}".format(**binary)
                if name in indexes:
                    if indexes[name].component != index_file.component:
                        raise InconsistentMirrorError(
                            f"{name} found in multiple components: "
                            f"{indexes[name].component} and "
                            f"{index_file.component}"
                        )
                    elif indexes[name].contents != binary:
                        raise InconsistentMirrorError(
                            f"{name} mismatch.  Conflicting Packages "
                            f"entries:\n\n"
                            + indexes[name].contents.dump()
                            + "\n\n"
                            + binary.dump()
                        )

                match_result = self.match_filters(
                    source_name=binary.get("Source", binary["Package"]).split()[
                        0
                    ],
                    binary_name=binary["Package"],
                    priority=binary.get("Priority"),
                    section=binary.get("Section"),
                )
                if match_result.matched:
                    indexes[name] = IndexedPackage[Packages](
                        contents=binary,
                        component=index_file.component,
                        ignore_unauthorized=(
                            match_result
                            == FilterMatchResult.MATCH_IGNORE_UNAUTHORIZED
                        ),
                    )

        # Binary packages from the local collection
        items: dict[str, tuple[CollectionItem, dict[str, str]]] = {}
        for item in (
            self.collection.child_items.active()
            .filter(
                child_type=CollectionItem.Types.ARTIFACT,
                category=ArtifactCategory.BINARY_PACKAGE,
            )
            .only("name", "data", "artifact")
            .prefetch_related("artifact__fileinartifact_set__file")
        ):
            artifact = item.artifact
            assert artifact is not None
            items[item.name] = (
                item,
                {
                    file_in_artifact.path: file_in_artifact.file.sha256.hex()
                    for file_in_artifact in artifact.fileinartifact_set.all()
                },
            )

        plan = Plan[Packages]()

        for name, indexed in sorted(indexes.items()):
            index_checksums = {
                PurePath(indexed.contents["Filename"]).name: indexed.contents[
                    "SHA256"
                ]
            }
            if name in items:
                item, item_checksums = items[name]
                if index_checksums != item_checksums:
                    plan.replace.append(
                        PlanReplace[Packages](
                            name=name,
                            contents=indexed.contents,
                            component=indexed.component,
                            item=item,
                            ignore_unauthorized=indexed.ignore_unauthorized,
                        )
                    )
            else:
                plan.add.append(
                    PlanAdd[Packages](
                        name=name,
                        contents=indexed.contents,
                        component=indexed.component,
                        ignore_unauthorized=indexed.ignore_unauthorized,
                    )
                )

        for name in sorted(items.keys() - indexes.keys()):
            item, _ = items[name]
            plan.remove.append(item)

        return plan

    def add_binary(self, temp_path: Path, plan: PlanAdd[Packages]) -> None:
        """Download a binary package and add it to the collection."""
        with ExitStack() as stack:
            (temp_download := temp_path / "download").mkdir(parents=True)
            stack.callback(shutil.rmtree, temp_download)

            binary_artifact: Artifact | None = None
            if (
                self.collection_manager.parent_archive is not None
                and (
                    binary_from_archive
                    := self.collection_manager.parent_archive.manager.lookup(
                        f"binary-version:{plan.name}"
                    )
                )
                is not None
            ):
                # The parent archive already has this package, so we don't
                # need to download it again as long as its checksums match.
                assert binary_from_archive.artifact is not None
                if binary_from_archive.artifact.fileinartifact_set.filter(
                    path=PurePath(plan.contents["Filename"]).name,
                    file__size=int(plan.contents["Size"]),
                    file__sha256=binascii.unhexlify(plan.contents["SHA256"]),
                ).exists():
                    binary_artifact = binary_from_archive.artifact

            if binary_artifact is None:
                # We don't have this package anywhere usable, so we need to
                # download it again.
                if not self.fetch_archive_file(
                    temp_download,
                    plan.contents["Filename"],
                    expected_size=int(plan.contents["Size"]),
                    expected_sha256=plan.contents["SHA256"],
                    ignore_unauthorized=plan.ignore_unauthorized,
                ):
                    # Unauthorized, but ignored by filters.
                    return
                binary_package = BinaryPackage.create(
                    file=temp_download / plan.contents["Filename"]
                )
                binary_artifact = Artifact.objects.create_from_local_artifact(
                    binary_package,
                    self.workspace,
                    created_by_work_request=self.work_request,
                )
                srcpkg_name = binary_package.data.srcpkg_name
                srcpkg_version = binary_package.data.srcpkg_version
                source_item = self.collection_manager.lookup(
                    f"source-version:{srcpkg_name}_{srcpkg_version}"
                )
                # Add a built-using relationship to the corresponding source
                # if we can be confident of it.  Suites with
                # may_reuse_versions=True may have source packages replaced,
                # and in that case we can't be sure that the source package
                # version in the binary package's metadata is enough to
                # match it.
                if (
                    source_item is not None
                    and not self.collection_manager.may_reuse_versions
                ):
                    assert source_item.artifact is not None
                    ArtifactRelation.objects.create(
                        artifact=binary_artifact,
                        target=source_item.artifact,
                        type=ArtifactRelation.Relations.BUILT_USING,
                    )

            with self.collection_manager.allow_rewinding_versions():
                self.collection_manager.add_artifact(
                    binary_artifact,
                    user=self.work_request.created_by,
                    variables={
                        "component": plan.component,
                        "section": plan.contents["Section"],
                        "priority": plan.contents["Priority"],
                    },
                    replace=self.collection_manager.may_reuse_versions,
                )

    def update_binaries(self, temp_path: Path, plan: Plan[Packages]) -> None:
        """
        Update all binary packages in the collection.

        This may take a long time, so it commits transactions as it goes
        rather than taking a single long transaction.  If processing an item
        fails, then the results of earlier processing will remain visible.
        """
        for add in plan.add:
            with transaction.atomic():
                self.add_binary(temp_path, add)

        for replace in plan.replace:
            with transaction.atomic():
                self.collection_manager.remove_item(replace.item)
                self.add_binary(temp_path, replace)

        with transaction.atomic():
            for remove in plan.remove:
                self.collection_manager.remove_item(remove)

    @contextmanager
    def apt_config(self, temp_path: Path) -> Generator[None]:
        """Temporarily use a different APT configuration."""
        # We have to import this late, as otherwise mypy_django_plugin tries
        # to import it during early initialization and fails (since we can't
        # install it from PyPI and so it isn't present in mypy's virtual
        # environment).
        import apt_pkg

        try:
            os.environ["APT_CONFIG"] = self.make_apt_environment(temp_path)[
                "APT_CONFIG"
            ]
            for key in apt_pkg.config.keys():
                apt_pkg.config.clear(key)
            apt_pkg.init_config()
            yield
        finally:
            del os.environ["APT_CONFIG"]

    def plan_indexes(self, temp_path: Path) -> Plan[Deb822]:
        """Plan the update of all indexes in the collection."""
        # We don't mirror indexes for suites in the flat repository format,
        # because we don't currently have a reasonable way to serve them:
        # debusine.web.archives assumes a pooled layout, and we can't freely
        # convert between them because the Release file must agree with the
        # layout we serve.
        if self.data.suite.endswith("/"):
            return Plan[Deb822]()

        indexes: dict[str, Deb822] = {}
        for rel_path, (index_path, index_file) in self.get_fetched_index_files(
            temp_path
        ).items():
            indexes.setdefault(
                f"index:{rel_path}",
                Deb822(
                    {
                        "MetaKey": rel_path,
                        "Component": index_file.component,
                        "Filename": str(index_path),
                    }
                ),
            )

        for release_name in ("Release", "Release.gpg", "InRelease"):
            lists_path = self.get_apt_lists_path(temp_path)
            release_path = lists_path / self.get_apt_release_file_name(
                temp_path, release_name
            )
            if release_path.exists():
                indexes[f"index:{release_name}"] = Deb822(
                    {
                        "MetaKey": release_name,
                        "Component": "",
                        "Filename": str(release_path),
                    }
                )

        # Repository index files from the local collection
        items: dict[str, tuple[CollectionItem, dict[str, str]]] = {}
        for item in (
            self.collection.child_items.active()
            .filter(
                child_type=CollectionItem.Types.ARTIFACT,
                category=ArtifactCategory.REPOSITORY_INDEX,
            )
            .only("name", "data", "artifact")
            .prefetch_related("artifact__fileinartifact_set__file")
        ):
            artifact = item.artifact
            assert artifact is not None
            items[item.name] = (
                item,
                {
                    file_in_artifact.path: file_in_artifact.file.sha256.hex()
                    for file_in_artifact in artifact.fileinartifact_set.all()
                },
            )

        plan = Plan[Deb822]()

        for name, paragraph in sorted(indexes.items()):
            rel_path = paragraph["MetaKey"]
            component = paragraph["Component"]
            index_path = Path(paragraph["Filename"])
            index_checksums = {
                PurePath(rel_path).name: (
                    calculate_hash(index_path, "sha256").hex()
                )
            }
            if name in items:
                item, item_checksums = items[name]
                if index_checksums != item_checksums:
                    plan.replace.append(
                        PlanReplace[Deb822](
                            name=rel_path,
                            contents=paragraph,
                            component=component,
                            item=item,
                        )
                    )
            else:
                plan.add.append(
                    PlanAdd[Deb822](
                        name=rel_path, contents=paragraph, component=component
                    )
                )

        for name in sorted(items.keys() - indexes.keys()):
            item, _ = items[name]
            plan.remove.append(item)

        return plan

    def add_index(self, *, name: str, paragraph: Deb822) -> None:
        """Add an index to the collection."""
        index = RepositoryIndex.create(
            file=Path(paragraph["Filename"]), path=name
        )
        index_artifact = Artifact.objects.create_from_local_artifact(
            index, self.workspace, created_by_work_request=self.work_request
        )
        self.collection_manager.add_artifact(
            index_artifact,
            user=self.work_request.created_by,
            variables={"path": name},
        )

    def update_indexes(self, plan: Plan[Deb822]) -> None:
        """
        Update all repository indexes in the collection.

        Indexes are normally relatively small compared to packages and have
        already been downloaded by apt, so it does this in a single
        transaction.
        """
        with transaction.atomic():
            for add in plan.add:
                self.add_index(name=add.name, paragraph=add.contents)

            for replace in plan.replace:
                self.collection_manager.remove_item(replace.item)
                self.add_index(name=replace.name, paragraph=replace.contents)

            for remove in plan.remove:
                self.collection_manager.remove_item(remove)

    @override
    def _execute(self) -> WorkRequestResults:
        """Execute the task."""
        with advisory_lock(
            (
                LockType.APT_MIRROR,
                # Only use the bottom 31 bits, in order that this fits into
                # PostgreSQL's int type.  In the unlikely event that we have
                # enough mirrored collections for there to be a collision,
                # then it just means that the colliding collections can't be
                # mirrored simultaneously.
                self.collection.id & (2**31 - 1),
            ),
            wait=False,
        ) as acquired:
            if not acquired:
                raise LockError(
                    f"Another APTMirror task for {self.data.collection} is "
                    f"already running"
                )

            # This task may take a long time, so it commits transactions as
            # it goes rather than taking a single long transaction.  If
            # processing an item fails, then the results of earlier
            # processing will remain visible.
            with tempfile.TemporaryDirectory(
                prefix="debusine-aptmirror-"
            ) as temp_dir:
                temp_path = Path(temp_dir)
                self.fetch_meta_indexes(temp_path)
                self.fetch_indexes(temp_path)
                self.update_sources(temp_path, self.plan_sources(temp_path))
                self.update_binaries(temp_path, self.plan_binaries(temp_path))
                self.update_indexes(self.plan_indexes(temp_path))
            return WorkRequestResults.SUCCESS

    @override
    def get_label(self) -> str:
        """Return the task label."""
        return f"mirror {self.data.collection} from {self.data.url}"
