From fd03c16eca085423267c163137b28ccb60de8db0 Mon Sep 17 00:00:00 2001
From: Matthias Nott <mnott@mnsoft.org>
Date: Wed, 25 Feb 2026 00:45:13 +0100
Subject: [PATCH] feat: multi-compose rebuild (Seafile), cancel endpoint, schedule router, project descriptor

---
 app/routers/rebuild.py |  597 +++++++++++++++++++++++++++--------------------------------
 1 files changed, 277 insertions(+), 320 deletions(-)

diff --git a/app/routers/rebuild.py b/app/routers/rebuild.py
index 447979f..5ff798f 100644
--- a/app/routers/rebuild.py
+++ b/app/routers/rebuild.py
@@ -1,19 +1,16 @@
 """
-Container lifecycle operations via Coolify API + SSH.
+Container lifecycle operations via docker compose.
 
 Three operations:
-  restart  – docker restart {containers} via SSH (no Coolify, no image pruning)
-  rebuild  – Coolify stop → docker build → Coolify start
-  recreate – Coolify stop → wipe data → docker build → Coolify start → show backups banner
+  restart  - docker restart <containers> (fast, no downtime window)
+  rebuild  - docker compose down && docker compose up -d --build
+  recreate - docker compose down --volumes && docker compose up -d --build (destructive DR)
 """
 import json
-import os
-import urllib.request
-import urllib.error
+import sys
 from datetime import datetime, timezone
 from typing import AsyncGenerator
 
-import yaml
 from fastapi import APIRouter, Depends, Query
 from fastapi.responses import StreamingResponse
 
@@ -21,87 +18,121 @@
 from app.ops_runner import (
     OPS_CLI,
     _BACKUP_TIMEOUT,
+    new_op_id,
+    is_cancelled,
+    clear_cancelled,
     run_command,
     run_command_host,
     stream_command_host,
 )
 
+sys.path.insert(0, "/opt/infrastructure")
+
 router = APIRouter()
 
+
 # ---------------------------------------------------------------------------
-# Configuration
+# Descriptor helpers
 # ---------------------------------------------------------------------------
 
-_REGISTRY_PATH = os.environ.get(
-    "REGISTRY_PATH",
-    "/opt/infrastructure/servers/hetzner-vps/registry.yaml",
-)
+def _descriptor(project: str):
+    """Load the project descriptor from /opt/data/{project}/project.yaml."""
+    from toolkit.descriptor import find as find_project
+    desc = find_project(project)
+    if desc is None:
+        raise ValueError(f"Unknown project '{project}' — no project.yaml found")
+    return desc
 
-_COOLIFY_BASE = os.environ.get(
-    "COOLIFY_BASE_URL",
-    "https://cockpit.tekmidian.com/api/v1",
-)
 
-_COOLIFY_TOKEN = os.environ.get(
-    "COOLIFY_API_TOKEN",
-    "3|f1fa8ee5791440ddd37e6cecafd964c8cd734dd4a8891180c424efad6bfdb7f5",
-)
+def _compose_dir(project: str, env: str) -> str:
+    desc = _descriptor(project)
+    return desc.compose_dir(env)
 
-_COOLIFY_TIMEOUT = 30   # seconds for API calls
-_POLL_INTERVAL  = 5    # seconds between container status polls
-_POLL_MAX_WAIT  = 180  # max seconds to wait for containers to stop/start
+
+def _container_prefix(project: str, env: str) -> str:
+    """Return expanded container prefix, e.g. 'dev-mdf' or 'prod-seriousletter'."""
+    desc = _descriptor(project)
+    return desc.container_prefix_for(env)
+
+
+def _all_compose_dirs(project: str, env: str) -> list[tuple[str, str]]:
+    """Return list of (label, compose_dir) for all compose files to manage.
+
+    Always includes the main compose_dir for the env. Additionally includes
+    any subsystem compose dirs defined in the descriptor's raw config that
+    are applicable to the given env. Currently supports:
+      - seafile: prod-only extra compose at descriptor.raw['seafile']['compose_dir']
+    """
+    desc = _descriptor(project)
+    dirs = []
+    main_dir = desc.compose_dir(env)
+    if main_dir:
+        dirs.append((env, main_dir))
+    # Check for seafile subsystem (prod-only, lives in its own compose dir)
+    seafile = desc.raw.get("seafile")
+    if seafile and env == "prod" and "compose_dir" in seafile:
+        dirs.append(("seafile", seafile["compose_dir"]))
+    return dirs
+
+
+def _compose_cmd(project: str, env: str) -> list[str]:
+    """Build the base docker compose command with env-file and profile."""
+    import os
+    compose_dir = _compose_dir(project, env)
+    # Find compose file
+    compose_file = "docker-compose.yaml"
+    if not os.path.isfile(os.path.join(compose_dir, compose_file)):
+        compose_file = "docker-compose.yml"
+    cmd = ["docker", "compose", "-f", f"{compose_dir}/{compose_file}"]
+    # Find env file
+    for candidate in [f".env.{env}", ".env"]:
+        path = os.path.join(compose_dir, candidate)
+        if os.path.isfile(path):
+            cmd += ["--env-file", path]
+            break
+    cmd += ["--profile", env]
+    return cmd
+
+
+def _compose_cmd_for(compose_dir: str, env: str) -> list[str]:
+    """Build the base docker compose command for a specific compose directory.
+
+    Searches for .env.{env} first, then .env. Adds --profile {env}.
+    """
+    import os
+    compose_file = "docker-compose.yaml"
+    if not os.path.isfile(os.path.join(compose_dir, compose_file)):
+        compose_file = "docker-compose.yml"
+    cmd = ["docker", "compose", "-f", f"{compose_dir}/{compose_file}"]
+    for candidate in [f".env.{env}", ".env"]:
+        path = os.path.join(compose_dir, candidate)
+        if os.path.isfile(path):
+            cmd += ["--env-file", path]
+            break
+    cmd += ["--profile", env]
+    return cmd
 
 
 # ---------------------------------------------------------------------------
-# Registry helpers
+# Container discovery
 # ---------------------------------------------------------------------------
 
-def _load_registry() -> dict:
-    with open(_REGISTRY_PATH) as f:
-        return yaml.safe_load(f) or {}
+async def _find_containers(project: str, env: str) -> list[str]:
+    """Find all running containers matching the project/env prefix."""
+    prefix = _container_prefix(project, env)
+    pattern = f"{prefix}-"
 
-
-def _project_cfg(project: str) -> dict:
-    reg = _load_registry()
-    projects = reg.get("projects", {})
-    if project not in projects:
-        raise ValueError(f"Unknown project '{project}'")
-    return projects[project]
-
-
-def _coolify_uuid(project: str, env: str) -> str:
-    cfg = _project_cfg(project)
-    uuids = cfg.get("coolify_uuids", {})
-    uuid = uuids.get(env)
-    if not uuid:
-        raise ValueError(
-            f"No coolify_uuid configured for {project}/{env} in registry.yaml"
-        )
-    return uuid
-
-
-def _data_dir(project: str, env: str) -> str:
-    cfg = _project_cfg(project)
-    template = cfg.get("data_dir", "")
-    if not template:
-        raise ValueError(f"No data_dir configured for {project} in registry.yaml")
-    return template.replace("{env}", env)
-
-
-def _build_cfg(project: str, env: str) -> dict | None:
-    """Return build config or None if the project uses registry-only images."""
-    cfg = _project_cfg(project)
-    build = cfg.get("build", {})
-    if build.get("no_local_image"):
-        return None
-    ctx_template = build.get("build_context", "")
-    if not ctx_template:
-        return None
-    return {
-        "build_context": ctx_template.replace("{env}", env),
-        "image_name": build.get("image_name", project),
-        "env": env,
-    }
+    result = await run_command(
+        ["docker", "ps", "--filter", f"name={pattern}", "--format", "{{.Names}}"],
+        timeout=15,
+    )
+    containers = []
+    if result["success"]:
+        for name in result["output"].strip().splitlines():
+            name = name.strip()
+            if name and name.startswith(pattern):
+                containers.append(name)
+    return containers
 
 
 # ---------------------------------------------------------------------------
@@ -120,151 +151,38 @@
     return _sse({"line": text, "timestamp": _now()})
 
 
-def _done(success: bool, project: str, env: str, action: str) -> str:
-    return _sse({
+def _done(success: bool, project: str, env: str, action: str, cancelled: bool = False) -> str:
+    payload = {
         "done": True,
         "success": success,
         "project": project,
         "env": env,
         "action": action,
-    })
-
-
-# ---------------------------------------------------------------------------
-# Coolify API (synchronous — called from async context via run_in_executor)
-# ---------------------------------------------------------------------------
-
-def _coolify_request(method: str, path: str) -> dict:
-    """Make a Coolify API request. Returns parsed JSON body."""
-    url = f"{_COOLIFY_BASE}{path}"
-    req = urllib.request.Request(
-        url,
-        method=method,
-        headers={
-            "Authorization": f"Bearer {_COOLIFY_TOKEN}",
-            "Content-Type": "application/json",
-            "Accept": "application/json",
-        },
-    )
-    try:
-        with urllib.request.urlopen(req, timeout=_COOLIFY_TIMEOUT) as resp:
-            body = resp.read()
-            return json.loads(body) if body else {}
-    except urllib.error.HTTPError as exc:
-        body = exc.read()
-        raise RuntimeError(
-            f"Coolify API {method} {path} returned HTTP {exc.code}: {body.decode(errors='replace')[:500]}"
-        ) from exc
-    except Exception as exc:
-        raise RuntimeError(f"Coolify API call failed: {exc}") from exc
-
-
-async def _coolify_action(action: str, uuid: str) -> dict:
-    """Call a Coolify service action endpoint (stop/start/restart)."""
-    import asyncio
-    loop = asyncio.get_event_loop()
-    return await loop.run_in_executor(
-        None, _coolify_request, "POST", f"/services/{uuid}/{action}"
-    )
-
-
-# ---------------------------------------------------------------------------
-# Container polling helpers
-# ---------------------------------------------------------------------------
-
-async def _find_containers_for_service(project: str, env: str) -> list[str]:
-    """
-    Find all running Docker containers belonging to a project/env.
-    Uses the registry name_prefix and matches {env}-{prefix}-* pattern.
-    """
-    cfg = _project_cfg(project)
-    prefix = cfg.get("name_prefix", project)
-    name_pattern = f"{env}-{prefix}-"
-
-    result = await run_command(
-        ["docker", "ps", "--filter", f"name={name_pattern}", "--format", "{{.Names}}"],
-        timeout=15,
-    )
-    containers = []
-    if result["success"]:
-        for name in result["output"].strip().splitlines():
-            name = name.strip()
-            if name and name.startswith(name_pattern):
-                containers.append(name)
-    return containers
-
-
-async def _poll_until_stopped(
-    project: str,
-    env: str,
-    max_wait: int = _POLL_MAX_WAIT,
-) -> bool:
-    """Poll until no containers for project/env are running. Returns True if stopped."""
-    import asyncio
-    cfg = _project_cfg(project)
-    prefix = cfg.get("name_prefix", project)
-    name_pattern = f"{env}-{prefix}-"
-    waited = 0
-    while waited < max_wait:
-        result = await run_command(
-            ["docker", "ps", "--filter", f"name={name_pattern}", "--format", "{{.Names}}"],
-            timeout=15,
-        )
-        running = [
-            n.strip()
-            for n in result["output"].strip().splitlines()
-            if n.strip().startswith(name_pattern)
-        ] if result["success"] else []
-        if not running:
-            return True
-        await asyncio.sleep(_POLL_INTERVAL)
-        waited += _POLL_INTERVAL
-    return False
-
-
-async def _poll_until_running(
-    project: str,
-    env: str,
-    max_wait: int = _POLL_MAX_WAIT,
-) -> bool:
-    """Poll until at least one container for project/env is running. Returns True if up."""
-    import asyncio
-    cfg = _project_cfg(project)
-    prefix = cfg.get("name_prefix", project)
-    name_pattern = f"{env}-{prefix}-"
-    waited = 0
-    while waited < max_wait:
-        result = await run_command(
-            ["docker", "ps", "--filter", f"name={name_pattern}", "--format", "{{.Names}}"],
-            timeout=15,
-        )
-        running = [
-            n.strip()
-            for n in result["output"].strip().splitlines()
-            if n.strip().startswith(name_pattern)
-        ] if result["success"] else []
-        if running:
-            return True
-        await asyncio.sleep(_POLL_INTERVAL)
-        waited += _POLL_INTERVAL
-    return False
+    }
+    if cancelled:
+        payload["cancelled"] = True
+    return _sse(payload)
 
 
 # ---------------------------------------------------------------------------
 # Operation: Restart
 # ---------------------------------------------------------------------------
 
-async def _op_restart(project: str, env: str) -> AsyncGenerator[str, None]:
+async def _op_restart(project: str, env: str, op_id: str | None = None) -> AsyncGenerator[str, None]:
+    """Restart: docker restart <containers>. Fast, no compose cycle.
+
+    Uses _find_containers which matches all containers with the project/env
+    prefix (e.g. 'prod-mdf-'). This naturally includes any subsystem containers
+    such as prod-mdf-seafile, prod-mdf-seafile-mysql, prod-mdf-seafile-redis.
     """
-    Restart: docker restart {containers} via SSH/nsenter.
-    No Coolify involvement — avoids the image-pruning stop/start cycle.
-    """
+    if op_id:
+        yield _sse({"op_id": op_id})
     yield _line(f"[restart] Finding containers for {project}/{env}...")
 
     try:
-        containers = await _find_containers_for_service(project, env)
+        containers = await _find_containers(project, env)
     except Exception as exc:
-        yield _line(f"[error] Registry lookup failed: {exc}")
+        yield _line(f"[error] Descriptor lookup failed: {exc}")
         yield _done(False, project, env, "restart")
         return
 
@@ -275,21 +193,20 @@
 
     yield _line(f"[restart] Restarting {len(containers)} container(s): {', '.join(containers)}")
 
-    cmd = ["docker", "restart"] + containers
-    result = await run_command(cmd, timeout=120)
+    result = await run_command(["docker", "restart"] + containers, timeout=120)
 
-    if result["output"].strip():
-        for line in result["output"].strip().splitlines():
-            yield _line(line)
-    if result["error"].strip():
-        for line in result["error"].strip().splitlines():
-            yield _line(f"[stderr] {line}")
+    for output_line in result["output"].strip().splitlines():
+        if output_line.strip():
+            yield _line(output_line)
+    for err_line in result["error"].strip().splitlines():
+        if err_line.strip():
+            yield _line(f"[stderr] {err_line}")
 
     if result["success"]:
         yield _line(f"[restart] All containers restarted successfully.")
         yield _done(True, project, env, "restart")
     else:
-        yield _line(f"[error] docker restart failed (exit code non-zero)")
+        yield _line(f"[error] docker restart failed")
         yield _done(False, project, env, "restart")
 
 
@@ -297,43 +214,73 @@
 # Operation: Rebuild
 # ---------------------------------------------------------------------------
 
-async def _op_rebuild(project: str, env: str) -> AsyncGenerator[str, None]:
+async def _op_rebuild(project: str, env: str, op_id: str | None = None) -> AsyncGenerator[str, None]:
+    """Rebuild: docker compose down && docker compose up -d --build. No data loss.
+
+    Iterates over all compose dirs (main + any subsystem dirs like seafile for prod).
+    Each compose is brought down then rebuilt in sequence.
     """
-    Rebuild: docker compose down → build image → docker compose up.
-    Uses `ops rebuild` on the host which handles env files, profiles, and cd correctly.
-    No data loss. For code/Dockerfile changes.
-    """
-    yield _line(f"[rebuild] Rebuilding {project}/{env} via ops CLI...")
+    if op_id:
+        yield _sse({"op_id": op_id})
 
-    had_output = False
-    success = True
-    async for line in stream_command_host(
-        [OPS_CLI, "rebuild", project, env],
-        timeout=_BACKUP_TIMEOUT,
-    ):
-        had_output = True
-        if line.startswith("[stderr] "):
-            yield _line(line)
-        elif line.startswith("ERROR") or line.startswith("[error]"):
-            yield _line(f"[error] {line}")
-            success = False
-        else:
-            yield _line(f"[rebuild] {line}")
+    try:
+        compose_dirs = _all_compose_dirs(project, env)
+    except Exception as exc:
+        yield _line(f"[error] Descriptor lookup failed: {exc}")
+        yield _done(False, project, env, "rebuild")
+        return
 
-    if not had_output:
-        yield _line(f"[error] ops rebuild produced no output — check registry config for {project}")
-        success = False
+    if not compose_dirs:
+        yield _line(f"[error] No compose directories found for {project}/{env}")
+        yield _done(False, project, env, "rebuild")
+        return
 
-    if success:
-        # Verify containers came up
-        containers = await _find_containers_for_service(project, env)
-        if containers:
-            yield _line(f"[rebuild] {len(containers)} container(s) running: {', '.join(containers)}")
-            yield _done(True, project, env, "rebuild")
-        else:
-            yield _line(f"[warn] No containers found after rebuild — check docker compose logs")
+    for label, cdir in compose_dirs:
+        yield _line(f"[rebuild] Compose dir ({label}): {cdir}")
+
+        # Step 1: docker compose down
+        yield _line(f"[rebuild] Stopping {label} via docker compose down...")
+        result = await run_command_host(
+            _compose_cmd_for(cdir, env) + ["down"],
+            timeout=120,
+        )
+        for output_line in (result["output"] + result["error"]).strip().splitlines():
+            if output_line.strip():
+                yield _line(output_line)
+
+        if not result["success"]:
+            yield _line(f"[error] docker compose down failed for {label}")
             yield _done(False, project, env, "rebuild")
+            return
+
+        yield _line(f"[rebuild] {label} containers stopped.")
+
+        if op_id and is_cancelled(op_id):
+            yield _line(f"[rebuild] Cancelled after stop. Run docker compose up manually to recover.")
+            yield _done(False, project, env, "rebuild", cancelled=True)
+            return
+
+        # Step 2: docker compose up -d --build (streaming for real-time build output)
+        yield _line(f"[rebuild] Building and starting {label}...")
+        async for build_line in stream_command_host(
+            _compose_cmd_for(cdir, env) + ["up", "-d", "--build"],
+            timeout=_BACKUP_TIMEOUT,
+            op_id=op_id,
+        ):
+            yield _line(f"[rebuild] {build_line}")
+
+        if op_id and is_cancelled(op_id):
+            yield _line(f"[rebuild] Cancelled during build/start of {label}.")
+            yield _done(False, project, env, "rebuild", cancelled=True)
+            return
+
+    # Verify all containers came up
+    containers = await _find_containers(project, env)
+    if containers:
+        yield _line(f"[rebuild] {len(containers)} container(s) running: {', '.join(containers)}")
+        yield _done(True, project, env, "rebuild")
     else:
+        yield _line(f"[warn] No running containers detected after rebuild — check compose logs")
         yield _done(False, project, env, "rebuild")
 
 
@@ -341,105 +288,115 @@
 # Operation: Recreate (Disaster Recovery)
 # ---------------------------------------------------------------------------
 
-async def _op_recreate(project: str, env: str) -> AsyncGenerator[str, None]:
+async def _op_recreate(project: str, env: str, op_id: str | None = None) -> AsyncGenerator[str, None]:
+    """Recreate: docker compose down --volumes && up --build. DESTRUCTIVE — wipes volumes.
+
+    Iterates over all compose dirs (main + any subsystem dirs like seafile for prod).
+    A safety backup is taken first. Then each compose is wiped and rebuilt in sequence.
     """
-    Recreate: docker compose down → wipe data → docker build → docker compose up.
-    DESTRUCTIVE — wipes all data volumes. Shows "Go to Backups" banner on success.
-    """
+    if op_id:
+        yield _sse({"op_id": op_id})
+
     try:
-        data_dir = _data_dir(project, env)
-        cfg = _project_cfg(project)
-    except ValueError as exc:
-        yield _line(f"[error] Config error: {exc}")
+        compose_dirs = _all_compose_dirs(project, env)
+    except Exception as exc:
+        yield _line(f"[error] Descriptor lookup failed: {exc}")
         yield _done(False, project, env, "recreate")
         return
 
-    # Step 1: Find and stop containers via docker compose
-    code_dir = cfg.get("path", "") + f"/{env}/code"
-    yield _line(f"[recreate] Stopping {project}/{env} containers...")
-
-    stop_result = await run_command_host(
-        ["sh", "-c", f"cd {code_dir} && docker compose -p {env}-{cfg.get('name_prefix', project)} --profile {env} down 2>&1 || true"],
-        timeout=120,
-    )
-    if stop_result["output"].strip():
-        for line in stop_result["output"].strip().splitlines():
-            yield _line(line)
-
-    # Step 2: Verify containers are stopped
-    name_prefix = cfg.get("name_prefix", project)
-    verify = await run_command_host(
-        ["sh", "-c", f"docker ps --format '{{{{.Names}}}}' | grep '^{env}-{name_prefix}-' || true"],
-        timeout=30,
-    )
-    running_containers = verify["output"].strip()
-    if running_containers:
-        yield _line(f"[error] Containers still running for {project}/{env}:")
-        for line in running_containers.splitlines():
-            yield _line(f"  {line}")
-        yield _done(False, project, env, "recreate")
-        return
-    yield _line(f"[recreate] All containers stopped.")
-
-    # Step 3: Wipe data volumes
-    yield _line(f"[recreate] WARNING: Wiping data directory: {data_dir}")
-    wipe_result = await run_command_host(
-        ["sh", "-c", f"rm -r {data_dir}/* 2>&1; echo EXIT_CODE=$?"],
-        timeout=120,
-    )
-    for line in (wipe_result["output"].strip() + "\n" + wipe_result["error"].strip()).strip().splitlines():
-        if line:
-            yield _line(line)
-    if "EXIT_CODE=0" in wipe_result["output"]:
-        yield _line(f"[recreate] Data directory wiped.")
-    else:
-        yield _line(f"[error] Wipe may have failed — check output above.")
+    if not compose_dirs:
+        yield _line(f"[error] No compose directories found for {project}/{env}")
         yield _done(False, project, env, "recreate")
         return
 
-    # Step 4: Rebuild via ops CLI (handles image build + compose up)
-    yield _line(f"[recreate] Rebuilding containers...")
-    async for line in stream_command_host(
-        [OPS_CLI, "rebuild", project, env],
+    # Log all compose dirs we will operate on
+    for label, cdir in compose_dirs:
+        yield _line(f"[recreate] Compose dir ({label}): {cdir}")
+
+    # Step 1: Safety backup before destroying anything
+    yield _line(f"[recreate] Creating safety backup before wipe...")
+    async for backup_line in stream_command_host(
+        [OPS_CLI, "backup", project, env],
         timeout=_BACKUP_TIMEOUT,
+        op_id=op_id,
     ):
-        if line.startswith("[stderr] "):
-            yield _line(line)
-        else:
-            yield _line(f"[recreate] {line}")
+        yield _line(f"[recreate] {backup_line}")
 
-    # Step 5: Verify containers came up
-    containers = await _find_containers_for_service(project, env)
+    if op_id and is_cancelled(op_id):
+        yield _line(f"[recreate] Cancelled during safety backup. No data was lost.")
+        yield _done(False, project, env, "recreate", cancelled=True)
+        return
+
+    yield _line(f"[recreate] Safety backup complete.")
+
+    for label, cdir in compose_dirs:
+        # Step 2: docker compose down --volumes (removes named volumes)
+        yield _line(f"[recreate] WARNING: Running docker compose down --volumes for {label} (data will be wiped)...")
+        result = await run_command_host(
+            _compose_cmd_for(cdir, env) + ["down", "--volumes"],
+            timeout=120,
+        )
+        for output_line in (result["output"] + result["error"]).strip().splitlines():
+            if output_line.strip():
+                yield _line(output_line)
+
+        if not result["success"]:
+            yield _line(f"[error] docker compose down --volumes failed for {label}")
+            yield _done(False, project, env, "recreate")
+            return
+
+        yield _line(f"[recreate] {label} containers and volumes removed.")
+
+        if op_id and is_cancelled(op_id):
+            yield _line(f"[recreate] Cancelled after volume wipe of {label}. Restore a backup to recover.")
+            yield _done(False, project, env, "recreate", cancelled=True)
+            return
+
+        # Step 3: docker compose up -d --build
+        yield _line(f"[recreate] Building and starting fresh {label}...")
+        async for build_line in stream_command_host(
+            _compose_cmd_for(cdir, env) + ["up", "-d", "--build"],
+            timeout=_BACKUP_TIMEOUT,
+            op_id=op_id,
+        ):
+            yield _line(f"[recreate] {build_line}")
+
+        if op_id and is_cancelled(op_id):
+            yield _line(f"[recreate] Cancelled during build/start of {label}.")
+            yield _done(False, project, env, "recreate", cancelled=True)
+            return
+
+    # Verify containers came up
+    containers = await _find_containers(project, env)
     if containers:
         yield _line(f"[recreate] {len(containers)} container(s) running. Restore a backup to complete recovery.")
         yield _done(True, project, env, "recreate")
     else:
-        yield _line(f"[warn] No containers found after recreate — check docker compose logs")
-        yield _done(True, project, env, "recreate")
+        yield _line(f"[warn] No running containers after recreate — check compose logs")
+        yield _done(False, project, env, "recreate")
 
 
 # ---------------------------------------------------------------------------
-# Dispatch wrapper
+# Dispatch
 # ---------------------------------------------------------------------------
 
-async def _op_generator(
-    project: str,
-    env: str,
-    action: str,
-) -> AsyncGenerator[str, None]:
-    """Route to the correct operation generator."""
-    if action == "restart":
-        async for chunk in _op_restart(project, env):
-            yield chunk
-    elif action == "rebuild":
-        async for chunk in _op_rebuild(project, env):
-            yield chunk
-    elif action == "recreate":
-        async for chunk in _op_recreate(project, env):
-            yield chunk
-    else:
-        yield _line(f"[error] Unknown action '{action}'. Valid: restart, rebuild, recreate")
-        yield _done(False, project, env, action)
+async def _op_generator(project: str, env: str, action: str) -> AsyncGenerator[str, None]:
+    op_id = new_op_id()
+    try:
+        if action == "restart":
+            async for chunk in _op_restart(project, env, op_id=op_id):
+                yield chunk
+        elif action == "rebuild":
+            async for chunk in _op_rebuild(project, env, op_id=op_id):
+                yield chunk
+        elif action == "recreate":
+            async for chunk in _op_recreate(project, env, op_id=op_id):
+                yield chunk
+        else:
+            yield _line(f"[error] Unknown action '{action}'. Valid: restart, rebuild, recreate")
+            yield _done(False, project, env, action)
+    finally:
+        clear_cancelled(op_id)
 
 
 # ---------------------------------------------------------------------------
@@ -463,8 +420,8 @@
     Stream a container lifecycle operation via SSE.
 
     - restart:  docker restart containers (safe, fast)
-    - rebuild:  stop via Coolify, rebuild image, start via Coolify
-    - recreate: stop, wipe data, rebuild image, start (destructive — DR only)
+    - rebuild:  docker compose down && up --build (no data loss)
+    - recreate: docker compose down --volumes && up --build (destructive — DR only)
     """
     return StreamingResponse(
         _op_generator(project, env, action),

--
Gitblit v1.3.1