| .. | .. |
|---|
| 1 | 1 | """ |
|---|
| 2 | | -Container lifecycle operations via Coolify API + SSH. |
|---|
| 2 | +Container lifecycle operations via docker compose. |
|---|
| 3 | 3 | |
|---|
| 4 | 4 | Three operations: |
|---|
| 5 | | - restart – docker restart {containers} via SSH (no Coolify, no image pruning) |
|---|
| 6 | | - rebuild – Coolify stop → docker build → Coolify start |
|---|
| 7 | | - recreate – Coolify stop → wipe data → docker build → Coolify start → show backups banner |
|---|
| 5 | + restart - docker restart <containers> (fast, no downtime window) |
|---|
| 6 | + rebuild - docker compose down && docker compose up -d --build |
|---|
| 7 | + recreate - docker compose down --volumes && docker compose up -d --build (destructive DR) |
|---|
| 8 | 8 | """ |
|---|
| 9 | 9 | import json |
|---|
| 10 | | -import os |
|---|
| 11 | | -import urllib.request |
|---|
| 12 | | -import urllib.error |
|---|
| 10 | +import sys |
|---|
| 13 | 11 | from datetime import datetime, timezone |
|---|
| 14 | 12 | from typing import AsyncGenerator |
|---|
| 15 | 13 | |
|---|
| 16 | | -import yaml |
|---|
| 17 | 14 | from fastapi import APIRouter, Depends, Query |
|---|
| 18 | 15 | from fastapi.responses import StreamingResponse |
|---|
| 19 | 16 | |
|---|
| .. | .. |
|---|
| 21 | 18 | from app.ops_runner import ( |
|---|
| 22 | 19 | OPS_CLI, |
|---|
| 23 | 20 | _BACKUP_TIMEOUT, |
|---|
| 21 | + new_op_id, |
|---|
| 22 | + is_cancelled, |
|---|
| 23 | + clear_cancelled, |
|---|
| 24 | 24 | run_command, |
|---|
| 25 | 25 | run_command_host, |
|---|
| 26 | 26 | stream_command_host, |
|---|
| 27 | 27 | ) |
|---|
| 28 | 28 | |
|---|
| 29 | +sys.path.insert(0, "/opt/infrastructure") |
|---|
| 30 | + |
|---|
| 29 | 31 | router = APIRouter() |
|---|
| 30 | 32 | |
|---|
| 33 | + |
|---|
| 31 | 34 | # --------------------------------------------------------------------------- |
|---|
| 32 | | -# Configuration |
|---|
| 35 | +# Descriptor helpers |
|---|
| 33 | 36 | # --------------------------------------------------------------------------- |
|---|
| 34 | 37 | |
|---|
| 35 | | -_REGISTRY_PATH = os.environ.get( |
|---|
| 36 | | - "REGISTRY_PATH", |
|---|
| 37 | | - "/opt/infrastructure/servers/hetzner-vps/registry.yaml", |
|---|
| 38 | | -) |
|---|
| 38 | +def _descriptor(project: str): |
|---|
| 39 | + """Load the project descriptor from /opt/data/{project}/project.yaml.""" |
|---|
| 40 | + from toolkit.descriptor import find as find_project |
|---|
| 41 | + desc = find_project(project) |
|---|
| 42 | + if desc is None: |
|---|
| 43 | + raise ValueError(f"Unknown project '{project}' — no project.yaml found") |
|---|
| 44 | + return desc |
|---|
| 39 | 45 | |
|---|
| 40 | | -_COOLIFY_BASE = os.environ.get( |
|---|
| 41 | | - "COOLIFY_BASE_URL", |
|---|
| 42 | | - "https://cockpit.tekmidian.com/api/v1", |
|---|
| 43 | | -) |
|---|
| 44 | 46 | |
|---|
| 45 | | -_COOLIFY_TOKEN = os.environ.get( |
|---|
| 46 | | - "COOLIFY_API_TOKEN", |
|---|
| 47 | | - "3|f1fa8ee5791440ddd37e6cecafd964c8cd734dd4a8891180c424efad6bfdb7f5", |
|---|
| 48 | | -) |
|---|
| 47 | +def _compose_dir(project: str, env: str) -> str: |
|---|
| 48 | + desc = _descriptor(project) |
|---|
| 49 | + return desc.compose_dir(env) |
|---|
| 49 | 50 | |
|---|
| 50 | | -_COOLIFY_TIMEOUT = 30 # seconds for API calls |
|---|
| 51 | | -_POLL_INTERVAL = 5 # seconds between container status polls |
|---|
| 52 | | -_POLL_MAX_WAIT = 180 # max seconds to wait for containers to stop/start |
|---|
| 51 | + |
|---|
| 52 | +def _container_prefix(project: str, env: str) -> str: |
|---|
| 53 | + """Return expanded container prefix, e.g. 'dev-mdf' or 'prod-seriousletter'.""" |
|---|
| 54 | + desc = _descriptor(project) |
|---|
| 55 | + return desc.container_prefix_for(env) |
|---|
| 56 | + |
|---|
| 57 | + |
|---|
| 58 | +def _all_compose_dirs(project: str, env: str) -> list[tuple[str, str]]: |
|---|
| 59 | + """Return list of (label, compose_dir) for all compose files to manage. |
|---|
| 60 | + |
|---|
| 61 | + Always includes the main compose_dir for the env. Additionally includes |
|---|
| 62 | + any subsystem compose dirs defined in the descriptor's raw config that |
|---|
| 63 | + are applicable to the given env. Currently supports: |
|---|
| 64 | + - seafile: prod-only extra compose at descriptor.raw['seafile']['compose_dir'] |
|---|
| 65 | + """ |
|---|
| 66 | + desc = _descriptor(project) |
|---|
| 67 | + dirs = [] |
|---|
| 68 | + main_dir = desc.compose_dir(env) |
|---|
| 69 | + if main_dir: |
|---|
| 70 | + dirs.append((env, main_dir)) |
|---|
| 71 | + # Check for seafile subsystem (prod-only, lives in its own compose dir) |
|---|
| 72 | + seafile = desc.raw.get("seafile") |
|---|
| 73 | + if seafile and env == "prod" and "compose_dir" in seafile: |
|---|
| 74 | + dirs.append(("seafile", seafile["compose_dir"])) |
|---|
| 75 | + return dirs |
|---|
| 76 | + |
|---|
| 77 | + |
|---|
| 78 | +def _compose_cmd(project: str, env: str) -> list[str]: |
|---|
| 79 | + """Build the base docker compose command with env-file and profile.""" |
|---|
| 80 | + import os |
|---|
| 81 | + compose_dir = _compose_dir(project, env) |
|---|
| 82 | + # Find compose file |
|---|
| 83 | + compose_file = "docker-compose.yaml" |
|---|
| 84 | + if not os.path.isfile(os.path.join(compose_dir, compose_file)): |
|---|
| 85 | + compose_file = "docker-compose.yml" |
|---|
| 86 | + cmd = ["docker", "compose", "-f", f"{compose_dir}/{compose_file}"] |
|---|
| 87 | + # Find env file |
|---|
| 88 | + for candidate in [f".env.{env}", ".env"]: |
|---|
| 89 | + path = os.path.join(compose_dir, candidate) |
|---|
| 90 | + if os.path.isfile(path): |
|---|
| 91 | + cmd += ["--env-file", path] |
|---|
| 92 | + break |
|---|
| 93 | + cmd += ["--profile", env] |
|---|
| 94 | + return cmd |
|---|
| 95 | + |
|---|
| 96 | + |
|---|
| 97 | +def _compose_cmd_for(compose_dir: str, env: str) -> list[str]: |
|---|
| 98 | + """Build the base docker compose command for a specific compose directory. |
|---|
| 99 | + |
|---|
| 100 | + Searches for .env.{env} first, then .env. Adds --profile {env}. |
|---|
| 101 | + """ |
|---|
| 102 | + import os |
|---|
| 103 | + compose_file = "docker-compose.yaml" |
|---|
| 104 | + if not os.path.isfile(os.path.join(compose_dir, compose_file)): |
|---|
| 105 | + compose_file = "docker-compose.yml" |
|---|
| 106 | + cmd = ["docker", "compose", "-f", f"{compose_dir}/{compose_file}"] |
|---|
| 107 | + for candidate in [f".env.{env}", ".env"]: |
|---|
| 108 | + path = os.path.join(compose_dir, candidate) |
|---|
| 109 | + if os.path.isfile(path): |
|---|
| 110 | + cmd += ["--env-file", path] |
|---|
| 111 | + break |
|---|
| 112 | + cmd += ["--profile", env] |
|---|
| 113 | + return cmd |
|---|
| 53 | 114 | |
|---|
| 54 | 115 | |
|---|
| 55 | 116 | # --------------------------------------------------------------------------- |
|---|
| 56 | | -# Registry helpers |
|---|
| 117 | +# Container discovery |
|---|
| 57 | 118 | # --------------------------------------------------------------------------- |
|---|
| 58 | 119 | |
|---|
| 59 | | -def _load_registry() -> dict: |
|---|
| 60 | | - with open(_REGISTRY_PATH) as f: |
|---|
| 61 | | - return yaml.safe_load(f) or {} |
|---|
| 120 | +async def _find_containers(project: str, env: str) -> list[str]: |
|---|
| 121 | + """Find all running containers matching the project/env prefix.""" |
|---|
| 122 | + prefix = _container_prefix(project, env) |
|---|
| 123 | + pattern = f"{prefix}-" |
|---|
| 62 | 124 | |
|---|
| 63 | | - |
|---|
| 64 | | -def _project_cfg(project: str) -> dict: |
|---|
| 65 | | - reg = _load_registry() |
|---|
| 66 | | - projects = reg.get("projects", {}) |
|---|
| 67 | | - if project not in projects: |
|---|
| 68 | | - raise ValueError(f"Unknown project '{project}'") |
|---|
| 69 | | - return projects[project] |
|---|
| 70 | | - |
|---|
| 71 | | - |
|---|
| 72 | | -def _coolify_uuid(project: str, env: str) -> str: |
|---|
| 73 | | - cfg = _project_cfg(project) |
|---|
| 74 | | - uuids = cfg.get("coolify_uuids", {}) |
|---|
| 75 | | - uuid = uuids.get(env) |
|---|
| 76 | | - if not uuid: |
|---|
| 77 | | - raise ValueError( |
|---|
| 78 | | - f"No coolify_uuid configured for {project}/{env} in registry.yaml" |
|---|
| 79 | | - ) |
|---|
| 80 | | - return uuid |
|---|
| 81 | | - |
|---|
| 82 | | - |
|---|
| 83 | | -def _data_dir(project: str, env: str) -> str: |
|---|
| 84 | | - cfg = _project_cfg(project) |
|---|
| 85 | | - template = cfg.get("data_dir", "") |
|---|
| 86 | | - if not template: |
|---|
| 87 | | - raise ValueError(f"No data_dir configured for {project} in registry.yaml") |
|---|
| 88 | | - return template.replace("{env}", env) |
|---|
| 89 | | - |
|---|
| 90 | | - |
|---|
| 91 | | -def _build_cfg(project: str, env: str) -> dict | None: |
|---|
| 92 | | - """Return build config or None if the project uses registry-only images.""" |
|---|
| 93 | | - cfg = _project_cfg(project) |
|---|
| 94 | | - build = cfg.get("build", {}) |
|---|
| 95 | | - if build.get("no_local_image"): |
|---|
| 96 | | - return None |
|---|
| 97 | | - ctx_template = build.get("build_context", "") |
|---|
| 98 | | - if not ctx_template: |
|---|
| 99 | | - return None |
|---|
| 100 | | - return { |
|---|
| 101 | | - "build_context": ctx_template.replace("{env}", env), |
|---|
| 102 | | - "image_name": build.get("image_name", project), |
|---|
| 103 | | - "env": env, |
|---|
| 104 | | - } |
|---|
| 125 | + result = await run_command( |
|---|
| 126 | + ["docker", "ps", "--filter", f"name={pattern}", "--format", "{{.Names}}"], |
|---|
| 127 | + timeout=15, |
|---|
| 128 | + ) |
|---|
| 129 | + containers = [] |
|---|
| 130 | + if result["success"]: |
|---|
| 131 | + for name in result["output"].strip().splitlines(): |
|---|
| 132 | + name = name.strip() |
|---|
| 133 | + if name and name.startswith(pattern): |
|---|
| 134 | + containers.append(name) |
|---|
| 135 | + return containers |
|---|
| 105 | 136 | |
|---|
| 106 | 137 | |
|---|
| 107 | 138 | # --------------------------------------------------------------------------- |
|---|
| .. | .. |
|---|
| 120 | 151 | return _sse({"line": text, "timestamp": _now()}) |
|---|
| 121 | 152 | |
|---|
| 122 | 153 | |
|---|
| 123 | | -def _done(success: bool, project: str, env: str, action: str) -> str: |
|---|
| 124 | | - return _sse({ |
|---|
| 154 | +def _done(success: bool, project: str, env: str, action: str, cancelled: bool = False) -> str: |
|---|
| 155 | + payload = { |
|---|
| 125 | 156 | "done": True, |
|---|
| 126 | 157 | "success": success, |
|---|
| 127 | 158 | "project": project, |
|---|
| 128 | 159 | "env": env, |
|---|
| 129 | 160 | "action": action, |
|---|
| 130 | | - }) |
|---|
| 131 | | - |
|---|
| 132 | | - |
|---|
| 133 | | -# --------------------------------------------------------------------------- |
|---|
| 134 | | -# Coolify API (synchronous — called from async context via run_in_executor) |
|---|
| 135 | | -# --------------------------------------------------------------------------- |
|---|
| 136 | | - |
|---|
| 137 | | -def _coolify_request(method: str, path: str) -> dict: |
|---|
| 138 | | - """Make a Coolify API request. Returns parsed JSON body.""" |
|---|
| 139 | | - url = f"{_COOLIFY_BASE}{path}" |
|---|
| 140 | | - req = urllib.request.Request( |
|---|
| 141 | | - url, |
|---|
| 142 | | - method=method, |
|---|
| 143 | | - headers={ |
|---|
| 144 | | - "Authorization": f"Bearer {_COOLIFY_TOKEN}", |
|---|
| 145 | | - "Content-Type": "application/json", |
|---|
| 146 | | - "Accept": "application/json", |
|---|
| 147 | | - }, |
|---|
| 148 | | - ) |
|---|
| 149 | | - try: |
|---|
| 150 | | - with urllib.request.urlopen(req, timeout=_COOLIFY_TIMEOUT) as resp: |
|---|
| 151 | | - body = resp.read() |
|---|
| 152 | | - return json.loads(body) if body else {} |
|---|
| 153 | | - except urllib.error.HTTPError as exc: |
|---|
| 154 | | - body = exc.read() |
|---|
| 155 | | - raise RuntimeError( |
|---|
| 156 | | - f"Coolify API {method} {path} returned HTTP {exc.code}: {body.decode(errors='replace')[:500]}" |
|---|
| 157 | | - ) from exc |
|---|
| 158 | | - except Exception as exc: |
|---|
| 159 | | - raise RuntimeError(f"Coolify API call failed: {exc}") from exc |
|---|
| 160 | | - |
|---|
| 161 | | - |
|---|
| 162 | | -async def _coolify_action(action: str, uuid: str) -> dict: |
|---|
| 163 | | - """Call a Coolify service action endpoint (stop/start/restart).""" |
|---|
| 164 | | - import asyncio |
|---|
| 165 | | - loop = asyncio.get_event_loop() |
|---|
| 166 | | - return await loop.run_in_executor( |
|---|
| 167 | | - None, _coolify_request, "POST", f"/services/{uuid}/{action}" |
|---|
| 168 | | - ) |
|---|
| 169 | | - |
|---|
| 170 | | - |
|---|
| 171 | | -# --------------------------------------------------------------------------- |
|---|
| 172 | | -# Container polling helpers |
|---|
| 173 | | -# --------------------------------------------------------------------------- |
|---|
| 174 | | - |
|---|
| 175 | | -async def _find_containers_for_service(project: str, env: str) -> list[str]: |
|---|
| 176 | | - """ |
|---|
| 177 | | - Find all running Docker containers belonging to a project/env. |
|---|
| 178 | | - Uses the registry name_prefix and matches {env}-{prefix}-* pattern. |
|---|
| 179 | | - """ |
|---|
| 180 | | - cfg = _project_cfg(project) |
|---|
| 181 | | - prefix = cfg.get("name_prefix", project) |
|---|
| 182 | | - name_pattern = f"{env}-{prefix}-" |
|---|
| 183 | | - |
|---|
| 184 | | - result = await run_command( |
|---|
| 185 | | - ["docker", "ps", "--filter", f"name={name_pattern}", "--format", "{{.Names}}"], |
|---|
| 186 | | - timeout=15, |
|---|
| 187 | | - ) |
|---|
| 188 | | - containers = [] |
|---|
| 189 | | - if result["success"]: |
|---|
| 190 | | - for name in result["output"].strip().splitlines(): |
|---|
| 191 | | - name = name.strip() |
|---|
| 192 | | - if name and name.startswith(name_pattern): |
|---|
| 193 | | - containers.append(name) |
|---|
| 194 | | - return containers |
|---|
| 195 | | - |
|---|
| 196 | | - |
|---|
| 197 | | -async def _poll_until_stopped( |
|---|
| 198 | | - project: str, |
|---|
| 199 | | - env: str, |
|---|
| 200 | | - max_wait: int = _POLL_MAX_WAIT, |
|---|
| 201 | | -) -> bool: |
|---|
| 202 | | - """Poll until no containers for project/env are running. Returns True if stopped.""" |
|---|
| 203 | | - import asyncio |
|---|
| 204 | | - cfg = _project_cfg(project) |
|---|
| 205 | | - prefix = cfg.get("name_prefix", project) |
|---|
| 206 | | - name_pattern = f"{env}-{prefix}-" |
|---|
| 207 | | - waited = 0 |
|---|
| 208 | | - while waited < max_wait: |
|---|
| 209 | | - result = await run_command( |
|---|
| 210 | | - ["docker", "ps", "--filter", f"name={name_pattern}", "--format", "{{.Names}}"], |
|---|
| 211 | | - timeout=15, |
|---|
| 212 | | - ) |
|---|
| 213 | | - running = [ |
|---|
| 214 | | - n.strip() |
|---|
| 215 | | - for n in result["output"].strip().splitlines() |
|---|
| 216 | | - if n.strip().startswith(name_pattern) |
|---|
| 217 | | - ] if result["success"] else [] |
|---|
| 218 | | - if not running: |
|---|
| 219 | | - return True |
|---|
| 220 | | - await asyncio.sleep(_POLL_INTERVAL) |
|---|
| 221 | | - waited += _POLL_INTERVAL |
|---|
| 222 | | - return False |
|---|
| 223 | | - |
|---|
| 224 | | - |
|---|
| 225 | | -async def _poll_until_running( |
|---|
| 226 | | - project: str, |
|---|
| 227 | | - env: str, |
|---|
| 228 | | - max_wait: int = _POLL_MAX_WAIT, |
|---|
| 229 | | -) -> bool: |
|---|
| 230 | | - """Poll until at least one container for project/env is running. Returns True if up.""" |
|---|
| 231 | | - import asyncio |
|---|
| 232 | | - cfg = _project_cfg(project) |
|---|
| 233 | | - prefix = cfg.get("name_prefix", project) |
|---|
| 234 | | - name_pattern = f"{env}-{prefix}-" |
|---|
| 235 | | - waited = 0 |
|---|
| 236 | | - while waited < max_wait: |
|---|
| 237 | | - result = await run_command( |
|---|
| 238 | | - ["docker", "ps", "--filter", f"name={name_pattern}", "--format", "{{.Names}}"], |
|---|
| 239 | | - timeout=15, |
|---|
| 240 | | - ) |
|---|
| 241 | | - running = [ |
|---|
| 242 | | - n.strip() |
|---|
| 243 | | - for n in result["output"].strip().splitlines() |
|---|
| 244 | | - if n.strip().startswith(name_pattern) |
|---|
| 245 | | - ] if result["success"] else [] |
|---|
| 246 | | - if running: |
|---|
| 247 | | - return True |
|---|
| 248 | | - await asyncio.sleep(_POLL_INTERVAL) |
|---|
| 249 | | - waited += _POLL_INTERVAL |
|---|
| 250 | | - return False |
|---|
| 161 | + } |
|---|
| 162 | + if cancelled: |
|---|
| 163 | + payload["cancelled"] = True |
|---|
| 164 | + return _sse(payload) |
|---|
| 251 | 165 | |
|---|
| 252 | 166 | |
|---|
| 253 | 167 | # --------------------------------------------------------------------------- |
|---|
| 254 | 168 | # Operation: Restart |
|---|
| 255 | 169 | # --------------------------------------------------------------------------- |
|---|
| 256 | 170 | |
|---|
| 257 | | -async def _op_restart(project: str, env: str) -> AsyncGenerator[str, None]: |
|---|
| 171 | +async def _op_restart(project: str, env: str, op_id: str | None = None) -> AsyncGenerator[str, None]: |
|---|
| 172 | + """Restart: docker restart <containers>. Fast, no compose cycle. |
|---|
| 173 | + |
|---|
| 174 | + Uses _find_containers which matches all containers with the project/env |
|---|
| 175 | + prefix (e.g. 'prod-mdf-'). This naturally includes any subsystem containers |
|---|
| 176 | + such as prod-mdf-seafile, prod-mdf-seafile-mysql, prod-mdf-seafile-redis. |
|---|
| 258 | 177 | """ |
|---|
| 259 | | - Restart: docker restart {containers} via SSH/nsenter. |
|---|
| 260 | | - No Coolify involvement — avoids the image-pruning stop/start cycle. |
|---|
| 261 | | - """ |
|---|
| 178 | + if op_id: |
|---|
| 179 | + yield _sse({"op_id": op_id}) |
|---|
| 262 | 180 | yield _line(f"[restart] Finding containers for {project}/{env}...") |
|---|
| 263 | 181 | |
|---|
| 264 | 182 | try: |
|---|
| 265 | | - containers = await _find_containers_for_service(project, env) |
|---|
| 183 | + containers = await _find_containers(project, env) |
|---|
| 266 | 184 | except Exception as exc: |
|---|
| 267 | | - yield _line(f"[error] Registry lookup failed: {exc}") |
|---|
| 185 | + yield _line(f"[error] Descriptor lookup failed: {exc}") |
|---|
| 268 | 186 | yield _done(False, project, env, "restart") |
|---|
| 269 | 187 | return |
|---|
| 270 | 188 | |
|---|
| .. | .. |
|---|
| 275 | 193 | |
|---|
| 276 | 194 | yield _line(f"[restart] Restarting {len(containers)} container(s): {', '.join(containers)}") |
|---|
| 277 | 195 | |
|---|
| 278 | | - cmd = ["docker", "restart"] + containers |
|---|
| 279 | | - result = await run_command(cmd, timeout=120) |
|---|
| 196 | + result = await run_command(["docker", "restart"] + containers, timeout=120) |
|---|
| 280 | 197 | |
|---|
| 281 | | - if result["output"].strip(): |
|---|
| 282 | | - for line in result["output"].strip().splitlines(): |
|---|
| 283 | | - yield _line(line) |
|---|
| 284 | | - if result["error"].strip(): |
|---|
| 285 | | - for line in result["error"].strip().splitlines(): |
|---|
| 286 | | - yield _line(f"[stderr] {line}") |
|---|
| 198 | + for output_line in result["output"].strip().splitlines(): |
|---|
| 199 | + if output_line.strip(): |
|---|
| 200 | + yield _line(output_line) |
|---|
| 201 | + for err_line in result["error"].strip().splitlines(): |
|---|
| 202 | + if err_line.strip(): |
|---|
| 203 | + yield _line(f"[stderr] {err_line}") |
|---|
| 287 | 204 | |
|---|
| 288 | 205 | if result["success"]: |
|---|
| 289 | 206 | yield _line(f"[restart] All containers restarted successfully.") |
|---|
| 290 | 207 | yield _done(True, project, env, "restart") |
|---|
| 291 | 208 | else: |
|---|
| 292 | | - yield _line(f"[error] docker restart failed (exit code non-zero)") |
|---|
| 209 | + yield _line(f"[error] docker restart failed") |
|---|
| 293 | 210 | yield _done(False, project, env, "restart") |
|---|
| 294 | 211 | |
|---|
| 295 | 212 | |
|---|
| .. | .. |
|---|
| 297 | 214 | # Operation: Rebuild |
|---|
| 298 | 215 | # --------------------------------------------------------------------------- |
|---|
| 299 | 216 | |
|---|
| 300 | | -async def _op_rebuild(project: str, env: str) -> AsyncGenerator[str, None]: |
|---|
| 217 | +async def _op_rebuild(project: str, env: str, op_id: str | None = None) -> AsyncGenerator[str, None]: |
|---|
| 218 | + """Rebuild: docker compose down && docker compose up -d --build. No data loss. |
|---|
| 219 | + |
|---|
| 220 | + Iterates over all compose dirs (main + any subsystem dirs like seafile for prod). |
|---|
| 221 | + Each compose is brought down then rebuilt in sequence. |
|---|
| 301 | 222 | """ |
|---|
| 302 | | - Rebuild: docker compose down → build image → docker compose up. |
|---|
| 303 | | - Uses `ops rebuild` on the host which handles env files, profiles, and cd correctly. |
|---|
| 304 | | - No data loss. For code/Dockerfile changes. |
|---|
| 305 | | - """ |
|---|
| 306 | | - yield _line(f"[rebuild] Rebuilding {project}/{env} via ops CLI...") |
|---|
| 223 | + if op_id: |
|---|
| 224 | + yield _sse({"op_id": op_id}) |
|---|
| 307 | 225 | |
|---|
| 308 | | - had_output = False |
|---|
| 309 | | - success = True |
|---|
| 310 | | - async for line in stream_command_host( |
|---|
| 311 | | - [OPS_CLI, "rebuild", project, env], |
|---|
| 312 | | - timeout=_BACKUP_TIMEOUT, |
|---|
| 313 | | - ): |
|---|
| 314 | | - had_output = True |
|---|
| 315 | | - if line.startswith("[stderr] "): |
|---|
| 316 | | - yield _line(line) |
|---|
| 317 | | - elif line.startswith("ERROR") or line.startswith("[error]"): |
|---|
| 318 | | - yield _line(f"[error] {line}") |
|---|
| 319 | | - success = False |
|---|
| 320 | | - else: |
|---|
| 321 | | - yield _line(f"[rebuild] {line}") |
|---|
| 226 | + try: |
|---|
| 227 | + compose_dirs = _all_compose_dirs(project, env) |
|---|
| 228 | + except Exception as exc: |
|---|
| 229 | + yield _line(f"[error] Descriptor lookup failed: {exc}") |
|---|
| 230 | + yield _done(False, project, env, "rebuild") |
|---|
| 231 | + return |
|---|
| 322 | 232 | |
|---|
| 323 | | - if not had_output: |
|---|
| 324 | | - yield _line(f"[error] ops rebuild produced no output — check registry config for {project}") |
|---|
| 325 | | - success = False |
|---|
| 233 | + if not compose_dirs: |
|---|
| 234 | + yield _line(f"[error] No compose directories found for {project}/{env}") |
|---|
| 235 | + yield _done(False, project, env, "rebuild") |
|---|
| 236 | + return |
|---|
| 326 | 237 | |
|---|
| 327 | | - if success: |
|---|
| 328 | | - # Verify containers came up |
|---|
| 329 | | - containers = await _find_containers_for_service(project, env) |
|---|
| 330 | | - if containers: |
|---|
| 331 | | - yield _line(f"[rebuild] {len(containers)} container(s) running: {', '.join(containers)}") |
|---|
| 332 | | - yield _done(True, project, env, "rebuild") |
|---|
| 333 | | - else: |
|---|
| 334 | | - yield _line(f"[warn] No containers found after rebuild — check docker compose logs") |
|---|
| 238 | + for label, cdir in compose_dirs: |
|---|
| 239 | + yield _line(f"[rebuild] Compose dir ({label}): {cdir}") |
|---|
| 240 | + |
|---|
| 241 | + # Step 1: docker compose down |
|---|
| 242 | + yield _line(f"[rebuild] Stopping {label} via docker compose down...") |
|---|
| 243 | + result = await run_command_host( |
|---|
| 244 | + _compose_cmd_for(cdir, env) + ["down"], |
|---|
| 245 | + timeout=120, |
|---|
| 246 | + ) |
|---|
| 247 | + for output_line in (result["output"] + result["error"]).strip().splitlines(): |
|---|
| 248 | + if output_line.strip(): |
|---|
| 249 | + yield _line(output_line) |
|---|
| 250 | + |
|---|
| 251 | + if not result["success"]: |
|---|
| 252 | + yield _line(f"[error] docker compose down failed for {label}") |
|---|
| 335 | 253 | yield _done(False, project, env, "rebuild") |
|---|
| 254 | + return |
|---|
| 255 | + |
|---|
| 256 | + yield _line(f"[rebuild] {label} containers stopped.") |
|---|
| 257 | + |
|---|
| 258 | + if op_id and is_cancelled(op_id): |
|---|
| 259 | + yield _line(f"[rebuild] Cancelled after stop. Run docker compose up manually to recover.") |
|---|
| 260 | + yield _done(False, project, env, "rebuild", cancelled=True) |
|---|
| 261 | + return |
|---|
| 262 | + |
|---|
| 263 | + # Step 2: docker compose up -d --build (streaming for real-time build output) |
|---|
| 264 | + yield _line(f"[rebuild] Building and starting {label}...") |
|---|
| 265 | + async for build_line in stream_command_host( |
|---|
| 266 | + _compose_cmd_for(cdir, env) + ["up", "-d", "--build"], |
|---|
| 267 | + timeout=_BACKUP_TIMEOUT, |
|---|
| 268 | + op_id=op_id, |
|---|
| 269 | + ): |
|---|
| 270 | + yield _line(f"[rebuild] {build_line}") |
|---|
| 271 | + |
|---|
| 272 | + if op_id and is_cancelled(op_id): |
|---|
| 273 | + yield _line(f"[rebuild] Cancelled during build/start of {label}.") |
|---|
| 274 | + yield _done(False, project, env, "rebuild", cancelled=True) |
|---|
| 275 | + return |
|---|
| 276 | + |
|---|
| 277 | + # Verify all containers came up |
|---|
| 278 | + containers = await _find_containers(project, env) |
|---|
| 279 | + if containers: |
|---|
| 280 | + yield _line(f"[rebuild] {len(containers)} container(s) running: {', '.join(containers)}") |
|---|
| 281 | + yield _done(True, project, env, "rebuild") |
|---|
| 336 | 282 | else: |
|---|
| 283 | + yield _line(f"[warn] No running containers detected after rebuild — check compose logs") |
|---|
| 337 | 284 | yield _done(False, project, env, "rebuild") |
|---|
| 338 | 285 | |
|---|
| 339 | 286 | |
|---|
| .. | .. |
|---|
| 341 | 288 | # Operation: Recreate (Disaster Recovery) |
|---|
| 342 | 289 | # --------------------------------------------------------------------------- |
|---|
| 343 | 290 | |
|---|
| 344 | | -async def _op_recreate(project: str, env: str) -> AsyncGenerator[str, None]: |
|---|
| 291 | +async def _op_recreate(project: str, env: str, op_id: str | None = None) -> AsyncGenerator[str, None]: |
|---|
| 292 | + """Recreate: docker compose down --volumes && up --build. DESTRUCTIVE — wipes volumes. |
|---|
| 293 | + |
|---|
| 294 | + Iterates over all compose dirs (main + any subsystem dirs like seafile for prod). |
|---|
| 295 | + A safety backup is taken first. Then each compose is wiped and rebuilt in sequence. |
|---|
| 345 | 296 | """ |
|---|
| 346 | | - Recreate: docker compose down → wipe data → docker build → docker compose up. |
|---|
| 347 | | - DESTRUCTIVE — wipes all data volumes. Shows "Go to Backups" banner on success. |
|---|
| 348 | | - """ |
|---|
| 297 | + if op_id: |
|---|
| 298 | + yield _sse({"op_id": op_id}) |
|---|
| 299 | + |
|---|
| 349 | 300 | try: |
|---|
| 350 | | - data_dir = _data_dir(project, env) |
|---|
| 351 | | - cfg = _project_cfg(project) |
|---|
| 352 | | - except ValueError as exc: |
|---|
| 353 | | - yield _line(f"[error] Config error: {exc}") |
|---|
| 301 | + compose_dirs = _all_compose_dirs(project, env) |
|---|
| 302 | + except Exception as exc: |
|---|
| 303 | + yield _line(f"[error] Descriptor lookup failed: {exc}") |
|---|
| 354 | 304 | yield _done(False, project, env, "recreate") |
|---|
| 355 | 305 | return |
|---|
| 356 | 306 | |
|---|
| 357 | | - # Step 1: Find and stop containers via docker compose |
|---|
| 358 | | - code_dir = cfg.get("path", "") + f"/{env}/code" |
|---|
| 359 | | - yield _line(f"[recreate] Stopping {project}/{env} containers...") |
|---|
| 360 | | - |
|---|
| 361 | | - stop_result = await run_command_host( |
|---|
| 362 | | - ["sh", "-c", f"cd {code_dir} && docker compose -p {env}-{cfg.get('name_prefix', project)} --profile {env} down 2>&1 || true"], |
|---|
| 363 | | - timeout=120, |
|---|
| 364 | | - ) |
|---|
| 365 | | - if stop_result["output"].strip(): |
|---|
| 366 | | - for line in stop_result["output"].strip().splitlines(): |
|---|
| 367 | | - yield _line(line) |
|---|
| 368 | | - |
|---|
| 369 | | - # Step 2: Verify containers are stopped |
|---|
| 370 | | - name_prefix = cfg.get("name_prefix", project) |
|---|
| 371 | | - verify = await run_command_host( |
|---|
| 372 | | - ["sh", "-c", f"docker ps --format '{{{{.Names}}}}' | grep '^{env}-{name_prefix}-' || true"], |
|---|
| 373 | | - timeout=30, |
|---|
| 374 | | - ) |
|---|
| 375 | | - running_containers = verify["output"].strip() |
|---|
| 376 | | - if running_containers: |
|---|
| 377 | | - yield _line(f"[error] Containers still running for {project}/{env}:") |
|---|
| 378 | | - for line in running_containers.splitlines(): |
|---|
| 379 | | - yield _line(f" {line}") |
|---|
| 380 | | - yield _done(False, project, env, "recreate") |
|---|
| 381 | | - return |
|---|
| 382 | | - yield _line(f"[recreate] All containers stopped.") |
|---|
| 383 | | - |
|---|
| 384 | | - # Step 3: Wipe data volumes |
|---|
| 385 | | - yield _line(f"[recreate] WARNING: Wiping data directory: {data_dir}") |
|---|
| 386 | | - wipe_result = await run_command_host( |
|---|
| 387 | | - ["sh", "-c", f"rm -r {data_dir}/* 2>&1; echo EXIT_CODE=$?"], |
|---|
| 388 | | - timeout=120, |
|---|
| 389 | | - ) |
|---|
| 390 | | - for line in (wipe_result["output"].strip() + "\n" + wipe_result["error"].strip()).strip().splitlines(): |
|---|
| 391 | | - if line: |
|---|
| 392 | | - yield _line(line) |
|---|
| 393 | | - if "EXIT_CODE=0" in wipe_result["output"]: |
|---|
| 394 | | - yield _line(f"[recreate] Data directory wiped.") |
|---|
| 395 | | - else: |
|---|
| 396 | | - yield _line(f"[error] Wipe may have failed — check output above.") |
|---|
| 307 | + if not compose_dirs: |
|---|
| 308 | + yield _line(f"[error] No compose directories found for {project}/{env}") |
|---|
| 397 | 309 | yield _done(False, project, env, "recreate") |
|---|
| 398 | 310 | return |
|---|
| 399 | 311 | |
|---|
| 400 | | - # Step 4: Rebuild via ops CLI (handles image build + compose up) |
|---|
| 401 | | - yield _line(f"[recreate] Rebuilding containers...") |
|---|
| 402 | | - async for line in stream_command_host( |
|---|
| 403 | | - [OPS_CLI, "rebuild", project, env], |
|---|
| 312 | + # Log all compose dirs we will operate on |
|---|
| 313 | + for label, cdir in compose_dirs: |
|---|
| 314 | + yield _line(f"[recreate] Compose dir ({label}): {cdir}") |
|---|
| 315 | + |
|---|
| 316 | + # Step 1: Safety backup before destroying anything |
|---|
| 317 | + yield _line(f"[recreate] Creating safety backup before wipe...") |
|---|
| 318 | + async for backup_line in stream_command_host( |
|---|
| 319 | + [OPS_CLI, "backup", project, env], |
|---|
| 404 | 320 | timeout=_BACKUP_TIMEOUT, |
|---|
| 321 | + op_id=op_id, |
|---|
| 405 | 322 | ): |
|---|
| 406 | | - if line.startswith("[stderr] "): |
|---|
| 407 | | - yield _line(line) |
|---|
| 408 | | - else: |
|---|
| 409 | | - yield _line(f"[recreate] {line}") |
|---|
| 323 | + yield _line(f"[recreate] {backup_line}") |
|---|
| 410 | 324 | |
|---|
| 411 | | - # Step 5: Verify containers came up |
|---|
| 412 | | - containers = await _find_containers_for_service(project, env) |
|---|
| 325 | + if op_id and is_cancelled(op_id): |
|---|
| 326 | + yield _line(f"[recreate] Cancelled during safety backup. No data was lost.") |
|---|
| 327 | + yield _done(False, project, env, "recreate", cancelled=True) |
|---|
| 328 | + return |
|---|
| 329 | + |
|---|
| 330 | + yield _line(f"[recreate] Safety backup complete.") |
|---|
| 331 | + |
|---|
| 332 | + for label, cdir in compose_dirs: |
|---|
| 333 | + # Step 2: docker compose down --volumes (removes named volumes) |
|---|
| 334 | + yield _line(f"[recreate] WARNING: Running docker compose down --volumes for {label} (data will be wiped)...") |
|---|
| 335 | + result = await run_command_host( |
|---|
| 336 | + _compose_cmd_for(cdir, env) + ["down", "--volumes"], |
|---|
| 337 | + timeout=120, |
|---|
| 338 | + ) |
|---|
| 339 | + for output_line in (result["output"] + result["error"]).strip().splitlines(): |
|---|
| 340 | + if output_line.strip(): |
|---|
| 341 | + yield _line(output_line) |
|---|
| 342 | + |
|---|
| 343 | + if not result["success"]: |
|---|
| 344 | + yield _line(f"[error] docker compose down --volumes failed for {label}") |
|---|
| 345 | + yield _done(False, project, env, "recreate") |
|---|
| 346 | + return |
|---|
| 347 | + |
|---|
| 348 | + yield _line(f"[recreate] {label} containers and volumes removed.") |
|---|
| 349 | + |
|---|
| 350 | + if op_id and is_cancelled(op_id): |
|---|
| 351 | + yield _line(f"[recreate] Cancelled after volume wipe of {label}. Restore a backup to recover.") |
|---|
| 352 | + yield _done(False, project, env, "recreate", cancelled=True) |
|---|
| 353 | + return |
|---|
| 354 | + |
|---|
| 355 | + # Step 3: docker compose up -d --build |
|---|
| 356 | + yield _line(f"[recreate] Building and starting fresh {label}...") |
|---|
| 357 | + async for build_line in stream_command_host( |
|---|
| 358 | + _compose_cmd_for(cdir, env) + ["up", "-d", "--build"], |
|---|
| 359 | + timeout=_BACKUP_TIMEOUT, |
|---|
| 360 | + op_id=op_id, |
|---|
| 361 | + ): |
|---|
| 362 | + yield _line(f"[recreate] {build_line}") |
|---|
| 363 | + |
|---|
| 364 | + if op_id and is_cancelled(op_id): |
|---|
| 365 | + yield _line(f"[recreate] Cancelled during build/start of {label}.") |
|---|
| 366 | + yield _done(False, project, env, "recreate", cancelled=True) |
|---|
| 367 | + return |
|---|
| 368 | + |
|---|
| 369 | + # Verify containers came up |
|---|
| 370 | + containers = await _find_containers(project, env) |
|---|
| 413 | 371 | if containers: |
|---|
| 414 | 372 | yield _line(f"[recreate] {len(containers)} container(s) running. Restore a backup to complete recovery.") |
|---|
| 415 | 373 | yield _done(True, project, env, "recreate") |
|---|
| 416 | 374 | else: |
|---|
| 417 | | - yield _line(f"[warn] No containers found after recreate — check docker compose logs") |
|---|
| 418 | | - yield _done(True, project, env, "recreate") |
|---|
| 375 | + yield _line(f"[warn] No running containers after recreate — check compose logs") |
|---|
| 376 | + yield _done(False, project, env, "recreate") |
|---|
| 419 | 377 | |
|---|
| 420 | 378 | |
|---|
| 421 | 379 | # --------------------------------------------------------------------------- |
|---|
| 422 | | -# Dispatch wrapper |
|---|
| 380 | +# Dispatch |
|---|
| 423 | 381 | # --------------------------------------------------------------------------- |
|---|
| 424 | 382 | |
|---|
| 425 | | -async def _op_generator( |
|---|
| 426 | | - project: str, |
|---|
| 427 | | - env: str, |
|---|
| 428 | | - action: str, |
|---|
| 429 | | -) -> AsyncGenerator[str, None]: |
|---|
| 430 | | - """Route to the correct operation generator.""" |
|---|
| 431 | | - if action == "restart": |
|---|
| 432 | | - async for chunk in _op_restart(project, env): |
|---|
| 433 | | - yield chunk |
|---|
| 434 | | - elif action == "rebuild": |
|---|
| 435 | | - async for chunk in _op_rebuild(project, env): |
|---|
| 436 | | - yield chunk |
|---|
| 437 | | - elif action == "recreate": |
|---|
| 438 | | - async for chunk in _op_recreate(project, env): |
|---|
| 439 | | - yield chunk |
|---|
| 440 | | - else: |
|---|
| 441 | | - yield _line(f"[error] Unknown action '{action}'. Valid: restart, rebuild, recreate") |
|---|
| 442 | | - yield _done(False, project, env, action) |
|---|
| 383 | +async def _op_generator(project: str, env: str, action: str) -> AsyncGenerator[str, None]: |
|---|
| 384 | + op_id = new_op_id() |
|---|
| 385 | + try: |
|---|
| 386 | + if action == "restart": |
|---|
| 387 | + async for chunk in _op_restart(project, env, op_id=op_id): |
|---|
| 388 | + yield chunk |
|---|
| 389 | + elif action == "rebuild": |
|---|
| 390 | + async for chunk in _op_rebuild(project, env, op_id=op_id): |
|---|
| 391 | + yield chunk |
|---|
| 392 | + elif action == "recreate": |
|---|
| 393 | + async for chunk in _op_recreate(project, env, op_id=op_id): |
|---|
| 394 | + yield chunk |
|---|
| 395 | + else: |
|---|
| 396 | + yield _line(f"[error] Unknown action '{action}'. Valid: restart, rebuild, recreate") |
|---|
| 397 | + yield _done(False, project, env, action) |
|---|
| 398 | + finally: |
|---|
| 399 | + clear_cancelled(op_id) |
|---|
| 443 | 400 | |
|---|
| 444 | 401 | |
|---|
| 445 | 402 | # --------------------------------------------------------------------------- |
|---|
| .. | .. |
|---|
| 463 | 420 | Stream a container lifecycle operation via SSE. |
|---|
| 464 | 421 | |
|---|
| 465 | 422 | - restart: docker restart containers (safe, fast) |
|---|
| 466 | | - - rebuild: stop via Coolify, rebuild image, start via Coolify |
|---|
| 467 | | - - recreate: stop, wipe data, rebuild image, start (destructive — DR only) |
|---|
| 423 | + - rebuild: docker compose down && up --build (no data loss) |
|---|
| 424 | + - recreate: docker compose down --volumes && up --build (destructive — DR only) |
|---|
| 468 | 425 | """ |
|---|
| 469 | 426 | return StreamingResponse( |
|---|
| 470 | 427 | _op_generator(project, env, action), |
|---|