diff --git a/docs/OPERACAO-PRODUCAO.md b/docs/OPERACAO-PRODUCAO.md index 541cdf5..4533411 100644 --- a/docs/OPERACAO-PRODUCAO.md +++ b/docs/OPERACAO-PRODUCAO.md @@ -97,6 +97,36 @@ Acesso - App: `https://tickets.esdrasrenan.com.br` - Convex: `https://convex.esdrasrenan.com.br` (o importante é o WebSocket do cliente conectar; o path `/version` responde para sanity‑check) +## Zero‑downtime (sem queda durante deploy) + +Para evitar interrupção perceptível no deploy, habilitamos rollout "start-first" e múltiplas réplicas nos serviços web e Convex. O Traefik continua roteando para as tasks saudáveis enquanto uma task reinicia. + +O `stack.yml` já inclui: +- `replicas: 2` para `web` e `convex_backend`. +- `update_config.order: start-first` + `failure_action: rollback`. +- `healthcheck` por porta local, garantindo que o Swarm só troque quando a nova task estiver OK. + +Se quiser ajustar recursos/estratégia: +``` +deploy: + replicas: 2 + update_config: + parallelism: 1 + order: start-first + failure_action: rollback + restart_policy: + condition: any +healthcheck: + # web + test: ["CMD", "node", "-e", "fetch('http://localhost:3000').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"] + interval: 10s + timeout: 3s + retries: 5 + start_period: 30s +``` + +Observação: o CI já força `docker service update --force` após `stack deploy` e passa `RELEASE_SHA` no ambiente para variar a spec em todo commit, assegurando rollout. + ### Dashboard (opcional) Você pode expor o painel do Convex para inspeção em produção. diff --git a/stack.yml b/stack.yml index b30f95d..9e3183e 100644 --- a/stack.yml +++ b/stack.yml @@ -26,7 +26,13 @@ services: RELEASE_SHA: "${RELEASE_SHA:-dev}" deploy: mode: replicated - replicas: 1 + replicas: 2 + update_config: + parallelism: 1 + order: start-first + failure_action: rollback + restart_policy: + condition: any placement: constraints: - node.role == manager @@ -40,6 +46,12 @@ services: - traefik.http.services.sistema_web.loadbalancer.server.port=3000 networks: - traefik_public + healthcheck: + test: ["CMD", "node", "-e", "fetch('http://localhost:3000').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"] + interval: 10s + timeout: 3s + retries: 5 + start_period: 30s convex_backend: image: ghcr.io/get-convex/convex-backend:latest @@ -53,7 +65,13 @@ services: - CONVEX_SITE_ORIGIN=https://convex.esdrasrenan.com.br deploy: mode: replicated - replicas: 1 + replicas: 2 + update_config: + parallelism: 1 + order: start-first + failure_action: rollback + restart_policy: + condition: any placement: constraints: - node.role == manager @@ -67,6 +85,12 @@ services: - traefik.http.services.sistema_convex.loadbalancer.server.port=3210 networks: - traefik_public + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:3210/version >/dev/null || exit 1"] + interval: 10s + timeout: 3s + retries: 5 + start_period: 20s convex_dashboard: image: ghcr.io/get-convex/convex-dashboard:latest