version: "3.8" # ci: redeploy trigger (frontend) services: web: image: sistema_web:node22-bun # Rodamos como root para permitir apt-get (Node 22, toolchain) no boot user: "root" working_dir: /app command: > bash -lc "bash /app/scripts/start-web.sh" volumes: - ${APP_DIR:-/srv/apps/sistema}:/app - sistema_db:/app/data environment: NODE_ENV: "production" BUN_INSTALL_CACHE_DIR: "/tmp/bun-cache" # Garante instalação de devDependencies para o build (prisma CLI) NPM_CONFIG_PRODUCTION: "false" SKIP_AUTH_SEED: "true" # IMPORTANTE: "NEXT_PUBLIC_*" é consumida pelo navegador (cliente). Use a URL pública do Convex. # Não use o hostname interno do Swarm aqui, pois o browser não consegue resolvê-lo. NEXT_PUBLIC_CONVEX_URL: "${NEXT_PUBLIC_CONVEX_URL}" # URLs consumidas apenas pelo backend/SSR podem usar o hostname interno CONVEX_INTERNAL_URL: "http://convex_backend:3210" # URLs públicas do app (evita fallback para localhost) NEXT_PUBLIC_APP_URL: "${NEXT_PUBLIC_APP_URL}" BETTER_AUTH_URL: "${BETTER_AUTH_URL}" BETTER_AUTH_SECRET: "${BETTER_AUTH_SECRET}" REPORTS_CRON_SECRET: "${REPORTS_CRON_SECRET}" REPORTS_CRON_BASE_URL: "${REPORTS_CRON_BASE_URL}" CHAT_WS_PORT: "${CHAT_WS_PORT:-3030}" # Mantém o SQLite fora do repositório DATABASE_URL: "file:/app/data/db.sqlite" # Evita apt-get na inicialização porque a imagem já vem com toolchain pronta SKIP_APT_BOOTSTRAP: "true" # Sempre revalida/rebuild better-sqlite3 para a runtime atual SKIP_SQLITE_REBUILD: "false" # Usado para forçar novo rollout a cada deploy (setado pelo CI) RELEASE_SHA: "${RELEASE_SHA:-dev}" deploy: mode: replicated replicas: 1 update_config: parallelism: 1 # start-first evita downtime: sobe o novo task antes de parar o anterior order: start-first failure_action: rollback # Delay entre updates para garantir que o healthcheck passa delay: 10s # Monitor: tempo que o Swarm espera após o deploy para verificar estabilidade monitor: 30s rollback_config: order: start-first resources: limits: memory: "2G" restart_policy: condition: any placement: constraints: - node.role == manager labels: - traefik.enable=true - traefik.docker.network=traefik_public - traefik.http.routers.sistema_web.rule=Host(`tickets.esdrasrenan.com.br`) - traefik.http.routers.sistema_web.entrypoints=websecure - traefik.http.routers.sistema_web.tls=true - traefik.http.routers.sistema_web.tls.certresolver=le - traefik.http.services.sistema_web.loadbalancer.server.port=3000 # Roteador dedicado para WebSocket do chat - traefik.http.routers.sistema_web_ws.rule=Host(`tickets.esdrasrenan.com.br`) && PathPrefix(`/chat-ws`) - traefik.http.routers.sistema_web_ws.entrypoints=websecure - traefik.http.routers.sistema_web_ws.tls=true - traefik.http.routers.sistema_web_ws.tls.certresolver=le - traefik.http.routers.sistema_web_ws.service=sistema_web_ws - traefik.http.services.sistema_web_ws.loadbalancer.server.port=3030 networks: - traefik_public healthcheck: # Healthcheck mais robusto: verifica se o servidor responde test: ["CMD", "node", "-e", "fetch('http://localhost:3000/api/health',{timeout:2000}).then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))"] interval: 10s timeout: 5s retries: 3 # start_period: tempo de inicialização antes de começar a contar falhas # O novo container só entra em serviço APÓS passar no healthcheck start_period: 180s convex_backend: image: sistema_convex_backend:1.29.2 stop_grace_period: 10s stop_signal: SIGINT volumes: - convex_data:/convex/data environment: - RUST_LOG=info - CONVEX_CLOUD_ORIGIN=https://convex.esdrasrenan.com.br - CONVEX_SITE_ORIGIN=https://convex.esdrasrenan.com.br # Provisionamento de máquinas (usado pelas functions do Convex) - MACHINE_PROVISIONING_SECRET=${MACHINE_PROVISIONING_SECRET} - MACHINE_TOKEN_TTL_MS=${MACHINE_TOKEN_TTL_MS:-2592000000} - FLEET_SYNC_SECRET=${FLEET_SYNC_SECRET:-} - REPORTS_CRON_SECRET=${REPORTS_CRON_SECRET} - REPORTS_CRON_BASE_URL=${REPORTS_CRON_BASE_URL} - REPORTS_CRON_ENABLED=${REPORTS_CRON_ENABLED:-false} deploy: mode: replicated replicas: 1 update_config: parallelism: 1 order: start-first failure_action: rollback # Delay e monitor para garantir zero-downtime delay: 10s monitor: 30s resources: limits: # Limite de memória elevado para evitar reinícios por OOM (exit code 137) em cargas de relatórios / índices. memory: "16G" reservations: memory: "4G" restart_policy: condition: any placement: constraints: - node.role == manager labels: - traefik.enable=true - traefik.docker.network=traefik_public - traefik.http.routers.sistema_convex.rule=Host(`convex.esdrasrenan.com.br`) - traefik.http.routers.sistema_convex.entrypoints=websecure - traefik.http.routers.sistema_convex.tls=true - traefik.http.routers.sistema_convex.tls.certresolver=le - traefik.http.services.sistema_convex.loadbalancer.server.port=3210 networks: - traefik_public healthcheck: test: ["CMD-SHELL", "curl -sf http://localhost:3210/version >/dev/null || exit 1"] interval: 15s timeout: 5s retries: 3 start_period: 60s convex_dashboard: image: ghcr.io/get-convex/convex-dashboard:latest environment: - NEXT_PUBLIC_DEPLOYMENT_URL=https://convex.esdrasrenan.com.br deploy: mode: replicated replicas: 0 placement: constraints: - node.role == manager labels: - traefik.enable=true - traefik.docker.network=traefik_public - traefik.http.routers.sistema_convex_admin.rule=Host(`convex-admin.esdrasrenan.com.br`) - traefik.http.routers.sistema_convex_admin.entrypoints=websecure - traefik.http.routers.sistema_convex_admin.tls=true - traefik.http.routers.sistema_convex_admin.tls.certresolver=le - traefik.http.services.sistema_convex_admin.loadbalancer.server.port=6791 networks: - traefik_public volumes: sistema_db: convex_data: networks: traefik_public: external: true