From 0a6b808d99d1eabc75328e611292757316cfa1e5 Mon Sep 17 00:00:00 2001 From: rever-tecnologia Date: Wed, 10 Dec 2025 14:43:13 -0300 Subject: [PATCH] feat: add health dashboard and local ticket archive --- .env.example | 6 + apps/desktop/.env.example | 4 + apps/desktop/scripts/tauri-with-stub.mjs | 9 + convex/_generated/api.d.ts | 2 + convex/machines.ts | 203 +++++++++++++----- convex/ops.ts | 76 +++++++ convex/tickets.ts | 65 ++++++ docs/OPERATIONS.md | 10 +- docs/RETENTION-HEALTH.md | 41 ++++ src/app/admin/health/page.tsx | 201 +++++++++++++++++ .../api/admin/tickets/archive-local/route.ts | 52 +++++ src/lib/env.ts | 4 + src/lib/retention.ts | 40 ++++ src/server/archive/local-tickets.ts | 73 +++++++ src/server/health.ts | 98 +++++++++ 15 files changed, 824 insertions(+), 60 deletions(-) create mode 100644 convex/ops.ts create mode 100644 docs/RETENTION-HEALTH.md create mode 100644 src/app/admin/health/page.tsx create mode 100644 src/app/api/admin/tickets/archive-local/route.ts create mode 100644 src/lib/retention.ts create mode 100644 src/server/archive/local-tickets.ts create mode 100644 src/server/health.ts diff --git a/.env.example b/.env.example index 7ae99f3..bee775b 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,12 @@ NEXT_PUBLIC_CONVEX_URL=http://127.0.0.1:3210 CONVEX_INTERNAL_URL=http://127.0.0.1:3210 # Intervalo (ms) para aceitar token revogado ao sincronizar acessos remotos (opcional) REMOTE_ACCESS_TOKEN_GRACE_MS=900000 +# Token interno opcional para o dashboard de saude (/admin/health) e queries internas +INTERNAL_HEALTH_TOKEN=dev-health-token +# Segredo para crons HTTP (reutilize em prod se preferir um unico token) +REPORTS_CRON_SECRET=reports-cron-secret +# Diretório para arquivamento local de tickets (JSONL/backup) +ARCHIVE_DIR=./archives # SQLite database (local dev) DATABASE_URL=file:./prisma/db.dev.sqlite diff --git a/apps/desktop/.env.example b/apps/desktop/.env.example index aae00ba..ccc9808 100644 --- a/apps/desktop/.env.example +++ b/apps/desktop/.env.example @@ -13,6 +13,10 @@ VITE_API_BASE_URL= VITE_RUSTDESK_CONFIG_STRING= VITE_RUSTDESK_DEFAULT_PASSWORD=FMQ9MA>e73r.FI { return Boolean(value) && typeof value === "object" && !Array.isArray(value) } +type JsonPrimitive = string | number | boolean | null +type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue } +type JsonRecord = Record + +const MAX_JSON_DEPTH = 6 +const MAX_ARRAY_LENGTH = 200 + +function sanitizeJsonValue(value: unknown, depth = 0): JsonValue | undefined { + if (value === null) return null + if (typeof value === "string") return value + if (typeof value === "number") return Number.isFinite(value) ? value : undefined + if (typeof value === "boolean") return value + if (depth >= MAX_JSON_DEPTH) return undefined + if (Array.isArray(value)) { + const items: JsonValue[] = [] + for (const entry of value.slice(0, MAX_ARRAY_LENGTH)) { + const sanitized = sanitizeJsonValue(entry, depth + 1) + if (sanitized !== undefined) { + items.push(sanitized) + } + } + return items + } + if (isObject(value)) { + const result: JsonRecord = {} + for (const [key, entry] of Object.entries(value)) { + const sanitized = sanitizeJsonValue(entry, depth + 1) + if (sanitized !== undefined) { + result[key] = sanitized + } + } + return result + } + return undefined +} + +function sanitizeRecord(value: unknown): JsonRecord | null { + const sanitized = sanitizeJsonValue(value) + if (!sanitized || Array.isArray(sanitized)) return null + return sanitized as JsonRecord +} + +function stableSerialize(value: JsonValue): string { + if (value === null) return "null" + if (typeof value !== "object") return JSON.stringify(value) + if (Array.isArray(value)) { + return `[${value.map((item) => stableSerialize(item)).join(",")}]` + } + const entries = Object.keys(value) + .sort() + .map((key) => `${JSON.stringify(key)}:${stableSerialize((value as JsonRecord)[key]!)}`) + return `{${entries.join(",")}}` +} + +function hashJson(value: JsonRecord | null): string | null { + if (!value) return null + const serialized = stableSerialize(value) + return toHex(sha256(utf8(serialized))) +} + +function areJsonValuesEqual(a: JsonValue | undefined, b: JsonValue | undefined): boolean { + if (a === b) return true + if (a === undefined || b === undefined) return false + return stableSerialize(a) === stableSerialize(b) +} + // Busca o lastHeartbeatAt da tabela machineHeartbeats (fonte de verdade) // Fallback para machine.lastHeartbeatAt para retrocompatibilidade durante migracao async function getMachineLastHeartbeat( @@ -269,48 +335,50 @@ async function getMachineLastHeartbeat( // para evitar OOM no Convex (documentos de ~100KB cada) const INVENTORY_BLOCKLIST = new Set(["software", "extended"]) -function mergeInventory(current: unknown, patch: unknown): unknown { - if (!isObject(patch)) { - return patch +function mergeInventory(current: JsonRecord | null | undefined, patch: Record): JsonRecord { + const sanitizedPatch = sanitizeRecord(patch) + if (!sanitizedPatch) { + return current ? { ...current } : {} } - const base: Record = isObject(current) ? { ...(current as Record) } : {} - for (const [key, value] of Object.entries(patch)) { - if (value === undefined) continue + const base: JsonRecord = current ? { ...current } : {} + for (const [key, value] of Object.entries(sanitizedPatch)) { // Filtrar campos volumosos que causam OOM if (INVENTORY_BLOCKLIST.has(key)) continue - if (isObject(value) && isObject(base[key])) { - base[key] = mergeInventory(base[key], value) - } else { - base[key] = value - } - } - return base -} - -function mergeMetadata(current: unknown, patch: Record) { - const base: Record = isObject(current) ? { ...(current as Record) } : {} - for (const [key, value] of Object.entries(patch)) { if (value === undefined) continue - if (key === "inventory") { - base[key] = mergeInventory(base[key], value) - } else if (isObject(value) && isObject(base[key])) { - base[key] = mergeInventory(base[key], value) + if (isObject(value) && isObject(base[key])) { + base[key] = mergeInventory(base[key] as JsonRecord, value as Record) } else { - base[key] = value + base[key] = value as JsonValue } } return base } -type JsonRecord = Record +function mergeMetadata(current: unknown, patch: Record): JsonRecord { + const base: JsonRecord = sanitizeRecord(current) ?? {} + const sanitizedPatch = sanitizeRecord(patch) ?? {} + for (const [key, value] of Object.entries(sanitizedPatch)) { + if (value === undefined) continue + if (key === "inventory" && isObject(value)) { + base[key] = mergeInventory(sanitizeRecord(base[key]), value as Record) + } else if (isObject(value) && isObject(base[key])) { + base[key] = mergeInventory(sanitizeRecord(base[key]), value as Record) + } else { + base[key] = value as JsonValue + } + } + return base +} function ensureRecord(value: unknown): JsonRecord | null { - return isObject(value) ? (value as JsonRecord) : null + return sanitizeRecord(value) } function ensureRecordArray(value: unknown): JsonRecord[] { if (!Array.isArray(value)) return [] - return value.filter(isObject) as JsonRecord[] + return value + .map((entry) => sanitizeRecord(entry)) + .filter((entry): entry is JsonRecord => Boolean(entry)) } function ensureFiniteNumber(value: unknown): number | null { @@ -322,6 +390,19 @@ function ensureString(value: unknown): string | null { return typeof value === "string" ? value : null } +function sanitizeInventoryPayload(value: unknown): JsonRecord | null { + const record = sanitizeRecord(value) + if (!record) return null + for (const blocked of INVENTORY_BLOCKLIST) { + delete record[blocked] + } + return record +} + +function sanitizeMetricsPayload(value: unknown): JsonRecord | null { + return sanitizeRecord(value) +} + function getNestedRecord(root: JsonRecord | null, ...keys: string[]): JsonRecord | null { let current: JsonRecord | null = root for (const key of keys) { @@ -833,9 +914,9 @@ export const heartbeat = mutation({ architecture: v.optional(v.string()), }) ), - metrics: v.optional(v.any()), - inventory: v.optional(v.any()), - metadata: v.optional(v.any()), + metrics: v.optional(v.record(v.string(), v.any())), + inventory: v.optional(v.record(v.string(), v.any())), + metadata: v.optional(v.record(v.string(), v.any())), }, handler: async (ctx, args) => { const { machine, token } = await getActiveToken(ctx, args.machineToken) @@ -857,41 +938,40 @@ export const heartbeat = mutation({ // 2. Preparar patch de metadata (se houver mudancas REAIS) // IMPORTANTE: So incluimos no patch se os dados realmente mudaram // Isso evita criar versoes desnecessarias do documento machines - const metadataPatch: Record = {} - const currentMetadata = (machine.metadata ?? {}) as Record + const metadataPatch: JsonRecord = {} + const currentMetadata = ensureRecord(machine.metadata) ?? {} + const incomingMeta = ensureRecord(args.metadata) + const remoteAccessSnapshot = incomingMeta ? ensureRecord(incomingMeta["remoteAccessSnapshot"]) : null - if (args.metadata && typeof args.metadata === "object") { + if (incomingMeta) { // Filtrar apenas campos que realmente mudaram - const incomingMeta = args.metadata as Record - for (const key of Object.keys(incomingMeta)) { - if (key !== "inventory" && key !== "metrics" && key !== "remoteAccessSnapshot") { - if (JSON.stringify(incomingMeta[key]) !== JSON.stringify(currentMetadata[key])) { - metadataPatch[key] = incomingMeta[key] - } + for (const [key, value] of Object.entries(incomingMeta)) { + if (key === "inventory" || key === "metrics" || key === "remoteAccessSnapshot" || key === "inventoryHash" || key === "metricsHash") { + continue + } + const currentValue = currentMetadata[key] as JsonValue | undefined + if (!areJsonValuesEqual(value as JsonValue, currentValue)) { + metadataPatch[key] = value as JsonValue } } } - const remoteAccessSnapshot = (args.metadata as Record | undefined)?.["remoteAccessSnapshot"] - - // Inventory: so incluir se realmente mudou - if (args.inventory && typeof args.inventory === "object") { - const currentInventory = currentMetadata.inventory as Record | undefined - const newInventoryStr = JSON.stringify(args.inventory) - const currentInventoryStr = JSON.stringify(currentInventory ?? {}) - if (newInventoryStr !== currentInventoryStr) { - metadataPatch.inventory = mergeInventory(currentInventory, args.inventory as Record) - } + const sanitizedInventory = sanitizeInventoryPayload(args.inventory) + const currentInventory = ensureRecord(currentMetadata.inventory) + const incomingInventoryHash = hashJson(sanitizedInventory) + const currentInventoryHash = typeof currentMetadata["inventoryHash"] === "string" ? currentMetadata["inventoryHash"] : null + if (sanitizedInventory && incomingInventoryHash && incomingInventoryHash !== currentInventoryHash) { + metadataPatch.inventory = mergeInventory(currentInventory, sanitizedInventory) + metadataPatch.inventoryHash = incomingInventoryHash } - // Metrics: so incluir se realmente mudou - if (args.metrics && typeof args.metrics === "object") { - const currentMetrics = currentMetadata.metrics as Record | undefined - const newMetricsStr = JSON.stringify(args.metrics) - const currentMetricsStr = JSON.stringify(currentMetrics ?? {}) - if (newMetricsStr !== currentMetricsStr) { - metadataPatch.metrics = args.metrics as Record - } + const sanitizedMetrics = sanitizeMetricsPayload(args.metrics) + const currentMetrics = ensureRecord(currentMetadata.metrics) + const incomingMetricsHash = hashJson(sanitizedMetrics) + const currentMetricsHash = typeof currentMetadata["metricsHash"] === "string" ? currentMetadata["metricsHash"] : null + if (sanitizedMetrics && incomingMetricsHash && incomingMetricsHash !== currentMetricsHash) { + metadataPatch.metrics = sanitizedMetrics + metadataPatch.metricsHash = incomingMetricsHash } // 3. Verificar se ha mudancas reais nos dados que justifiquem atualizar o documento machines @@ -902,13 +982,14 @@ export const heartbeat = mutation({ args.os.version !== machine.osVersion || args.os.architecture !== machine.architecture ) - const hasStatusChange = args.status && args.status !== machine.status + const hasStatusChange = typeof args.status === "string" && args.status !== machine.status const needsMachineUpdate = hasMetadataChanges || hasHostnameChange || hasOsChange || hasStatusChange // 4. So atualizar machines se houver mudancas reais (evita criar versoes desnecessarias) // NOTA: lastHeartbeatAt agora vive na tabela machineHeartbeats, nao atualizamos mais aqui if (needsMachineUpdate) { const mergedMetadata = hasMetadataChanges ? mergeMetadata(machine.metadata, metadataPatch) : machine.metadata + const nextStatus = args.status ?? machine.status ?? (sanitizedMetrics ? "online" : "unknown") await ctx.db.patch(machine._id, { hostname: args.hostname ?? machine.hostname, @@ -920,7 +1001,7 @@ export const heartbeat = mutation({ deviceType: machine.deviceType ?? "desktop", managementMode: machine.managementMode ?? "agent", updatedAt: now, - status: args.status ?? "online", + status: nextStatus, metadata: mergedMetadata, }) } @@ -937,7 +1018,11 @@ export const heartbeat = mutation({ // Evaluate posture/alerts & optionally create ticket const fresh = needsMachineUpdate ? (await ctx.db.get(machine._id)) as Doc<"machines"> : machine - await evaluatePostureAndMaybeRaise(ctx, fresh, { metrics: args.metrics, inventory: args.inventory, metadata: args.metadata }) + await evaluatePostureAndMaybeRaise(ctx, fresh, { + metrics: sanitizedMetrics ?? null, + inventory: sanitizedInventory ?? null, + metadata: incomingMeta ?? null, + }) return { ok: true, diff --git a/convex/ops.ts b/convex/ops.ts new file mode 100644 index 0000000..c50e795 --- /dev/null +++ b/convex/ops.ts @@ -0,0 +1,76 @@ +import { ConvexError, v } from "convex/values" + +import { query } from "./_generated/server" +import { getOfflineThresholdMs, getStaleThresholdMs } from "./machines" + +const MACHINE_SCAN_LIMIT = 1200 + +export const healthSnapshot = query({ + args: { + token: v.optional(v.string()), + }, + handler: async (ctx, args) => { + const requiredToken = process.env["INTERNAL_HEALTH_TOKEN"] ?? process.env["REPORTS_CRON_SECRET"] ?? null + if (requiredToken && args.token !== requiredToken) { + throw new ConvexError("Nao autorizado") + } + + const now = Date.now() + const offlineMs = getOfflineThresholdMs() + const staleMs = getStaleThresholdMs(offlineMs) + + const machines = await ctx.db.query("machines").take(MACHINE_SCAN_LIMIT) + const heartbeats = await ctx.db.query("machineHeartbeats").collect() + + let online = 0 + let warning = 0 + let offline = 0 + let newest = 0 + let oldest = 0 + const withHeartbeat = new Set() + + for (const hb of heartbeats) { + const ageMs = now - hb.lastHeartbeatAt + withHeartbeat.add(String(hb.machineId)) + if (newest === 0 || hb.lastHeartbeatAt > newest) { + newest = hb.lastHeartbeatAt + } + if (oldest === 0 || hb.lastHeartbeatAt < oldest) { + oldest = hb.lastHeartbeatAt + } + if (ageMs <= offlineMs) { + online += 1 + } else if (ageMs <= staleMs) { + warning += 1 + } else { + offline += 1 + } + } + + const withoutHeartbeat = machines.length - withHeartbeat.size + const totalOffline = offline + (withoutHeartbeat > 0 ? withoutHeartbeat : 0) + + return { + totals: { + machines: machines.length, + heartbeats: heartbeats.length, + withoutHeartbeat: withoutHeartbeat > 0 ? withoutHeartbeat : 0, + truncated: machines.length === MACHINE_SCAN_LIMIT, + }, + connectivity: { + online, + warning, + offline: totalOffline, + }, + heartbeatAgeMs: { + newest: newest ? now - newest : null, + oldest: oldest ? now - oldest : null, + }, + thresholds: { + offlineMs, + staleMs, + }, + generatedAt: now, + } + }, +}) diff --git a/convex/tickets.ts b/convex/tickets.ts index e8ad68e..5bc60f5 100644 --- a/convex/tickets.ts +++ b/convex/tickets.ts @@ -5028,3 +5028,68 @@ export const listPaginated = query({ }; }, }) + +// Exporta tickets resolvidos para arquivamento externo (somente com segredo) +export const exportForArchive = query({ + args: { + tenantId: v.string(), + before: v.number(), // timestamp ms + limit: v.optional(v.number()), + secret: v.optional(v.string()), + }, + handler: async (ctx, args) => { + const allowedSecret = process.env["INTERNAL_HEALTH_TOKEN"] ?? process.env["REPORTS_CRON_SECRET"] + if (allowedSecret && args.secret !== allowedSecret) { + throw new ConvexError("Nao autorizado") + } + const cutoff = args.before + const limit = Math.min(args.limit ?? 50, 200) + const candidates = await ctx.db + .query("tickets") + .withIndex("by_tenant_resolved", (q) => q.eq("tenantId", args.tenantId).lt("resolvedAt", cutoff)) + .order("desc") + .take(limit) + + const result: Array<{ + ticket: Doc<"tickets"> + comments: Array> + events: Array> + }> = [] + + for (const t of candidates) { + const comments = await ctx.db + .query("ticketComments") + .withIndex("by_ticket", (q) => q.eq("ticketId", t._id)) + .take(200) + + const events = await ctx.db + .query("ticketEvents") + .withIndex("by_ticket", (q) => q.eq("ticketId", t._id)) + .order("desc") + .take(200) + + result.push({ + ticket: t, + comments, + events, + }) + } + + return { + total: result.length, + items: result.map((item) => ({ + ticket: item.ticket, + comments: item.comments.map((c) => ({ + ...c, + attachments: (c.attachments ?? []).map((att) => ({ + storageId: att.storageId, + name: att.name, + size: att.size ?? null, + type: att.type ?? null, + })), + })), + events: item.events, + })), + } + }, +}) diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md index 75a4001..6cec485 100644 --- a/docs/OPERATIONS.md +++ b/docs/OPERATIONS.md @@ -509,6 +509,14 @@ EOF └── db.sqlite3.pre-vacuum-20251209 449MB (antes do primeiro vacuum) ``` +## 14) Saude e retencao (dashboard interno) + +- Dashboard staff: `/admin/health` mostra tickets, cadastros, estado de heartbeat e resumo de retencao. Usa Prisma + Convex; se o Convex nao responder, exibe aviso. +- Token interno: defina `INTERNAL_HEALTH_TOKEN` (ou reutilize `REPORTS_CRON_SECRET`) no Convex e no Next para a query `ops.healthSnapshot`. +- Politica alvo: tickets sem expiracao; telemetria inventory/metrics 90 dias; alertas 180 dias; runs/artefatos de export 30 dias. Detalhes em `docs/RETENTION-HEALTH.md`. +- Sem cron de limpeza ligado. Monitorar tamanho do SQLite e memoria; so limpar/arquivar em janela de manutencao com backup. +- Backup local de tickets: `POST /api/admin/tickets/archive-local` (staff) exporta tickets resolvidos mais antigos que N dias para JSONL em `ARCHIVE_DIR` (padrao `./archives`). Protegido por `INTERNAL_HEALTH_TOKEN`/`REPORTS_CRON_SECRET`. + --- -Ultima atualizacao: **10/12/2025** — Problema de OOM resolvido definitivamente. Sistema estavel com 395MB de memoria (1.93% do limite de 20GB) +Ultima atualizacao: **10/12/2025** - Problema de OOM resolvido definitivamente. Sistema estavel com 395MB de memoria (1.93% do limite de 20GB) diff --git a/docs/RETENTION-HEALTH.md b/docs/RETENTION-HEALTH.md new file mode 100644 index 0000000..4bcd947 --- /dev/null +++ b/docs/RETENTION-HEALTH.md @@ -0,0 +1,41 @@ +# Retencao, observabilidade e consulta a longo prazo + +Este documento resume as decisoes aplicadas agora para manter o sistema saudavel sem perder dados de negocio (tickets). + +## Politica de retencao (alvo) +- Tickets: **sem expiracao automatica**. Antes de mover para storage frio, exportar e manter copia integra; nao apagar tickets direto. +- Telemetria de maquinas (inventory/metrics): manter 90 dias. +- Alertas de postura de maquinas: manter 180 dias. +- Runs/artefatos de export de relatorios: manter 30 dias. + +Estrategia: nenhuma limpeza automatica ligada. Usamos apenas monitoramento e, se necessario no futuro, GC preguicoso em handlers de alto volume ou rotinas manuais com janela de manutencao + backup. + +## O que foi ajustado no codigo +- Heartbeat: inventario e metrics agora sao saneados, tem hash estavel e so geram nova versao quando o hash muda (evita JSON.stringify pesado e escrita redundante). +- Hashes guardados em metadata para comparacao barata; campos volumosos (software/extended) continuam bloqueados. +- Query interna `ops.healthSnapshot` (Convex) para expor contagem de maquinas/heartbeats por faixa de idade; protegida por `INTERNAL_HEALTH_TOKEN` (cai no `REPORTS_CRON_SECRET` se nao setado). +- Dashboard `/admin/health` (Next) usa Prisma + Convex para consolidar tickets, cadastros, estado de heartbeat e a politica de retencao. +- Export/backup local de tickets: endpoint `POST /api/admin/tickets/archive-local` (staff) grava tickets resolvidos mais antigos que N dias em JSONL dentro de `ARCHIVE_DIR` (padrão `./archives`). Usa `exportResolvedTicketsToDisk` com segredo interno (`INTERNAL_HEALTH_TOKEN`/`REPORTS_CRON_SECRET`). + +## Como acessar tickets antigos sem perda +- Base quente: Prisma (SQLite) guarda todos os tickets; nenhuma rotina remove ou trunca tickets. +- Se um dia for preciso offload (ex.: >50k tickets): + - Exportar em lotes (ex.: JSONL mensais) para storage frio (S3/compat). + - Gravar um marcador de offload no DB quente (ex.: `ticket_archived_at`, `archive_key`). + - Endpoint de leitura pode, ao nao encontrar o ticket no DB quente, baixar/consultar o arquivo frio com base no `archive_key` e exibir em modo somente leitura. +- Com isso, mesmo tickets muito antigos continuam consultaveis, apenas com caminho de leitura mais lento quando estiverem fora do DB principal. + +## Dashboard interno de saude +- Caminho: `/admin/health` (somente staff). +- Mostra: contagem total/abertos de tickets, cadastros (usuarios/empresas), conectividade de dispositivos (online/atencao/offline), idade do ultimo e do mais antigo heartbeat, politica de retencao. +- Protecao da query Convex: defina `INTERNAL_HEALTH_TOKEN` (ou reutilize `REPORTS_CRON_SECRET`) no ambiente do Convex e do Next. Se o token faltar ou o Convex nao responder, o card exibe aviso. + +## Checks operacionais sugeridos (manuais) +- Tamanho do banco do Convex: `ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "ls -lh /var/lib/docker/volumes/sistema_convex_data/_data/db.sqlite3"` +- Memoria do Convex: `ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "docker stats --no-stream | grep convex"` +- Alvos: <100-200 MB para o SQLite e <5 GB de RAM. Acima disso, abrir janela curta, fazer backup e avaliar limpeza ou arquivamento pontual. + +## Estado atual e proximos passos +- Cron de limpeza segue desativado. Prioridade: monitorar 2-4 semanas para validar estabilidade pos-correcoes. +- Se o volume crescer: habilitar GC preguicoso em handlers de alto volume (ex.: heartbeat) com limites pequenos, ou acionar rotina manual/HTTP para deletar apenas telemetria fora da retenao. +- Tickets permanecem integrais; qualquer offload futuro deve ser acompanhado de export completo + marcador para leitura em modo arquivo frio. diff --git a/src/app/admin/health/page.tsx b/src/app/admin/health/page.tsx new file mode 100644 index 0000000..146340e --- /dev/null +++ b/src/app/admin/health/page.tsx @@ -0,0 +1,201 @@ +import { Badge } from "@/components/ui/badge" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { AppShell } from "@/components/app-shell" +import { SiteHeader } from "@/components/site-header" +import { getHealthSnapshot } from "@/server/health" + +export const runtime = "nodejs" +export const dynamic = "force-dynamic" + +function formatDuration(ms: number | null): string { + if (ms === null) return "N/A" + const totalSeconds = Math.floor(ms / 1000) + const minutes = Math.floor(totalSeconds / 60) + const hours = Math.floor(minutes / 60) + const days = Math.floor(hours / 24) + if (days > 0) return `${days}d ${hours % 24}h` + if (hours > 0) return `${hours}h ${minutes % 60}m` + if (minutes > 0) return `${minutes}m` + return `${totalSeconds}s` +} + +function formatNumber(value: number) { + return value.toLocaleString("pt-BR") +} + +export default async function AdminHealthPage() { + const snapshot = await getHealthSnapshot() + const devices = snapshot.devices + const retention = snapshot.retention + const strategy = snapshot.retentionStrategy + + return ( + + } + > +
+
+ + + Tickets + Sem expiracao automatica; acompanhar volume diario. + + +
+ Total + {formatNumber(snapshot.tickets.total)} +
+
+ Abertos + {formatNumber(snapshot.tickets.open)} +
+
+ Ultimos 7 dias + {formatNumber(snapshot.tickets.last7d)} +
+
+ Ultimas 24h + {formatNumber(snapshot.tickets.last24h)} +
+
+
+ + + +
+ Dispositivos + Heartbeats e conectividade (Convex) +
+ {devices ? "Ativo" : "Sem resposta"} +
+ + {devices ? ( + <> +
+ Cadastrados + {formatNumber(devices.machines)} +
+
+
+
Online
+
{formatNumber(devices.online)}
+
+
+
Atencao
+
{formatNumber(devices.warning)}
+
+
+
Offline
+
{formatNumber(devices.offline)}
+
+
+
+ Sem heartbeat + {formatNumber(devices.withoutHeartbeat)} +
+
+ Ultimo heartbeat + {formatDuration(devices.newestHeartbeatAgeMs)} +
+
+ Mais antigo + {formatDuration(devices.oldestHeartbeatAgeMs)} +
+
+ Offline apos {Math.round(devices.thresholds.offlineMs / 60000)} min | stale apos{" "} + {Math.round(devices.thresholds.staleMs / 60000)} min + {devices.truncated ? " (amostra limitada)" : ""} +
+ + ) : ( +

+ Nao foi possivel ler o estado do Convex. Confirme token interno e conectividade do backend. +

+ )} +
+
+ + + + Cadastros + Usuarios e empresas ativos na base SQL. + + +
+ Usuarios + {formatNumber(snapshot.accounts.users)} +
+
+ Empresas + {formatNumber(snapshot.accounts.companies)} +
+
+ Tickets antigos continuam consultaveis; planejar arquivamento frio antes de qualquer offload massivo. +
+
+
+
+ +
+ + + Retencao + Duracao alvo por tipo de dado (sem cron de exclusao ligado). + + +
+ Tickets + Sem expiracao +
+
+ Telemetria inventory/metrics + {retention.machineTelemetry.inventoryDays} dias +
+
+ Alertas de dispositivo + {retention.machineTelemetry.alertsDays} dias +
+
+ Historico de exports + {retention.reportExports.runsDays} dias +
+
+ {strategy.archivalPlan} Sem exclusao automatica; GC preguicoso pode ser habilitado futuramente em handlers de + alto volume. +
+
+
+ + + + Check rapido + Comandos manuais recomendados (sem tocar em dados). + + +
+ ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "ls -lh /var/lib/docker/volumes/sistema_convex_data/_data/db.sqlite3" +
+
+ ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "docker stats --no-stream | grep convex" +
+

+ Objetivo: acompanhar tamanho do SQLite e memoria do Convex por 2-4 semanas. Se subir alem de 200 MB / 5 GB, + abrir janela de manutencao com backup antes de limpar/arquivar. +

+
+
+
+ +

+ Atualizado em {new Date(snapshot.generatedAt).toLocaleString("pt-BR")}{" "} + {snapshot.notes ? `- Observacao: ${snapshot.notes}` : ""} +

+
+
+ ) +} diff --git a/src/app/api/admin/tickets/archive-local/route.ts b/src/app/api/admin/tickets/archive-local/route.ts new file mode 100644 index 0000000..ee31ad2 --- /dev/null +++ b/src/app/api/admin/tickets/archive-local/route.ts @@ -0,0 +1,52 @@ +import { NextResponse } from "next/server" + +import { requireAuthenticatedSession } from "@/lib/auth-server" +import { isStaff } from "@/lib/authz" +import { env } from "@/lib/env" +import { exportResolvedTicketsToDisk } from "@/server/archive/local-tickets" + +function getCronSecret(): string | null { + return env.INTERNAL_HEALTH_TOKEN ?? env.REPORTS_CRON_SECRET ?? null +} + +export async function POST(request: Request) { + const cronSecret = getCronSecret() + const headerSecret = request.headers.get("x-cron-secret")?.trim() + + let isCron = false + if (cronSecret && headerSecret && headerSecret === cronSecret) { + isCron = true + } + + if (!isCron) { + const session = await requireAuthenticatedSession() + const role = session.user.role ?? "agent" + if (!isStaff(role)) { + return NextResponse.json({ error: "Acesso negado" }, { status: 403 }) + } + } + + let body: { days?: number; limit?: number } = {} + try { + body = (await request.json()) ?? {} + } catch { + body = {} + } + + const days = typeof body.days === "number" && body.days > 0 ? body.days : 365 + const limit = typeof body.limit === "number" && body.limit > 0 ? body.limit : 50 + + try { + const result = await exportResolvedTicketsToDisk({ days, limit }) + return NextResponse.json({ ok: true, ...result }, { status: 200 }) + } catch (error) { + console.error("[admin.tickets.archive-local] failed", error) + return NextResponse.json( + { + error: "Falha ao arquivar tickets", + details: error instanceof Error ? error.message : String(error), + }, + { status: 500 } + ) + } +} diff --git a/src/lib/env.ts b/src/lib/env.ts index aea4779..77127fc 100644 --- a/src/lib/env.ts +++ b/src/lib/env.ts @@ -32,7 +32,9 @@ const envSchema = z.object({ SMTP_TLS: z.string().optional(), MAILER_SENDER_EMAIL: z.string().optional(), REPORTS_CRON_SECRET: z.string().optional(), + INTERNAL_HEALTH_TOKEN: z.string().optional(), REPORTS_CRON_BASE_URL: urlField().or(z.literal("")).optional(), + ARCHIVE_DIR: stringField().or(z.literal("")).optional(), }) const parsed = envSchema.safeParse(process.env) @@ -67,7 +69,9 @@ export const env = { MACHINE_TOKEN_TTL_MS: parsed.data.MACHINE_TOKEN_TTL_MS, FLEET_SYNC_SECRET: parsed.data.FLEET_SYNC_SECRET, REPORTS_CRON_SECRET: parsed.data.REPORTS_CRON_SECRET, + INTERNAL_HEALTH_TOKEN: parsed.data.INTERNAL_HEALTH_TOKEN, REPORTS_CRON_BASE_URL: parsed.data.REPORTS_CRON_BASE_URL, + ARCHIVE_DIR: parsed.data.ARCHIVE_DIR ?? "./archives", SMTP: parsed.data.SMTP_ADDRESS && parsed.data.SMTP_USERNAME && parsed.data.SMTP_PASSWORD ? { host: parsed.data.SMTP_ADDRESS, diff --git a/src/lib/retention.ts b/src/lib/retention.ts new file mode 100644 index 0000000..2c5c229 --- /dev/null +++ b/src/lib/retention.ts @@ -0,0 +1,40 @@ +export type RetentionPolicy = { + tickets: { + retention: "infinite" + notes: string + } + machineTelemetry: { + inventoryDays: number + metricsDays: number + alertsDays: number + } + reportExports: { + runsDays: number + artifactsDays: number + } +} + +export const RETENTION_POLICY: RetentionPolicy = { + tickets: { + retention: "infinite", + notes: "Nao apagar tickets automaticamente; arquivar antes de mover para storage frio.", + }, + machineTelemetry: { + inventoryDays: 90, + metricsDays: 90, + alertsDays: 180, + }, + reportExports: { + runsDays: 30, + artifactsDays: 30, + }, +} + +export const RETENTION_STRATEGY = { + cleanupMode: "manual-or-lazy" as const, + cleanupBatchSize: 500, + archivalPlan: + "Arquivar dados frios (telemetria/tickets antigos) em storage barato antes de qualquer remocao definitiva.", + notes: + "Sem cron de limpeza ativa; usar verificacoes manuais ou GC preguiçoso acionado por handlers de alto volume.", +} diff --git a/src/server/archive/local-tickets.ts b/src/server/archive/local-tickets.ts new file mode 100644 index 0000000..3c29bed --- /dev/null +++ b/src/server/archive/local-tickets.ts @@ -0,0 +1,73 @@ +import { mkdir, writeFile } from "fs/promises" +import { join, dirname } from "path" + +import { api } from "@/convex/_generated/api" +import { DEFAULT_TENANT_ID } from "@/lib/constants" +import { env } from "@/lib/env" +import { createConvexClient } from "@/server/convex-client" + +type ArchiveItem = { + ticket: Record + comments: Array> + events: Array> +} + +type ExportResponse = { + total: number + items: ArchiveItem[] +} + +function assertArchiveSecret(): string { + const secret = env.INTERNAL_HEALTH_TOKEN ?? env.REPORTS_CRON_SECRET + if (!secret) { + throw new Error("Defina INTERNAL_HEALTH_TOKEN ou REPORTS_CRON_SECRET para exportar tickets") + } + return secret +} + +function nowIso() { + return new Date().toISOString().replace(/[:.]/g, "-") +} + +export async function exportResolvedTicketsToDisk(options?: { + days?: number + limit?: number + tenantId?: string +}) { + const days = options?.days ?? 365 + const limit = options?.limit ?? 50 + const tenantId = options?.tenantId ?? DEFAULT_TENANT_ID + const cutoff = Date.now() - days * 24 * 60 * 60 * 1000 + const secret = assertArchiveSecret() + const client = createConvexClient() + + // @ts-expect-error - exportForArchive é adicionada manualmente sem regen do client + const res = (await client.query(api.tickets.exportForArchive, { + tenantId, + before: cutoff, + limit, + secret, + })) as ExportResponse + + const archiveDir = env.ARCHIVE_DIR ?? "./archives" + const filename = `tickets-archive-${nowIso()}-resolved-${days}d.jsonl` + const fullPath = join(archiveDir, filename) + await mkdir(dirname(fullPath), { recursive: true }) + + const lines = res.items.map((item) => + JSON.stringify({ + ticketId: item.ticket?._id ?? null, + tenantId, + archivedAt: Date.now(), + ticket: item.ticket, + comments: item.comments, + events: item.events, + }) + ) + await writeFile(fullPath, lines.join("\n"), { encoding: "utf-8" }) + + return { + written: res.items.length, + file: fullPath, + } +} diff --git a/src/server/health.ts b/src/server/health.ts new file mode 100644 index 0000000..54bac5a --- /dev/null +++ b/src/server/health.ts @@ -0,0 +1,98 @@ +import { api } from "@/convex/_generated/api" +import { RETENTION_POLICY, RETENTION_STRATEGY } from "@/lib/retention" +import { prisma } from "@/lib/prisma" +import { createConvexClient } from "@/server/convex-client" +import { env } from "@/lib/env" + +const OPEN_TICKET_STATUSES = ["PENDING", "AWAITING_ATTENDANCE", "PAUSED"] + +type DeviceHealth = { + machines: number + online: number + warning: number + offline: number + withoutHeartbeat: number + newestHeartbeatAgeMs: number | null + oldestHeartbeatAgeMs: number | null + thresholds: { + offlineMs: number + staleMs: number + } + truncated: boolean +} + +export type HealthSnapshot = { + generatedAt: string + tickets: { + total: number + open: number + last7d: number + last24h: number + } + accounts: { + users: number + companies: number + } + devices: DeviceHealth | null + retention: typeof RETENTION_POLICY + retentionStrategy: typeof RETENTION_STRATEGY + notes?: string +} + +function toIsoString(date: Date) { + return date.toISOString() +} + +export async function getHealthSnapshot(): Promise { + const now = new Date() + const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000) + const oneDayAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000) + + const [totalTickets, openTickets, lastWeekTickets, lastDayTickets, userCount, companyCount] = await Promise.all([ + prisma.ticket.count(), + prisma.ticket.count({ where: { status: { in: OPEN_TICKET_STATUSES } } }), + prisma.ticket.count({ where: { createdAt: { gte: sevenDaysAgo } } }), + prisma.ticket.count({ where: { createdAt: { gte: oneDayAgo } } }), + prisma.user.count(), + prisma.company.count(), + ]) + + let devices: DeviceHealth | null = null + try { + const client = createConvexClient() + const convexHealth = await client.query(api.ops.healthSnapshot, { + token: env.INTERNAL_HEALTH_TOKEN ?? env.REPORTS_CRON_SECRET ?? undefined, + }) + devices = { + machines: convexHealth.totals.machines, + online: convexHealth.connectivity.online, + warning: convexHealth.connectivity.warning, + offline: convexHealth.connectivity.offline, + withoutHeartbeat: convexHealth.totals.withoutHeartbeat, + newestHeartbeatAgeMs: convexHealth.heartbeatAgeMs.newest, + oldestHeartbeatAgeMs: convexHealth.heartbeatAgeMs.oldest, + thresholds: convexHealth.thresholds, + truncated: convexHealth.totals.truncated, + } + } catch (error) { + console.error("[health] Falha ao carregar estado das maquinas", error) + } + + return { + generatedAt: toIsoString(now), + tickets: { + total: totalTickets, + open: openTickets, + last7d: lastWeekTickets, + last24h: lastDayTickets, + }, + accounts: { + users: userCount, + companies: companyCount, + }, + devices, + retention: RETENTION_POLICY, + retentionStrategy: RETENTION_STRATEGY, + notes: devices ? undefined : "Convex nao respondeu; verificar conectividade ou token interno.", + } +}