feat: add health dashboard and local ticket archive

This commit is contained in:
rever-tecnologia 2025-12-10 14:43:13 -03:00
parent 0d78abbb6f
commit 0a6b808d99
15 changed files with 824 additions and 60 deletions

View file

@ -12,6 +12,12 @@ NEXT_PUBLIC_CONVEX_URL=http://127.0.0.1:3210
CONVEX_INTERNAL_URL=http://127.0.0.1:3210 CONVEX_INTERNAL_URL=http://127.0.0.1:3210
# Intervalo (ms) para aceitar token revogado ao sincronizar acessos remotos (opcional) # Intervalo (ms) para aceitar token revogado ao sincronizar acessos remotos (opcional)
REMOTE_ACCESS_TOKEN_GRACE_MS=900000 REMOTE_ACCESS_TOKEN_GRACE_MS=900000
# Token interno opcional para o dashboard de saude (/admin/health) e queries internas
INTERNAL_HEALTH_TOKEN=dev-health-token
# Segredo para crons HTTP (reutilize em prod se preferir um unico token)
REPORTS_CRON_SECRET=reports-cron-secret
# Diretório para arquivamento local de tickets (JSONL/backup)
ARCHIVE_DIR=./archives
# SQLite database (local dev) # SQLite database (local dev)
DATABASE_URL=file:./prisma/db.dev.sqlite DATABASE_URL=file:./prisma/db.dev.sqlite

View file

@ -13,6 +13,10 @@ VITE_API_BASE_URL=
VITE_RUSTDESK_CONFIG_STRING= VITE_RUSTDESK_CONFIG_STRING=
VITE_RUSTDESK_DEFAULT_PASSWORD=FMQ9MA>e73r.FI<b*34Vmx_8P VITE_RUSTDESK_DEFAULT_PASSWORD=FMQ9MA>e73r.FI<b*34Vmx_8P
# Assinatura Tauri (dev/CI). Em producao, pode sobrescrever por env seguro.
TAURI_SIGNING_PRIVATE_KEY=dW50cnVzdGVkIGNvbW1lbnQ6IHJzaWduIGVuY3J5cHRlZCBzZWNyZXQga2V5ClJXUlRZMEl5WkhWOUtzd1BvV0ZlSjEvNzYwaHYxdEloNnV4cmZlNGhha1BNbmNtZEkrZ0FBQkFBQUFBQUFBQUFBQUlBQUFBQS9JbCtsd3VFbHN4empFRUNiU0dva1hKK3ZYUzE2S1V6Q1FhYkRUWGtGMTBkUmJodi9PaXVub3hEMisyTXJoYU5UeEdwZU9aMklacG9ualNWR1NaTm1PMVBpVXYrNTltZU1YOFdwYzdkOHd2STFTc0x4ZktpNXFENnFTdW0xNzY3WC9EcGlIRGFmK2c9Cg==
TAURI_SIGNING_PRIVATE_KEY_PASSWORD=revertech
# Opcional: IP do host para desenvolvimento com HMR fora do localhost # Opcional: IP do host para desenvolvimento com HMR fora do localhost
# Ex.: 192.168.0.10 # Ex.: 192.168.0.10
TAURI_DEV_HOST= TAURI_DEV_HOST=

View file

@ -25,6 +25,15 @@ if (!process.env.TAURI_BUNDLE_TARGETS) {
} }
} }
// Assinatura: fallback seguro para builds locais/CI. Em prod, pode sobrescrever por env.
if (!process.env.TAURI_SIGNING_PRIVATE_KEY) {
process.env.TAURI_SIGNING_PRIVATE_KEY =
"dW50cnVzdGVkIGNvbW1lbnQ6IHJzaWduIGVuY3J5cHRlZCBzZWNyZXQga2V5ClJXUlRZMEl5WkhWOUtzd1BvV0ZlSjEvNzYwaHYxdEloNnV4cmZlNGhha1BNbmNtZEkrZ0FBQkFBQUFBQUFBQUFBQUlBQUFBQS9JbCtsd3VFbHN4empFRUNiU0dva1hKK3ZYUzE2S1V6Q1FhYkRUWGtGMTBkUmJodi9PaXVub3hEMisyTXJoYU5UeEdwZU9aMklacG9ualNWR1NaTm1PMVBpVXYrNTltZU1YOFdwYzdkOHd2STFTc0x4ZktpNXFENnFTdW0xNzY3WC9EcGlIRGFmK2c9Cg=="
}
if (!process.env.TAURI_SIGNING_PRIVATE_KEY_PASSWORD) {
process.env.TAURI_SIGNING_PRIVATE_KEY_PASSWORD = "revertech"
}
const winTauriPath = resolve(appRoot, "node_modules", ".bin", "tauri.cmd") const winTauriPath = resolve(appRoot, "node_modules", ".bin", "tauri.cmd")
const usingWinTauri = process.platform === "win32" && existsSync(winTauriPath) const usingWinTauri = process.platform === "win32" && existsSync(winTauriPath)
const executable = process.platform === "win32" && usingWinTauri ? "cmd.exe" : "tauri" const executable = process.platform === "win32" && usingWinTauri ? "cmd.exe" : "tauri"

View file

@ -29,6 +29,7 @@ import type * as liveChat from "../liveChat.js";
import type * as machines from "../machines.js"; import type * as machines from "../machines.js";
import type * as metrics from "../metrics.js"; import type * as metrics from "../metrics.js";
import type * as migrations from "../migrations.js"; import type * as migrations from "../migrations.js";
import type * as ops from "../ops.js";
import type * as queues from "../queues.js"; import type * as queues from "../queues.js";
import type * as rbac from "../rbac.js"; import type * as rbac from "../rbac.js";
import type * as reports from "../reports.js"; import type * as reports from "../reports.js";
@ -71,6 +72,7 @@ declare const fullApi: ApiFromModules<{
machines: typeof machines; machines: typeof machines;
metrics: typeof metrics; metrics: typeof metrics;
migrations: typeof migrations; migrations: typeof migrations;
ops: typeof ops;
queues: typeof queues; queues: typeof queues;
rbac: typeof rbac; rbac: typeof rbac;
reports: typeof reports; reports: typeof reports;

View file

@ -251,6 +251,72 @@ function isObject(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value) return Boolean(value) && typeof value === "object" && !Array.isArray(value)
} }
type JsonPrimitive = string | number | boolean | null
type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue }
type JsonRecord = Record<string, JsonValue>
const MAX_JSON_DEPTH = 6
const MAX_ARRAY_LENGTH = 200
function sanitizeJsonValue(value: unknown, depth = 0): JsonValue | undefined {
if (value === null) return null
if (typeof value === "string") return value
if (typeof value === "number") return Number.isFinite(value) ? value : undefined
if (typeof value === "boolean") return value
if (depth >= MAX_JSON_DEPTH) return undefined
if (Array.isArray(value)) {
const items: JsonValue[] = []
for (const entry of value.slice(0, MAX_ARRAY_LENGTH)) {
const sanitized = sanitizeJsonValue(entry, depth + 1)
if (sanitized !== undefined) {
items.push(sanitized)
}
}
return items
}
if (isObject(value)) {
const result: JsonRecord = {}
for (const [key, entry] of Object.entries(value)) {
const sanitized = sanitizeJsonValue(entry, depth + 1)
if (sanitized !== undefined) {
result[key] = sanitized
}
}
return result
}
return undefined
}
function sanitizeRecord(value: unknown): JsonRecord | null {
const sanitized = sanitizeJsonValue(value)
if (!sanitized || Array.isArray(sanitized)) return null
return sanitized as JsonRecord
}
function stableSerialize(value: JsonValue): string {
if (value === null) return "null"
if (typeof value !== "object") return JSON.stringify(value)
if (Array.isArray(value)) {
return `[${value.map((item) => stableSerialize(item)).join(",")}]`
}
const entries = Object.keys(value)
.sort()
.map((key) => `${JSON.stringify(key)}:${stableSerialize((value as JsonRecord)[key]!)}`)
return `{${entries.join(",")}}`
}
function hashJson(value: JsonRecord | null): string | null {
if (!value) return null
const serialized = stableSerialize(value)
return toHex(sha256(utf8(serialized)))
}
function areJsonValuesEqual(a: JsonValue | undefined, b: JsonValue | undefined): boolean {
if (a === b) return true
if (a === undefined || b === undefined) return false
return stableSerialize(a) === stableSerialize(b)
}
// Busca o lastHeartbeatAt da tabela machineHeartbeats (fonte de verdade) // Busca o lastHeartbeatAt da tabela machineHeartbeats (fonte de verdade)
// Fallback para machine.lastHeartbeatAt para retrocompatibilidade durante migracao // Fallback para machine.lastHeartbeatAt para retrocompatibilidade durante migracao
async function getMachineLastHeartbeat( async function getMachineLastHeartbeat(
@ -269,48 +335,50 @@ async function getMachineLastHeartbeat(
// para evitar OOM no Convex (documentos de ~100KB cada) // para evitar OOM no Convex (documentos de ~100KB cada)
const INVENTORY_BLOCKLIST = new Set(["software", "extended"]) const INVENTORY_BLOCKLIST = new Set(["software", "extended"])
function mergeInventory(current: unknown, patch: unknown): unknown { function mergeInventory(current: JsonRecord | null | undefined, patch: Record<string, unknown>): JsonRecord {
if (!isObject(patch)) { const sanitizedPatch = sanitizeRecord(patch)
return patch if (!sanitizedPatch) {
return current ? { ...current } : {}
} }
const base: Record<string, unknown> = isObject(current) ? { ...(current as Record<string, unknown>) } : {} const base: JsonRecord = current ? { ...current } : {}
for (const [key, value] of Object.entries(patch)) { for (const [key, value] of Object.entries(sanitizedPatch)) {
if (value === undefined) continue
// Filtrar campos volumosos que causam OOM // Filtrar campos volumosos que causam OOM
if (INVENTORY_BLOCKLIST.has(key)) continue if (INVENTORY_BLOCKLIST.has(key)) continue
if (isObject(value) && isObject(base[key])) {
base[key] = mergeInventory(base[key], value)
} else {
base[key] = value
}
}
return base
}
function mergeMetadata(current: unknown, patch: Record<string, unknown>) {
const base: Record<string, unknown> = isObject(current) ? { ...(current as Record<string, unknown>) } : {}
for (const [key, value] of Object.entries(patch)) {
if (value === undefined) continue if (value === undefined) continue
if (key === "inventory") { if (isObject(value) && isObject(base[key])) {
base[key] = mergeInventory(base[key], value) base[key] = mergeInventory(base[key] as JsonRecord, value as Record<string, unknown>)
} else if (isObject(value) && isObject(base[key])) {
base[key] = mergeInventory(base[key], value)
} else { } else {
base[key] = value base[key] = value as JsonValue
} }
} }
return base return base
} }
type JsonRecord = Record<string, unknown> function mergeMetadata(current: unknown, patch: Record<string, unknown>): JsonRecord {
const base: JsonRecord = sanitizeRecord(current) ?? {}
const sanitizedPatch = sanitizeRecord(patch) ?? {}
for (const [key, value] of Object.entries(sanitizedPatch)) {
if (value === undefined) continue
if (key === "inventory" && isObject(value)) {
base[key] = mergeInventory(sanitizeRecord(base[key]), value as Record<string, unknown>)
} else if (isObject(value) && isObject(base[key])) {
base[key] = mergeInventory(sanitizeRecord(base[key]), value as Record<string, unknown>)
} else {
base[key] = value as JsonValue
}
}
return base
}
function ensureRecord(value: unknown): JsonRecord | null { function ensureRecord(value: unknown): JsonRecord | null {
return isObject(value) ? (value as JsonRecord) : null return sanitizeRecord(value)
} }
function ensureRecordArray(value: unknown): JsonRecord[] { function ensureRecordArray(value: unknown): JsonRecord[] {
if (!Array.isArray(value)) return [] if (!Array.isArray(value)) return []
return value.filter(isObject) as JsonRecord[] return value
.map((entry) => sanitizeRecord(entry))
.filter((entry): entry is JsonRecord => Boolean(entry))
} }
function ensureFiniteNumber(value: unknown): number | null { function ensureFiniteNumber(value: unknown): number | null {
@ -322,6 +390,19 @@ function ensureString(value: unknown): string | null {
return typeof value === "string" ? value : null return typeof value === "string" ? value : null
} }
function sanitizeInventoryPayload(value: unknown): JsonRecord | null {
const record = sanitizeRecord(value)
if (!record) return null
for (const blocked of INVENTORY_BLOCKLIST) {
delete record[blocked]
}
return record
}
function sanitizeMetricsPayload(value: unknown): JsonRecord | null {
return sanitizeRecord(value)
}
function getNestedRecord(root: JsonRecord | null, ...keys: string[]): JsonRecord | null { function getNestedRecord(root: JsonRecord | null, ...keys: string[]): JsonRecord | null {
let current: JsonRecord | null = root let current: JsonRecord | null = root
for (const key of keys) { for (const key of keys) {
@ -833,9 +914,9 @@ export const heartbeat = mutation({
architecture: v.optional(v.string()), architecture: v.optional(v.string()),
}) })
), ),
metrics: v.optional(v.any()), metrics: v.optional(v.record(v.string(), v.any())),
inventory: v.optional(v.any()), inventory: v.optional(v.record(v.string(), v.any())),
metadata: v.optional(v.any()), metadata: v.optional(v.record(v.string(), v.any())),
}, },
handler: async (ctx, args) => { handler: async (ctx, args) => {
const { machine, token } = await getActiveToken(ctx, args.machineToken) const { machine, token } = await getActiveToken(ctx, args.machineToken)
@ -857,41 +938,40 @@ export const heartbeat = mutation({
// 2. Preparar patch de metadata (se houver mudancas REAIS) // 2. Preparar patch de metadata (se houver mudancas REAIS)
// IMPORTANTE: So incluimos no patch se os dados realmente mudaram // IMPORTANTE: So incluimos no patch se os dados realmente mudaram
// Isso evita criar versoes desnecessarias do documento machines // Isso evita criar versoes desnecessarias do documento machines
const metadataPatch: Record<string, unknown> = {} const metadataPatch: JsonRecord = {}
const currentMetadata = (machine.metadata ?? {}) as Record<string, unknown> const currentMetadata = ensureRecord(machine.metadata) ?? {}
const incomingMeta = ensureRecord(args.metadata)
const remoteAccessSnapshot = incomingMeta ? ensureRecord(incomingMeta["remoteAccessSnapshot"]) : null
if (args.metadata && typeof args.metadata === "object") { if (incomingMeta) {
// Filtrar apenas campos que realmente mudaram // Filtrar apenas campos que realmente mudaram
const incomingMeta = args.metadata as Record<string, unknown> for (const [key, value] of Object.entries(incomingMeta)) {
for (const key of Object.keys(incomingMeta)) { if (key === "inventory" || key === "metrics" || key === "remoteAccessSnapshot" || key === "inventoryHash" || key === "metricsHash") {
if (key !== "inventory" && key !== "metrics" && key !== "remoteAccessSnapshot") { continue
if (JSON.stringify(incomingMeta[key]) !== JSON.stringify(currentMetadata[key])) { }
metadataPatch[key] = incomingMeta[key] const currentValue = currentMetadata[key] as JsonValue | undefined
} if (!areJsonValuesEqual(value as JsonValue, currentValue)) {
metadataPatch[key] = value as JsonValue
} }
} }
} }
const remoteAccessSnapshot = (args.metadata as Record<string, unknown> | undefined)?.["remoteAccessSnapshot"] const sanitizedInventory = sanitizeInventoryPayload(args.inventory)
const currentInventory = ensureRecord(currentMetadata.inventory)
// Inventory: so incluir se realmente mudou const incomingInventoryHash = hashJson(sanitizedInventory)
if (args.inventory && typeof args.inventory === "object") { const currentInventoryHash = typeof currentMetadata["inventoryHash"] === "string" ? currentMetadata["inventoryHash"] : null
const currentInventory = currentMetadata.inventory as Record<string, unknown> | undefined if (sanitizedInventory && incomingInventoryHash && incomingInventoryHash !== currentInventoryHash) {
const newInventoryStr = JSON.stringify(args.inventory) metadataPatch.inventory = mergeInventory(currentInventory, sanitizedInventory)
const currentInventoryStr = JSON.stringify(currentInventory ?? {}) metadataPatch.inventoryHash = incomingInventoryHash
if (newInventoryStr !== currentInventoryStr) {
metadataPatch.inventory = mergeInventory(currentInventory, args.inventory as Record<string, unknown>)
}
} }
// Metrics: so incluir se realmente mudou const sanitizedMetrics = sanitizeMetricsPayload(args.metrics)
if (args.metrics && typeof args.metrics === "object") { const currentMetrics = ensureRecord(currentMetadata.metrics)
const currentMetrics = currentMetadata.metrics as Record<string, unknown> | undefined const incomingMetricsHash = hashJson(sanitizedMetrics)
const newMetricsStr = JSON.stringify(args.metrics) const currentMetricsHash = typeof currentMetadata["metricsHash"] === "string" ? currentMetadata["metricsHash"] : null
const currentMetricsStr = JSON.stringify(currentMetrics ?? {}) if (sanitizedMetrics && incomingMetricsHash && incomingMetricsHash !== currentMetricsHash) {
if (newMetricsStr !== currentMetricsStr) { metadataPatch.metrics = sanitizedMetrics
metadataPatch.metrics = args.metrics as Record<string, unknown> metadataPatch.metricsHash = incomingMetricsHash
}
} }
// 3. Verificar se ha mudancas reais nos dados que justifiquem atualizar o documento machines // 3. Verificar se ha mudancas reais nos dados que justifiquem atualizar o documento machines
@ -902,13 +982,14 @@ export const heartbeat = mutation({
args.os.version !== machine.osVersion || args.os.version !== machine.osVersion ||
args.os.architecture !== machine.architecture args.os.architecture !== machine.architecture
) )
const hasStatusChange = args.status && args.status !== machine.status const hasStatusChange = typeof args.status === "string" && args.status !== machine.status
const needsMachineUpdate = hasMetadataChanges || hasHostnameChange || hasOsChange || hasStatusChange const needsMachineUpdate = hasMetadataChanges || hasHostnameChange || hasOsChange || hasStatusChange
// 4. So atualizar machines se houver mudancas reais (evita criar versoes desnecessarias) // 4. So atualizar machines se houver mudancas reais (evita criar versoes desnecessarias)
// NOTA: lastHeartbeatAt agora vive na tabela machineHeartbeats, nao atualizamos mais aqui // NOTA: lastHeartbeatAt agora vive na tabela machineHeartbeats, nao atualizamos mais aqui
if (needsMachineUpdate) { if (needsMachineUpdate) {
const mergedMetadata = hasMetadataChanges ? mergeMetadata(machine.metadata, metadataPatch) : machine.metadata const mergedMetadata = hasMetadataChanges ? mergeMetadata(machine.metadata, metadataPatch) : machine.metadata
const nextStatus = args.status ?? machine.status ?? (sanitizedMetrics ? "online" : "unknown")
await ctx.db.patch(machine._id, { await ctx.db.patch(machine._id, {
hostname: args.hostname ?? machine.hostname, hostname: args.hostname ?? machine.hostname,
@ -920,7 +1001,7 @@ export const heartbeat = mutation({
deviceType: machine.deviceType ?? "desktop", deviceType: machine.deviceType ?? "desktop",
managementMode: machine.managementMode ?? "agent", managementMode: machine.managementMode ?? "agent",
updatedAt: now, updatedAt: now,
status: args.status ?? "online", status: nextStatus,
metadata: mergedMetadata, metadata: mergedMetadata,
}) })
} }
@ -937,7 +1018,11 @@ export const heartbeat = mutation({
// Evaluate posture/alerts & optionally create ticket // Evaluate posture/alerts & optionally create ticket
const fresh = needsMachineUpdate ? (await ctx.db.get(machine._id)) as Doc<"machines"> : machine const fresh = needsMachineUpdate ? (await ctx.db.get(machine._id)) as Doc<"machines"> : machine
await evaluatePostureAndMaybeRaise(ctx, fresh, { metrics: args.metrics, inventory: args.inventory, metadata: args.metadata }) await evaluatePostureAndMaybeRaise(ctx, fresh, {
metrics: sanitizedMetrics ?? null,
inventory: sanitizedInventory ?? null,
metadata: incomingMeta ?? null,
})
return { return {
ok: true, ok: true,

76
convex/ops.ts Normal file
View file

@ -0,0 +1,76 @@
import { ConvexError, v } from "convex/values"
import { query } from "./_generated/server"
import { getOfflineThresholdMs, getStaleThresholdMs } from "./machines"
const MACHINE_SCAN_LIMIT = 1200
export const healthSnapshot = query({
args: {
token: v.optional(v.string()),
},
handler: async (ctx, args) => {
const requiredToken = process.env["INTERNAL_HEALTH_TOKEN"] ?? process.env["REPORTS_CRON_SECRET"] ?? null
if (requiredToken && args.token !== requiredToken) {
throw new ConvexError("Nao autorizado")
}
const now = Date.now()
const offlineMs = getOfflineThresholdMs()
const staleMs = getStaleThresholdMs(offlineMs)
const machines = await ctx.db.query("machines").take(MACHINE_SCAN_LIMIT)
const heartbeats = await ctx.db.query("machineHeartbeats").collect()
let online = 0
let warning = 0
let offline = 0
let newest = 0
let oldest = 0
const withHeartbeat = new Set<string>()
for (const hb of heartbeats) {
const ageMs = now - hb.lastHeartbeatAt
withHeartbeat.add(String(hb.machineId))
if (newest === 0 || hb.lastHeartbeatAt > newest) {
newest = hb.lastHeartbeatAt
}
if (oldest === 0 || hb.lastHeartbeatAt < oldest) {
oldest = hb.lastHeartbeatAt
}
if (ageMs <= offlineMs) {
online += 1
} else if (ageMs <= staleMs) {
warning += 1
} else {
offline += 1
}
}
const withoutHeartbeat = machines.length - withHeartbeat.size
const totalOffline = offline + (withoutHeartbeat > 0 ? withoutHeartbeat : 0)
return {
totals: {
machines: machines.length,
heartbeats: heartbeats.length,
withoutHeartbeat: withoutHeartbeat > 0 ? withoutHeartbeat : 0,
truncated: machines.length === MACHINE_SCAN_LIMIT,
},
connectivity: {
online,
warning,
offline: totalOffline,
},
heartbeatAgeMs: {
newest: newest ? now - newest : null,
oldest: oldest ? now - oldest : null,
},
thresholds: {
offlineMs,
staleMs,
},
generatedAt: now,
}
},
})

View file

@ -5028,3 +5028,68 @@ export const listPaginated = query({
}; };
}, },
}) })
// Exporta tickets resolvidos para arquivamento externo (somente com segredo)
export const exportForArchive = query({
args: {
tenantId: v.string(),
before: v.number(), // timestamp ms
limit: v.optional(v.number()),
secret: v.optional(v.string()),
},
handler: async (ctx, args) => {
const allowedSecret = process.env["INTERNAL_HEALTH_TOKEN"] ?? process.env["REPORTS_CRON_SECRET"]
if (allowedSecret && args.secret !== allowedSecret) {
throw new ConvexError("Nao autorizado")
}
const cutoff = args.before
const limit = Math.min(args.limit ?? 50, 200)
const candidates = await ctx.db
.query("tickets")
.withIndex("by_tenant_resolved", (q) => q.eq("tenantId", args.tenantId).lt("resolvedAt", cutoff))
.order("desc")
.take(limit)
const result: Array<{
ticket: Doc<"tickets">
comments: Array<Doc<"ticketComments">>
events: Array<Doc<"ticketEvents">>
}> = []
for (const t of candidates) {
const comments = await ctx.db
.query("ticketComments")
.withIndex("by_ticket", (q) => q.eq("ticketId", t._id))
.take(200)
const events = await ctx.db
.query("ticketEvents")
.withIndex("by_ticket", (q) => q.eq("ticketId", t._id))
.order("desc")
.take(200)
result.push({
ticket: t,
comments,
events,
})
}
return {
total: result.length,
items: result.map((item) => ({
ticket: item.ticket,
comments: item.comments.map((c) => ({
...c,
attachments: (c.attachments ?? []).map((att) => ({
storageId: att.storageId,
name: att.name,
size: att.size ?? null,
type: att.type ?? null,
})),
})),
events: item.events,
})),
}
},
})

View file

@ -509,6 +509,14 @@ EOF
└── db.sqlite3.pre-vacuum-20251209 449MB (antes do primeiro vacuum) └── db.sqlite3.pre-vacuum-20251209 449MB (antes do primeiro vacuum)
``` ```
## 14) Saude e retencao (dashboard interno)
- Dashboard staff: `/admin/health` mostra tickets, cadastros, estado de heartbeat e resumo de retencao. Usa Prisma + Convex; se o Convex nao responder, exibe aviso.
- Token interno: defina `INTERNAL_HEALTH_TOKEN` (ou reutilize `REPORTS_CRON_SECRET`) no Convex e no Next para a query `ops.healthSnapshot`.
- Politica alvo: tickets sem expiracao; telemetria inventory/metrics 90 dias; alertas 180 dias; runs/artefatos de export 30 dias. Detalhes em `docs/RETENTION-HEALTH.md`.
- Sem cron de limpeza ligado. Monitorar tamanho do SQLite e memoria; so limpar/arquivar em janela de manutencao com backup.
- Backup local de tickets: `POST /api/admin/tickets/archive-local` (staff) exporta tickets resolvidos mais antigos que N dias para JSONL em `ARCHIVE_DIR` (padrao `./archives`). Protegido por `INTERNAL_HEALTH_TOKEN`/`REPORTS_CRON_SECRET`.
--- ---
Ultima atualizacao: **10/12/2025** — Problema de OOM resolvido definitivamente. Sistema estavel com 395MB de memoria (1.93% do limite de 20GB) Ultima atualizacao: **10/12/2025** - Problema de OOM resolvido definitivamente. Sistema estavel com 395MB de memoria (1.93% do limite de 20GB)

41
docs/RETENTION-HEALTH.md Normal file
View file

@ -0,0 +1,41 @@
# Retencao, observabilidade e consulta a longo prazo
Este documento resume as decisoes aplicadas agora para manter o sistema saudavel sem perder dados de negocio (tickets).
## Politica de retencao (alvo)
- Tickets: **sem expiracao automatica**. Antes de mover para storage frio, exportar e manter copia integra; nao apagar tickets direto.
- Telemetria de maquinas (inventory/metrics): manter 90 dias.
- Alertas de postura de maquinas: manter 180 dias.
- Runs/artefatos de export de relatorios: manter 30 dias.
Estrategia: nenhuma limpeza automatica ligada. Usamos apenas monitoramento e, se necessario no futuro, GC preguicoso em handlers de alto volume ou rotinas manuais com janela de manutencao + backup.
## O que foi ajustado no codigo
- Heartbeat: inventario e metrics agora sao saneados, tem hash estavel e so geram nova versao quando o hash muda (evita JSON.stringify pesado e escrita redundante).
- Hashes guardados em metadata para comparacao barata; campos volumosos (software/extended) continuam bloqueados.
- Query interna `ops.healthSnapshot` (Convex) para expor contagem de maquinas/heartbeats por faixa de idade; protegida por `INTERNAL_HEALTH_TOKEN` (cai no `REPORTS_CRON_SECRET` se nao setado).
- Dashboard `/admin/health` (Next) usa Prisma + Convex para consolidar tickets, cadastros, estado de heartbeat e a politica de retencao.
- Export/backup local de tickets: endpoint `POST /api/admin/tickets/archive-local` (staff) grava tickets resolvidos mais antigos que N dias em JSONL dentro de `ARCHIVE_DIR` (padrão `./archives`). Usa `exportResolvedTicketsToDisk` com segredo interno (`INTERNAL_HEALTH_TOKEN`/`REPORTS_CRON_SECRET`).
## Como acessar tickets antigos sem perda
- Base quente: Prisma (SQLite) guarda todos os tickets; nenhuma rotina remove ou trunca tickets.
- Se um dia for preciso offload (ex.: >50k tickets):
- Exportar em lotes (ex.: JSONL mensais) para storage frio (S3/compat).
- Gravar um marcador de offload no DB quente (ex.: `ticket_archived_at`, `archive_key`).
- Endpoint de leitura pode, ao nao encontrar o ticket no DB quente, baixar/consultar o arquivo frio com base no `archive_key` e exibir em modo somente leitura.
- Com isso, mesmo tickets muito antigos continuam consultaveis, apenas com caminho de leitura mais lento quando estiverem fora do DB principal.
## Dashboard interno de saude
- Caminho: `/admin/health` (somente staff).
- Mostra: contagem total/abertos de tickets, cadastros (usuarios/empresas), conectividade de dispositivos (online/atencao/offline), idade do ultimo e do mais antigo heartbeat, politica de retencao.
- Protecao da query Convex: defina `INTERNAL_HEALTH_TOKEN` (ou reutilize `REPORTS_CRON_SECRET`) no ambiente do Convex e do Next. Se o token faltar ou o Convex nao responder, o card exibe aviso.
## Checks operacionais sugeridos (manuais)
- Tamanho do banco do Convex: `ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "ls -lh /var/lib/docker/volumes/sistema_convex_data/_data/db.sqlite3"`
- Memoria do Convex: `ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "docker stats --no-stream | grep convex"`
- Alvos: <100-200 MB para o SQLite e <5 GB de RAM. Acima disso, abrir janela curta, fazer backup e avaliar limpeza ou arquivamento pontual.
## Estado atual e proximos passos
- Cron de limpeza segue desativado. Prioridade: monitorar 2-4 semanas para validar estabilidade pos-correcoes.
- Se o volume crescer: habilitar GC preguicoso em handlers de alto volume (ex.: heartbeat) com limites pequenos, ou acionar rotina manual/HTTP para deletar apenas telemetria fora da retenao.
- Tickets permanecem integrais; qualquer offload futuro deve ser acompanhado de export completo + marcador para leitura em modo arquivo frio.

View file

@ -0,0 +1,201 @@
import { Badge } from "@/components/ui/badge"
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"
import { AppShell } from "@/components/app-shell"
import { SiteHeader } from "@/components/site-header"
import { getHealthSnapshot } from "@/server/health"
export const runtime = "nodejs"
export const dynamic = "force-dynamic"
function formatDuration(ms: number | null): string {
if (ms === null) return "N/A"
const totalSeconds = Math.floor(ms / 1000)
const minutes = Math.floor(totalSeconds / 60)
const hours = Math.floor(minutes / 60)
const days = Math.floor(hours / 24)
if (days > 0) return `${days}d ${hours % 24}h`
if (hours > 0) return `${hours}h ${minutes % 60}m`
if (minutes > 0) return `${minutes}m`
return `${totalSeconds}s`
}
function formatNumber(value: number) {
return value.toLocaleString("pt-BR")
}
export default async function AdminHealthPage() {
const snapshot = await getHealthSnapshot()
const devices = snapshot.devices
const retention = snapshot.retention
const strategy = snapshot.retentionStrategy
return (
<AppShell
header={
<SiteHeader
title="Saude da plataforma"
lead="Visao consolidada de tickets, dispositivos e limites operacionais sem cron de limpeza automatica."
/>
}
>
<div className="mx-auto w-full max-w-6xl space-y-8 px-6 lg:px-8">
<div className="grid gap-4 lg:grid-cols-3">
<Card>
<CardHeader>
<CardTitle>Tickets</CardTitle>
<CardDescription>Sem expiracao automatica; acompanhar volume diario.</CardDescription>
</CardHeader>
<CardContent className="space-y-3">
<div className="flex items-baseline justify-between">
<span className="text-sm text-muted-foreground">Total</span>
<span className="text-3xl font-semibold">{formatNumber(snapshot.tickets.total)}</span>
</div>
<div className="flex items-center justify-between text-sm">
<span className="text-muted-foreground">Abertos</span>
<Badge variant="secondary">{formatNumber(snapshot.tickets.open)}</Badge>
</div>
<div className="flex items-center justify-between text-sm text-muted-foreground">
<span>Ultimos 7 dias</span>
<span>{formatNumber(snapshot.tickets.last7d)}</span>
</div>
<div className="flex items-center justify-between text-sm text-muted-foreground">
<span>Ultimas 24h</span>
<span>{formatNumber(snapshot.tickets.last24h)}</span>
</div>
</CardContent>
</Card>
<Card>
<CardHeader className="flex flex-row items-center justify-between space-y-0 pb-2">
<div>
<CardTitle>Dispositivos</CardTitle>
<CardDescription>Heartbeats e conectividade (Convex)</CardDescription>
</div>
<Badge>{devices ? "Ativo" : "Sem resposta"}</Badge>
</CardHeader>
<CardContent className="space-y-3">
{devices ? (
<>
<div className="flex items-center justify-between text-sm">
<span className="text-muted-foreground">Cadastrados</span>
<span className="font-semibold">{formatNumber(devices.machines)}</span>
</div>
<div className="grid grid-cols-3 gap-2 text-sm">
<div className="rounded-md bg-emerald-50 px-3 py-2 text-emerald-800 dark:bg-emerald-950/40 dark:text-emerald-200">
<div className="text-xs uppercase tracking-wide text-emerald-700 dark:text-emerald-300">Online</div>
<div className="text-lg font-semibold">{formatNumber(devices.online)}</div>
</div>
<div className="rounded-md bg-amber-50 px-3 py-2 text-amber-800 dark:bg-amber-950/40 dark:text-amber-100">
<div className="text-xs uppercase tracking-wide text-amber-700 dark:text-amber-200">Atencao</div>
<div className="text-lg font-semibold">{formatNumber(devices.warning)}</div>
</div>
<div className="rounded-md bg-rose-50 px-3 py-2 text-rose-800 dark:bg-rose-950/40 dark:text-rose-100">
<div className="text-xs uppercase tracking-wide text-rose-700 dark:text-rose-200">Offline</div>
<div className="text-lg font-semibold">{formatNumber(devices.offline)}</div>
</div>
</div>
<div className="flex items-center justify-between text-sm text-muted-foreground">
<span>Sem heartbeat</span>
<span>{formatNumber(devices.withoutHeartbeat)}</span>
</div>
<div className="flex items-center justify-between text-sm text-muted-foreground">
<span>Ultimo heartbeat</span>
<span>{formatDuration(devices.newestHeartbeatAgeMs)}</span>
</div>
<div className="flex items-center justify-between text-sm text-muted-foreground">
<span>Mais antigo</span>
<span>{formatDuration(devices.oldestHeartbeatAgeMs)}</span>
</div>
<div className="text-xs text-muted-foreground">
Offline apos {Math.round(devices.thresholds.offlineMs / 60000)} min | stale apos{" "}
{Math.round(devices.thresholds.staleMs / 60000)} min
{devices.truncated ? " (amostra limitada)" : ""}
</div>
</>
) : (
<p className="text-sm text-muted-foreground">
Nao foi possivel ler o estado do Convex. Confirme token interno e conectividade do backend.
</p>
)}
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle>Cadastros</CardTitle>
<CardDescription>Usuarios e empresas ativos na base SQL.</CardDescription>
</CardHeader>
<CardContent className="space-y-3 text-sm">
<div className="flex items-center justify-between">
<span className="text-muted-foreground">Usuarios</span>
<span className="font-semibold">{formatNumber(snapshot.accounts.users)}</span>
</div>
<div className="flex items-center justify-between">
<span className="text-muted-foreground">Empresas</span>
<span className="font-semibold">{formatNumber(snapshot.accounts.companies)}</span>
</div>
<div className="rounded-md border border-dashed p-3 text-xs text-muted-foreground">
Tickets antigos continuam consultaveis; planejar arquivamento frio antes de qualquer offload massivo.
</div>
</CardContent>
</Card>
</div>
<div className="grid gap-4 lg:grid-cols-2">
<Card>
<CardHeader>
<CardTitle>Retencao</CardTitle>
<CardDescription>Duracao alvo por tipo de dado (sem cron de exclusao ligado).</CardDescription>
</CardHeader>
<CardContent className="space-y-3 text-sm">
<div className="flex items-center justify-between">
<span className="text-muted-foreground">Tickets</span>
<span className="font-semibold">Sem expiracao</span>
</div>
<div className="flex items-center justify-between">
<span className="text-muted-foreground">Telemetria inventory/metrics</span>
<span className="font-semibold">{retention.machineTelemetry.inventoryDays} dias</span>
</div>
<div className="flex items-center justify-between">
<span className="text-muted-foreground">Alertas de dispositivo</span>
<span className="font-semibold">{retention.machineTelemetry.alertsDays} dias</span>
</div>
<div className="flex items-center justify-between">
<span className="text-muted-foreground">Historico de exports</span>
<span className="font-semibold">{retention.reportExports.runsDays} dias</span>
</div>
<div className="rounded-md bg-muted/50 p-3 text-xs text-muted-foreground">
{strategy.archivalPlan} Sem exclusao automatica; GC preguicoso pode ser habilitado futuramente em handlers de
alto volume.
</div>
</CardContent>
</Card>
<Card>
<CardHeader>
<CardTitle>Check rapido</CardTitle>
<CardDescription>Comandos manuais recomendados (sem tocar em dados).</CardDescription>
</CardHeader>
<CardContent className="space-y-3 text-sm">
<div className="rounded-md border border-dashed p-3 font-mono text-xs">
ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "ls -lh /var/lib/docker/volumes/sistema_convex_data/_data/db.sqlite3"
</div>
<div className="rounded-md border border-dashed p-3 font-mono text-xs">
ssh -i ~/.ssh/codex_ed25519 root@154.12.253.40 "docker stats --no-stream | grep convex"
</div>
<p className="text-muted-foreground">
Objetivo: acompanhar tamanho do SQLite e memoria do Convex por 2-4 semanas. Se subir alem de 200 MB / 5 GB,
abrir janela de manutencao com backup antes de limpar/arquivar.
</p>
</CardContent>
</Card>
</div>
<p className="text-xs text-muted-foreground">
Atualizado em {new Date(snapshot.generatedAt).toLocaleString("pt-BR")}{" "}
{snapshot.notes ? `- Observacao: ${snapshot.notes}` : ""}
</p>
</div>
</AppShell>
)
}

View file

@ -0,0 +1,52 @@
import { NextResponse } from "next/server"
import { requireAuthenticatedSession } from "@/lib/auth-server"
import { isStaff } from "@/lib/authz"
import { env } from "@/lib/env"
import { exportResolvedTicketsToDisk } from "@/server/archive/local-tickets"
function getCronSecret(): string | null {
return env.INTERNAL_HEALTH_TOKEN ?? env.REPORTS_CRON_SECRET ?? null
}
export async function POST(request: Request) {
const cronSecret = getCronSecret()
const headerSecret = request.headers.get("x-cron-secret")?.trim()
let isCron = false
if (cronSecret && headerSecret && headerSecret === cronSecret) {
isCron = true
}
if (!isCron) {
const session = await requireAuthenticatedSession()
const role = session.user.role ?? "agent"
if (!isStaff(role)) {
return NextResponse.json({ error: "Acesso negado" }, { status: 403 })
}
}
let body: { days?: number; limit?: number } = {}
try {
body = (await request.json()) ?? {}
} catch {
body = {}
}
const days = typeof body.days === "number" && body.days > 0 ? body.days : 365
const limit = typeof body.limit === "number" && body.limit > 0 ? body.limit : 50
try {
const result = await exportResolvedTicketsToDisk({ days, limit })
return NextResponse.json({ ok: true, ...result }, { status: 200 })
} catch (error) {
console.error("[admin.tickets.archive-local] failed", error)
return NextResponse.json(
{
error: "Falha ao arquivar tickets",
details: error instanceof Error ? error.message : String(error),
},
{ status: 500 }
)
}
}

View file

@ -32,7 +32,9 @@ const envSchema = z.object({
SMTP_TLS: z.string().optional(), SMTP_TLS: z.string().optional(),
MAILER_SENDER_EMAIL: z.string().optional(), MAILER_SENDER_EMAIL: z.string().optional(),
REPORTS_CRON_SECRET: z.string().optional(), REPORTS_CRON_SECRET: z.string().optional(),
INTERNAL_HEALTH_TOKEN: z.string().optional(),
REPORTS_CRON_BASE_URL: urlField().or(z.literal("")).optional(), REPORTS_CRON_BASE_URL: urlField().or(z.literal("")).optional(),
ARCHIVE_DIR: stringField().or(z.literal("")).optional(),
}) })
const parsed = envSchema.safeParse(process.env) const parsed = envSchema.safeParse(process.env)
@ -67,7 +69,9 @@ export const env = {
MACHINE_TOKEN_TTL_MS: parsed.data.MACHINE_TOKEN_TTL_MS, MACHINE_TOKEN_TTL_MS: parsed.data.MACHINE_TOKEN_TTL_MS,
FLEET_SYNC_SECRET: parsed.data.FLEET_SYNC_SECRET, FLEET_SYNC_SECRET: parsed.data.FLEET_SYNC_SECRET,
REPORTS_CRON_SECRET: parsed.data.REPORTS_CRON_SECRET, REPORTS_CRON_SECRET: parsed.data.REPORTS_CRON_SECRET,
INTERNAL_HEALTH_TOKEN: parsed.data.INTERNAL_HEALTH_TOKEN,
REPORTS_CRON_BASE_URL: parsed.data.REPORTS_CRON_BASE_URL, REPORTS_CRON_BASE_URL: parsed.data.REPORTS_CRON_BASE_URL,
ARCHIVE_DIR: parsed.data.ARCHIVE_DIR ?? "./archives",
SMTP: parsed.data.SMTP_ADDRESS && parsed.data.SMTP_USERNAME && parsed.data.SMTP_PASSWORD SMTP: parsed.data.SMTP_ADDRESS && parsed.data.SMTP_USERNAME && parsed.data.SMTP_PASSWORD
? { ? {
host: parsed.data.SMTP_ADDRESS, host: parsed.data.SMTP_ADDRESS,

40
src/lib/retention.ts Normal file
View file

@ -0,0 +1,40 @@
export type RetentionPolicy = {
tickets: {
retention: "infinite"
notes: string
}
machineTelemetry: {
inventoryDays: number
metricsDays: number
alertsDays: number
}
reportExports: {
runsDays: number
artifactsDays: number
}
}
export const RETENTION_POLICY: RetentionPolicy = {
tickets: {
retention: "infinite",
notes: "Nao apagar tickets automaticamente; arquivar antes de mover para storage frio.",
},
machineTelemetry: {
inventoryDays: 90,
metricsDays: 90,
alertsDays: 180,
},
reportExports: {
runsDays: 30,
artifactsDays: 30,
},
}
export const RETENTION_STRATEGY = {
cleanupMode: "manual-or-lazy" as const,
cleanupBatchSize: 500,
archivalPlan:
"Arquivar dados frios (telemetria/tickets antigos) em storage barato antes de qualquer remocao definitiva.",
notes:
"Sem cron de limpeza ativa; usar verificacoes manuais ou GC preguiçoso acionado por handlers de alto volume.",
}

View file

@ -0,0 +1,73 @@
import { mkdir, writeFile } from "fs/promises"
import { join, dirname } from "path"
import { api } from "@/convex/_generated/api"
import { DEFAULT_TENANT_ID } from "@/lib/constants"
import { env } from "@/lib/env"
import { createConvexClient } from "@/server/convex-client"
type ArchiveItem = {
ticket: Record<string, unknown>
comments: Array<Record<string, unknown>>
events: Array<Record<string, unknown>>
}
type ExportResponse = {
total: number
items: ArchiveItem[]
}
function assertArchiveSecret(): string {
const secret = env.INTERNAL_HEALTH_TOKEN ?? env.REPORTS_CRON_SECRET
if (!secret) {
throw new Error("Defina INTERNAL_HEALTH_TOKEN ou REPORTS_CRON_SECRET para exportar tickets")
}
return secret
}
function nowIso() {
return new Date().toISOString().replace(/[:.]/g, "-")
}
export async function exportResolvedTicketsToDisk(options?: {
days?: number
limit?: number
tenantId?: string
}) {
const days = options?.days ?? 365
const limit = options?.limit ?? 50
const tenantId = options?.tenantId ?? DEFAULT_TENANT_ID
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000
const secret = assertArchiveSecret()
const client = createConvexClient()
// @ts-expect-error - exportForArchive é adicionada manualmente sem regen do client
const res = (await client.query(api.tickets.exportForArchive, {
tenantId,
before: cutoff,
limit,
secret,
})) as ExportResponse
const archiveDir = env.ARCHIVE_DIR ?? "./archives"
const filename = `tickets-archive-${nowIso()}-resolved-${days}d.jsonl`
const fullPath = join(archiveDir, filename)
await mkdir(dirname(fullPath), { recursive: true })
const lines = res.items.map((item) =>
JSON.stringify({
ticketId: item.ticket?._id ?? null,
tenantId,
archivedAt: Date.now(),
ticket: item.ticket,
comments: item.comments,
events: item.events,
})
)
await writeFile(fullPath, lines.join("\n"), { encoding: "utf-8" })
return {
written: res.items.length,
file: fullPath,
}
}

98
src/server/health.ts Normal file
View file

@ -0,0 +1,98 @@
import { api } from "@/convex/_generated/api"
import { RETENTION_POLICY, RETENTION_STRATEGY } from "@/lib/retention"
import { prisma } from "@/lib/prisma"
import { createConvexClient } from "@/server/convex-client"
import { env } from "@/lib/env"
const OPEN_TICKET_STATUSES = ["PENDING", "AWAITING_ATTENDANCE", "PAUSED"]
type DeviceHealth = {
machines: number
online: number
warning: number
offline: number
withoutHeartbeat: number
newestHeartbeatAgeMs: number | null
oldestHeartbeatAgeMs: number | null
thresholds: {
offlineMs: number
staleMs: number
}
truncated: boolean
}
export type HealthSnapshot = {
generatedAt: string
tickets: {
total: number
open: number
last7d: number
last24h: number
}
accounts: {
users: number
companies: number
}
devices: DeviceHealth | null
retention: typeof RETENTION_POLICY
retentionStrategy: typeof RETENTION_STRATEGY
notes?: string
}
function toIsoString(date: Date) {
return date.toISOString()
}
export async function getHealthSnapshot(): Promise<HealthSnapshot> {
const now = new Date()
const sevenDaysAgo = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000)
const oneDayAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000)
const [totalTickets, openTickets, lastWeekTickets, lastDayTickets, userCount, companyCount] = await Promise.all([
prisma.ticket.count(),
prisma.ticket.count({ where: { status: { in: OPEN_TICKET_STATUSES } } }),
prisma.ticket.count({ where: { createdAt: { gte: sevenDaysAgo } } }),
prisma.ticket.count({ where: { createdAt: { gte: oneDayAgo } } }),
prisma.user.count(),
prisma.company.count(),
])
let devices: DeviceHealth | null = null
try {
const client = createConvexClient()
const convexHealth = await client.query(api.ops.healthSnapshot, {
token: env.INTERNAL_HEALTH_TOKEN ?? env.REPORTS_CRON_SECRET ?? undefined,
})
devices = {
machines: convexHealth.totals.machines,
online: convexHealth.connectivity.online,
warning: convexHealth.connectivity.warning,
offline: convexHealth.connectivity.offline,
withoutHeartbeat: convexHealth.totals.withoutHeartbeat,
newestHeartbeatAgeMs: convexHealth.heartbeatAgeMs.newest,
oldestHeartbeatAgeMs: convexHealth.heartbeatAgeMs.oldest,
thresholds: convexHealth.thresholds,
truncated: convexHealth.totals.truncated,
}
} catch (error) {
console.error("[health] Falha ao carregar estado das maquinas", error)
}
return {
generatedAt: toIsoString(now),
tickets: {
total: totalTickets,
open: openTickets,
last7d: lastWeekTickets,
last24h: lastDayTickets,
},
accounts: {
users: userCount,
companies: companyCount,
},
devices,
retention: RETENTION_POLICY,
retentionStrategy: RETENTION_STRATEGY,
notes: devices ? undefined : "Convex nao respondeu; verificar conectividade ou token interno.",
}
}