diff --git a/apps/sim/lib/copilot/chat/payload.ts b/apps/sim/lib/copilot/chat/payload.ts index a3d0bb9014..e718b33cce 100644 --- a/apps/sim/lib/copilot/chat/payload.ts +++ b/apps/sim/lib/copilot/chat/payload.ts @@ -3,6 +3,7 @@ import { toError } from '@sim/utils/errors' import { LRUCache } from 'lru-cache' import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription' import { isPaid } from '@/lib/billing/plan-helpers' +import type { VfsSnapshotV1 } from '@/lib/copilot/generated/vfs-snapshot-v1' import { getExposedIntegrationTools } from '@/lib/copilot/integration-tools' import { getToolEntry } from '@/lib/copilot/tool-executor/router' import { getCopilotToolDescription } from '@/lib/copilot/tools/descriptions' @@ -33,6 +34,7 @@ interface BuildPayloadParams { prefetch?: boolean implicitFeedback?: string workspaceContext?: string + vfs?: VfsSnapshotV1 userPermission?: string userTimezone?: string userMetadata?: { @@ -366,6 +368,7 @@ export async function buildCopilotRequestPayload( ...(mothershipTools.length > 0 ? { mothershipTools } : {}), ...(commands && commands.length > 0 ? { commands } : {}), ...(params.workspaceContext ? { workspaceContext: params.workspaceContext } : {}), + ...(params.vfs ? { vfs: params.vfs } : {}), ...(params.userPermission ? { userPermission: params.userPermission } : {}), ...(params.userTimezone ? { userTimezone: params.userTimezone } : {}), ...(params.userMetadata && diff --git a/apps/sim/lib/copilot/chat/post.test.ts b/apps/sim/lib/copilot/chat/post.test.ts index 118f6fee77..37e85038e3 100644 --- a/apps/sim/lib/copilot/chat/post.test.ts +++ b/apps/sim/lib/copilot/chat/post.test.ts @@ -17,7 +17,7 @@ const getUserEntityPermissions = permissionsMockFns.mockGetUserEntityPermissions const { getEffectiveDecryptedEnv, - generateWorkspaceContext, + generateWorkspaceSnapshot, processContextsServer, resolveActiveResourceContext, buildCopilotRequestPayload, @@ -31,7 +31,7 @@ const { mockPublishStatusChanged, } = vi.hoisted(() => ({ getEffectiveDecryptedEnv: vi.fn(), - generateWorkspaceContext: vi.fn(), + generateWorkspaceSnapshot: vi.fn(), processContextsServer: vi.fn(), resolveActiveResourceContext: vi.fn(), buildCopilotRequestPayload: vi.fn(), @@ -56,7 +56,7 @@ vi.mock('@/lib/environment/utils', () => ({ })) vi.mock('@/lib/copilot/chat/workspace-context', () => ({ - generateWorkspaceContext, + generateWorkspaceSnapshot, })) vi.mock('@/lib/copilot/chat/process-contents', () => ({ @@ -142,7 +142,10 @@ describe('handleUnifiedChatPost', () => { }) getUserEntityPermissions.mockResolvedValue('write') getEffectiveDecryptedEnv.mockResolvedValue({ API_KEY: 'secret' }) - generateWorkspaceContext.mockResolvedValue('workspace context') + generateWorkspaceSnapshot.mockResolvedValue({ + markdown: 'workspace context', + snapshot: { workflows: [{ id: 'wf-1', name: 'Alpha', path: 'workflows/Alpha' }] }, + }) processContextsServer.mockResolvedValue([]) resolveActiveResourceContext.mockResolvedValue(null) buildCopilotRequestPayload.mockImplementation(async (params: Record) => params) @@ -178,11 +181,13 @@ describe('handleUnifiedChatPost', () => { ) expect(response.status).toBe(200) - expect(generateWorkspaceContext).toHaveBeenCalledWith('ws-1', 'user-1') + expect(generateWorkspaceSnapshot).toHaveBeenCalledWith('ws-1', 'user-1') expect(buildCopilotRequestPayload).toHaveBeenCalledWith( expect.objectContaining({ model: 'claude-opus-4-8', workspaceContext: 'workspace context', + // Regression guard: the branch must forward the typed snapshot, not drop it. + vfs: expect.objectContaining({ workflows: expect.any(Array) }), }), { selectedModel: 'claude-opus-4-8' } ) @@ -221,6 +226,8 @@ describe('handleUnifiedChatPost', () => { expect.objectContaining({ workspaceId: 'ws-1', workspaceContext: 'workspace context', + // Regression guard: the branch must forward the typed snapshot, not drop it. + vfs: expect.objectContaining({ workflows: expect.any(Array) }), }), { selectedModel: '' } ) diff --git a/apps/sim/lib/copilot/chat/post.ts b/apps/sim/lib/copilot/chat/post.ts index e1347d0531..52fe2baef5 100644 --- a/apps/sim/lib/copilot/chat/post.ts +++ b/apps/sim/lib/copilot/chat/post.ts @@ -22,7 +22,7 @@ import { resolveActiveResourceContext, } from '@/lib/copilot/chat/process-contents' import { finalizeAssistantTurn } from '@/lib/copilot/chat/terminal-state' -import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context' +import { generateWorkspaceSnapshot } from '@/lib/copilot/chat/workspace-context' import { chatPubSub } from '@/lib/copilot/chat-status' import { COPILOT_REQUEST_MODES } from '@/lib/copilot/constants' import { @@ -32,6 +32,7 @@ import { } from '@/lib/copilot/generated/trace-attribute-values-v1' import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import type { VfsSnapshotV1 } from '@/lib/copilot/generated/vfs-snapshot-v1' import { createBadRequestResponse, createUnauthorizedResponse } from '@/lib/copilot/request/http' import { createSSEStream, SSE_RESPONSE_HEADERS } from '@/lib/copilot/request/lifecycle/start' import { startCopilotOtelRoot, withCopilotSpan } from '@/lib/copilot/request/otel' @@ -183,6 +184,7 @@ type UnifiedChatBranch = prefetch?: boolean implicitFeedback?: string workspaceContext?: string + vfs?: VfsSnapshotV1 }) => Promise> buildExecutionContext: (params: { userId: string @@ -210,6 +212,7 @@ type UnifiedChatBranch = userTimezone?: string userMetadata?: { name?: string; email?: string; timezone?: string } workspaceContext?: string + vfs?: VfsSnapshotV1 }) => Promise> buildExecutionContext: (params: { userId: string @@ -616,6 +619,7 @@ async function resolveBranch(params: { prefetch: payloadParams.prefetch, implicitFeedback: payloadParams.implicitFeedback, workspaceContext: payloadParams.workspaceContext, + vfs: payloadParams.vfs, userPermission: payloadParams.userPermission, userTimezone: payloadParams.userTimezone, userMetadata: payloadParams.userMetadata, @@ -675,6 +679,7 @@ async function resolveBranch(params: { fileAttachments: payloadParams.fileAttachments, chatId: payloadParams.chatId, workspaceContext: payloadParams.workspaceContext, + vfs: payloadParams.vfs, userPermission: payloadParams.userPermission, userTimezone: payloadParams.userTimezone, userMetadata: payloadParams.userMetadata, @@ -898,7 +903,7 @@ export async function handleUnifiedChatPost(req: NextRequest) { ? withCopilotSpan( TraceSpan.CopilotChatBuildWorkspaceContext, { [TraceAttr.WorkspaceId]: workspaceId }, - () => generateWorkspaceContext(workspaceId, authenticatedUserId), + () => generateWorkspaceSnapshot(workspaceId, authenticatedUserId), activeOtelRoot.context ) : Promise.resolve(undefined) @@ -943,7 +948,7 @@ export async function handleUnifiedChatPost(req: NextRequest) { activeOtelRoot.context ) - const [agentContexts, userPermission, workspaceContext, , executionContext] = + const [agentContexts, userPermission, workspaceSnapshot, , executionContext] = await Promise.all([ agentContextsPromise, userPermissionPromise, @@ -951,6 +956,11 @@ export async function handleUnifiedChatPost(req: NextRequest) { persistUserMessagePromise, executionContextPromise, ]) + // Both halves come from one primary-db fetch (workspace-context.ts): + // `workspaceContext` is the markdown transition fallback, `vfs` is the + // typed snapshot Go diffs into baseline+delta messages. + const workspaceContext = workspaceSnapshot?.markdown + const vfs = workspaceSnapshot?.snapshot executionContext.userPermission = userPermission ?? undefined @@ -987,6 +997,7 @@ export async function handleUnifiedChatPost(req: NextRequest) { prefetch: body.prefetch, implicitFeedback: body.implicitFeedback, workspaceContext, + vfs, }) : branch.buildPayload({ message: body.message, @@ -999,6 +1010,7 @@ export async function handleUnifiedChatPost(req: NextRequest) { userTimezone: body.userTimezone, userMetadata, workspaceContext, + vfs, }), activeOtelRoot.context ) diff --git a/apps/sim/lib/copilot/chat/workspace-context.test.ts b/apps/sim/lib/copilot/chat/workspace-context.test.ts index 0c7850da2d..bcb5398b63 100644 --- a/apps/sim/lib/copilot/chat/workspace-context.test.ts +++ b/apps/sim/lib/copilot/chat/workspace-context.test.ts @@ -116,3 +116,157 @@ describe('buildWorkspaceMd - connected integrations / credentials', () => { expect(md).toContain('## Connected Integrations\n(none)') }) }) + +describe('buildWorkspaceMd - determinism (prompt-cache stability)', () => { + it('is byte-identical regardless of input row order', () => { + const a = buildWorkspaceMd( + baseData({ + members: [ + { name: 'Bob', email: 'bob@x.com', permissionType: 'admin' }, + { name: 'Amy', email: 'amy@x.com', permissionType: 'write' }, + ], + workflows: [ + { id: 'wf-2', name: 'Zeta', isDeployed: false, folderPath: null }, + { id: 'wf-1', name: 'Alpha', isDeployed: true, folderPath: null }, + ], + tables: [ + { id: 't-2', name: 'Orders', description: null, rowCount: 5 }, + { id: 't-1', name: 'Customers', description: null, rowCount: 9 }, + ], + knowledgeBases: [ + { id: 'kb-2', name: 'Docs', connectorTypes: ['notion', 'github'] }, + { id: 'kb-1', name: 'Articles', connectorTypes: ['github', 'notion'] }, + ], + oauthIntegrations: [ + { id: 'c-2', providerId: 'slack', displayName: null, role: null }, + { id: 'c-1', providerId: 'github', displayName: null, role: null }, + ], + envVariables: ['ZED', 'API_KEY'], + customTools: [ + { id: 'ct-2', name: 'Beta Tool' }, + { id: 'ct-1', name: 'Alpha Tool' }, + ], + mcpServers: [ + { id: 'mcp-2', name: 'Zulu', url: null, enabled: false }, + { id: 'mcp-1', name: 'Mike', url: 'https://x', enabled: true }, + ], + skills: [ + { id: 'sk-2', name: 'Writer', description: 'writes' }, + { id: 'sk-1', name: 'Editor', description: 'edits' }, + ], + jobs: [ + { + id: 'j-2', + title: 'Nightly', + prompt: 'run nightly', + cronExpression: '0 0 * * *', + status: 'active', + lifecycle: 'persistent', + sourceTaskName: null, + }, + { + id: 'j-1', + title: 'Hourly', + prompt: 'run hourly', + cronExpression: '0 * * * *', + status: 'active', + lifecycle: 'persistent', + sourceTaskName: null, + }, + ], + }) + ) + const b = buildWorkspaceMd( + baseData({ + members: [ + { name: 'Amy', email: 'amy@x.com', permissionType: 'write' }, + { name: 'Bob', email: 'bob@x.com', permissionType: 'admin' }, + ], + workflows: [ + { id: 'wf-1', name: 'Alpha', isDeployed: true, folderPath: null }, + { id: 'wf-2', name: 'Zeta', isDeployed: false, folderPath: null }, + ], + tables: [ + { id: 't-1', name: 'Customers', description: null, rowCount: 9 }, + { id: 't-2', name: 'Orders', description: null, rowCount: 5 }, + ], + knowledgeBases: [ + { id: 'kb-1', name: 'Articles', connectorTypes: ['notion', 'github'] }, + { id: 'kb-2', name: 'Docs', connectorTypes: ['github', 'notion'] }, + ], + oauthIntegrations: [ + { id: 'c-1', providerId: 'github', displayName: null, role: null }, + { id: 'c-2', providerId: 'slack', displayName: null, role: null }, + ], + envVariables: ['API_KEY', 'ZED'], + customTools: [ + { id: 'ct-1', name: 'Alpha Tool' }, + { id: 'ct-2', name: 'Beta Tool' }, + ], + mcpServers: [ + { id: 'mcp-1', name: 'Mike', url: 'https://x', enabled: true }, + { id: 'mcp-2', name: 'Zulu', url: null, enabled: false }, + ], + skills: [ + { id: 'sk-1', name: 'Editor', description: 'edits' }, + { id: 'sk-2', name: 'Writer', description: 'writes' }, + ], + jobs: [ + { + id: 'j-1', + title: 'Hourly', + prompt: 'run hourly', + cronExpression: '0 * * * *', + status: 'active', + lifecycle: 'persistent', + sourceTaskName: null, + }, + { + id: 'j-2', + title: 'Nightly', + prompt: 'run nightly', + cronExpression: '0 0 * * *', + status: 'active', + lifecycle: 'persistent', + sourceTaskName: null, + }, + ], + }) + ) + expect(a).toBe(b) + }) + + it('ignores volatile workflow run timestamps', () => { + const withRun = buildWorkspaceMd( + baseData({ + workflows: [ + { + id: 'wf-1', + name: 'Alpha', + isDeployed: false, + folderPath: null, + lastRunAt: new Date('2026-06-18T12:00:00Z'), + }, + ], + }) + ) + const withoutRun = buildWorkspaceMd( + baseData({ + workflows: [{ id: 'wf-1', name: 'Alpha', isDeployed: false, folderPath: null }], + }) + ) + expect(withRun).toBe(withoutRun) + expect(withRun).not.toContain('last run') + }) + + it('ignores volatile table row counts', () => { + const a = buildWorkspaceMd( + baseData({ tables: [{ id: 't-1', name: 'Customers', description: null, rowCount: 1 }] }) + ) + const b = buildWorkspaceMd( + baseData({ tables: [{ id: 't-1', name: 'Customers', description: null, rowCount: 9999 }] }) + ) + expect(a).toBe(b) + expect(a).not.toContain('rows') + }) +}) diff --git a/apps/sim/lib/copilot/chat/workspace-context.ts b/apps/sim/lib/copilot/chat/workspace-context.ts index d1a4b4ded8..2bbf5311a8 100644 --- a/apps/sim/lib/copilot/chat/workspace-context.ts +++ b/apps/sim/lib/copilot/chat/workspace-context.ts @@ -1,17 +1,21 @@ -import { dbReplica } from '@sim/db' +import { db } from '@sim/db' import { knowledgeBase, knowledgeConnector, mcpServers, userTableDefinitions, - userTableRows, workflow, workflowFolder, workflowSchedule, } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' -import { and, count, eq, inArray, isNull } from 'drizzle-orm' +import { and, eq, inArray, isNull } from 'drizzle-orm' +import type { + VfsSnapshotV1, + VfsSnapshotV1Job, + VfsSnapshotV1Workflow, +} from '@/lib/copilot/generated/vfs-snapshot-v1' import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' import { canonicalWorkflowVfsDir, canonicalWorkspaceFilePath } from '@/lib/copilot/vfs/path-utils' import { getAccessibleOAuthCredentials } from '@/lib/credentials/environment' @@ -57,7 +61,11 @@ export interface WorkspaceMdData { description?: string | null connectorTypes?: string[] }> - tables: Array<{ id: string; name: string; description?: string | null; rowCount: number }> + // rowCount is no longer rendered (it is volatile and would bust the cached + // prompt prefix); kept optional so callers that still have it cheaply (the VFS + // materializer via listTables) need not change, while generateWorkspaceContext + // skips the per-table COUNT query entirely. + tables: Array<{ id: string; name: string; description?: string | null; rowCount?: number }> files: Array<{ id: string; name: string; type: string; size: number; folderPath?: string | null }> oauthIntegrations: Array<{ id: string @@ -81,9 +89,30 @@ export interface WorkspaceMdData { }> } +/** + * Deterministic string ordering. The workspace inventory is placed in the + * prompt-cache prefix (mothership), so its bytes must be identical for identical + * workspace state regardless of DB row order — otherwise the cache silently + * busts every turn. `localeCompare` with a pinned locale gives stable, readable + * ordering across Sim instances (all run the same Node/ICU build). + */ +function stableCompare(a: string, b: string): number { + return a.localeCompare(b, 'en') +} + +/** Stable order by display name, tie-broken by id, for inventory listings. */ +function byNameThenId(a: { name: string; id: string }, b: { name: string; id: string }): number { + return stableCompare(a.name, b.name) || stableCompare(a.id, b.id) +} + /** * Pure formatting: build WORKSPACE.md content from pre-fetched data. * No DB access — callers are responsible for providing the data. + * + * Output is deterministic: every collection is sorted by a stable key and + * volatile fields (run timestamps, mutable row counts) are omitted, so the + * rendered inventory only changes when the workspace structurally changes. This + * is what lets the mothership cache it in the prompt prefix across turns. */ export function buildWorkspaceMd(data: WorkspaceMdData): string { const sections: string[] = [] @@ -95,10 +124,12 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.members.length > 0) { - const lines = data.members.map((m) => { - const display = m.name ? `${m.name} (${m.email})` : m.email - return `- ${display} — ${m.permissionType}` - }) + const lines = [...data.members] + .sort((a, b) => stableCompare(a.email, b.email)) + .map((m) => { + const display = m.name ? `${m.name} (${m.email})` : m.email + return `- ${display} — ${m.permissionType}` + }) sections.push(`## Members\n${lines.join('\n')}`) } @@ -122,10 +153,11 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { parts.push(`${indent} VFS dir: \`${workflowDir}\``) parts.push(`${indent} VFS state path: \`${workflowDir}/state.json\``) if (wf.description) parts.push(`${indent} ${wf.description}`) - const flags: string[] = [] - if (wf.isDeployed) flags.push('deployed') - if (wf.lastRunAt) flags.push(`last run: ${wf.lastRunAt.toISOString().split('T')[0]}`) - if (flags.length > 0) parts[0] += ` — ${flags.join(', ')}` + // `deployed` is a structural flag (kept); `lastRunAt` is intentionally + // omitted — it changes on every run and would bust the cached prompt + // prefix that carries this inventory. Current run data lives in + // workflows/{name}/executions.json. + if (wf.isDeployed) parts[0] += ' — deployed' return parts.join('\n') } @@ -133,13 +165,13 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { lines.push( 'Use the canonical VFS dir/state path shown under each workflow. Paths are percent-encoded per segment; copy them verbatim and do not infer paths from display names.' ) - for (const wf of rootWorkflows) { + for (const wf of [...rootWorkflows].sort(byNameThenId)) { lines.push(formatWf(wf, '')) } - const sortedFolders = [...folderWorkflows.entries()].sort((a, b) => a[0].localeCompare(b[0])) + const sortedFolders = [...folderWorkflows.entries()].sort((a, b) => stableCompare(a[0], b[0])) for (const [folder, wfs] of sortedFolders) { lines.push(`- 📁 **${folder}/**`) - for (const wf of wfs) { + for (const wf of [...wfs].sort(byNameThenId)) { lines.push(formatWf(wf, ' ')) } } @@ -149,11 +181,11 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.knowledgeBases.length > 0) { - const lines = data.knowledgeBases.map((kb) => { + const lines = [...data.knowledgeBases].sort(byNameThenId).map((kb) => { let line = `- **${kb.name}** (${kb.id})` if (kb.description) line += ` — ${kb.description}` if (kb.connectorTypes && kb.connectorTypes.length > 0) { - line += ` | connectors: ${kb.connectorTypes.join(', ')}` + line += ` | connectors: ${[...kb.connectorTypes].sort(stableCompare).join(', ')}` } return line }) @@ -163,9 +195,11 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.tables.length > 0) { - const lines = data.tables.map((t) => { - let line = `- **${t.name}** (${t.id}) — ${t.rowCount} rows` - if (t.description) line += `, ${t.description}` + // rowCount is omitted: it changes on every row write and would bust the + // cached prompt prefix. Live counts are in tables/{name}/meta.json. + const lines = [...data.tables].sort(byNameThenId).map((t) => { + let line = `- **${t.name}** (${t.id})` + if (t.description) line += ` — ${t.description}` return line }) sections.push(`## Tables (${data.tables.length})\n${lines.join('\n')}`) @@ -192,13 +226,13 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { const lines: string[] = [ 'Read or edit a file by the exact VFS path shown in backticks below — copy it verbatim (it is already percent-encoded) and append `/content` to read the contents. Do not retype the display name or re-encode the path.', ] - for (const f of rootFiles) { + for (const f of [...rootFiles].sort(byNameThenId)) { lines.push(fileLine(f, '')) } - const sortedFolders = [...folderFiles.entries()].sort((a, b) => a[0].localeCompare(b[0])) + const sortedFolders = [...folderFiles.entries()].sort((a, b) => stableCompare(a[0], b[0])) for (const [folder, folderFileList] of sortedFolders) { lines.push(`- 📁 **${folder}/**`) - for (const f of folderFileList) { + for (const f of [...folderFileList].sort(byNameThenId)) { lines.push(fileLine(f, ' ')) } } @@ -208,13 +242,15 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.oauthIntegrations.length > 0) { - const lines = data.oauthIntegrations.map((c) => { - const services = PROVIDER_SERVICES[c.providerId] - const svc = services ? ` (${services.join(', ')})` : '' - const who = c.displayName ? ` — ${c.displayName}` : '' - const role = c.role ? `, ${c.role}` : '' - return `- ${c.providerId}${svc}${who}${role} — credentialId: \`${c.id}\`` - }) + const lines = [...data.oauthIntegrations] + .sort((a, b) => stableCompare(a.providerId, b.providerId) || stableCompare(a.id, b.id)) + .map((c) => { + const services = PROVIDER_SERVICES[c.providerId] + const svc = services ? ` (${services.join(', ')})` : '' + const who = c.displayName ? ` — ${c.displayName}` : '' + const role = c.role ? `, ${c.role}` : '' + return `- ${c.providerId}${svc}${who}${role} — credentialId: \`${c.id}\`` + }) sections.push( `## Connected Integrations\nPass these credentialId values directly on OAuth tool calls — no need to read environment/credentials.json for them.\n${lines.join('\n')}` ) @@ -223,17 +259,17 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.envVariables.length > 0) { - const lines = data.envVariables.map((v) => `- ${v}`) + const lines = [...data.envVariables].sort(stableCompare).map((v) => `- ${v}`) sections.push(`## Environment Variables (${data.envVariables.length})\n${lines.join('\n')}`) } if (data.customTools && data.customTools.length > 0) { - const lines = data.customTools.map((t) => `- **${t.name}** (${t.id})`) + const lines = [...data.customTools].sort(byNameThenId).map((t) => `- **${t.name}** (${t.id})`) sections.push(`## Custom Tools (${data.customTools.length})\n${lines.join('\n')}`) } if (data.mcpServers && data.mcpServers.length > 0) { - const lines = data.mcpServers.map((s) => { + const lines = [...data.mcpServers].sort(byNameThenId).map((s) => { const status = s.enabled ? 'enabled' : 'disabled' return `- **${s.name}** (${s.id}) — ${status}${s.url ? `, ${s.url}` : ''}` }) @@ -241,7 +277,9 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.skills && data.skills.length > 0) { - const lines = data.skills.map((s) => `- **${s.name}** (${s.id}) — ${s.description}`) + const lines = [...data.skills] + .sort(byNameThenId) + .map((s) => `- **${s.name}** (${s.id}) — ${s.description}`) sections.push( `## Skills (${data.skills.length})\n` + 'To use a skill, call the load_user_skill tool with its name to load the full instructions, then follow them. The descriptions below only say when each skill applies — they are not the instructions.\n' + @@ -250,16 +288,18 @@ export function buildWorkspaceMd(data: WorkspaceMdData): string { } if (data.jobs && data.jobs.length > 0) { - const lines = data.jobs.map((j) => { - const displayName = j.title || j.id - let line = `- **${displayName}** (${j.id}) — ${j.status}` - if (j.lifecycle !== 'persistent') line += ` [${j.lifecycle}]` - if (j.cronExpression) line += `, cron: ${j.cronExpression}` - if (j.sourceTaskName) line += `, task: ${j.sourceTaskName}` - const promptPreview = j.prompt.length > 80 ? `${j.prompt.slice(0, 77)}...` : j.prompt - line += `\n ${promptPreview}` - return line - }) + const lines = [...data.jobs] + .sort((a, b) => stableCompare(a.title || a.id, b.title || b.id) || stableCompare(a.id, b.id)) + .map((j) => { + const displayName = j.title || j.id + let line = `- **${displayName}** (${j.id}) — ${j.status}` + if (j.lifecycle !== 'persistent') line += ` [${j.lifecycle}]` + if (j.cronExpression) line += `, cron: ${j.cronExpression}` + if (j.sourceTaskName) line += `, task: ${j.sourceTaskName}` + const promptPreview = j.prompt.length > 80 ? `${j.prompt.slice(0, 77)}...` : j.prompt + line += `\n ${promptPreview}` + return line + }) sections.push(`## Jobs (${data.jobs.length})\n${lines.join('\n')}`) } @@ -276,15 +316,20 @@ export function buildWorkspaceContextMd(data: WorkspaceMdData): string { * discovery rules; the LLM reads dynamic workspace state from VFS files. * The LLM never writes this file directly. */ -export async function generateWorkspaceContext( +// Fetch + assemble the workspace inventory data once, from the PRIMARY db +// (read-your-writes: a just-edited workflow is visible immediately, so the +// injected snapshot can't lag behind a `glob`). Both the markdown inventory and +// the typed VFS snapshot are built from this single fetch. Returns null when the +// workspace is unavailable or a fetch fails. +async function buildWorkspaceMdData( workspaceId: string, userId: string -): Promise { +): Promise { try { await assertActiveWorkspaceAccess(workspaceId, userId) const wsRow = await getWorkspaceWithOwner(workspaceId) if (!wsRow) { - return '## Workspace\n(unavailable)' + return null } const [ @@ -302,7 +347,7 @@ export async function generateWorkspaceContext( ] = await Promise.all([ getUsersWithPermissions(workspaceId), - dbReplica + db .select({ id: workflow.id, name: workflow.name, @@ -314,7 +359,7 @@ export async function generateWorkspaceContext( .from(workflow) .where(and(eq(workflow.workspaceId, workspaceId), isNull(workflow.archivedAt))), - dbReplica + db .select({ id: workflowFolder.id, name: workflowFolder.name, @@ -323,7 +368,7 @@ export async function generateWorkspaceContext( .from(workflowFolder) .where(and(eq(workflowFolder.workspaceId, workspaceId), isNull(workflowFolder.archivedAt))), - dbReplica + db .select({ id: knowledgeBase.id, name: knowledgeBase.name, @@ -332,7 +377,7 @@ export async function generateWorkspaceContext( .from(knowledgeBase) .where(and(eq(knowledgeBase.workspaceId, workspaceId), isNull(knowledgeBase.deletedAt))), - dbReplica + db .select({ id: userTableDefinitions.id, name: userTableDefinitions.name, @@ -352,7 +397,7 @@ export async function generateWorkspaceContext( listCustomTools({ userId, workspaceId }), - dbReplica + db .select({ id: mcpServers.id, name: mcpServers.name, @@ -364,7 +409,7 @@ export async function generateWorkspaceContext( listSkills({ workspaceId, includeBuiltins: false }), - dbReplica + db .select({ id: workflowSchedule.id, jobTitle: workflowSchedule.jobTitle, @@ -384,23 +429,10 @@ export async function generateWorkspaceContext( ), ]) - const rowCounts = - tables.length > 0 - ? await Promise.all( - tables.map(async (t) => { - const [row] = await dbReplica - .select({ count: count() }) - .from(userTableRows) - .where(eq(userTableRows.tableId, t.id)) - return row?.count ?? 0 - }) - ) - : [] - const kbIds = kbs.map((kb) => kb.id) const connectorRows = kbIds.length > 0 - ? await dbReplica + ? await db .select({ knowledgeBaseId: knowledgeConnector.knowledgeBaseId, connectorType: knowledgeConnector.connectorType, @@ -437,7 +469,7 @@ export async function generateWorkspaceContext( return path } - return buildWorkspaceMd({ + return { workspace: wsRow, members, workflows: workflows.map((wf) => ({ @@ -446,9 +478,13 @@ export async function generateWorkspaceContext( })), knowledgeBases: kbs.map((kb) => ({ ...kb, - connectorTypes: connectorTypesByKb.get(kb.id), + // Sort connector types so the snapshot is order-stable: the DB query has + // no ORDER BY, and the Go delta engine compares item JSON byte-wise, so + // an unsorted (but unchanged) list would emit a spurious "modified" + // delta and needlessly bust the prompt cache. + connectorTypes: connectorTypesByKb.get(kb.id)?.sort(stableCompare), })), - tables: tables.map((t, i) => ({ ...t, rowCount: rowCounts[i] ?? 0 })), + tables: tables.map((t) => ({ id: t.id, name: t.name, description: t.description })), files: files.map((f) => ({ id: f.id, name: f.name, @@ -477,13 +513,128 @@ export async function generateWorkspaceContext( lifecycle: j.lifecycle, sourceTaskName: j.sourceTaskName, })), - }) + } } catch (err) { - logger.error('Failed to generate workspace context', { + logger.error('Failed to build workspace data', { workspaceId, error: toError(err).message, }) - return '## Workspace\n(unavailable)\n\n## Workflows\n(unavailable)\n\n## Knowledge Bases\n(unavailable)\n\n## Tables\n(unavailable)\n\n## Files\n(unavailable)\n\n## Connected Integrations\n(unavailable)' + return null + } +} + +const WORKSPACE_CONTEXT_UNAVAILABLE_MD = + '## Workspace\n(unavailable)\n\n## Workflows\n(unavailable)\n\n## Knowledge Bases\n(unavailable)\n\n## Tables\n(unavailable)\n\n## Files\n(unavailable)\n\n## Connected Integrations\n(unavailable)' + +/** + * Generate WORKSPACE.md markdown from current DB state (primary db). The LLM + * reads dynamic workspace state from VFS files; it never writes this file. + */ +export async function generateWorkspaceContext( + workspaceId: string, + userId: string +): Promise { + const data = await buildWorkspaceMdData(workspaceId, userId) + return data ? buildWorkspaceMd(data) : WORKSPACE_CONTEXT_UNAVAILABLE_MD +} + +/** + * Build BOTH the markdown inventory and the typed VFS snapshot from a single + * primary-db fetch. The snapshot is the structured form Go diffs into + * baseline+delta messages; the markdown is the transition fallback. Returns null + * when the workspace is unavailable. + */ +export async function generateWorkspaceSnapshot( + workspaceId: string, + userId: string +): Promise<{ markdown: string; snapshot: VfsSnapshotV1 } | null> { + const data = await buildWorkspaceMdData(workspaceId, userId) + if (!data) return null + return { markdown: buildWorkspaceMd(data), snapshot: buildVfsSnapshot(data) } +} + +/** + * Map the workspace inventory data to the typed VFS snapshot contract. Pure; + * mirrors buildWorkspaceMd's field selection. Resource order is irrelevant — Go + * diffs by stable id, not position. + */ +export function buildVfsSnapshot(data: WorkspaceMdData): VfsSnapshotV1 { + const workflows: VfsSnapshotV1Workflow[] = data.workflows.map((wf) => ({ + id: wf.id, + name: wf.name, + path: canonicalWorkflowVfsDir({ name: wf.name, folderPath: wf.folderPath }), + ...(wf.description ? { description: wf.description } : {}), + ...(wf.isDeployed ? { isDeployed: true } : {}), + ...(wf.folderPath ? { folderPath: wf.folderPath } : {}), + })) + const jobs: VfsSnapshotV1Job[] = (data.jobs ?? []) + .filter((j) => j.status !== 'completed') + .map((j) => ({ + id: j.id, + ...(j.title ? { title: j.title } : {}), + ...(j.prompt ? { prompt: j.prompt } : {}), + ...(j.cronExpression ? { cronExpression: j.cronExpression } : {}), + ...(j.status ? { status: j.status } : {}), + ...(j.lifecycle ? { lifecycle: j.lifecycle } : {}), + ...(j.sourceTaskName ? { sourceTaskName: j.sourceTaskName } : {}), + })) + return { + ...(data.workspace + ? { + workspace: { + id: data.workspace.id, + name: data.workspace.name, + ...(data.workspace.ownerId ? { ownerId: data.workspace.ownerId } : {}), + }, + } + : {}), + members: data.members.map((m) => ({ + ...(m.name ? { name: m.name } : {}), + email: m.email, + ...(m.permissionType ? { permissionType: m.permissionType } : {}), + })), + workflows, + knowledgeBases: data.knowledgeBases.map((kb) => ({ + id: kb.id, + name: kb.name, + ...(kb.description ? { description: kb.description } : {}), + ...(kb.connectorTypes && kb.connectorTypes.length > 0 + ? { connectorTypes: kb.connectorTypes } + : {}), + })), + tables: data.tables.map((t) => ({ + id: t.id, + name: t.name, + ...(t.description ? { description: t.description } : {}), + })), + files: data.files.map((f) => ({ + id: f.id, + name: f.name, + path: canonicalWorkspaceFilePath({ folderPath: f.folderPath, name: f.name }), + ...(f.type ? { type: f.type } : {}), + ...(f.size ? { size: f.size } : {}), + ...(f.folderPath ? { folderPath: f.folderPath } : {}), + })), + integrations: data.oauthIntegrations.map((c) => ({ + id: c.id, + providerId: c.providerId, + ...(c.displayName ? { displayName: c.displayName } : {}), + ...(c.role ? { role: c.role } : {}), + })), + envVars: data.envVariables, + customTools: (data.customTools ?? []).map((t) => ({ id: t.id, name: t.name })), + mcpServers: (data.mcpServers ?? []).map((s) => ({ + id: s.id, + name: s.name, + ...(s.url ? { url: s.url } : {}), + ...(s.enabled ? { enabled: true } : {}), + })), + skills: (data.skills ?? []).map((s) => ({ + id: s.id, + name: s.name, + ...(s.description ? { description: s.description } : {}), + })), + jobs, } } diff --git a/apps/sim/lib/copilot/generated/metrics-v1.ts b/apps/sim/lib/copilot/generated/metrics-v1.ts index dd8527f815..fefc4534ea 100644 --- a/apps/sim/lib/copilot/generated/metrics-v1.ts +++ b/apps/sim/lib/copilot/generated/metrics-v1.ts @@ -24,6 +24,7 @@ export const Metric = { CopilotRequestDuration: 'copilot.request.duration', CopilotToolCalls: 'copilot.tool.calls', CopilotToolDuration: 'copilot.tool.duration', + CopilotVfsDelta: 'copilot.vfs.delta', CopilotVfsMaterializeDuration: 'copilot.vfs.materialize.duration', GenAiClientCacheTokenUsage: 'gen_ai.client.cache.token.usage', GenAiClientTokenUsage: 'gen_ai.client.token.usage', @@ -48,6 +49,7 @@ export const MetricValues: readonly MetricValue[] = [ 'copilot.request.duration', 'copilot.tool.calls', 'copilot.tool.duration', + 'copilot.vfs.delta', 'copilot.vfs.materialize.duration', 'gen_ai.client.cache.token.usage', 'gen_ai.client.token.usage', diff --git a/apps/sim/lib/copilot/generated/trace-attributes-v1.ts b/apps/sim/lib/copilot/generated/trace-attributes-v1.ts index 982857673f..c5024ee357 100644 --- a/apps/sim/lib/copilot/generated/trace-attributes-v1.ts +++ b/apps/sim/lib/copilot/generated/trace-attributes-v1.ts @@ -266,6 +266,7 @@ export const TraceAttr = { CopilotTransport: 'copilot.transport', CopilotUserMessagePreview: 'copilot.user.message_preview', CopilotValidateOutcome: 'copilot.validate.outcome', + CopilotVfsDeltaOutcome: 'copilot.vfs.delta.outcome', CopilotVfsFileExtension: 'copilot.vfs.file.extension', CopilotVfsFileMediaType: 'copilot.vfs.file.media_type', CopilotVfsFileName: 'copilot.vfs.file.name', @@ -411,6 +412,8 @@ export const TraceAttr = { GenAiStreamPhaseToolArgsFirstMs: 'gen_ai.stream.phase.tool_args.first_ms', GenAiStreamPhaseToolArgsMs: 'gen_ai.stream.phase.tool_args.ms', GenAiSystem: 'gen_ai.system', + GenAiThinkingBlocksCaptured: 'gen_ai.thinking.blocks_captured', + GenAiThinkingRedactedBlocks: 'gen_ai.thinking.redacted_blocks', GenAiTokenType: 'gen_ai.token.type', GenAiToolName: 'gen_ai.tool.name', GenAiUsageCacheCreationInputTokens: 'gen_ai.usage.cache_creation.input_tokens', @@ -890,6 +893,7 @@ export const TraceAttrValues: readonly TraceAttrValue[] = [ 'copilot.transport', 'copilot.user.message_preview', 'copilot.validate.outcome', + 'copilot.vfs.delta.outcome', 'copilot.vfs.file.extension', 'copilot.vfs.file.media_type', 'copilot.vfs.file.name', @@ -1024,6 +1028,8 @@ export const TraceAttrValues: readonly TraceAttrValue[] = [ 'gen_ai.stream.phase.tool_args.first_ms', 'gen_ai.stream.phase.tool_args.ms', 'gen_ai.system', + 'gen_ai.thinking.blocks_captured', + 'gen_ai.thinking.redacted_blocks', 'gen_ai.token.type', 'gen_ai.tool.name', 'gen_ai.usage.cache_creation.input_tokens', diff --git a/apps/sim/lib/copilot/generated/vfs-snapshot-v1.ts b/apps/sim/lib/copilot/generated/vfs-snapshot-v1.ts new file mode 100644 index 0000000000..9a4df7519b --- /dev/null +++ b/apps/sim/lib/copilot/generated/vfs-snapshot-v1.ts @@ -0,0 +1,131 @@ +// AUTO-GENERATED FILE. DO NOT EDIT. +// + +/** + * Structured workspace inventory snapshot Sim sends to Go; Go diffs successive snapshots into baseline+delta messages. + */ +export interface VfsSnapshotV1 { + customTools?: VfsSnapshotV1NamedResource[] + envVars?: string[] + files?: VfsSnapshotV1File[] + integrations?: VfsSnapshotV1Integration[] + jobs?: VfsSnapshotV1Job[] + knowledgeBases?: VfsSnapshotV1KnowledgeBase[] + mcpServers?: VfsSnapshotV1McpServer[] + members?: VfsSnapshotV1Member[] + skills?: VfsSnapshotV1Skill[] + tables?: VfsSnapshotV1Table[] + workflows?: VfsSnapshotV1Workflow[] + workspace?: VfsSnapshotV1Workspace +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1NamedResource". + */ +export interface VfsSnapshotV1NamedResource { + id: string + name: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1File". + */ +export interface VfsSnapshotV1File { + folderPath?: string + id: string + name: string + path: string + size?: number + type?: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Integration". + */ +export interface VfsSnapshotV1Integration { + displayName?: string + id: string + providerId: string + role?: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Job". + */ +export interface VfsSnapshotV1Job { + cronExpression?: string + id: string + lifecycle?: string + prompt?: string + sourceTaskName?: string + status?: string + title?: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1KnowledgeBase". + */ +export interface VfsSnapshotV1KnowledgeBase { + connectorTypes?: string[] + description?: string + id: string + name: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1McpServer". + */ +export interface VfsSnapshotV1McpServer { + enabled?: boolean + id: string + name: string + url?: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Member". + */ +export interface VfsSnapshotV1Member { + email: string + name?: string + permissionType?: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Skill". + */ +export interface VfsSnapshotV1Skill { + description?: string + id: string + name: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Table". + */ +export interface VfsSnapshotV1Table { + description?: string + id: string + name: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Workflow". + */ +export interface VfsSnapshotV1Workflow { + description?: string + folderPath?: string + id: string + isDeployed?: boolean + name: string + path: string +} +/** + * This interface was referenced by `VfsSnapshotV1`'s JSON-Schema + * via the `definition` "VfsSnapshotV1Workspace". + */ +export interface VfsSnapshotV1Workspace { + id: string + name: string + ownerId?: string +} diff --git a/apps/sim/lib/copilot/tools/handlers/vfs.ts b/apps/sim/lib/copilot/tools/handlers/vfs.ts index e41c2b34b8..ca1902692e 100644 --- a/apps/sim/lib/copilot/tools/handlers/vfs.ts +++ b/apps/sim/lib/copilot/tools/handlers/vfs.ts @@ -122,7 +122,7 @@ export async function executeVfsGrep( const vfs = await getOrMaterializeVFS(workspaceId, context.userId) result = isWorkspaceFileGrepPath(rawPath) ? await vfs.grepFile(rawPath, pattern, grepOptions) - : vfs.grep(pattern, rawPath, grepOptions) + : await vfs.grep(pattern, rawPath, grepOptions) } const key = outputMode === 'files_with_matches' ? 'files' : outputMode === 'count' ? 'counts' : 'matches' @@ -324,7 +324,7 @@ export async function executeVfsRead( } } - const result = vfs.read(path, offset, limit) + const result = await vfs.read(path, offset, limit) if (!result) { const suggestions = vfs.suggestSimilar(path) logger.warn('vfs_read file not found', { path, suggestions }) diff --git a/apps/sim/lib/copilot/vfs/operations.ts b/apps/sim/lib/copilot/vfs/operations.ts index dfef44a1ae..bd7208719b 100644 --- a/apps/sim/lib/copilot/vfs/operations.ts +++ b/apps/sim/lib/copilot/vfs/operations.ts @@ -108,8 +108,11 @@ function splitLinesForGrep(content: string): string[] { * (including `[`, `{`, spaces) use directory-prefix logic so literal VFS path segments are not * parsed as glob syntax. Trailing slashes are stripped so `files/` and `files` both scope under * `files/...`. + * + * Exported so the lazy VFS can resolve exactly the lazy artifacts a scoped grep will consider, + * keeping "what we materialize" identical to "what grep filters in". */ -function pathWithinGrepScope(filePath: string, scope: string): boolean { +export function pathWithinGrepScope(filePath: string, scope: string): boolean { const scopeUsesStarOrQuestionGlob = /[*?]/.test(scope) if (scopeUsesStarOrQuestionGlob) { return micromatch.isMatch(filePath, scope, VFS_GLOB_OPTIONS) diff --git a/apps/sim/lib/copilot/vfs/workspace-vfs.ts b/apps/sim/lib/copilot/vfs/workspace-vfs.ts index 0038755e60..bc04dfbe5b 100644 --- a/apps/sim/lib/copilot/vfs/workspace-vfs.ts +++ b/apps/sim/lib/copilot/vfs/workspace-vfs.ts @@ -378,7 +378,24 @@ function getStaticComponentFiles(): Map { * components/triggers/{provider}/{id}.json (external triggers: github, slack, etc.) */ export class WorkspaceVFS { + // Eagerly-materialized, cheap content (structure + metadata): folder markers, + // per-resource meta.json, WORKSPACE.md/WORKSPACE_CONTEXT.md, static components. private files: Map = new Map() + // Lazily-materialized, expensive content keyed by VFS path. The loader runs on + // demand: a `read` resolves exactly one entry; a scoped `grep` resolves only + // the entries within its scope; an unscoped `grep` resolves all; a `glob` never + // resolves any (it matches keys only). This is why a read/glob no longer pays + // for every workflow's graph-load + lint + stringify — only grep over contents + // does, and only for what it actually scans. + private lazy: Map Promise> = new Map() + // Per-instance (per-tool-call) memo so state.json + lint.json for the same + // workflow share one normalized-table load, and deployment.json + versions.json + // share one deployment query. + private normalizedCache = new Map< + string, + Promise>> + >() + private deploymentCache = new Map>() private _workspaceId = '' private _betaEnabled = false @@ -386,6 +403,108 @@ export class WorkspaceVFS { return this._workspaceId } + /** Register a VFS path whose (expensive) content is produced on demand. */ + private registerLazy(path: string, loader: () => Promise): void { + this.lazy.set(path, loader) + } + + /** + * Load a workflow's normalized state once per instance. state.json and lint.json + * both need it, and a grep over a workflow's dir touches both — without this they + * would each re-load the full block graph. + */ + private loadNormalized( + workflowId: string + ): Promise>> { + let cached = this.normalizedCache.get(workflowId) + if (!cached) { + cached = loadWorkflowFromNormalizedTables(workflowId) + this.normalizedCache.set(workflowId, cached) + } + return cached + } + + /** Load a workflow's deployment data once per instance (deployment.json + versions.json share it). */ + private loadDeployments(wf: { + id: string + isDeployed: boolean + deployedAt: Date | null + }): Promise { + let cached = this.deploymentCache.get(wf.id) + if (!cached) { + cached = this.getWorkflowDeployments(wf.id, this._workspaceId, wf.isDeployed, wf.deployedAt) + this.deploymentCache.set(wf.id, cached) + } + return cached + } + + /** + * Resolve a single lazy artifact into {@link files}. Idempotent: once resolved + * the entry moves to `files` and the loader is dropped. A loader that returns + * null (no data) leaves nothing behind, so the path reads as "not found". + */ + private async resolveLazyPath(path: string): Promise { + const existing = this.files.get(path) + if (existing !== undefined) return existing + const loader = this.lazy.get(path) + if (!loader) return null + this.lazy.delete(path) + let content: string | null = null + try { + content = await loader() + } catch (err) { + logger.warn('Failed to resolve lazy VFS artifact', { + workspaceId: this._workspaceId, + path, + error: toError(err).message, + }) + content = null + } + if (content !== null) this.files.set(path, content) + return content + } + + /** + * Resolve every lazy artifact a grep over `scope` will scan, in parallel. An + * undefined scope (unscoped grep) resolves all — the worst case, equivalent to + * the old eager full materialize, but now only paid by an unscoped grep. + * Uses the same scope matcher as {@link ops.grep} so the materialized set is + * exactly the set grep filters in. + */ + private async resolveLazyWithinScope(scope?: string): Promise { + const targets: string[] = [] + for (const path of this.lazy.keys()) { + if (!scope || ops.pathWithinGrepScope(path, scope)) targets.push(path) + } + if (targets.length === 0) return + await Promise.all(targets.map((path) => this.resolveLazyPath(path))) + } + + /** + * `recently-deleted/` artifacts are opt-in: excluded from the active view + * unless a path/pattern explicitly scopes into them. + */ + private isRecentlyDeleted(key: string): boolean { + return key.startsWith('recently-deleted/') + } + + /** + * A keys-only view (eager values plus empty placeholders for unresolved lazy + * paths) for glob/suggestSimilar, which match on keys and never read content. + */ + private keyView(includeDeleted: boolean): Map { + const view = new Map() + for (const [key, value] of this.files) { + if (includeDeleted || !this.isRecentlyDeleted(key)) view.set(key, value) + } + for (const key of this.lazy.keys()) { + if ((includeDeleted || !this.isRecentlyDeleted(key)) && !view.has(key)) { + view.set(key, '') + } + } + return view + } + /** * Materialize workspace data into the VFS. * Uses shared service functions for all data access, then generates @@ -394,6 +513,9 @@ export class WorkspaceVFS { async materialize(workspaceId: string, userId: string): Promise { const start = Date.now() this.files = new Map() + this.lazy = new Map() + this.normalizedCache = new Map() + this.deploymentCache = new Map() this._workspaceId = workspaceId this._betaEnabled = await isFeatureEnabled('mothership-beta', { userId }) @@ -507,7 +629,7 @@ export class WorkspaceVFS { private activeFiles(): Map { const filtered = new Map() for (const [key, value] of this.files) { - if (!key.startsWith('recently-deleted/')) { + if (!this.isRecentlyDeleted(key)) { filtered.set(key, value) } } @@ -519,11 +641,14 @@ export class WorkspaceVFS { return this.activeFiles() } - grep( + async grep( pattern: string, path?: string, options?: GrepOptions - ): GrepMatch[] | string[] | ops.GrepCountEntry[] { + ): Promise { + // grep is the only op that scans contents, so it is the only op that pays to + // materialize lazy artifacts — and only those within its scope. + await this.resolveLazyWithinScope(path) return ops.grep(this.filesForPath(path), pattern, path, options) } @@ -578,16 +703,23 @@ export class WorkspaceVFS { } glob(pattern: string): string[] { - const target = pattern.startsWith('recently-deleted') ? this.files : this.activeFiles() - return ops.glob(target, pattern) + // glob matches keys only, so it resolves no lazy content — it sees the full + // path structure (eager keys + lazy placeholders) for free. + const includeDeleted = pattern.startsWith('recently-deleted') + return ops.glob(this.keyView(includeDeleted), pattern) } - read(path: string, offset?: number, limit?: number): ReadResult | null { + async read(path: string, offset?: number, limit?: number): Promise { + // Resolve the one lazy artifact being read into `files`; a no-op for eager + // paths (already present) and unknown paths (no loader). Lazy keys are always + // ASCII (built via encodeURIComponent), so no Unicode-normalized lookup is + // needed here; ops.read still does its own NFC/NFD fallback over `files`. + await this.resolveLazyPath(path) return ops.read(this.files, path, offset, limit) } suggestSimilar(missingPath: string, max?: number): string[] { - return ops.suggestSimilar(this.files, missingPath, max) + return ops.suggestSimilar(this.keyView(true), missingPath, max) } private async resolveWorkspaceFileForDynamicRead( @@ -1159,118 +1291,87 @@ export class WorkspaceVFS { ) } - let normalized: Awaited> = null - try { - normalized = await loadWorkflowFromNormalizedTables(wf.id) - if (normalized) { - const sanitized = sanitizeForCopilot({ - blocks: normalized.blocks, - edges: normalized.edges, - loops: normalized.loops, - parallels: normalized.parallels, - } as any) - this.files.set(`${prefix}state.json`, JSON.stringify(sanitized, null, 2)) - - // Dynamically-computed validation state (lint.json), derived from - // the raw normalized state so subBlock values, advancedMode, - // canonicalModes, and subflow edges are all available. - // - // CPU-only by design: tier-2 reference resolution - // (collectUnresolvedReferences) runs DB queries per selector field - // and is validated where it matters — at edit_workflow apply time. - // Running it here meant workflows × selectors sequential DB queries - // on every read/glob/grep call, which is what made `files/` reads - // take ~40s in large workspaces. - try { - const graphLint = lintEditedWorkflowState(normalized as any) - const fieldIssues = collectWorkflowFieldIssues(normalized.blocks as any) - this.files.set( - `${prefix}lint.json`, - JSON.stringify( - { - ...graphLint, - fieldIssues, - notes: [ - UNRESOLVABLE_AT_LINT_NOTE, - 'Credential/resource reference resolution is validated when editing the workflow, not in this snapshot.', - ], - }, - null, - 2 - ) - ) - } catch (lintErr) { - logger.warn('Failed to compute lint.json', { - workflowId: wf.id, - error: toError(lintErr).message, - }) - } - } else { - // loadWorkflowFromNormalizedTables returns null when the workflow has - // zero block rows. A block-less workflow still exists and must be - // readable, so emit an empty-but-valid state.json — otherwise - // read("workflows/{path}/state.json") 404s and suggestSimilar points the - // agent at a different, same-named workflow. dag/lint are derived from - // blocks and are omitted for the empty case. - this.files.set( - `${prefix}state.json`, - JSON.stringify( - sanitizeForCopilot({ blocks: {}, edges: [], loops: {}, parallels: {} } as any), - null, - 2 - ) - ) - } - } catch (err) { - logger.warn('Failed to load workflow state', { - workflowId: wf.id, - error: toError(err).message, - }) - } + // Heavy per-workflow content is LAZY: a read/glob never loads the block + // graph, runs lint, or queries executions/deployments. Only a read of the + // specific artifact — or a grep whose scope touches it — resolves it. + // state.json + lint.json share one memoized normalized-table load; + // deployment.json + versions.json share one memoized deployment query. + // This is the change that stops every read/glob from paying O(workflows) + // graph-loads + lint + stringify (what made large-workspace reads ~40s). + this.registerLazy(`${prefix}state.json`, async () => { + const normalized = await this.loadNormalized(wf.id) + // loadWorkflowFromNormalizedTables returns null for a zero-block + // workflow; it still exists and must be readable, so emit an + // empty-but-valid state.json rather than a 404. + const sanitized = normalized + ? sanitizeForCopilot({ + blocks: normalized.blocks, + edges: normalized.edges, + loops: normalized.loops, + parallels: normalized.parallels, + } as any) + : sanitizeForCopilot({ blocks: {}, edges: [], loops: {}, parallels: {} } as any) + return JSON.stringify(sanitized, null, 2) + }) - try { - const execRows = await db - .select({ - id: workflowExecutionLogs.id, - executionId: workflowExecutionLogs.executionId, - status: workflowExecutionLogs.status, - trigger: workflowExecutionLogs.trigger, - startedAt: workflowExecutionLogs.startedAt, - endedAt: workflowExecutionLogs.endedAt, - totalDurationMs: workflowExecutionLogs.totalDurationMs, - }) - .from(workflowExecutionLogs) - .where(eq(workflowExecutionLogs.workflowId, wf.id)) - .orderBy(desc(workflowExecutionLogs.startedAt)) - .limit(5) + this.registerLazy(`${prefix}lint.json`, async () => { + const normalized = await this.loadNormalized(wf.id) + // Derived from the raw normalized state (subBlock values, advancedMode, + // canonicalModes, subflow edges). CPU-only by design: tier-2 reference + // resolution runs at edit_workflow apply time, not here. A zero-block + // workflow has no lint (reads as not-found, as before). + if (!normalized) return null + const graphLint = lintEditedWorkflowState(normalized as any) + const fieldIssues = collectWorkflowFieldIssues(normalized.blocks as any) + return JSON.stringify( + { + ...graphLint, + fieldIssues, + notes: [ + UNRESOLVABLE_AT_LINT_NOTE, + 'Credential/resource reference resolution is validated when editing the workflow, not in this snapshot.', + ], + }, + null, + 2 + ) + }) - if (execRows.length > 0) { - this.files.set(`${prefix}executions.json`, serializeRecentExecutions(execRows)) - } - } catch (err) { - logger.warn('Failed to load execution logs', { - workflowId: wf.id, - error: toError(err).message, + // executions.json is advertised only when the workflow has run (cheap + // signal: lastRunAt), matching the old "set iff execRows > 0" behavior + // without the per-workflow query on every tool call. + if (wf.lastRunAt) { + this.registerLazy(`${prefix}executions.json`, async () => { + const execRows = await db + .select({ + id: workflowExecutionLogs.id, + executionId: workflowExecutionLogs.executionId, + status: workflowExecutionLogs.status, + trigger: workflowExecutionLogs.trigger, + startedAt: workflowExecutionLogs.startedAt, + endedAt: workflowExecutionLogs.endedAt, + totalDurationMs: workflowExecutionLogs.totalDurationMs, + }) + .from(workflowExecutionLogs) + .where(eq(workflowExecutionLogs.workflowId, wf.id)) + .orderBy(desc(workflowExecutionLogs.startedAt)) + .limit(5) + return execRows.length > 0 ? serializeRecentExecutions(execRows) : null }) } - try { - const deploymentData = await this.getWorkflowDeployments( - wf.id, - workspaceId, - wf.isDeployed, - wf.deployedAt - ) - if (deploymentData) { - this.files.set(`${prefix}deployment.json`, serializeDeployments(deploymentData)) - if (deploymentData.versions && deploymentData.versions.length > 0) { - this.files.set(`${prefix}versions.json`, serializeVersions(deploymentData.versions)) - } - } - } catch (err) { - logger.warn('Failed to load deployment data', { - workflowId: wf.id, - error: toError(err).message, + // deployment.json / versions.json are advertised when the workflow is + // deployed (cheap signal: isDeployed). Both share one memoized query. + if (wf.isDeployed) { + this.registerLazy(`${prefix}deployment.json`, async () => { + const deploymentData = await this.loadDeployments(wf) + return deploymentData ? serializeDeployments(deploymentData) : null + }) + this.registerLazy(`${prefix}versions.json`, async () => { + const deploymentData = await this.loadDeployments(wf) + return deploymentData?.versions && deploymentData.versions.length > 0 + ? serializeVersions(deploymentData.versions) + : null }) } }) @@ -1317,70 +1418,61 @@ export class WorkspaceVFS { }) ) - try { - const docRows = await db - .select({ - id: document.id, - filename: document.filename, - fileSize: document.fileSize, - mimeType: document.mimeType, - chunkCount: document.chunkCount, - tokenCount: document.tokenCount, - processingStatus: document.processingStatus, - enabled: document.enabled, - uploadedAt: document.uploadedAt, - }) - .from(document) - .where( - and( - eq(document.knowledgeBaseId, kb.id), - eq(document.userExcluded, false), - isNull(document.archivedAt), - isNull(document.deletedAt) + // documents.json / connectors.json are lazy, advertised only when the KB + // summary says they exist (docCount / connectorTypes) — no per-KB query on + // a read/glob, only when the artifact is read or grepped. + if (kb.docCount > 0) { + this.registerLazy(`${prefix}documents.json`, async () => { + const docRows = await db + .select({ + id: document.id, + filename: document.filename, + fileSize: document.fileSize, + mimeType: document.mimeType, + chunkCount: document.chunkCount, + tokenCount: document.tokenCount, + processingStatus: document.processingStatus, + enabled: document.enabled, + uploadedAt: document.uploadedAt, + }) + .from(document) + .where( + and( + eq(document.knowledgeBaseId, kb.id), + eq(document.userExcluded, false), + isNull(document.archivedAt), + isNull(document.deletedAt) + ) ) - ) - - if (docRows.length > 0) { - this.files.set(`${prefix}documents.json`, serializeDocuments(docRows)) - } - } catch (err) { - logger.warn('Failed to load KB documents', { - knowledgeBaseId: kb.id, - error: toError(err).message, + return docRows.length > 0 ? serializeDocuments(docRows) : null }) } - try { - const connectorRows = await db - .select({ - id: knowledgeConnector.id, - connectorType: knowledgeConnector.connectorType, - status: knowledgeConnector.status, - syncMode: knowledgeConnector.syncMode, - syncIntervalMinutes: knowledgeConnector.syncIntervalMinutes, - lastSyncAt: knowledgeConnector.lastSyncAt, - lastSyncError: knowledgeConnector.lastSyncError, - lastSyncDocCount: knowledgeConnector.lastSyncDocCount, - nextSyncAt: knowledgeConnector.nextSyncAt, - consecutiveFailures: knowledgeConnector.consecutiveFailures, - createdAt: knowledgeConnector.createdAt, - }) - .from(knowledgeConnector) - .where( - and( - eq(knowledgeConnector.knowledgeBaseId, kb.id), - isNull(knowledgeConnector.archivedAt), - isNull(knowledgeConnector.deletedAt) + if (kb.connectorTypes.length > 0) { + this.registerLazy(`${prefix}connectors.json`, async () => { + const connectorRows = await db + .select({ + id: knowledgeConnector.id, + connectorType: knowledgeConnector.connectorType, + status: knowledgeConnector.status, + syncMode: knowledgeConnector.syncMode, + syncIntervalMinutes: knowledgeConnector.syncIntervalMinutes, + lastSyncAt: knowledgeConnector.lastSyncAt, + lastSyncError: knowledgeConnector.lastSyncError, + lastSyncDocCount: knowledgeConnector.lastSyncDocCount, + nextSyncAt: knowledgeConnector.nextSyncAt, + consecutiveFailures: knowledgeConnector.consecutiveFailures, + createdAt: knowledgeConnector.createdAt, + }) + .from(knowledgeConnector) + .where( + and( + eq(knowledgeConnector.knowledgeBaseId, kb.id), + isNull(knowledgeConnector.archivedAt), + isNull(knowledgeConnector.deletedAt) + ) ) - ) - - if (connectorRows.length > 0) { - this.files.set(`${prefix}connectors.json`, serializeConnectors(connectorRows)) - } - } catch (err) { - logger.warn('Failed to load KB connectors', { - knowledgeBaseId: kb.id, - error: toError(err).message, + return connectorRows.length > 0 ? serializeConnectors(connectorRows) : null }) } }) @@ -1955,29 +2047,25 @@ export class WorkspaceVFS { this.files.set(`jobs/${safeName}/history.json`, JSON.stringify(history, null, 2)) } - try { - const execRows = await db - .select({ - id: jobExecutionLogs.id, - executionId: jobExecutionLogs.executionId, - status: jobExecutionLogs.status, - trigger: jobExecutionLogs.trigger, - startedAt: jobExecutionLogs.startedAt, - endedAt: jobExecutionLogs.endedAt, - totalDurationMs: jobExecutionLogs.totalDurationMs, - }) - .from(jobExecutionLogs) - .where(eq(jobExecutionLogs.scheduleId, job.id)) - .orderBy(desc(jobExecutionLogs.startedAt)) - .limit(5) - - if (execRows.length > 0) { - this.files.set(`jobs/${safeName}/executions.json`, serializeRecentExecutions(execRows)) - } - } catch (err) { - logger.warn('Failed to load job execution logs', { - jobId: job.id, - error: toError(err).message, + // executions.json is lazy, advertised only when the job has run (cheap + // signal: lastRanAt) — no per-job query on a read/glob. + if (job.lastRanAt) { + this.registerLazy(`jobs/${safeName}/executions.json`, async () => { + const execRows = await db + .select({ + id: jobExecutionLogs.id, + executionId: jobExecutionLogs.executionId, + status: jobExecutionLogs.status, + trigger: jobExecutionLogs.trigger, + startedAt: jobExecutionLogs.startedAt, + endedAt: jobExecutionLogs.endedAt, + totalDurationMs: jobExecutionLogs.totalDurationMs, + }) + .from(jobExecutionLogs) + .where(eq(jobExecutionLogs.scheduleId, job.id)) + .orderBy(desc(jobExecutionLogs.startedAt)) + .limit(5) + return execRows.length > 0 ? serializeRecentExecutions(execRows) : null }) } } diff --git a/package.json b/package.json index c552dbad16..ce15ab7810 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,8 @@ "trace-events-contract:check": "bun run scripts/sync-trace-events-contract.ts --check", "metrics-contract:generate": "bun run scripts/sync-metrics-contract.ts", "metrics-contract:check": "bun run scripts/sync-metrics-contract.ts --check", + "vfs-snapshot-contract:generate": "bun run scripts/sync-vfs-snapshot-contract.ts", + "vfs-snapshot-contract:check": "bun run scripts/sync-vfs-snapshot-contract.ts --check", "mship:generate": "bun run scripts/generate-mship-contracts.ts", "mship:check": "bun run scripts/generate-mship-contracts.ts --check", "prepare": "bun husky", diff --git a/scripts/generate-mship-contracts.ts b/scripts/generate-mship-contracts.ts index a789f0825f..f0cdd9439b 100644 --- a/scripts/generate-mship-contracts.ts +++ b/scripts/generate-mship-contracts.ts @@ -23,6 +23,7 @@ const GENERATORS = [ 'scripts/sync-trace-attribute-values-contract.ts', 'scripts/sync-trace-events-contract.ts', 'scripts/sync-metrics-contract.ts', + 'scripts/sync-vfs-snapshot-contract.ts', ] // Generated files under this path. We biome-format this whole dir on diff --git a/scripts/sync-vfs-snapshot-contract.ts b/scripts/sync-vfs-snapshot-contract.ts new file mode 100644 index 0000000000..e0e616ebdb --- /dev/null +++ b/scripts/sync-vfs-snapshot-contract.ts @@ -0,0 +1,46 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' +import { compile } from 'json-schema-to-typescript' +import { formatGeneratedSource } from './format-generated-source' + +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)) +const ROOT = resolve(SCRIPT_DIR, '..') +// Matches the sibling sync scripts' canonical layout. In a repo where the Go +// service lives at `mothership/copilot`, pass `--input=` (e.g. +// `--input=../mothership/copilot/contracts/vfs-snapshot-v1.schema.json`). +const DEFAULT_CONTRACT_PATH = resolve( + ROOT, + '../copilot/copilot/contracts/vfs-snapshot-v1.schema.json' +) +const OUTPUT_PATH = resolve(ROOT, 'apps/sim/lib/copilot/generated/vfs-snapshot-v1.ts') + +async function main() { + const checkOnly = process.argv.includes('--check') + const inputPathArg = process.argv.find((arg) => arg.startsWith('--input=')) + const inputPath = inputPathArg + ? resolve(ROOT, inputPathArg.slice('--input='.length)) + : DEFAULT_CONTRACT_PATH + + const raw = await readFile(inputPath, 'utf8') + const schema = JSON.parse(raw) + const types = await compile(schema, 'VfsSnapshotV1', { + bannerComment: '// AUTO-GENERATED FILE. DO NOT EDIT.\n//', + unreachableDefinitions: true, + additionalProperties: false, + }) + const rendered = formatGeneratedSource(types, OUTPUT_PATH, ROOT) + + if (checkOnly) { + const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null) + if (existing !== rendered) { + throw new Error('Generated vfs snapshot contract is stale. Run: bun run mship:generate') + } + return + } + + await mkdir(dirname(OUTPUT_PATH), { recursive: true }) + await writeFile(OUTPUT_PATH, rendered, 'utf8') +} + +await main()