feat(core): Add ability to view agent traces locally under a flag (no-changelog) (#32397)

This commit is contained in:
Riqwan Thamir
2026-06-17 10:36:38 +02:00
committed by GitHub
parent ae7f699d78
commit b60bc923d4
56 changed files with 4924 additions and 114 deletions
@@ -348,6 +348,29 @@ describe('AgentRuntime — execution counters', () => {
expect(counter.incrementTokenCount).toHaveBeenCalledWith(15);
});
it('forwards onStepStart and onStepFinish to generateText and streamText', async () => {
generateText.mockResolvedValue(makeGenerateSuccess());
streamText.mockReturnValue(makeStreamSuccess());
const onStepStart = vi.fn();
const onStepFinish = vi.fn();
const { runtime } = createRuntime();
await runtime.generate('hi', { onStepStart, onStepFinish });
const streamResult = await runtime.stream('hi', { onStepStart, onStepFinish });
await collectChunks(streamResult.stream);
for (const call of generateText.mock.calls) {
const args = call[0] as Record<string, unknown>;
expect(args.experimental_onStepStart).toBe(onStepStart);
expect(args.onStepFinish).toBe(onStepFinish);
}
for (const call of streamText.mock.calls) {
const args = call[0] as Record<string, unknown>;
expect(args.experimental_onStepStart).toBe(onStepStart);
expect(args.onStepFinish).toBe(onStepFinish);
}
});
it('counts provider-executed tool calls when surfaced by the model', async () => {
generateText
.mockResolvedValueOnce({
@@ -777,9 +777,13 @@ export class AgentRuntime {
): {
experimental_telemetry?: TelemetrySettings;
experimental_repairToolCall?: ToolCallRepairFunction<NoInfer<ToolSet>>;
experimental_onStepStart?: ExecutionOptions['onStepStart'];
onStepFinish?: ExecutionOptions['onStepFinish'];
} {
return {
...this.buildTelemetryOptions(options),
...(options?.onStepStart ? { experimental_onStepStart: options.onStepStart } : {}),
...(options?.onStepFinish ? { onStepFinish: options.onStepFinish } : {}),
experimental_repairToolCall: async (options) => {
return await fixToolCall(
{
+3 -1
View File
@@ -1,5 +1,5 @@
import type { ProviderOptions } from '@ai-sdk/provider-utils';
import type { LanguageModel, smoothStream } from 'ai';
import type { LanguageModel, OnStepFinishEvent, OnStepStartEvent, smoothStream } from 'ai';
import type { JsonSchema7Type } from 'zod-to-json-schema';
import type { AgentMessage, ContentMetadata } from './message';
@@ -165,6 +165,8 @@ export interface ExecutionOptions {
telemetry?: BuiltTelemetry;
/** Inherited execution counter from the host runtime. Used for aggregate heartbeat telemetry. */
executionCounter?: AgentExecutionCounter;
onStepStart?: (event: OnStepStartEvent) => void | Promise<void>;
onStepFinish?: (event: OnStepFinishEvent) => void | Promise<void>;
}
export interface PersistedExecutionOptions {
@@ -299,6 +299,8 @@ export type FrontendModuleSettings = {
sandboxEnabled: boolean;
workflowBuilderAvailable: boolean;
sandboxUnavailableReason: string | null;
/** When true, orchestrator LLM step / workflow code debug is captured (`N8N_INSTANCE_AI_RUN_DEBUG_ENABLED`). */
runDebugEnabled: boolean;
};
/**
+19
View File
@@ -376,6 +376,11 @@ export type {
InstanceAiEnsureThreadResponse,
InstanceAiStoredMessage,
InstanceAiThreadMessagesResponse,
InstanceAiRunDebugSummary,
InstanceAiRunDebugStep,
InstanceAiRunDebugWorkflowCodeSnapshot,
InstanceAiRunDebugResponse,
InstanceAiThreadDebugRunsResponse,
InstanceAiRichMessagesResponse,
InstanceAiThreadStatusResponse,
InstanceAiAdminSettingsResponse,
@@ -421,6 +426,20 @@ export {
export type { AgentRunState, AgentNode } from './schemas/agent-run-reducer';
export {
formatDebugJson,
summarizeJsonValue,
parseSystemPromptForDisplay,
parseMessageBlocks,
parseUsageSummary,
parseInputExtras,
parseOutputDisplayBlocks,
parseOutputExtras,
parseStepSummary,
} from './schemas/llm-step-display';
export type { ReadableContentBlock, ReadableSegment } from './schemas/llm-step-display';
export {
startTestRunPayloadSchema,
StartTestRunRequestDto,
@@ -0,0 +1,290 @@
import { describe, expect, it } from 'vitest';
import {
parseInputExtras,
parseMessageBlocks,
parseOutputDisplayBlocks,
parseStepSummary,
parseSystemBlocks,
parseSystemPromptForDisplay,
parseToolCallBlocks,
parseToolResultBlocks,
parseUsageSummary,
summarizeJsonValue,
extractObservationsBlock,
} from '../llm-step-display';
describe('llm-step-display', () => {
it('parses string system prompts into readable blocks', () => {
expect(parseSystemBlocks('You are helpful')).toEqual([
{
role: 'system',
content: 'You are helpful',
segments: [{ type: 'text', text: 'You are helpful' }],
},
]);
});
it('parses message content with role-first layout and collapsible metadata', () => {
const blocks = parseMessageBlocks([
{
role: 'user',
content: 'Build a weather workflow',
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } },
},
]);
expect(blocks).toHaveLength(1);
expect(blocks[0]?.role).toBe('user');
expect(blocks[0]?.content).toBe('Build a weather workflow');
expect(blocks[0]?.segments).toEqual([{ type: 'text', text: 'Build a weather workflow' }]);
expect(blocks[0]?.metadata).toEqual({
providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } },
});
});
it('extracts structured tool-call segments from multipart assistant messages', () => {
const blocks = parseMessageBlocks([
{
role: 'assistant',
content: [
{ type: 'text', text: 'Here is the plan.' },
{ type: 'tool-call', toolName: 'search_nodes', input: { query: 'webhook' } },
],
},
]);
expect(blocks[0]?.segments).toEqual([
{ type: 'text', text: 'Here is the plan.' },
{
type: 'tool-call',
name: 'search_nodes',
payload: { query: 'webhook' },
metadata: undefined,
},
]);
});
it('extracts structured tool-result segments from tool messages', () => {
const blocks = parseMessageBlocks([
{
role: 'tool',
content: [
{
type: 'tool-result',
toolName: 'load_skill',
output: { type: 'json', value: { ok: true, skillId: 'workflow-builder' } },
},
],
},
]);
expect(blocks[0]?.segments?.[0]).toMatchObject({
type: 'tool-result',
name: 'load_skill',
payload: { type: 'json', value: { ok: true, skillId: 'workflow-builder' } },
});
});
it('summarizes json payloads for collapsed previews', () => {
expect(summarizeJsonValue({ name: 'workflow-builder', extra: true })).toBe('{ name, extra }');
expect(summarizeJsonValue({ type: 'json', value: { ok: true } })).toBe('{ ok: true }');
});
it('formats tool calls with structured payload and metadata', () => {
const blocks = parseToolCallBlocks([
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
input: { code: 'workflow code' },
},
]);
expect(blocks[0]?.name).toBe('build-workflow');
expect(blocks[0]?.kind).toBe('input');
expect(blocks[0]?.payload).toEqual({ code: 'workflow code' });
expect(blocks[0]?.content).toBe('{ code: workflow code }');
expect(blocks[0]?.metadata).toEqual({ toolCallId: 'tc-1' });
});
it('formats tool results with structured payload', () => {
const blocks = parseToolResultBlocks([
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
output: { success: true, workflowId: 'wf-1' },
},
]);
expect(blocks[0]?.name).toBe('build-workflow');
expect(blocks[0]?.kind).toBe('output');
expect(blocks[0]?.payload).toEqual({ success: true, workflowId: 'wf-1' });
expect(blocks[0]?.content).toBe('{ success, workflowId }');
});
it('parses standalone tool-result content objects as structured segments', () => {
const blocks = parseMessageBlocks([
{
type: 'tool-result',
toolName: 'nodes',
output: { results: [], totalResults: 0 },
toolCallId: 'tc-1',
},
]);
expect(blocks[0]?.role).toBe('tool');
expect(blocks[0]?.segments?.[0]).toMatchObject({
type: 'tool-result',
name: 'nodes',
payload: { results: [], totalResults: 0 },
metadata: { toolCallId: 'tc-1' },
});
expect(blocks[0]?.metadata).toBeUndefined();
});
it('deduplicates overlapping tool call sources in output display blocks', () => {
const blocks = parseOutputDisplayBlocks({
toolCalls: [
{
toolCallId: 'tc-1',
toolName: 'nodes',
input: { action: 'search', nodeTypes: ['trigger'] },
},
],
response: {
messages: [
{
role: 'assistant',
toolCallId: 'tc-1',
providerMetadata: { anthropic: { cacheCreationInputTokens: 0 } },
content: [
{
type: 'tool-call',
toolName: 'nodes',
toolCallId: 'tc-1',
input: { action: 'search', nodeTypes: ['trigger'] },
providerMetadata: { anthropic: { cacheCreationInputTokens: 0 } },
},
],
},
],
},
});
expect(blocks.filter((block) => block.role === 'assistant')).toHaveLength(1);
expect(blocks[0]?.metadata).toBeUndefined();
expect(blocks[0]?.segments?.[0]).toMatchObject({
type: 'tool-call',
name: 'nodes',
});
});
it('builds unified output display blocks using the same message card shape', () => {
const blocks = parseOutputDisplayBlocks({
toolResults: [
{
toolCallId: 'tc-1',
toolName: 'nodes',
output: { results: [], totalResults: 0 },
},
],
response: {
messages: [
{
role: 'assistant',
content: [{ type: 'tool-call', toolName: 'nodes', input: { action: 'search' } }],
},
],
},
});
expect(blocks[0]?.role).toBe('tool');
expect(blocks[0]?.segments?.[0]).toMatchObject({
type: 'tool-result',
name: 'nodes',
});
expect(blocks[1]?.role).toBe('assistant');
expect(blocks[1]?.segments?.[0]).toMatchObject({
type: 'tool-call',
name: 'nodes',
payload: { action: 'search' },
});
});
it('summarizes steps for sidebar display', () => {
expect(
parseStepSummary(
{
system: 'x'.repeat(100),
messages: [{ role: 'user', content: 'Build a weather workflow please' }],
},
{
finishReason: 'tool-calls',
toolCalls: [{ toolName: 'search_nodes' }, { toolName: 'build-workflow' }],
usage: { inputTokens: 100, outputTokens: 20, totalTokens: 120 },
},
),
).toEqual({
finishReason: 'tool-calls',
toolNames: ['search_nodes', 'build-workflow'],
usageLabel: 'in: 100 · out: 20 · total: 120',
messagePreview: 'Build a weather workflow please',
systemCharCount: 100,
});
});
it('includes full tools and config in input extras', () => {
expect(
parseInputExtras({
system: 'prompt',
messages: [],
tools: { search: { description: 'search' } },
toolChoice: 'auto',
activeTools: ['search'],
stepNumber: 0,
sdkStepNumber: 0,
}),
).toEqual({
tools: { search: { description: 'search' } },
toolChoice: 'auto',
activeTools: ['search'],
});
});
it('summarizes usage tokens for inline display', () => {
expect(parseUsageSummary({ inputTokens: 100, outputTokens: 20, totalTokens: 120 })?.label).toBe(
'in: 100 · out: 20 · total: 120',
);
});
it('extracts observations block from system prompt text', () => {
const systemPrompt = [
'You are helpful.',
'<observations>',
'* CRITICAL (14:28) User is rebuilding observational memory.',
'</observations>',
].join('\n');
expect(extractObservationsBlock(systemPrompt)).toEqual({
withoutObservations: 'You are helpful.',
observations: '* CRITICAL (14:28) User is rebuilding observational memory.',
});
});
it('splits system prompt display into collapsed system and observations blocks', () => {
const parsed = parseSystemPromptForDisplay(
[
'Skill loading protocol',
'<observations>',
'* INFO (09:15) User prefers Slack notifications',
'</observations>',
].join('\n'),
);
expect(parsed.observations).toBe('* INFO (09:15) User prefers Slack notifications');
expect(parsed.systemBlocks[0]?.content).toBe('Skill loading protocol');
expect(parsed.systemBlocks[0]?.segments).toEqual([
{ type: 'text', text: 'Skill loading protocol' },
]);
});
});
@@ -881,6 +881,50 @@ export interface InstanceAiThreadMessagesResponse {
threadId: string;
}
// ---------------------------------------------------------------------------
// Run debug buffer (dev panel — orchestrator LLM steps + workflow code)
// ---------------------------------------------------------------------------
export interface InstanceAiRunDebugSummary {
runId: string;
threadId: string;
startedAt: number;
stepCount: number;
workflowCodeCount: number;
label?: string;
}
export interface InstanceAiRunDebugStep {
stepNumber: number;
input?: Record<string, unknown>;
output?: Record<string, unknown>;
}
export interface InstanceAiRunDebugWorkflowCodeSnapshot {
code: string;
source: 'full-code' | 'patch';
patches?: unknown;
workflowId?: string;
toolCallId?: string;
success: boolean;
errors?: string[];
capturedAt: number;
}
export interface InstanceAiRunDebugResponse {
threadId: string;
runId: string;
startedAt: number;
label?: string;
steps: InstanceAiRunDebugStep[];
workflowCode: InstanceAiRunDebugWorkflowCodeSnapshot[];
}
export interface InstanceAiThreadDebugRunsResponse {
runs: InstanceAiRunDebugSummary[];
threadId: string;
}
// ---------------------------------------------------------------------------
// Rich messages response (session-restored view with agent trees)
// ---------------------------------------------------------------------------
@@ -0,0 +1,773 @@
/**
* Shared LLM step display helpers for Instance AI run debug.
*
* Used by the frontend debug modal, eval HTML reports, and any tooling
* that renders InstanceAiRunDebug* API payloads.
*/
export interface ReadableContentBlock {
role: string;
content: string;
segments?: ReadableSegment[];
metadata?: unknown;
}
export type ReadableSegment =
| { type: 'text'; text: string }
| { type: 'tool-call'; name: string; payload?: unknown; metadata?: unknown }
| { type: 'tool-result'; name?: string; payload?: unknown; metadata?: unknown }
| { type: 'json'; payload: unknown; label?: string }
| { type: 'reasoning'; text: string };
export interface ReadableToolCallBlock {
name: string;
kind?: 'input' | 'output';
payload?: unknown;
content: string;
metadata?: unknown;
}
export interface StepDebugSummary {
finishReason?: string;
toolNames: string[];
usageLabel?: string;
messagePreview?: string;
systemCharCount?: number;
}
export interface ReadableUsageSummary {
label: string;
metadata: unknown;
}
export interface ParsedSystemPromptDisplay {
systemBlocks: ReadableContentBlock[];
observations: string | null;
}
const OBSERVATIONS_BLOCK_PATTERN = /<observations>([\s\S]*?)<\/observations>/i;
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
export function formatDebugJson(value: unknown): string {
try {
return JSON.stringify(value, null, 2);
} catch {
return String(value);
}
}
function looksLikeJsonString(value: string): boolean {
const trimmed = value.trim();
return (
(trimmed.startsWith('{') && trimmed.endsWith('}')) ||
(trimmed.startsWith('[') && trimmed.endsWith(']'))
);
}
function unwrapJsonPayload(value: unknown): unknown {
if (isRecord(value) && value.type === 'json' && 'value' in value) {
return value.value;
}
return value;
}
export function summarizeJsonValue(value: unknown): string {
const unwrapped = unwrapJsonPayload(value);
if (unwrapped === null) return 'null';
if (unwrapped === undefined) return 'undefined';
if (typeof unwrapped === 'string') {
const trimmed = unwrapped.trim();
if (looksLikeJsonString(trimmed)) {
try {
return summarizeJsonValue(JSON.parse(trimmed));
} catch {
// fall through
}
}
return trimmed.length > 96 ? `${trimmed.slice(0, 96)}` : trimmed;
}
if (typeof unwrapped === 'number' || typeof unwrapped === 'boolean') {
return String(unwrapped);
}
if (Array.isArray(unwrapped)) {
return `[${unwrapped.length} items]`;
}
if (isRecord(unwrapped)) {
const keys = Object.keys(unwrapped);
if (keys.length === 0) return '{}';
if (keys.length === 1) {
const key = keys[0] ?? 'key';
const entry = summarizeJsonValue(unwrapped[key]);
return `{ ${key}: ${entry} }`;
}
const preview = keys.slice(0, 2).join(', ');
return keys.length > 2 ? `{ ${preview}, +${keys.length - 2} }` : `{ ${preview} }`;
}
const formatted = formatDebugJson(unwrapped);
return formatted.length > 96 ? `${formatted.slice(0, 96)}` : formatted;
}
function omitKeys(record: Record<string, unknown>, keys: ReadonlySet<string>): unknown {
const rest: Record<string, unknown> = {};
for (const [key, value] of Object.entries(record)) {
if (!keys.has(key)) {
rest[key] = value;
}
}
return Object.keys(rest).length > 0 ? rest : undefined;
}
function parsePartSegment(part: unknown): ReadableSegment[] {
if (!isRecord(part)) {
return [{ type: 'json', payload: part }];
}
if (part.type === 'text' && typeof part.text === 'string') {
return [{ type: 'text', text: part.text }];
}
if (part.type === 'tool-call') {
const name =
(typeof part.toolName === 'string' && part.toolName) ||
(typeof part.name === 'string' && part.name) ||
'tool';
const input = part.input ?? part.args;
return [
{
type: 'tool-call',
name,
payload: input,
metadata: omitKeys(part, new Set(['type', 'toolName', 'name', 'input', 'args'])),
},
];
}
if (part.type === 'tool-result') {
const name =
(typeof part.toolName === 'string' && part.toolName) ||
(typeof part.name === 'string' && part.name) ||
undefined;
const output = part.output ?? part.result ?? part;
return [
{
type: 'tool-result',
name,
payload: output,
metadata: omitKeys(part, new Set(['type', 'toolName', 'name', 'output', 'result'])),
},
];
}
if (part.type === 'reasoning' && typeof part.text === 'string') {
return [{ type: 'reasoning', text: part.text }];
}
if (part.type === 'file') {
const mediaType = typeof part.mediaType === 'string' ? part.mediaType : 'file';
return [{ type: 'json', payload: part, label: `file: ${mediaType}` }];
}
return [
{
type: 'json',
payload: part,
label: typeof part.type === 'string' ? String(part.type) : undefined,
},
];
}
function parseContentSegments(content: unknown): ReadableSegment[] {
if (typeof content === 'string') {
const trimmed = content.trim();
if (trimmed.length === 0) return [];
if (looksLikeJsonString(trimmed)) {
try {
return [{ type: 'json', payload: JSON.parse(trimmed) }];
} catch {
// fall through
}
}
return [{ type: 'text', text: content }];
}
if (Array.isArray(content)) {
return content.flatMap((part) => parsePartSegment(part));
}
if (isRecord(content)) {
if (typeof content.type === 'string') {
return parsePartSegment(content);
}
return [{ type: 'json', payload: content }];
}
if (content === undefined || content === null) {
return [];
}
return [{ type: 'json', payload: content }];
}
function segmentsToPreview(segments: ReadableSegment[]): string {
return segments
.map((segment) => {
switch (segment.type) {
case 'text':
return segment.text;
case 'tool-call':
return `[tool-call: ${segment.name}]`;
case 'tool-result':
return segment.name ? `[tool-result: ${segment.name}]` : '[tool-result]';
case 'reasoning':
return segment.text;
case 'json':
return summarizeJsonValue(segment.payload);
}
})
.filter((entry) => entry.length > 0)
.join('\n\n');
}
function inferRoleFromValue(value: Record<string, unknown>, fallback: string): string {
if (typeof value.role === 'string') {
return value.role;
}
if (value.type === 'tool-result') {
return 'tool';
}
if (value.type === 'tool-call') {
return 'assistant';
}
return fallback;
}
function createTextBlock(
role: string,
text: string,
segmentType: 'text' | 'reasoning' = 'text',
): ReadableContentBlock {
if (segmentType === 'reasoning') {
return {
role,
content: text,
segments: [{ type: 'reasoning', text }],
};
}
return {
role,
content: text,
segments: [{ type: 'text', text }],
};
}
function toolCallToBlock(toolCall: ReadableToolCallBlock): ReadableContentBlock {
return {
role: 'assistant',
content: `[tool-call: ${toolCall.name}]`,
segments: [
{
type: 'tool-call',
name: toolCall.name,
payload: toolCall.payload,
metadata: toolCall.metadata,
},
],
};
}
function toolResultToBlock(toolResult: ReadableToolCallBlock): ReadableContentBlock {
return {
role: 'tool',
content: toolResult.name ? `[tool-result: ${toolResult.name}]` : '[tool-result]',
segments: [
{
type: 'tool-result',
name: toolResult.name,
payload: toolResult.payload,
metadata: toolResult.metadata,
},
],
};
}
function parseContentBlock(
role: string,
value: unknown,
contentKey = 'content',
): ReadableContentBlock {
if (typeof value === 'string') {
const segments = parseContentSegments(value);
return {
role,
content: segmentsToPreview(segments) || value,
segments: segments.length > 0 ? segments : undefined,
};
}
if (!isRecord(value)) {
const segments = [{ type: 'json' as const, payload: value }];
return {
role,
content: summarizeJsonValue(value),
segments,
};
}
const resolvedRole = inferRoleFromValue(value, role);
const rawContent = value[contentKey] ?? value.text ?? value;
const segments = parseContentSegments(rawContent);
const metadataKeys = new Set<string>([contentKey, 'text', 'role']);
if (rawContent === value) {
for (const key of ['type', 'toolName', 'name', 'input', 'args', 'output', 'result']) {
metadataKeys.add(key);
}
}
const hasToolSegments = segments.some(
(segment) => segment.type === 'tool-call' || segment.type === 'tool-result',
);
if (hasToolSegments) {
for (const key of ['toolCallId', 'providerMetadata', 'providerOptions']) {
metadataKeys.add(key);
}
}
const metadata = omitKeys(value, metadataKeys);
return {
role: resolvedRole,
content: segmentsToPreview(segments) || summarizeJsonValue(rawContent),
segments: segments.length > 0 ? segments : undefined,
metadata,
};
}
export function parseSystemBlocks(system: unknown): ReadableContentBlock[] {
if (system === undefined || system === null) {
return [];
}
if (typeof system === 'string') {
return [{ role: 'system', content: system, segments: [{ type: 'text', text: system }] }];
}
if (Array.isArray(system)) {
return system.flatMap((entry, index) => {
const block = parseContentBlock('system', entry);
return [{ ...block, role: block.role === 'system' ? `system ${index + 1}` : block.role }];
});
}
return [parseContentBlock('system', system)];
}
export function extractObservationsBlock(text: string): {
withoutObservations: string;
observations: string | null;
} {
const match = text.match(OBSERVATIONS_BLOCK_PATTERN);
if (!match) {
return { withoutObservations: text, observations: null };
}
const observations = match[1]?.trim() ?? null;
const withoutObservations = text.replace(OBSERVATIONS_BLOCK_PATTERN, '').trim();
return {
withoutObservations,
observations: observations && observations.length > 0 ? observations : null,
};
}
function extractObservationsFromBlock(block: ReadableContentBlock): {
block: ReadableContentBlock;
observations: string | null;
} {
if (block.segments?.length) {
let observations: string | null = null;
const segments: ReadableSegment[] = block.segments.flatMap((segment): ReadableSegment[] => {
if (segment.type !== 'text') {
return [segment];
}
const extracted = extractObservationsBlock(segment.text);
if (extracted.observations) {
observations = observations
? `${observations}\n\n${extracted.observations}`
: extracted.observations;
}
if (extracted.withoutObservations.trim().length === 0) {
return [];
}
return [{ ...segment, text: extracted.withoutObservations }];
});
return {
block: {
...block,
content: segmentsToPreview(segments),
segments: segments.length > 0 ? segments : undefined,
},
observations,
};
}
const extracted = extractObservationsBlock(block.content);
return {
block: {
...block,
content: extracted.withoutObservations,
segments: extracted.withoutObservations
? [{ type: 'text', text: extracted.withoutObservations }]
: undefined,
},
observations: extracted.observations,
};
}
export function parseSystemPromptForDisplay(system: unknown): ParsedSystemPromptDisplay {
const blocks = parseSystemBlocks(system);
const observationsParts: string[] = [];
const systemBlocks: ReadableContentBlock[] = [];
for (const block of blocks) {
const extracted = extractObservationsFromBlock(block);
if (extracted.observations) {
observationsParts.push(extracted.observations);
}
if (extracted.block.content.trim().length > 0 || extracted.block.segments?.length) {
systemBlocks.push(extracted.block);
}
}
return {
systemBlocks,
observations: observationsParts.length > 0 ? observationsParts.join('\n\n') : null,
};
}
export function parseMessageBlocks(messages: unknown): ReadableContentBlock[] {
if (messages === undefined || messages === null) {
return [];
}
if (!Array.isArray(messages)) {
return [parseContentBlock('message', messages)];
}
return messages.map((message, index) => {
const block = parseContentBlock(`message ${index + 1}`, message);
return {
...block,
role: block.role.startsWith('message ') ? block.role : block.role || `message ${index + 1}`,
};
});
}
export function parseToolCallBlocks(toolCalls: unknown): ReadableToolCallBlock[] {
if (toolCalls === undefined || toolCalls === null) {
return [];
}
if (!Array.isArray(toolCalls)) {
return [
{
name: 'tool',
payload: toolCalls,
content: summarizeJsonValue(toolCalls),
},
];
}
return (toolCalls as unknown[]).map((toolCall: unknown, index) => {
if (!isRecord(toolCall)) {
return {
name: `tool ${index + 1}`,
payload: toolCall,
content: summarizeJsonValue(toolCall),
};
}
const name =
(typeof toolCall.toolName === 'string' && toolCall.toolName) ||
(typeof toolCall.name === 'string' && toolCall.name) ||
`tool ${index + 1}`;
const input = toolCall.input ?? toolCall.args;
const payload = input ?? toolCall;
const metadata = omitKeys(
toolCall,
new Set(['toolName', 'name', 'input', 'args', 'output', 'result']),
);
return {
name,
kind: 'input',
payload,
content: summarizeJsonValue(payload),
metadata,
};
});
}
export function parseToolResultBlocks(toolResults: unknown): ReadableToolCallBlock[] {
if (toolResults === undefined || toolResults === null) {
return [];
}
if (!Array.isArray(toolResults)) {
return [
{
name: 'tool result',
payload: toolResults,
content: summarizeJsonValue(toolResults),
},
];
}
return (toolResults as unknown[]).map((toolResult: unknown, index) => {
if (!isRecord(toolResult)) {
return {
name: `result ${index + 1}`,
payload: toolResult,
content: summarizeJsonValue(toolResult),
};
}
const name =
(typeof toolResult.toolName === 'string' && toolResult.toolName) ||
(typeof toolResult.name === 'string' && toolResult.name) ||
`tool ${index + 1}`;
const output = toolResult.output ?? toolResult.result;
const payload = output ?? toolResult;
const metadata = omitKeys(
toolResult,
new Set(['toolName', 'name', 'output', 'result', 'input', 'args']),
);
return {
name,
kind: 'output',
payload,
content: summarizeJsonValue(payload),
metadata,
};
});
}
export function parseUsageSummary(usage: unknown): ReadableUsageSummary | undefined {
if (usage === undefined || usage === null) {
return undefined;
}
if (!isRecord(usage)) {
return { label: formatDebugJson(usage), metadata: usage };
}
const inputTokens = usage.inputTokens ?? usage.promptTokens;
const outputTokens = usage.outputTokens ?? usage.completionTokens;
const totalTokens = usage.totalTokens;
const parts: string[] = [];
if (typeof inputTokens === 'number') parts.push(`in: ${inputTokens}`);
if (typeof outputTokens === 'number') parts.push(`out: ${outputTokens}`);
if (typeof totalTokens === 'number') parts.push(`total: ${totalTokens}`);
return {
label: parts.length > 0 ? parts.join(' · ') : formatDebugJson(usage),
metadata: usage,
};
}
export function parseInputExtras(input: Record<string, unknown> | undefined): unknown {
if (!input) return undefined;
const extras: Record<string, unknown> = {};
const primaryKeys = new Set(['system', 'messages', 'stepNumber', 'sdkStepNumber']);
for (const [key, value] of Object.entries(input)) {
if (!primaryKeys.has(key)) {
extras[key] = value;
}
}
return Object.keys(extras).length > 0 ? extras : undefined;
}
function getBlockDedupeKey(block: ReadableContentBlock): string {
if (block.segments?.length) {
return block.segments
.map((segment) => {
switch (segment.type) {
case 'text':
return `text:${segment.text}`;
case 'reasoning':
return `reasoning:${segment.text}`;
case 'tool-call': {
const toolCallId =
isRecord(segment.metadata) && typeof segment.metadata.toolCallId === 'string'
? segment.metadata.toolCallId
: '';
return `tool-call:${segment.name}:${toolCallId}:${formatDebugJson(segment.payload ?? null)}`;
}
case 'tool-result': {
const toolCallId =
isRecord(segment.metadata) && typeof segment.metadata.toolCallId === 'string'
? segment.metadata.toolCallId
: '';
return `tool-result:${segment.name ?? ''}:${toolCallId}:${formatDebugJson(segment.payload ?? null)}`;
}
case 'json':
return `json:${formatDebugJson(segment.payload)}`;
}
})
.join('|');
}
return `${block.role}:${block.content}`;
}
function dedupeContentBlocks(blocks: ReadableContentBlock[]): ReadableContentBlock[] {
const seen = new Set<string>();
return blocks.filter((block) => {
const key = getBlockDedupeKey(block);
if (seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
}
function parsePrimaryOutputMessageBlocks(output: Record<string, unknown>): ReadableContentBlock[] {
if (isRecord(output.response) && Array.isArray(output.response.messages)) {
const messages = parseMessageBlocks(output.response.messages);
if (messages.length > 0) {
return messages;
}
}
if (output.content !== undefined && output.content !== null) {
const contentBlocks = parseMessageBlocks(output.content);
if (contentBlocks.length > 0) {
return contentBlocks;
}
}
return parseToolCallBlocks(output.toolCalls).map(toolCallToBlock);
}
export function parseOutputDisplayBlocks(
output: Record<string, unknown> | undefined,
): ReadableContentBlock[] {
if (!output) {
return [];
}
const blocks: ReadableContentBlock[] = [];
if (typeof output.text === 'string' && output.text.trim()) {
blocks.push(createTextBlock('assistant', output.text));
}
if (typeof output.reasoningText === 'string' && output.reasoningText.trim()) {
blocks.push(createTextBlock('reasoning', output.reasoningText, 'reasoning'));
}
blocks.push(...parseToolResultBlocks(output.toolResults).map(toolResultToBlock));
blocks.push(...parsePrimaryOutputMessageBlocks(output));
return dedupeContentBlocks(blocks);
}
export function parseOutputExtras(output: Record<string, unknown> | undefined): unknown {
if (!output) return undefined;
const extras: Record<string, unknown> = {};
const primaryKeys = new Set([
'text',
'toolCalls',
'toolResults',
'usage',
'response',
'finishReason',
'stepNumber',
'sdkStepNumber',
'content',
'reasoning',
'reasoningText',
]);
for (const [key, value] of Object.entries(output)) {
if (!primaryKeys.has(key)) {
extras[key] = value;
}
}
if (isRecord(output.response)) {
const responseMeta: Record<string, unknown> = {};
for (const [key, value] of Object.entries(output.response)) {
if (key !== 'messages') {
responseMeta[key] = value;
}
}
if (Object.keys(responseMeta).length > 0) {
extras.responseMeta = responseMeta;
}
}
return Object.keys(extras).length > 0 ? extras : undefined;
}
export function parseStepSummary(
input?: Record<string, unknown>,
output?: Record<string, unknown>,
): StepDebugSummary {
const toolNames: string[] = [];
if (Array.isArray(output?.toolCalls)) {
for (const toolCall of output.toolCalls) {
if (!isRecord(toolCall)) continue;
const name =
(typeof toolCall.toolName === 'string' && toolCall.toolName) ||
(typeof toolCall.name === 'string' && toolCall.name) ||
undefined;
if (name && !toolNames.includes(name)) {
toolNames.push(name);
}
}
}
let messagePreview: string | undefined;
if (Array.isArray(input?.messages) && input.messages.length > 0) {
const blocks = parseMessageBlocks(input.messages);
const lastBlock = blocks[blocks.length - 1];
if (lastBlock?.content) {
const trimmed = lastBlock.content.trim();
messagePreview = trimmed.length > 72 ? `${trimmed.slice(0, 72)}` : trimmed;
}
}
let systemCharCount: number | undefined;
if (typeof input?.system === 'string') {
systemCharCount = input.system.length;
} else if (Array.isArray(input?.system)) {
systemCharCount = parseSystemBlocks(input.system).reduce(
(total, block) => total + block.content.length,
0,
);
}
return {
finishReason: typeof output?.finishReason === 'string' ? output.finishReason : undefined,
toolNames,
usageLabel: parseUsageSummary(output?.usage)?.label,
messagePreview,
systemCharCount,
};
}
@@ -155,4 +155,8 @@ export class InstanceAiConfig {
/** Replacement text substituted for each redacted match in agent output. */
@Env('N8N_INSTANCE_AI_OUTPUT_REDACTION_PLACEHOLDER')
outputRedactionPlaceholder: string = '[REDACTED]';
/** Capture orchestrator LLM steps and workflow code snapshots for the dev debug panel. */
@Env('N8N_INSTANCE_AI_RUN_DEBUG_ENABLED')
runDebugEnabled: boolean = false;
}
+1
View File
@@ -322,6 +322,7 @@ describe('GlobalConfig', () => {
outputRedactionSecrets: true,
outputRedactionPii: 'credit-card',
outputRedactionPlaceholder: '[REDACTED]',
runDebugEnabled: false,
},
queue: {
health: {
@@ -24,6 +24,12 @@ All Instance AI configuration is done via environment variables.
| `LANGSMITH_ENDPOINT` / `LANGCHAIN_ENDPOINT` | string | unset | Optional direct LangSmith endpoint override. |
| `LANGSMITH_TRACING` / `LANGCHAIN_TRACING_V2` | boolean | unset | LangSmith SDK tracing flags. `false` disables tracing; `true` enables direct tracing when direct LangSmith credentials or endpoints are configured. |
### Debugging
| Variable | Type | Default | Description |
|----------|------|---------|-------------|
| `N8N_INSTANCE_AI_RUN_DEBUG_ENABLED` | boolean | `false` | Capture orchestrator LLM steps and workflow code snapshots for the dev debug panel and eval LLM debug reports. |
### Memory
| Variable | Type | Default | Description |
@@ -241,6 +241,7 @@ Operational details:
| `LANGSMITH_BRANCH` | No | Branch name to tag the experiment with (auto-set in CI) |
| `CONTEXT7_API_KEY` | No | Context7 key for API-doc lookups. Improves mock realism for less-common services; the LLM falls back to training data when unset |
| `N8N_AI_ASSISTANT_BASE_URL` | No | Set to `""` to bypass the hosted AI proxy and hit Anthropic directly — useful to avoid per-tenant quota during large batch runs |
| `N8N_INSTANCE_AI_RUN_DEBUG_ENABLED` | No | Set to `true` on the target n8n instance to capture orchestrator LLM steps and workflow code for the eval LLM debug report (`workflow-eval-llm-debug.html`). Off by default. |
**LangSmith caveat:** if `LANGSMITH_API_KEY` is set in `.env.local`, local runs also land in the shared `instance-ai-workflow-evals` dataset. Unset it (or run without `dotenvx`) to keep exploratory runs out of team results.
@@ -0,0 +1,50 @@
import { describe, expect, it, vi } from 'vitest';
import type { N8nClient } from '../clients/n8n-client';
import { captureThreadRunDebug } from '../harness/capture-run-debug';
describe('captureThreadRunDebug', () => {
it('fetches full run records for each summary', async () => {
const client = {
listThreadDebugRuns: vi.fn().mockResolvedValue({
threadId: 'thread-1',
runs: [
{
runId: 'run-1',
threadId: 'thread-1',
startedAt: 1,
stepCount: 1,
workflowCodeCount: 0,
label: 'Build workflow',
},
],
}),
getRunDebug: vi.fn().mockResolvedValue({
threadId: 'thread-1',
runId: 'run-1',
startedAt: 1,
steps: [{ stepNumber: 0, input: { system: 'prompt' } }],
workflowCode: [],
}),
} as unknown as N8nClient;
const records = await captureThreadRunDebug(client, 'thread-1');
expect(records).toHaveLength(1);
expect(records[0]?.label).toBe('Build workflow');
expect(records[0]?.steps).toHaveLength(1);
expect(client.getRunDebug).toHaveBeenCalledWith('run-1');
});
it('returns an empty array when the debug API is unavailable', async () => {
const client = {
listThreadDebugRuns: vi
.fn()
.mockRejectedValue(
new Error('n8n API GET /rest/instance-ai/debug/threads/t/runs failed (404): not found'),
),
} as unknown as N8nClient;
await expect(captureThreadRunDebug(client, 'thread-1')).resolves.toEqual([]);
});
});
@@ -214,4 +214,47 @@ describe('reshapeLangSmithRuns', () => {
expect(tc.workflowJson).toEqual(workflowJson);
expect(tc.buildTrace).toEqual(buildTrace);
});
it('merges stashed run debug by thread id', () => {
const cases = [withFile('airtable', [scenario('s1')])];
const rows = [
row(
{ testCaseFile: 'airtable', scenarioName: 's1', _iteration: 0 },
{
buildSuccess: true,
passed: true,
score: 1,
reasoning: 'ok',
threadId: 'thread-1',
},
),
];
const runDebugByThreadId = new Map([
[
'thread-1',
[
{
threadId: 'thread-1',
runId: 'run-1',
startedAt: 1,
steps: [{ stepNumber: 0 }],
workflowCode: [],
},
],
],
]);
const result = reshapeLangSmithRuns(
rows,
cases,
1,
new Map(),
new Map(),
undefined,
runDebugByThreadId,
);
expect(result[0][0]?.runDebug).toHaveLength(1);
expect(result[0][0]?.runDebug?.[0]?.runId).toBe('run-1');
});
});
@@ -0,0 +1,83 @@
import { getTestCaseAnchorId } from '../report/report-anchors';
import { generateRunDebugReport } from '../report/run-debug-report';
import type { WorkflowTestCase, WorkflowTestCaseResult } from '../types';
const TEST_CASE: WorkflowTestCase = {
conversation: [{ role: 'user', text: 'Build a Slack notifier' }],
complexity: 'simple',
tags: [],
executionScenarios: [{ name: 's', description: 'd', dataSetup: '', successCriteria: 'ok' }],
datasets: ['full'],
};
function resultWithRunDebug(
runDebug: WorkflowTestCaseResult['runDebug'],
overrides: Partial<WorkflowTestCaseResult> = {},
): WorkflowTestCaseResult {
return {
testCase: TEST_CASE,
workflowBuildSuccess: true,
executionScenarioResults: [],
fileSlug: 'slack-notifier',
threadId: 'thread-1',
runDebug,
...overrides,
};
}
describe('run debug report', () => {
it('renders runs, steps, and escaped user content', () => {
const html = generateRunDebugReport([
resultWithRunDebug([
{
threadId: 'thread-1',
runId: 'run-1',
startedAt: 1_700_000_000_000,
label: 'Build a Slack notifier',
steps: [
{
stepNumber: 0,
input: {
system: 'You are helpful <script>alert(1)</script>',
messages: [{ role: 'user', content: 'Build a Slack notifier' }],
},
output: {
finishReason: 'tool-calls',
toolCalls: [{ toolName: 'search_nodes', input: { query: 'slack' } }],
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
},
},
],
workflowCode: [],
},
]),
]);
expect(html).toContain('id="tc-slack-notifier"');
expect(html).toContain('Build a Slack notifier');
expect(html).toContain('search_nodes');
expect(html).toContain('tool-calls');
expect(html).toContain('&lt;script&gt;alert(1)&lt;/script&gt;');
expect(html).toContain('selectRun(');
expect(html).toContain('selectStep(');
});
it('uses stable anchor ids from file slugs', () => {
const result = resultWithRunDebug([]);
expect(getTestCaseAnchorId(result, 0)).toBe('tc-slack-notifier');
});
it('renders an empty-state stub when no debug was captured', () => {
const html = generateRunDebugReport([
{
testCase: TEST_CASE,
workflowBuildSuccess: false,
executionScenarioResults: [],
},
]);
expect(html).toContain('No LLM run debug was captured');
expect(html).toContain('N8N_INSTANCE_AI_RUN_DEBUG_ENABLED=true');
expect(html).toContain('Workflow eval — LLM debug');
});
});
@@ -38,6 +38,27 @@ describe('build expectations in the workflow report', () => {
expect(html).toContain('&#10007;'); // fail icon
});
it('links to the LLM debug report when run debug was captured', () => {
const html = generateWorkflowReport([
{
...resultWith([]),
fileSlug: 'slack-notifier',
runDebug: [
{
threadId: 'thread-1',
runId: 'run-1',
startedAt: 1,
steps: [],
workflowCode: [],
},
],
},
]);
expect(html).toContain('workflow-eval-llm-debug.html#tc-slack-notifier');
expect(html).toContain('LLM steps →');
});
it('renders an incomplete verdict neutrally and keeps it out of the count', () => {
const html = generateWorkflowReport([
resultWith([
@@ -7,6 +7,7 @@
// falls back to a direct loop with the same eval-results.json output.
// ---------------------------------------------------------------------------
import type { InstanceAiRunDebugResponse } from '@n8n/api-types';
import { mkdirSync, writeFileSync } from 'fs';
import { Client } from 'langsmith';
import { evaluate } from 'langsmith/evaluation';
@@ -41,6 +42,7 @@ import { formatComparisonMarkdown, formatComparisonTerminal } from '../compariso
import { seedCredentials, cleanupCredentials } from '../credentials/seeder';
import { loadWorkflowTestCasesWithFiles } from '../data/workflows';
import type { WorkflowTestCaseWithFile } from '../data/workflows';
import { captureThreadRunDebug } from '../harness/capture-run-debug';
import { createLogger } from '../harness/logger';
import type { EvalLogger } from '../harness/logger';
import {
@@ -62,6 +64,7 @@ import {
import { syncDataset, type DatasetExampleInputs } from '../langsmith/dataset-sync';
import { seedMcpRegistry } from '../mcp-registry/seeder';
import { snapshotWorkflowIds } from '../outcome/workflow-discovery';
import { writeRunDebugReport } from '../report/run-debug-report';
import { writeWorkflowReport } from '../report/workflow-report';
import type {
BuildExpectationResult,
@@ -215,6 +218,8 @@ async function main(): Promise<void> {
const reportResults = flattenRunsForReport(evaluation);
const htmlPath = writeWorkflowReport(reportResults);
console.log(`Report: ${htmlPath}`);
const debugHtmlPath = writeRunDebugReport(reportResults);
console.log(`LLM debug: ${debugHtmlPath}`);
console.log(
'\n' + formatComparisonTerminal(evaluation, outcome, { commitSha, slugByTestCase }),
);
@@ -242,9 +247,19 @@ async function runWithLangSmith(config: RunConfig): Promise<{
}> {
const { args, lanes, logger, prebuiltManifest, prebuiltWorkflowIdsToDelete } = config;
const testCasesWithFiles = loadWorkflowTestCasesWithFiles(args.filter, args.exclude, args.tier);
if (testCasesWithFiles.length === 0) {
logger.info('No workflow test cases found in evaluations/data/workflows/');
return {
evaluation: { totalRuns: 0, testCases: [] },
experimentName: '',
outcome: { kind: 'no_baseline' },
slugByTestCase: new Map(),
};
}
const lsClient = new Client();
const datasetName = await syncDataset(lsClient, args.dataset, logger, args.filter, args.exclude);
const testCasesWithFiles = loadWorkflowTestCasesWithFiles(args.filter, args.exclude, args.tier);
// Stash transcripts by threadId so reshapeLangSmithRuns can merge them in —
// the LangSmith target() output schema doesn't carry the full transcript.
@@ -252,6 +267,7 @@ async function runWithLangSmith(config: RunConfig): Promise<{
// Build-expectation verdicts, judged once per build and merged the same way —
// fired during getOrBuild, awaited before reshapeLangSmithRuns.
const buildExpectationsByThreadId = new Map<string, Promise<BuildExpectationResult[]>>();
const runDebugByThreadId = new Map<string, Promise<InstanceAiRunDebugResponse[]>>();
// LangSmith dataset rows carry only per-scenario fields. The conversation and
// build expectations for the build are sourced locally, keyed by fileSlug.
@@ -383,6 +399,7 @@ async function runWithLangSmith(config: RunConfig): Promise<{
buildDurations.set(key, buildDurationMs);
stashTranscript(build);
stashBuildExpectations(fileSlug, build);
stashRunDebug(lane.runner.client, build);
if (build.success && !build.workflowChecks) {
// No transcript in prebuilt mode — checks run with empty prompt context.
build.workflowChecks = await runWorkflowChecks({
@@ -409,6 +426,7 @@ async function runWithLangSmith(config: RunConfig): Promise<{
buildDurations.set(key, buildDurationMs);
stashTranscript(build);
stashBuildExpectations(fileSlug, build);
stashRunDebug(lane.runner.client, build);
return { build, lane, buildDurationMs };
} finally {
allocator.release(lane, fileSlug);
@@ -424,6 +442,11 @@ async function runWithLangSmith(config: RunConfig): Promise<{
}
}
function stashRunDebug(client: N8nClient, build: BuildResult): void {
if (!build.threadId) return;
runDebugByThreadId.set(build.threadId, captureThreadRunDebug(client, build.threadId, logger));
}
// Judge build expectations once per build (off the scenario critical path);
// reshapeLangSmithRuns awaits and merges the verdicts by threadId.
function stashBuildExpectations(fileSlug: string, build: BuildResult): void {
@@ -656,6 +679,10 @@ async function runWithLangSmith(config: RunConfig): Promise<{
for (const [threadId, verdictsPromise] of buildExpectationsByThreadId) {
buildExpectationsResolved.set(threadId, await verdictsPromise);
}
const runDebugResolved = new Map<string, InstanceAiRunDebugResponse[]>();
for (const [threadId, runDebugPromise] of runDebugByThreadId) {
runDebugResolved.set(threadId, await runDebugPromise);
}
const allRunResults = reshapeLangSmithRuns(
experimentResults.results,
testCasesWithFiles,
@@ -663,6 +690,7 @@ async function runWithLangSmith(config: RunConfig): Promise<{
transcriptByThreadId,
buildExpectationsResolved,
lanes[0]?.baseUrl,
runDebugResolved,
);
const evaluation = aggregateResults(allRunResults, args.iterations);
@@ -7,7 +7,7 @@
// unit-testable on its own (index.ts runs main() at import time).
// ---------------------------------------------------------------------------
import type { InstanceAiEvalExecutionResult } from '@n8n/api-types';
import type { InstanceAiEvalExecutionResult, InstanceAiRunDebugResponse } from '@n8n/api-types';
import type { Run } from 'langsmith/schemas';
import { z } from 'zod';
@@ -139,6 +139,7 @@ export function reshapeLangSmithRuns(
transcriptByThreadId: Map<string, TranscriptTurn[]>,
buildExpectationsByThreadId: Map<string, BuildExpectationResult[]>,
n8nBaseUrl: string | undefined,
runDebugByThreadId: Map<string, InstanceAiRunDebugResponse[]> = new Map(),
): WorkflowTestCaseResult[][] {
// Index runs by (iteration, testCaseFile, scenarioName) using the `_iteration`
// we injected in expandExamplesForIterations. Falls back to 0 for single-run.
@@ -211,6 +212,7 @@ export function reshapeLangSmithRuns(
workflowJson,
buildTrace,
n8nBaseUrl,
runDebug: threadId ? runDebugByThreadId.get(threadId) : undefined,
});
}
allRunResults.push(runResults);
@@ -10,6 +10,8 @@ import type {
InstanceAiConfirmRequest,
InstanceAiRichMessagesResponse,
InstanceAiEvalExecutionResult,
InstanceAiRunDebugResponse,
InstanceAiThreadDebugRunsResponse,
} from '@n8n/api-types';
import { z } from 'zod';
@@ -208,6 +210,26 @@ export class N8nClient {
await this.fetch(`/rest/instance-ai/threads/${threadId}`, { method: 'DELETE' });
}
/**
* List captured LLM debug runs for a thread.
* GET /rest/instance-ai/debug/threads/:threadId/runs
*/
async listThreadDebugRuns(threadId: string): Promise<InstanceAiThreadDebugRunsResponse> {
return this.unwrapRestData<InstanceAiThreadDebugRunsResponse>(
await this.fetch(`/rest/instance-ai/debug/threads/${threadId}/runs`),
);
}
/**
* Fetch full LLM step debug for a single run.
* GET /rest/instance-ai/debug/runs/:runId
*/
async getRunDebug(runId: string): Promise<InstanceAiRunDebugResponse> {
return this.unwrapRestData<InstanceAiRunDebugResponse>(
await this.fetch(`/rest/instance-ai/debug/runs/${runId}`),
);
}
// -- Computer-use gateway (pairing + status) -----------------------------
/**
@@ -552,6 +574,13 @@ export class N8nClient {
// -- Internal fetch ------------------------------------------------------
private unwrapRestData<T>(result: unknown): T {
if (result && typeof result === 'object' && 'data' in result) {
return (result as { data: T }).data;
}
return result as T;
}
private async fetch(
path: string,
options: { method?: string; body?: unknown; timeoutMs?: number } = {},
@@ -0,0 +1,49 @@
import type { InstanceAiRunDebugResponse } from '@n8n/api-types';
import pLimit from 'p-limit';
import type { EvalLogger } from './logger';
import type { N8nClient } from '../clients/n8n-client';
const RUN_FETCH_CONCURRENCY = 4;
export async function captureThreadRunDebug(
client: N8nClient,
threadId: string,
logger?: EvalLogger,
): Promise<InstanceAiRunDebugResponse[]> {
try {
const response = await client.listThreadDebugRuns(threadId);
const runs = response.runs ?? [];
if (runs.length === 0) {
logger?.verbose(` No run debug records for thread ${threadId}`);
return [];
}
const limit = pLimit(RUN_FETCH_CONCURRENCY);
const records = await Promise.all(
runs.map(
async (summary) =>
await limit(async (): Promise<InstanceAiRunDebugResponse | null> => {
try {
const record = await client.getRunDebug(summary.runId);
return summary.label ? { ...record, label: summary.label } : record;
} catch (error: unknown) {
logger?.warn(
` Run debug fetch failed for ${summary.runId}: ${error instanceof Error ? error.message : String(error)}`,
);
return null;
}
}),
),
);
return records
.filter((record): record is InstanceAiRunDebugResponse => record !== null)
.sort((a, b) => a.startedAt - b.startedAt);
} catch (error: unknown) {
logger?.warn(
` Run debug capture skipped for thread ${threadId}: ${error instanceof Error ? error.message : String(error)}`,
);
return [];
}
}
@@ -12,6 +12,7 @@ import { mkdir, writeFile } from 'node:fs/promises';
import path from 'node:path';
import { setTimeout as delay } from 'node:timers/promises';
import { captureThreadRunDebug } from './capture-run-debug';
import {
SSE_SETTLE_DELAY_MS,
startSseConnection,
@@ -206,6 +207,9 @@ export async function runWorkflowTestCase(
}
if (build.threadId) {
result.threadId = build.threadId;
if (!config.prebuiltWorkflowId) {
result.runDebug = await captureThreadRunDebug(client, build.threadId, logger);
}
}
if (build.transcript) {
result.transcript = build.transcript;
@@ -0,0 +1,10 @@
import type { WorkflowTestCaseResult } from '../types';
function sanitizeAnchor(value: string): string {
return value.replace(/[^a-zA-Z0-9_-]+/g, '-').replace(/^-+|-+$/g, '');
}
export function getTestCaseAnchorId(result: WorkflowTestCaseResult, index: number): string {
const base = result.fileSlug ?? `case-${String(index)}`;
return `tc-${sanitizeAnchor(base)}`;
}
@@ -0,0 +1,428 @@
/**
* HTML report generator for LLM run/step debug captured during workflow evals.
*
* Mirrors the frontend InstanceAiLlmStepsModal layout: runs sidebar, steps
* sidebar, and per-step input/output detail.
*/
import type {
InstanceAiRunDebugResponse,
InstanceAiRunDebugStep,
InstanceAiRunDebugWorkflowCodeSnapshot,
ReadableContentBlock,
ReadableSegment,
} from '@n8n/api-types';
import {
formatDebugJson,
parseInputExtras,
parseMessageBlocks,
parseOutputDisplayBlocks,
parseOutputExtras,
parseStepSummary,
parseSystemPromptForDisplay,
parseUsageSummary,
} from '@n8n/api-types';
import fs from 'fs';
import path from 'path';
import { getTestCaseAnchorId } from './report-anchors';
import type { WorkflowTestCaseResult } from '../types';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function escapeHtml(str: string): string {
return str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
function sanitizeAnchor(value: string): string {
return value.replace(/[^a-zA-Z0-9_-]+/g, '-').replace(/^-+|-+$/g, '');
}
function getTestCaseLabel(result: WorkflowTestCaseResult): string {
const prompt = result.testCase.conversation[0]?.text ?? '';
const iterPrefix = /^\[iter \d+\/\d+\]\s*/.exec(prompt)?.[0] ?? '';
const truncatedPrompt = prompt.length > 100 ? `${prompt.slice(0, 100)}...` : prompt;
return iterPrefix + (result.fileSlug ?? result.testCase.description ?? truncatedPrompt);
}
function formatTimestamp(ms: number): string {
try {
return new Date(ms).toLocaleString();
} catch {
return String(ms);
}
}
function renderJsonBlock(value: unknown, label?: string): string {
const summary = label ? `<span class="json-label">${escapeHtml(label)}</span>` : '';
return `<details class="json-panel"><summary>${summary || 'JSON'} ${escapeHtml(formatDebugJson(value).slice(0, 80))}…</summary><pre class="json-block"><code>${escapeHtml(formatDebugJson(value))}</code></pre></details>`;
}
function renderSegments(segments: ReadableSegment[]): string {
return segments
.map((segment) => {
switch (segment.type) {
case 'text':
return `<p class="segment-text">${escapeHtml(segment.text)}</p>`;
case 'reasoning':
return `<div class="segment-block segment-reasoning"><div class="segment-kind">Reasoning</div><p class="segment-text">${escapeHtml(segment.text)}</p></div>`;
case 'tool-call':
return `<div class="segment-block segment-tool-call"><div class="segment-kind">Tool call · <code>${escapeHtml(segment.name)}</code></div>${segment.payload !== undefined ? renderJsonBlock(segment.payload, 'Input') : ''}${segment.metadata ? renderJsonBlock(segment.metadata, 'Metadata') : ''}</div>`;
case 'tool-result':
return `<div class="segment-block segment-tool-result"><div class="segment-kind">Tool result${segment.name ? ` · <code>${escapeHtml(segment.name)}</code>` : ''}</div>${segment.payload !== undefined ? renderJsonBlock(segment.payload, 'Output') : ''}${segment.metadata ? renderJsonBlock(segment.metadata, 'Metadata') : ''}</div>`;
case 'json':
return renderJsonBlock(segment.payload, segment.label);
}
})
.join('');
}
function renderContentBlock(block: ReadableContentBlock): string {
const roleClass = `role-${sanitizeAnchor(block.role.toLowerCase())}`;
const segmentsHtml = block.segments?.length
? renderSegments(block.segments)
: `<p class="segment-text">${escapeHtml(block.content)}</p>`;
const metadataHtml = block.metadata ? renderJsonBlock(block.metadata, 'Metadata') : '';
return `<div class="content-block ${roleClass}"><div class="content-role">${escapeHtml(block.role)}</div>${segmentsHtml}${metadataHtml}</div>`;
}
function renderWorkflowCodeSnapshot(snapshot: InstanceAiRunDebugWorkflowCodeSnapshot): string {
const status = snapshot.success
? '<span class="badge badge-pass">ok</span>'
: '<span class="badge badge-fail">failed</span>';
const errors =
snapshot.errors && snapshot.errors.length > 0
? `<ul class="error-list">${snapshot.errors.map((e) => `<li>${escapeHtml(e)}</li>`).join('')}</ul>`
: '';
return `<div class="workflow-code-snapshot">
<div class="workflow-code-header">${status} <code>${escapeHtml(snapshot.source)}</code>${snapshot.workflowId ? ` · ${escapeHtml(snapshot.workflowId)}` : ''}</div>
<pre class="code-block"><code>${escapeHtml(snapshot.code)}</code></pre>
${snapshot.patches ? renderJsonBlock(snapshot.patches, 'Patches') : ''}
${errors}
</div>`;
}
function renderStepDetail(
step: InstanceAiRunDebugStep,
workflowCode: InstanceAiRunDebugWorkflowCodeSnapshot[],
): string {
const parsedSystem = parseSystemPromptForDisplay(step.input?.system);
const messageBlocks = parseMessageBlocks(step.input?.messages);
const inputExtras = parseInputExtras(step.input);
const outputBlocks = parseOutputDisplayBlocks(step.output);
const outputExtras = parseOutputExtras(step.output);
const usage = parseUsageSummary(step.output?.usage);
const finishReason =
typeof step.output?.finishReason === 'string' ? step.output.finishReason : undefined;
const systemHtml = [
...parsedSystem.systemBlocks.map(renderContentBlock),
parsedSystem.observations
? `<div class="content-block role-observations"><div class="content-role">Observations</div><p class="segment-text">${escapeHtml(parsedSystem.observations)}</p></div>`
: '',
].join('');
const workflowCodeHtml =
workflowCode.length > 0
? `<div class="detail-section"><div class="detail-section-title">Workflow code</div>${workflowCode.map(renderWorkflowCodeSnapshot).join('')}</div>`
: '';
return `<div class="step-detail-inner">
<div class="detail-meta">${finishReason ? `<span class="meta-chip">finish: ${escapeHtml(finishReason)}</span>` : ''}${usage ? `<span class="meta-chip">${escapeHtml(usage.label)}</span>` : ''}</div>
<div class="detail-section"><div class="detail-section-title">Input</div>
${systemHtml ? `<div class="detail-subsection"><div class="detail-subsection-title">System</div>${systemHtml}</div>` : ''}
${messageBlocks.length > 0 ? `<div class="detail-subsection"><div class="detail-subsection-title">Messages</div>${messageBlocks.map(renderContentBlock).join('')}</div>` : ''}
${inputExtras ? renderJsonBlock(inputExtras, 'Input extras') : ''}
</div>
<div class="detail-section"><div class="detail-section-title">Output</div>
${outputBlocks.length > 0 ? outputBlocks.map(renderContentBlock).join('') : '<div class="muted">No structured output</div>'}
${usage ? renderJsonBlock(usage.metadata, 'Usage') : ''}
${outputExtras ? renderJsonBlock(outputExtras, 'Output extras') : ''}
</div>
${workflowCodeHtml}
</div>`;
}
function renderStepSummaryChips(summary: ReturnType<typeof parseStepSummary>): string {
const chips: string[] = [];
if (summary.finishReason) {
chips.push(`<span class="chip">${escapeHtml(summary.finishReason)}</span>`);
}
for (const tool of summary.toolNames) {
chips.push(`<span class="chip chip-tool">${escapeHtml(tool)}</span>`);
}
if (summary.usageLabel) {
chips.push(`<span class="chip chip-muted">${escapeHtml(summary.usageLabel)}</span>`);
}
if (summary.messagePreview) {
chips.push(`<span class="chip chip-preview">${escapeHtml(summary.messagePreview)}</span>`);
}
return chips.join('');
}
function renderRunPanel(
run: InstanceAiRunDebugResponse,
caseIndex: number,
runIndex: number,
): string {
const label = run.label;
const displayLabel = label ?? `Run ${String(runIndex + 1)}`;
const stepsList = run.steps
.map((step, stepIndex) => {
const summary = parseStepSummary(step.input, step.output);
const active = stepIndex === 0 ? ' active' : '';
return `<button type="button" class="step-btn${active}" data-step-index="${String(stepIndex)}" onclick="selectStep(${String(caseIndex)}, ${String(runIndex)}, ${String(stepIndex)})">
<span class="step-num">#${String(step.stepNumber)}</span>
<span class="step-chips">${renderStepSummaryChips(summary)}</span>
</button>`;
})
.join('');
const stepPanels = run.steps
.map((step, stepIndex) => {
const hidden = stepIndex === 0 ? '' : ' hidden';
return `<div class="step-panel${hidden}" data-step-index="${String(stepIndex)}">${renderStepDetail(step, run.workflowCode)}</div>`;
})
.join('');
const hidden = runIndex === 0 ? '' : ' hidden';
return `<div class="run-panel${hidden}" data-run-index="${String(runIndex)}">
<div class="steps-col">
<div class="col-title">Steps (${String(run.steps.length)})</div>
<div class="steps-list">${stepsList || '<div class="muted">No steps captured</div>'}</div>
</div>
<div class="detail-col">
<div class="col-title">${escapeHtml(displayLabel)} · ${escapeHtml(formatTimestamp(run.startedAt))}</div>
<div class="step-panels">${stepPanels || '<div class="muted">No step detail</div>'}</div>
</div>
</div>`;
}
function renderTestCaseDebug(result: WorkflowTestCaseResult, caseIndex: number): string {
const runs = result.runDebug ?? [];
const anchorId = getTestCaseAnchorId(result, caseIndex);
const label = getTestCaseLabel(result);
const totalSteps = runs.reduce((sum, run) => sum + run.steps.length, 0);
const runTabs = runs
.map((run, runIndex) => {
const tabLabel = run.label ?? `Run ${String(runIndex + 1)}`;
const active = runIndex === 0 ? ' active' : '';
return `<button type="button" class="run-btn${active}" data-run-index="${String(runIndex)}" onclick="selectRun(${String(caseIndex)}, ${String(runIndex)})">
<span class="run-label">${escapeHtml(tabLabel)}</span>
<span class="run-meta">${String(run.steps.length)} steps · ${escapeHtml(formatTimestamp(run.startedAt))}</span>
</button>`;
})
.join('');
const runPanels = runs.map((run, runIndex) => renderRunPanel(run, caseIndex, runIndex)).join('');
return `<section class="debug-case" id="${escapeHtml(anchorId)}" data-case-index="${String(caseIndex)}">
<header class="debug-case-header">
<h2>${escapeHtml(label)}</h2>
<div class="debug-case-meta">
<span class="meta-item">${String(runs.length)} run${runs.length === 1 ? '' : 's'}</span>
<span class="meta-item">${String(totalSteps)} step${totalSteps === 1 ? '' : 's'}</span>
${result.threadId ? `<span class="meta-item mono">thread ${escapeHtml(result.threadId)}</span>` : ''}
</div>
</header>
<div class="debug-grid">
<aside class="runs-col">
<div class="col-title">Runs</div>
<div class="runs-list">${runTabs}</div>
</aside>
<div class="debug-main" id="debug-main-${String(caseIndex)}">${runPanels}</div>
</div>
</section>`;
}
function countDebugStats(results: WorkflowTestCaseResult[]): {
testCases: number;
runs: number;
steps: number;
} {
const withDebug = results.filter((r) => (r.runDebug?.length ?? 0) > 0);
return {
testCases: withDebug.length,
runs: withDebug.reduce((sum, r) => sum + (r.runDebug?.length ?? 0), 0),
steps: withDebug.reduce(
(sum, r) => sum + (r.runDebug?.reduce((s, run) => s + run.steps.length, 0) ?? 0),
0,
),
};
}
// ---------------------------------------------------------------------------
// Full report
// ---------------------------------------------------------------------------
export function generateRunDebugReport(results: WorkflowTestCaseResult[]): string {
const casesWithDebug = results.filter((r) => (r.runDebug?.length ?? 0) > 0);
const stats = countDebugStats(results);
const body =
casesWithDebug.length > 0
? casesWithDebug
.map((result) => {
const caseIndex = results.indexOf(result);
return renderTestCaseDebug(result, caseIndex);
})
.join('\n')
: '<div class="empty-state">No LLM run debug was captured for this eval. Set <code>N8N_INSTANCE_AI_RUN_DEBUG_ENABLED=true</code> on the target n8n instance, ensure it exposes <code>/instance-ai/debug/*</code> endpoints, and that builds completed with a thread id.</div>';
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Workflow eval LLM debug</title>
<style>
:root {
--bg-primary: #0d1117;
--bg-secondary: #161b22;
--bg-tertiary: #1c2129;
--border: #30363d;
--border-light: #21262d;
--text-primary: #f0f6fc;
--text-secondary: #c9d1d9;
--text-muted: #8b949e;
--color-pass: #3fb950;
--color-fail: #f85149;
--color-info: #58a6ff;
--color-pass-bg: #23863622;
--color-fail-bg: #da363322;
}
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; background: var(--bg-primary); color: var(--text-secondary); padding: 24px; max-width: 1600px; margin: 0 auto; font-size: 14px; line-height: 1.5; }
h1 { color: var(--text-primary); font-size: 20px; margin-bottom: 2px; }
.subtitle { color: var(--text-muted); font-size: 13px; margin-bottom: 20px; }
.dashboard { display: flex; gap: 12px; margin-bottom: 24px; flex-wrap: wrap; }
.stat-card { background: var(--bg-secondary); border: 1px solid var(--border); border-radius: 8px; padding: 14px 20px; min-width: 120px; }
.stat-card .label { color: var(--text-muted); font-size: 12px; }
.stat-card .value { color: var(--text-primary); font-size: 26px; font-weight: 700; margin-top: 2px; }
.badge { display: inline-block; padding: 2px 8px; border-radius: 12px; font-size: 11px; font-weight: 600; }
.badge-pass { background: var(--color-pass-bg); color: var(--color-pass); }
.badge-fail { background: var(--color-fail-bg); color: var(--color-fail); }
.mono { font-family: monospace; font-size: 12px; }
.muted { color: var(--text-muted); font-size: 12px; }
.empty-state { background: var(--bg-secondary); border: 1px solid var(--border); border-radius: 8px; padding: 24px; color: var(--text-muted); }
.debug-case { background: var(--bg-secondary); border: 1px solid var(--border); border-radius: 8px; margin-bottom: 16px; overflow: hidden; }
.debug-case-header { padding: 14px 16px; border-bottom: 1px solid var(--border-light); }
.debug-case-header h2 { color: var(--text-primary); font-size: 14px; font-weight: 600; margin-bottom: 4px; }
.debug-case-meta { display: flex; gap: 10px; flex-wrap: wrap; font-size: 12px; color: var(--text-muted); }
.debug-grid { display: grid; grid-template-columns: 220px 1fr; min-height: 420px; }
.runs-col { border-right: 1px solid var(--border-light); background: var(--bg-primary); padding: 12px; }
.debug-main { min-width: 0; }
.run-panel { display: grid; grid-template-columns: 260px 1fr; min-height: 420px; }
.run-panel.hidden { display: none; }
.steps-col { border-right: 1px solid var(--border-light); background: var(--bg-secondary); padding: 12px; overflow: auto; max-height: 70vh; }
.detail-col { padding: 12px; overflow: auto; max-height: 70vh; background: var(--bg-primary); }
.col-title { color: var(--text-muted); font-size: 11px; font-weight: 700; text-transform: uppercase; letter-spacing: 0.04em; margin-bottom: 8px; }
.runs-list, .steps-list { display: flex; flex-direction: column; gap: 6px; }
.run-btn, .step-btn { width: 100%; text-align: left; background: var(--bg-secondary); border: 1px solid var(--border); border-radius: 6px; color: var(--text-secondary); padding: 8px 10px; cursor: pointer; }
.run-btn:hover, .step-btn:hover { border-color: var(--color-info); color: var(--text-primary); }
.run-btn.active, .step-btn.active { border-color: var(--color-info); background: #0f1c2e; color: var(--text-primary); }
.run-label, .step-num { display: block; color: var(--text-primary); font-size: 12px; font-weight: 600; }
.run-meta { display: block; color: var(--text-muted); font-size: 11px; margin-top: 2px; }
.step-chips { display: flex; flex-wrap: wrap; gap: 4px; margin-top: 4px; }
.chip { display: inline-block; padding: 1px 6px; border-radius: 999px; background: var(--bg-tertiary); color: var(--text-secondary); font-size: 10px; }
.chip-tool { color: var(--color-info); }
.chip-muted { color: var(--text-muted); }
.chip-preview { max-width: 100%; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
.step-panel.hidden { display: none; }
.detail-meta { display: flex; flex-wrap: wrap; gap: 6px; margin-bottom: 10px; }
.meta-chip { font-size: 11px; padding: 2px 8px; border-radius: 999px; border: 1px solid var(--border); color: var(--text-muted); }
.detail-section { margin-bottom: 14px; }
.detail-section-title { color: var(--color-info); font-size: 12px; font-weight: 700; margin-bottom: 6px; }
.detail-subsection { margin: 8px 0; }
.detail-subsection-title { color: var(--text-muted); font-size: 11px; font-weight: 600; margin-bottom: 4px; }
.content-block { border: 1px solid var(--border-light); border-radius: 6px; padding: 10px; margin-bottom: 8px; background: var(--bg-secondary); }
.content-role { color: var(--text-muted); font-size: 10px; font-weight: 700; text-transform: uppercase; margin-bottom: 6px; }
.segment-text { white-space: pre-wrap; color: var(--text-secondary); font-size: 12px; }
.segment-block { margin-top: 6px; }
.segment-kind { color: var(--text-muted); font-size: 11px; margin-bottom: 4px; }
.segment-kind code { color: var(--color-info); }
.json-panel { margin-top: 6px; }
.json-panel summary { cursor: pointer; color: var(--text-muted); font-size: 11px; }
.json-label { color: var(--color-info); font-weight: 600; margin-right: 6px; }
.json-block, .code-block { background: var(--bg-tertiary); border: 1px solid var(--border-light); border-radius: 6px; padding: 10px; overflow: auto; max-height: 360px; font-size: 11px; line-height: 1.45; margin-top: 6px; }
.json-block code, .code-block code { color: var(--text-secondary); white-space: pre-wrap; word-break: break-word; }
.workflow-code-snapshot { border: 1px solid var(--border-light); border-radius: 6px; padding: 10px; margin-bottom: 8px; }
.workflow-code-header { margin-bottom: 6px; font-size: 12px; }
.error-list { margin: 6px 0 0 18px; color: var(--color-fail); font-size: 12px; }
</style>
</head>
<body>
<h1>Workflow eval LLM debug</h1>
<p class="subtitle">Captured orchestrator LLM steps per test case (same data as the Instance AI debug modal)</p>
<div class="dashboard">
<div class="stat-card"><div class="label">Test cases</div><div class="value">${String(stats.testCases)}</div></div>
<div class="stat-card"><div class="label">Runs</div><div class="value">${String(stats.runs)}</div></div>
<div class="stat-card"><div class="label">Steps</div><div class="value">${String(stats.steps)}</div></div>
</div>
${body}
<script>
function getCaseRoot(caseIndex) {
return document.querySelector('[data-case-index="' + caseIndex + '"]');
}
function selectRun(caseIndex, runIndex) {
const root = getCaseRoot(caseIndex);
if (!root) return;
root.querySelectorAll('.run-btn').forEach((btn) => {
btn.classList.toggle('active', btn.getAttribute('data-run-index') === String(runIndex));
});
root.querySelectorAll('.run-panel').forEach((panel) => {
panel.classList.toggle('hidden', panel.getAttribute('data-run-index') !== String(runIndex));
});
selectStep(caseIndex, runIndex, 0);
}
function selectStep(caseIndex, runIndex, stepIndex) {
const root = getCaseRoot(caseIndex);
if (!root) return;
const runPanel = root.querySelector('.run-panel[data-run-index="' + runIndex + '"]');
if (!runPanel) return;
runPanel.querySelectorAll('.step-btn').forEach((btn) => {
btn.classList.toggle('active', btn.getAttribute('data-step-index') === String(stepIndex));
});
runPanel.querySelectorAll('.step-panel').forEach((panel) => {
panel.classList.toggle('hidden', panel.getAttribute('data-step-index') !== String(stepIndex));
});
}
</script>
</body>
</html>`;
}
export function writeRunDebugReport(results: WorkflowTestCaseResult[]): string {
const reportDir = path.join(__dirname, '..', '..', '.data');
if (!fs.existsSync(reportDir)) {
fs.mkdirSync(reportDir, { recursive: true });
}
const html = generateRunDebugReport(results);
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
const reportPath = path.join(reportDir, `workflow-eval-llm-debug-${timestamp}.html`);
fs.writeFileSync(reportPath, html);
fs.writeFileSync(path.join(reportDir, 'workflow-eval-llm-debug.html'), html);
return reportPath;
}
@@ -10,6 +10,7 @@
import fs from 'fs';
import path from 'path';
import { getTestCaseAnchorId } from './report-anchors';
import { groupOutcomesByDimension } from '../binaryChecks/aggregate';
import { CHECK_DIMENSIONS, type CheckDimension, type CheckOutcome } from '../binaryChecks/types';
import type {
@@ -1207,6 +1208,11 @@ function renderTestCase(result: WorkflowTestCaseResult, tcIndex: number): string
? `<a class="workflow-link" href="${workflowUrl(result.n8nBaseUrl, result.workflowId)}" target="_blank" rel="noopener" onclick="event.stopPropagation()">open in n8n →</a>`
: '';
const llmDebugLink =
(result.runDebug?.length ?? 0) > 0
? `<a class="workflow-link" href="workflow-eval-llm-debug.html#${escapeHtml(getTestCaseAnchorId(result, tcIndex))}" onclick="event.stopPropagation()">LLM steps →</a>`
: '';
return `<div class="test-case ${statusClass}">
<div class="test-case-header" onclick="this.parentElement.classList.toggle('expanded')">
<div class="test-case-title">
@@ -1218,6 +1224,7 @@ function renderTestCase(result: WorkflowTestCaseResult, tcIndex: number): string
${result.threadId ? `<span class="workflow-id" title="thread id — open in the UI">🧵 ${escapeHtml(result.threadId)}</span>` : ''}
${result.workflowId ? `<span class="workflow-id">${escapeHtml(result.workflowId)}</span>` : ''}
${workflowLink}
${llmDebugLink}
</div>
<div class="scenario-indicators">${scenarioIndicators}</div>
</div>
@@ -2,7 +2,7 @@
// Shared types for the instance-ai workflow test case evaluator
// ---------------------------------------------------------------------------
import type { InstanceAiEvalExecutionResult } from '@n8n/api-types';
import type { InstanceAiEvalExecutionResult, InstanceAiRunDebugResponse } from '@n8n/api-types';
import type { CheckOutcome } from './binaryChecks/types';
import type { WorkflowResponse } from './clients/n8n-client';
@@ -246,6 +246,8 @@ export interface WorkflowTestCaseResult {
/** Base URL of the n8n instance behind this run. Per-result so multi-lane
* configs each get their own URL for canvas/execution links. */
n8nBaseUrl?: string;
/** Per-run LLM step debug captured from the instance-ai debug API after build. */
runDebug?: InstanceAiRunDebugResponse[];
}
// ---------------------------------------------------------------------------
@@ -11,7 +11,9 @@
# - docker
# - dotenvx (pnpm exec dotenvx works)
# - n8nio/n8n:local image (build with: INCLUDE_TEST_CONTROLLER=true pnpm build:docker)
# - .env.local at repo root with N8N_INSTANCE_AI_MODEL_API_KEY (+ optional N8N_EVAL_*)
# - .env.local at repo root with N8N_INSTANCE_AI_MODEL_API_KEY (+ optional N8N_EVAL_*).
# All KEY=VALUE entries from that file are passed into each lane container via
# docker --env-file; lane-specific -e flags below override on conflict.
#
set -euo pipefail
@@ -93,6 +95,39 @@ is_node_fetch_blocked_port() {
esac
}
# Force-remove any docker container holding the given host port (e.g. a lane
# left over from a previous --keep-containers run). Returns non-zero when the
# port is held by something that isn't a docker container.
remove_container_on_port() {
local port="$1"
local expected_name="n8n-eval-${port}"
local ids
ids="$(docker ps -q --filter "publish=${port}")"
if [[ -z "$ids" ]]; then
return 1
fi
local id
for id in $ids; do
local name
name="$(docker inspect --format '{{.Name}}' "$id")"
name="${name#/}"
if [[ "$name" != "$expected_name" ]]; then
local names
names="$(docker ps --filter "publish=${port}" --format '{{.Names}}' | tr '\n' ' ')"
die "port ${port} is held by non-eval container(s): ${names}"
fi
done
log "removing existing eval container on port ${port}: ${expected_name}"
# shellcheck disable=SC2086 # ids is a newline-separated list of container IDs
docker rm -f $ids >/dev/null 2>&1 || true
# Wait for the kernel to release the port after the container dies.
for _ in $(seq 1 20); do
port_in_use "$port" || return 0
sleep 0.5
done
return 1
}
allocate_lane_ports() {
local count="$1"
local start="$2"
@@ -107,7 +142,11 @@ allocate_lane_ports() {
if is_node_fetch_blocked_port "$port"; then
log "skipping port ${port} (blocked by Node fetch)"
elif port_in_use "$port"; then
die "port ${port} is already in use — stop the existing process or pick --start-port"
if remove_container_on_port "$port"; then
PORTS+=("$port")
else
die "port ${port} is in use by a non-docker process — stop it or pick --start-port"
fi
else
PORTS+=("$port")
fi
@@ -242,6 +281,18 @@ ENV_FILE_PATH="${REPO_ROOT}/${ENV_FILE}"
EVAL_PKG_DIR="${REPO_ROOT}/packages/@n8n/instance-ai"
RESET_PAYLOAD='{"owner":{"email":"nathan@n8n.io","password":"PlaywrightTest123","firstName":"Eval","lastName":"Owner"},"admin":{"email":"admin@n8n.io","password":"PlaywrightTest123","firstName":"Admin","lastName":"User"},"members":[],"chat":{"email":"chat@n8n.io","password":"PlaywrightTest123","firstName":"Chat","lastName":"User"}}'
# ---------------------------------------------------------------------------
# Preflight
# ---------------------------------------------------------------------------
require_cmd docker
require_cmd curl
require_cmd lsof
require_cmd pnpm
cd "$REPO_ROOT"
# Port allocation may docker-rm stale lane containers, so it must run after
# the docker preflight check.
PORTS=()
BASE_URLS=()
@@ -253,15 +304,7 @@ done
BASE_URL_CSV="$(IFS=,; printf '%s' "${BASE_URLS[*]}")"
# ---------------------------------------------------------------------------
# Preflight
# ---------------------------------------------------------------------------
require_cmd docker
require_cmd curl
require_cmd lsof
require_cmd pnpm
cd "$REPO_ROOT"
[[ -f "$ENV_FILE_PATH" ]] || die "Env file not found: $ENV_FILE_PATH"
API_KEY="$(load_env_var N8N_INSTANCE_AI_MODEL_API_KEY "$ENV_FILE_PATH")"
[[ -n "$API_KEY" ]] || die "N8N_INSTANCE_AI_MODEL_API_KEY is empty"
@@ -279,17 +322,22 @@ trap cleanup EXIT
# Start lanes
# ---------------------------------------------------------------------------
log "starting ${INSTANCE_COUNT} lane(s) on ports: ${PORTS[*]}"
log "lane env file: ${ENV_FILE_PATH}"
for port in "${PORTS[@]}"; do
name="n8n-eval-${port}"
CONTAINER_NAMES+=("$name")
# A stopped leftover container doesn't hold the port (so allocation passes)
# but would still collide on the name.
if docker container inspect "$name" >/dev/null 2>&1; then
log "removing stale container ${name}"
docker rm -f "$name" >/dev/null 2>&1 || true
fi
docker run -d --name "$name" \
--env-file "$ENV_FILE_PATH" \
-e E2E_TESTS=true \
-e N8N_ENABLED_MODULES=instance-ai \
-e N8N_AI_ENABLED=true \
-e N8N_INSTANCE_AI_MODEL_API_KEY="$API_KEY" \
-e N8N_AI_ASSISTANT_BASE_URL="" \
-p "${port}:5678" \
"$IMAGE" >/dev/null
@@ -0,0 +1,222 @@
import type { OnStepFinishEvent, OnStepStartEvent } from 'ai';
import { describe, expect, it } from 'vitest';
import { mock } from 'vitest-mock-extended';
import type { Logger } from '../../logger';
import {
RunDebugBuffer,
buildRunDebugLabel,
createRunDebugStepHooks,
sanitizeStepFinish,
sanitizeStepStart,
} from '../run-debug-buffer';
import { sanitizeDebugSnapshotValue } from '../sanitize-debug-snapshot';
function makeFinishEvent(text: string): OnStepFinishEvent {
return {
stepNumber: 0,
text,
toolCalls: [],
finishReason: 'tool-calls',
usage: { inputTokens: 1, outputTokens: 2, totalTokens: 3 },
response: {
modelId: 'test-model',
timestamp: new Date('2026-01-01T00:00:00.000Z'),
messages: [{ role: 'assistant', content: text }],
},
} as unknown as OnStepFinishEvent;
}
function makeStartEvent(): OnStepStartEvent {
return {
stepNumber: 0,
system: 'You are helpful',
messages: [{ role: 'user', content: 'hello' }],
tools: { search: { description: 'search', inputSchema: { type: 'object' } } },
toolChoice: 'auto',
activeTools: ['search'],
abortSignal: new AbortController().signal,
} as unknown as OnStepStartEvent;
}
describe('RunDebugBuffer', () => {
it('pairs step start and finish by run-scoped step index', () => {
const buffer = new RunDebugBuffer();
buffer.ensure('run-1', 'thread-1');
buffer.recordStepStart('run-1', 0, makeStartEvent());
buffer.recordStepFinish('run-1', 0, makeFinishEvent('done'));
const record = buffer.get('run-1');
expect(record?.steps).toHaveLength(1);
expect(record?.steps[0]?.stepNumber).toBe(0);
expect(record?.steps[0]?.input?.messages).toBeDefined();
expect(record?.steps[0]?.output?.text).toBe('done');
});
it('records multiple orchestrator iterations even when SDK stepNumber stays 0', () => {
const buffer = new RunDebugBuffer();
buffer.ensure('run-1', 'thread-1');
const hooks = createRunDebugStepHooks(buffer, { runId: 'run-1', threadId: 'thread-1' });
for (const label of ['first', 'second', 'third']) {
hooks.onStepStart(makeStartEvent());
hooks.onStepFinish(makeFinishEvent(label));
}
const record = buffer.get('run-1');
expect(record?.steps).toHaveLength(3);
expect(record?.steps.map((step) => step.stepNumber)).toEqual([0, 1, 2]);
expect(record?.steps.map((step) => step.output?.text)).toEqual(['first', 'second', 'third']);
});
it('continues step numbering after resume hooks are recreated', () => {
const buffer = new RunDebugBuffer();
buffer.ensure('run-1', 'thread-1');
const firstPass = createRunDebugStepHooks(buffer, { runId: 'run-1', threadId: 'thread-1' });
firstPass.onStepStart(makeStartEvent());
firstPass.onStepFinish(makeFinishEvent('before suspend'));
const resumePass = createRunDebugStepHooks(buffer, { runId: 'run-1', threadId: 'thread-1' });
resumePass.onStepStart(makeStartEvent());
resumePass.onStepFinish(makeFinishEvent('after resume'));
const record = buffer.get('run-1');
expect(record?.steps).toHaveLength(2);
expect(record?.steps[1]?.stepNumber).toBe(1);
expect(record?.steps[1]?.output?.text).toBe('after resume');
});
it('keeps full tool definitions and strips abortSignal from step start', () => {
const sanitized = sanitizeStepStart(
{
stepNumber: 1,
system: 'system prompt',
messages: [{ role: 'user', content: 'hello' }],
tools: {
search: { description: 'search', inputSchema: { type: 'object', properties: { q: {} } } },
},
toolChoice: 'auto',
activeTools: ['search'],
abortSignal: new AbortController().signal,
} as unknown as OnStepStartEvent,
4,
);
expect(sanitized.stepNumber).toBe(4);
expect(sanitized.sdkStepNumber).toBe(1);
expect(sanitized.activeTools).toEqual(['search']);
expect(sanitized.tools).toEqual({
search: { description: 'search', inputSchema: { type: 'object', properties: { q: {} } } },
});
expect(sanitized).not.toHaveProperty('abortSignal');
});
it('does not truncate long captured strings', () => {
const longSystem = 'x'.repeat(5_000);
const sanitized = sanitizeDebugSnapshotValue(longSystem);
expect(sanitized).toHaveLength(5_000);
});
it('captures full tool call inputs and outputs on step finish', () => {
const sanitized = sanitizeStepFinish(
{
stepNumber: 0,
text: '',
toolCalls: [
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
input: { code: 'full workflow code payload' },
},
],
toolResults: [
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
output: { success: true, workflowId: 'wf-1' },
},
],
finishReason: 'tool-calls',
usage: { inputTokens: 1, outputTokens: 2, totalTokens: 3 },
response: {
modelId: 'test-model',
timestamp: new Date('2026-01-01T00:00:00.000Z'),
messages: [{ role: 'assistant', content: '' }],
body: { secret: 'raw-provider-body' },
},
} as unknown as OnStepFinishEvent,
0,
);
expect(sanitized.toolCalls).toEqual([
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
input: { code: 'full workflow code payload' },
},
]);
expect(sanitized.toolResults).toEqual([
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
output: { success: true, workflowId: 'wf-1' },
},
]);
expect(sanitized.response).not.toHaveProperty('body');
});
it('evicts oldest run when cap is exceeded', () => {
const logger = mock<Logger>();
const buffer = new RunDebugBuffer(logger);
for (let index = 0; index < 51; index++) {
const runId = `run-${index}`;
buffer.ensure(runId, 'thread-1');
buffer.recordStepStart(runId, 0, {
stepNumber: 0,
messages: [],
} as unknown as OnStepStartEvent);
}
expect(buffer.get('run-0')).toBeUndefined();
expect(buffer.get('run-50')).toBeDefined();
expect(logger.warn).toHaveBeenCalled();
});
it('createRunDebugStepHooks writes into the buffer', () => {
const buffer = new RunDebugBuffer();
buffer.ensure('run-1', 'thread-1');
const hooks = createRunDebugStepHooks(buffer, { runId: 'run-1', threadId: 'thread-1' });
hooks.onStepStart({
stepNumber: 2,
messages: [{ role: 'user', content: 'ping' }],
} as unknown as OnStepStartEvent);
expect(buffer.get('run-1')?.steps[0]?.stepNumber).toBe(0);
expect(buffer.get('run-1')?.steps[0]?.input?.sdkStepNumber).toBe(2);
});
it('stores a run label on first ensure', () => {
const buffer = new RunDebugBuffer();
buffer.ensure('run-1', 'thread-1', 'build a weather workflow');
expect(buffer.get('run-1')?.label).toBe('build a weather workflow');
buffer.ensure('run-1', 'thread-1', 'ignored');
expect(buffer.get('run-1')?.label).toBe('build a weather workflow');
});
});
describe('buildRunDebugLabel', () => {
it('uses the user message when no resume reason is provided', () => {
expect(buildRunDebugLabel({ message: 'build a simple workflow' })).toBe(
'build a simple workflow',
);
});
it('maps resume reasons to action labels', () => {
expect(buildRunDebugLabel({ resumeReason: 'approval' })).toBe('Resume · approval');
expect(buildRunDebugLabel({ resumeReason: 'replan' })).toBe('Follow-up · replan');
});
});
@@ -0,0 +1,251 @@
import { scrubSecretsInText } from '@n8n/utils';
import type { OnStepFinishEvent, OnStepStartEvent } from 'ai';
import type { Logger } from '../logger';
import { sanitizeDebugSnapshotRecord, sanitizeDebugSnapshotValue } from './sanitize-debug-snapshot';
const MAX_RUNS = 50;
const MAX_STEPS_PER_RUN = 200;
const MAX_WORKFLOW_SNAPSHOTS_PER_RUN = 100;
export interface WorkflowCodeSnapshotInput {
code: string;
source: 'full-code' | 'patch';
patches?: unknown;
workflowId?: string;
toolCallId?: string;
success: boolean;
errors?: string[];
capturedAt: number;
}
export type SanitizedStepStart = {
stepNumber: number;
sdkStepNumber?: number;
} & Record<string, unknown>;
export type SanitizedStepFinish = {
stepNumber: number;
sdkStepNumber?: number;
} & Record<string, unknown>;
export type WorkflowCodeSnapshot = WorkflowCodeSnapshotInput;
export interface RunDebugStep {
stepNumber: number;
input?: SanitizedStepStart;
output?: SanitizedStepFinish;
}
export interface RunDebugRecord {
threadId: string;
runId: string;
startedAt: number;
label?: string;
/** Next run-scoped step index; survives step-cap eviction. */
nextStepIndex: number;
steps: RunDebugStep[];
workflowCode: WorkflowCodeSnapshot[];
}
export interface RunDebugStepHookOptions {
runId: string;
threadId: string;
}
function captureStepStartPayload(event: OnStepStartEvent): Record<string, unknown> {
const { abortSignal: _abortSignal, ...capturable } = event;
return sanitizeDebugSnapshotRecord(capturable);
}
function captureStepFinishPayload(event: OnStepFinishEvent): Record<string, unknown> {
return sanitizeDebugSnapshotRecord(event);
}
export function sanitizeStepStart(event: OnStepStartEvent, stepNumber: number): SanitizedStepStart {
return {
...captureStepStartPayload(event),
stepNumber,
sdkStepNumber: event.stepNumber,
};
}
export function sanitizeStepFinish(
event: OnStepFinishEvent,
stepNumber: number,
): SanitizedStepFinish {
return {
...captureStepFinishPayload(event),
stepNumber,
sdkStepNumber: event.stepNumber,
};
}
export function createRunDebugStepHooks(
buffer: RunDebugBuffer,
options: RunDebugStepHookOptions,
): {
onStepStart: (event: OnStepStartEvent) => void;
onStepFinish: (event: OnStepFinishEvent) => void;
} {
// The agent runtime calls streamText/generateText once per loop iteration. The AI SDK
// resets stepNumber to 0 on each call, so we allocate a run-scoped sequence instead.
let stepIndex = buffer.getNextStepIndex(options.runId);
return {
onStepStart: (event) => {
buffer.recordStepStart(options.runId, stepIndex, event);
},
onStepFinish: (event) => {
buffer.recordStepFinish(options.runId, stepIndex, event);
stepIndex++;
},
};
}
export function buildRunDebugLabel(options: {
message?: string;
resumeReason?: string;
}): string {
const resumeLabels: Record<string, string> = {
approval: 'Resume · approval',
background_task_completed: 'Follow-up · background task completed',
workflow_verification: 'Follow-up · workflow verification',
workflow_setup: 'Follow-up · workflow setup',
planned_checkpoint: 'Follow-up · planned checkpoint',
replan: 'Follow-up · replan',
synthesize: 'Follow-up · synthesize',
};
if (options.resumeReason && resumeLabels[options.resumeReason]) {
return resumeLabels[options.resumeReason];
}
const trimmed = options.message?.trim();
if (trimmed) {
return trimmed.length > 80 ? `${trimmed.slice(0, 80)}` : trimmed;
}
return 'Orchestrator run';
}
export class RunDebugBuffer {
private readonly records = new Map<string, RunDebugRecord>();
constructor(private readonly logger?: Logger) {}
ensure(runId: string, threadId: string, label?: string): void {
if (this.records.has(runId)) return;
this.evictOldestRunIfNeeded();
this.records.set(runId, {
threadId,
runId,
startedAt: Date.now(),
label: label ? scrubSecretsInText(label.trim()) : undefined,
nextStepIndex: 0,
steps: [],
workflowCode: [],
});
}
getNextStepIndex(runId: string): number {
return this.records.get(runId)?.nextStepIndex ?? 0;
}
recordStepStart(runId: string, stepIndex: number, event: OnStepStartEvent): void {
const record = this.records.get(runId);
if (!record) return;
const existing = record.steps.find((step) => step.stepNumber === stepIndex);
if (existing) {
existing.input = sanitizeStepStart(event, stepIndex);
return;
}
this.evictOldestStepIfNeeded(record);
record.steps.push({
stepNumber: stepIndex,
input: sanitizeStepStart(event, stepIndex),
});
record.steps.sort((a, b) => a.stepNumber - b.stepNumber);
}
recordStepFinish(runId: string, stepIndex: number, event: OnStepFinishEvent): void {
const record = this.records.get(runId);
if (!record) return;
const existing = record.steps.find((step) => step.stepNumber === stepIndex);
if (existing) {
existing.output = sanitizeStepFinish(event, stepIndex);
record.nextStepIndex = stepIndex + 1;
return;
}
this.evictOldestStepIfNeeded(record);
record.steps.push({
stepNumber: stepIndex,
output: sanitizeStepFinish(event, stepIndex),
});
record.steps.sort((a, b) => a.stepNumber - b.stepNumber);
record.nextStepIndex = stepIndex + 1;
}
recordWorkflowCode(runId: string, snapshot: WorkflowCodeSnapshotInput): void {
const record = this.records.get(runId);
if (!record) return;
if (record.workflowCode.length >= MAX_WORKFLOW_SNAPSHOTS_PER_RUN) {
record.workflowCode.shift();
this.logger?.warn('Evicted oldest workflow code snapshot from run debug buffer', {
runId,
maxSnapshots: MAX_WORKFLOW_SNAPSHOTS_PER_RUN,
});
}
record.workflowCode.push({
...snapshot,
code: scrubSecretsInText(snapshot.code),
patches: sanitizeDebugSnapshotValue(snapshot.patches),
errors: snapshot.errors?.map((error) => scrubSecretsInText(error)),
});
}
get(runId: string): RunDebugRecord | undefined {
const record = this.records.get(runId);
if (!record) return undefined;
return structuredClone(record);
}
listByThread(threadId: string): RunDebugRecord[] {
return [...this.records.values()]
.filter((record) => record.threadId === threadId)
.sort((a, b) => a.startedAt - b.startedAt)
.map((record) => structuredClone(record));
}
private evictOldestRunIfNeeded(): void {
if (this.records.size < MAX_RUNS) return;
const oldest = [...this.records.values()].sort((a, b) => a.startedAt - b.startedAt)[0];
if (!oldest) return;
this.records.delete(oldest.runId);
this.logger?.warn('Evicted oldest run from debug buffer', {
runId: oldest.runId,
threadId: oldest.threadId,
maxRuns: MAX_RUNS,
});
}
private evictOldestStepIfNeeded(record: RunDebugRecord): void {
if (record.steps.length < MAX_STEPS_PER_RUN) return;
const removed = record.steps.shift();
this.logger?.warn('Evicted oldest step from run debug buffer', {
runId: record.runId,
stepNumber: removed?.stepNumber,
maxSteps: MAX_STEPS_PER_RUN,
});
}
}
@@ -0,0 +1,116 @@
import { scrubSecretsInText } from '@n8n/utils';
import { isRecord } from '../utils/stream-helpers';
const OMIT_KEYS = new Set(['abortSignal']);
const SENSITIVE_KEY_PATTERN =
/(api[_-]?key|authorization|bearer|cookie|credentials?|password|secret|access[_-]?token|refresh[_-]?token|id[_-]?token|session[_-]?token|auth[_-]?token|(?:^|[._-])token$)/i;
function shouldOmitKey(key: string, parentKey?: string): boolean {
if (OMIT_KEYS.has(key)) {
return true;
}
// Raw provider HTTP payloads are not useful in the debug buffer.
if (key === 'body' && parentKey === 'response') {
return true;
}
return false;
}
function redactSensitiveKey(key: string, value: unknown, seen: WeakSet<object>): unknown {
if (SENSITIVE_KEY_PATTERN.test(key) && typeof value === 'string') {
return '[redacted]';
}
return sanitizeDebugSnapshotValue(value, key, seen);
}
/**
* Full-fidelity JSON-safe snapshot for the in-memory run debug buffer.
* Unlike trace sanitization, this does not truncate strings, arrays, or object keys.
*/
export function sanitizeDebugSnapshotValue(
value: unknown,
keyHint?: string,
seen?: WeakSet<object>,
): unknown {
const seenObjects = seen ?? new WeakSet<object>();
if (value === undefined || value === null) {
return value;
}
if (typeof value === 'string') {
if (keyHint && SENSITIVE_KEY_PATTERN.test(keyHint)) {
return '[redacted]';
}
return scrubSecretsInText(value);
}
if (typeof value === 'number' || typeof value === 'boolean') {
return value;
}
if (typeof value === 'bigint') {
return value.toString();
}
if (typeof value === 'function') {
return `[function ${value.name || 'anonymous'}]`;
}
if (value instanceof AbortSignal) {
return '[AbortSignal]';
}
if (value instanceof Date) {
return value.toISOString();
}
if (value instanceof Error) {
return {
name: value.name,
message: scrubSecretsInText(value.message),
};
}
if (value instanceof Uint8Array) {
return `[binary ${value.byteLength} bytes]`;
}
if (Array.isArray(value)) {
if (seenObjects.has(value)) {
return '[Circular]';
}
seenObjects.add(value);
return value.map((entry) => sanitizeDebugSnapshotValue(entry, keyHint, seenObjects));
}
if (isRecord(value)) {
if (seenObjects.has(value)) {
return '[Circular]';
}
seenObjects.add(value);
const sanitized: Record<string, unknown> = {};
for (const [key, entryValue] of Object.entries(value)) {
if (shouldOmitKey(key, keyHint)) {
continue;
}
sanitized[key] = redactSensitiveKey(key, entryValue, seenObjects);
}
return sanitized;
}
if (typeof value === 'symbol') {
return value.toString();
}
return '[unsupported value]';
}
export function sanitizeDebugSnapshotRecord(value: unknown): Record<string, unknown> {
const sanitized = sanitizeDebugSnapshotValue(value);
return isRecord(sanitized) ? sanitized : { value: sanitized };
}
+6
View File
@@ -475,6 +475,12 @@ export type RunStateRegistry<TUser = unknown> = RunStateRegistryMod.RunStateRegi
export const RunStateRegistry: typeof RunStateRegistryMod.RunStateRegistry = lazyClass(
() => loadRunStateRegistry().RunStateRegistry,
);
export type { RunDebugRecord } from './debug/run-debug-buffer';
export {
RunDebugBuffer,
buildRunDebugLabel,
createRunDebugStepHooks,
} from './debug/run-debug-buffer';
export type {
ActiveRunState,
BackgroundTaskStatusSnapshot,
@@ -732,4 +732,78 @@ describe('createBuildWorkflowTool', () => {
'Failed to clear AI-builder temporary marker on main workflow wf-1: temporary marker cleanup failed',
);
});
it('records workflow code snapshot on success when callback is set', async () => {
const recordWorkflowCodeSnapshot = vi.fn();
const context = {
userId: 'user-1',
runId: 'run-1',
workflowService: {
createFromWorkflowJSON: vi.fn(async () => await Promise.resolve({ id: 'wf-1' })),
clearAiTemporary: vi.fn(async () => await Promise.resolve()),
},
credentialService: {},
nodeService: {},
dataTableService: {},
executionService: {},
permissions: { createWorkflow: 'always_allow' },
logger: { warn: vi.fn() },
recordWorkflowCodeSnapshot,
} as unknown as InstanceAiContext;
const tool = createBuildWorkflowTool(context);
const result = await executeTool(
tool,
{
code: 'workflow code',
name: 'Snapshot Test Workflow',
},
{ toolCallId: 'tc-build-1' },
);
expect(result).toMatchObject({ success: true, workflowId: 'wf-1' });
expect(recordWorkflowCodeSnapshot).toHaveBeenCalledWith(
expect.objectContaining({
code: 'workflow code',
source: 'full-code',
success: true,
toolCallId: 'tc-build-1',
capturedAt: expect.any(Number) as unknown,
}),
);
});
it('records workflow code snapshot on parse failure when callback is set', async () => {
const recordWorkflowCodeSnapshot = vi.fn();
const context = {
userId: 'user-1',
runId: 'run-1',
workflowService: {},
credentialService: {},
permissions: { createWorkflow: 'always_allow' },
logger: { warn: vi.fn() },
recordWorkflowCodeSnapshot,
} as unknown as InstanceAiContext;
vi.mocked(parseAndValidate).mockImplementationOnce(() => {
throw new Error('Failed to parse workflow code');
});
const tool = createBuildWorkflowTool(context);
const result = await executeTool<{ success: boolean; errors?: string[] }>(tool, {
code: 'broken workflow code',
name: 'Broken Workflow',
});
expect(result.success).toBe(false);
expect(recordWorkflowCodeSnapshot).toHaveBeenCalledWith(
expect.objectContaining({
source: 'full-code',
success: false,
errors: expect.arrayContaining([
expect.stringContaining('Failed to parse workflow code'),
]) as unknown,
}),
);
});
});
@@ -41,6 +41,7 @@ const confirmationResumeSchema = z.object({
});
interface BuildCtx {
toolCallId?: string;
resumeData?: z.infer<typeof confirmationResumeSchema>;
suspend?: (payload: z.infer<typeof confirmationSuspendSchema>) => Promise<never>;
}
@@ -511,6 +512,23 @@ export function createBuildWorkflowTool(context: InstanceAiContext) {
// Remember for future patches
lastCode = finalCode;
const codeSource = patches ? ('patch' as const) : ('full-code' as const);
const recordWorkflowCodeSnapshot = (result: {
success: boolean;
errors?: string[];
}): void => {
context.recordWorkflowCodeSnapshot?.({
code: finalCode,
source: codeSource,
patches: patches ?? undefined,
workflowId: workflowId ?? undefined,
toolCallId: ctx.toolCallId,
success: result.success,
errors: result.errors,
capturedAt: Date.now(),
});
};
// Parse TypeScript to WorkflowJSON with two-stage validation
let result;
try {
@@ -518,7 +536,7 @@ export function createBuildWorkflowTool(context: InstanceAiContext) {
nodeTypesProvider: context.nodeTypesProvider,
});
} catch (error) {
return {
const failure = {
success: false,
errors: withEscalation(
[error instanceof Error ? error.message : 'Failed to parse workflow code'],
@@ -527,13 +545,15 @@ export function createBuildWorkflowTool(context: InstanceAiContext) {
},
),
};
recordWorkflowCodeSnapshot(failure);
return failure;
}
// Partition validation results into blocking errors and informational warnings
const { errors, informational } = partitionWarnings(result.warnings);
if (errors.length > 0) {
return {
const failure = {
success: false,
errors: withEscalation(
errors.map((e) => `[${e.code}]${e.nodeName ? ` (${e.nodeName})` : ''}: ${e.message}`),
@@ -543,18 +563,22 @@ export function createBuildWorkflowTool(context: InstanceAiContext) {
? informational.map((w) => `[${w.code}]: ${w.message}`)
: undefined,
};
recordWorkflowCodeSnapshot(failure);
return failure;
}
const json = result.workflow;
if (name) {
json.name = name;
} else if (!json.name && !workflowId) {
return {
const failure = {
success: false,
errors: [
'Workflow name is required for new workflows. Provide a name parameter or set it in the SDK code.',
],
};
recordWorkflowCodeSnapshot(failure);
return failure;
}
// Resolve undefined/null credentials before saving.
@@ -651,7 +675,7 @@ export function createBuildWorkflowTool(context: InstanceAiContext) {
failureTracker.clear(workItemKey);
return {
const successResult = {
success: true,
workflowId: savedId,
workflowName: json.name || undefined,
@@ -681,6 +705,8 @@ export function createBuildWorkflowTool(context: InstanceAiContext) {
? informational.map((w) => `[${w.code}]: ${w.message}`)
: undefined,
};
recordWorkflowCodeSnapshot(successResult);
return successResult;
};
if (workflowId) {
+3
View File
@@ -30,6 +30,7 @@ import type {
// Service interfaces — dependency inversion so the package stays decoupled from n8n internals.
// The backend module provides concrete implementations via InstanceAiAdapterService.
import type { WorkflowCodeSnapshotInput } from './debug/run-debug-buffer';
import type { DomainAccessTracker } from './domain-access/domain-access-tracker';
import type { InstanceAiEventBus } from './event-bus/event-bus.interface';
import type { Logger } from './logger';
@@ -759,6 +760,8 @@ export interface InstanceAiContext {
domainAccessTracker?: DomainAccessTracker;
/** Current run ID — used for transient (allow_once) domain approvals. */
runId?: string;
/** Records workflow code snapshots for the run debug buffer (dev tooling). */
recordWorkflowCodeSnapshot?: (snapshot: WorkflowCodeSnapshotInput) => void;
/**
* IDs of workflows the agent created during the **currently active plan
* cycle**. Populated by build-workflow and submit-workflow on every
@@ -0,0 +1,92 @@
import type { User } from '@n8n/db';
import { RunDebugBuffer } from '@n8n/instance-ai';
import { InstanceAiService } from '../instance-ai.service';
type RunDebugGatingInternals = {
instanceAiConfig: { runDebugEnabled: boolean };
runDebugBuffer: RunDebugBuffer;
buildOrchestratorAgentStreamOptions: (
user: User,
threadId: string,
runId: string,
signal: AbortSignal,
) => Record<string, unknown>;
buildOrchestratorResumeAgentOptions: (
user: User,
threadId: string,
runId: string,
agentRunId: string,
toolCallId: string,
) => Record<string, unknown>;
getRunDebug: (runId: string) => ReturnType<RunDebugBuffer['get']>;
};
function createRunDebugGatingService(runDebugEnabled: boolean): RunDebugGatingInternals {
const service = Object.create(InstanceAiService.prototype) as RunDebugGatingInternals;
service.instanceAiConfig = { runDebugEnabled };
service.runDebugBuffer = new RunDebugBuffer();
return service;
}
describe('InstanceAiService run debug gating', () => {
const user = { id: 'user-1' } as User;
const threadId = 'thread-1';
const runId = 'run-1';
const signal = new AbortController().signal;
it('does not attach step hooks or create run records when run debug is disabled', () => {
const service = createRunDebugGatingService(false);
const streamOptions = service.buildOrchestratorAgentStreamOptions(
user,
threadId,
runId,
signal,
);
const resumeOptions = service.buildOrchestratorResumeAgentOptions(
user,
threadId,
runId,
'agent-run-1',
'tool-call-1',
);
expect(streamOptions.onStepStart).toBeUndefined();
expect(streamOptions.onStepFinish).toBeUndefined();
expect(resumeOptions.onStepStart).toBeUndefined();
expect(resumeOptions.onStepFinish).toBeUndefined();
expect(service.getRunDebug(runId)).toBeUndefined();
});
it('attaches step hooks and creates run records when run debug is enabled', () => {
const service = createRunDebugGatingService(true);
const streamOptions = service.buildOrchestratorAgentStreamOptions(
user,
threadId,
runId,
signal,
);
const resumeOptions = service.buildOrchestratorResumeAgentOptions(
user,
threadId,
runId,
'agent-run-1',
'tool-call-1',
);
expect(typeof streamOptions.onStepStart).toBe('function');
expect(typeof streamOptions.onStepFinish).toBe('function');
expect(typeof resumeOptions.onStepStart).toBe('function');
expect(typeof resumeOptions.onStepFinish).toBe('function');
expect(service.getRunDebug(runId)).toEqual(
expect.objectContaining({
runId,
threadId,
steps: [],
workflowCode: [],
}),
);
});
});
@@ -114,6 +114,12 @@ export class InstanceAiController {
throw new ForbiddenError('Instance AI is disabled');
}
}
private requireRunDebugEnabled(): void {
if (!this.instanceAiService.isRunDebugEnabled()) {
throw new NotFoundError('Run debug is not enabled');
}
}
// Each BrotliCompress stream allocates ~8.6 MB of native memory for its
// dictionary, and the compression middleware retains streams via closures on
// the response object for the lifetime of the HTTP keep-alive connection.
@@ -624,6 +630,35 @@ export class InstanceAiController {
return this.instanceAiService.getThreadStatus(threadId);
}
@Get('/debug/runs/:runId')
@GlobalScope('instanceAi:message')
async getRunDebug(req: AuthenticatedRequest, _res: Response, @Param('runId') runId: string) {
this.requireInstanceAiEnabled();
this.requireRunDebugEnabled();
const record = this.instanceAiService.getRunDebug(runId);
if (!record) {
throw new NotFoundError('Run debug record not found');
}
await this.assertThreadAccess(req.user.id, record.threadId);
return record;
}
@Get('/debug/threads/:threadId/runs')
@GlobalScope('instanceAi:message')
async listThreadDebugRuns(
req: AuthenticatedRequest,
_res: Response,
@Param('threadId') threadId: string,
) {
this.requireInstanceAiEnabled();
this.requireRunDebugEnabled();
await this.assertThreadAccess(req.user.id, threadId);
return {
threadId,
runs: this.instanceAiService.listThreadDebugRuns(threadId),
};
}
// ── Evaluation endpoints ──────────────────────────────────────────────────
@Post('/eval/execute-with-llm-mock/:workflowId')
@@ -44,6 +44,7 @@ export class InstanceAiModule implements ModuleInterface {
sandboxEnabled: sandboxStatus.enabled,
workflowBuilderAvailable: enabled && sandboxStatus.workflowBuilderAvailable,
sandboxUnavailableReason: sandboxStatus.unavailableReason,
runDebugEnabled: globalConfig.instanceAi.runDebugEnabled,
};
}
@@ -47,6 +47,9 @@ import {
submitLangsmithUserFeedback,
resumeAgentRun,
RunStateRegistry,
RunDebugBuffer,
buildRunDebugLabel,
createRunDebugStepHooks,
startDetachedDelegateTask,
streamAgentRun,
truncateToTitle,
@@ -77,6 +80,7 @@ import {
type WorkflowTaskService,
type WorkflowVerificationObligation,
type WorkSummary,
type RunDebugRecord,
WorkflowTaskCoordinator,
WorkflowLoopStorage,
ThreadTaskStorage,
@@ -490,6 +494,8 @@ export class InstanceAiService {
/** Test-only trace replay state (slugs, events, shared TraceIndex/IdRemapper). */
private readonly traceReplay = new TraceReplayState();
private readonly runDebugBuffer = new RunDebugBuffer();
/** Default IANA timezone for the instance (from GENERIC_TIMEZONE env var). */
private readonly defaultTimeZone: string;
@@ -872,6 +878,70 @@ export class InstanceAiService {
return this.traceContextsByRunId.get(runId)?.tracing;
}
isRunDebugEnabled(): boolean {
return this.instanceAiConfig.runDebugEnabled;
}
private buildOrchestratorAgentStreamOptions(
user: User,
threadId: string,
runId: string,
signal: AbortSignal,
): Record<string, unknown> {
if (this.isRunDebugEnabled()) {
this.runDebugBuffer.ensure(runId, threadId);
}
return {
maxIterations: MAX_STEPS.ORCHESTRATOR,
abortSignal: signal,
persistence: {
resourceId: user.id,
threadId,
},
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
...(this.isRunDebugEnabled()
? createRunDebugStepHooks(this.runDebugBuffer, { runId, threadId })
: {}),
};
}
private buildOrchestratorResumeAgentOptions(
user: User,
threadId: string,
runId: string,
agentRunId: string,
toolCallId: string,
): Record<string, unknown> {
if (this.isRunDebugEnabled()) {
this.runDebugBuffer.ensure(runId, threadId);
}
return {
runId: agentRunId,
toolCallId,
persistence: { resourceId: user.id, threadId },
...(this.isRunDebugEnabled()
? createRunDebugStepHooks(this.runDebugBuffer, { runId, threadId })
: {}),
};
}
getRunDebug(runId: string) {
return this.runDebugBuffer.get(runId);
}
listThreadDebugRuns(threadId: string) {
return this.runDebugBuffer.listByThread(threadId).map((record: RunDebugRecord) => ({
runId: record.runId,
threadId: record.threadId,
startedAt: record.startedAt,
stepCount: record.steps.length,
workflowCodeCount: record.workflowCode.length,
label: record.label,
}));
}
private getTraceContextForContinuation(
threadId: string,
messageGroupId?: string,
@@ -2546,6 +2616,12 @@ export class InstanceAiService {
}
context.domainAccessTracker = domainTracker;
context.runId = runId;
if (this.isRunDebugEnabled()) {
context.recordWorkflowCodeSnapshot = (snapshot) => {
this.runDebugBuffer.ensure(runId, threadId);
this.runDebugBuffer.recordWorkflowCode(runId, snapshot);
};
}
// Compute gateway status for the system prompt
if (localGatewayDisabledGlobally) {
@@ -3432,6 +3508,9 @@ export class InstanceAiService {
try {
messageId = nanoid();
if (this.isRunDebugEnabled()) {
this.runDebugBuffer.ensure(runId, threadId, buildRunDebugLabel({ message, resumeReason }));
}
// Publish run-start (includes userId for audit trail attribution)
this.eventBus.publish(threadId, {
@@ -3736,49 +3815,11 @@ export class InstanceAiService {
timeZone: timeZone ?? this.defaultTimeZone,
});
const streamOptions = this.buildOrchestratorAgentStreamOptions(user, threadId, runId, signal);
const result = tracing
? await tracing.withActiveSpan(tracing.actorRun, async () => {
return await streamAgentRun(
agent as StreamableAgent,
streamInput,
{
maxIterations: MAX_STEPS.ORCHESTRATOR,
abortSignal: signal,
persistence: {
resourceId: user.id,
threadId,
},
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
},
{
threadId,
runId,
agentId: ORCHESTRATOR_AGENT_ID,
signal,
eventBus: this.eventBus,
logger: this.logger,
onActivity: () => this.runState.touchActiveRun(threadId),
outputRedaction: resolveOutputRedaction(this.instanceAiConfig),
},
);
})
: await streamAgentRun(
agent as StreamableAgent,
streamInput,
{
maxIterations: MAX_STEPS.ORCHESTRATOR,
abortSignal: signal,
persistence: {
resourceId: user.id,
threadId,
},
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
},
{
return await streamAgentRun(agent as StreamableAgent, streamInput, streamOptions, {
threadId,
runId,
agentId: ORCHESTRATOR_AGENT_ID,
@@ -3787,8 +3828,18 @@ export class InstanceAiService {
logger: this.logger,
onActivity: () => this.runState.touchActiveRun(threadId),
outputRedaction: resolveOutputRedaction(this.instanceAiConfig),
},
);
});
})
: await streamAgentRun(agent as StreamableAgent, streamInput, streamOptions, {
threadId,
runId,
agentId: ORCHESTRATOR_AGENT_ID,
signal,
eventBus: this.eventBus,
logger: this.logger,
onActivity: () => this.runState.touchActiveRun(threadId),
outputRedaction: resolveOutputRedaction(this.instanceAiConfig),
});
if (result.status === 'suspended') {
if (result.suspension) {
this.runState.suspendRun(threadId, {
@@ -4632,38 +4683,17 @@ export class InstanceAiService {
}
}
const resumeOptions = this.buildOrchestratorResumeAgentOptions(
opts.user,
opts.threadId,
opts.runId,
opts.agentRunId,
opts.toolCallId,
);
const result = opts.tracing
? await opts.tracing.withActiveSpan(opts.tracing.actorRun, async () => {
return await resumeAgentRun(
agent,
resumeData,
{
runId: opts.agentRunId,
toolCallId: opts.toolCallId,
persistence: { resourceId: opts.user.id, threadId: opts.threadId },
},
{
threadId: opts.threadId,
runId: opts.runId,
agentId: ORCHESTRATOR_AGENT_ID,
signal: opts.signal,
eventBus: this.eventBus,
logger: this.logger,
agentRunId: opts.agentRunId,
onActivity: () => this.runState.touchActiveRun(opts.threadId),
outputRedaction: resolveOutputRedaction(this.instanceAiConfig),
},
);
})
: await resumeAgentRun(
agent,
resumeData,
{
runId: opts.agentRunId,
toolCallId: opts.toolCallId,
persistence: { resourceId: opts.user.id, threadId: opts.threadId },
},
{
return await resumeAgentRun(agent, resumeData, resumeOptions, {
threadId: opts.threadId,
runId: opts.runId,
agentId: ORCHESTRATOR_AGENT_ID,
@@ -4673,8 +4703,19 @@ export class InstanceAiService {
agentRunId: opts.agentRunId,
onActivity: () => this.runState.touchActiveRun(opts.threadId),
outputRedaction: resolveOutputRedaction(this.instanceAiConfig),
},
);
});
})
: await resumeAgentRun(agent, resumeData, resumeOptions, {
threadId: opts.threadId,
runId: opts.runId,
agentId: ORCHESTRATOR_AGENT_ID,
signal: opts.signal,
eventBus: this.eventBus,
logger: this.logger,
agentRunId: opts.agentRunId,
onActivity: () => this.runState.touchActiveRun(opts.threadId),
outputRedaction: resolveOutputRedaction(this.instanceAiConfig),
});
if (result.status === 'suspended') {
if (result.suspension) {
@@ -5955,6 +5955,47 @@
"instanceAi.debug.connectionStatus": "Connection",
"instanceAi.debug.tab.events": "Events",
"instanceAi.debug.tab.threads": "Threads",
"instanceAi.debug.tab.llmSteps": "LLM Steps",
"instanceAi.debug.tab.workflowCode": "Workflow Code",
"instanceAi.debug.runDebug.refresh": "Refresh",
"instanceAi.debug.runDebug.noRuns": "No captured runs for this thread",
"instanceAi.debug.runDebug.noSteps": "No LLM steps captured for this run",
"instanceAi.debug.runDebug.noWorkflowCode": "No workflow code snapshots for this run",
"instanceAi.debug.runDebug.selectRun": "Run",
"instanceAi.debug.runDebug.runs": "Runs",
"instanceAi.debug.runDebug.step": "Step {number}",
"instanceAi.debug.runDebug.fetchError": "Failed to load run debug data",
"instanceAi.debug.runDebug.success": "success",
"instanceAi.debug.runDebug.failed": "failed",
"instanceAi.debug.runDebug.openStepsModal": "Expand all steps",
"instanceAi.debug.runDebug.stepsModalTitle": "LLM Steps",
"instanceAi.debug.runDebug.input": "Input",
"instanceAi.debug.runDebug.output": "Output",
"instanceAi.debug.runDebug.system": "System",
"instanceAi.debug.runDebug.messages": "Messages",
"instanceAi.debug.runDebug.activeTools": "Active tools",
"instanceAi.debug.runDebug.toolChoice": "Tool choice",
"instanceAi.debug.runDebug.text": "Text",
"instanceAi.debug.runDebug.toolCalls": "Tool calls",
"instanceAi.debug.runDebug.usage": "Usage",
"instanceAi.debug.runDebug.responseMessages": "Response",
"instanceAi.debug.runDebug.noStepDetail": "No step details captured",
"instanceAi.debug.runDebug.metadata": "Metadata",
"instanceAi.debug.runDebug.inputSettings": "Step settings",
"instanceAi.debug.runDebug.inputExtras": "Tools and step config",
"instanceAi.debug.runDebug.outputMetadata": "Output metadata",
"instanceAi.debug.runDebug.toolResults": "Tool results",
"instanceAi.debug.runDebug.content": "Content",
"instanceAi.debug.runDebug.systemPrompt": "System prompt",
"instanceAi.debug.runDebug.charCount": "{count} chars",
"instanceAi.debug.runDebug.tools": "Tools",
"instanceAi.debug.runDebug.toolInput": "input",
"instanceAi.debug.runDebug.toolOutput": "output",
"instanceAi.debug.runDebug.stepCount": "{count} steps",
"instanceAi.debug.runDebug.json": "JSON",
"instanceAi.debug.runDebug.toolCall": "tool-call",
"instanceAi.debug.runDebug.toolResult": "tool-result",
"instanceAi.debug.runDebug.reasoning": "reasoning",
"instanceAi.debug.threads.title": "Thread Inspector",
"instanceAi.debug.threads.current": "Current",
"instanceAi.debug.threads.messages": "Messages",
@@ -627,6 +627,7 @@ describe('useGlobalEntityCreation', () => {
sandboxEnabled: true,
workflowBuilderAvailable: true,
sandboxUnavailableReason: null,
runDebugEnabled: false,
};
const enableInstanceAi = () => {
@@ -259,6 +259,7 @@ describe('router', () => {
sandboxEnabled: true,
workflowBuilderAvailable: true,
sandboxUnavailableReason: null,
runDebugEnabled: false,
};
const runRootRedirect = () => {
@@ -176,6 +176,7 @@ const defaultModuleSettings: InstanceAiModuleSettings = {
sandboxEnabled: true,
workflowBuilderAvailable: true,
sandboxUnavailableReason: null,
runDebugEnabled: false,
};
describe('InstanceAiEmptyView', () => {
@@ -180,6 +180,7 @@ const defaultModuleSettings: NonNullable<FrontendModuleSettings['instance-ai']>
sandboxEnabled: true,
workflowBuilderAvailable: true,
sandboxUnavailableReason: null,
runDebugEnabled: false,
};
function makePlanReviewMessage(): InstanceAiMessage {
@@ -78,6 +78,7 @@ const defaultModuleSettings: NonNullable<FrontendModuleSettings['instance-ai']>
sandboxEnabled: true,
workflowBuilderAvailable: true,
sandboxUnavailableReason: null,
runDebugEnabled: false,
};
describe('SettingsInstanceAiView', () => {
@@ -71,6 +71,7 @@ function makeModuleSettings(
sandboxEnabled: true,
workflowBuilderAvailable: true,
sandboxUnavailableReason: null,
runDebugEnabled: false,
...overrides,
};
}
@@ -0,0 +1,100 @@
<script lang="ts" setup>
import { useI18n } from '@n8n/i18n';
import { computed } from 'vue';
import { formatDebugJson, summarizeJsonValue } from '@n8n/api-types';
const props = withDefaults(
defineProps<{
value: unknown;
label?: string;
defaultOpen?: boolean;
}>(),
{
defaultOpen: false,
},
);
const i18n = useI18n();
const preview = computed(() => summarizeJsonValue(props.value));
const formatted = computed(() => formatDebugJson(props.value));
const summaryLabel = computed(() => props.label ?? i18n.baseText('instanceAi.debug.runDebug.json'));
</script>
<template>
<details :class="$style.root" :open="defaultOpen">
<summary :class="$style.summary">
<span :class="$style.summaryLabel">{{ summaryLabel }}</span>
<code :class="$style.preview">{{ preview }}</code>
</summary>
<pre :class="$style.json">{{ formatted }}</pre>
</details>
</template>
<style lang="scss" module>
.root {
border: 1px solid var(--color--foreground--tint-2);
border-radius: var(--radius);
background: var(--color--background--shade-1);
overflow: hidden;
}
.summary {
display: flex;
align-items: baseline;
gap: var(--spacing--2xs);
padding: var(--spacing--3xs) var(--spacing--2xs);
cursor: pointer;
list-style: none;
user-select: none;
&::-webkit-details-marker {
display: none;
}
&::before {
content: '▸';
flex-shrink: 0;
color: var(--color--text--tint-1);
transition: transform var(--duration--fast) ease;
}
}
.root[open] .summary::before {
transform: rotate(90deg);
}
.summaryLabel {
flex-shrink: 0;
font-size: var(--font-size--3xs);
font-weight: var(--font-weight--medium);
color: var(--color--text--tint-1);
}
.preview {
flex: 1;
min-width: 0;
font-family: monospace;
font-size: var(--font-size--3xs);
line-height: var(--line-height--lg);
color: var(--color--text);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.json {
margin: 0;
padding: var(--spacing--3xs) var(--spacing--2xs);
border-top: 1px solid var(--color--foreground--tint-2);
background: var(--background--surface);
font-family: monospace;
font-size: var(--font-size--3xs);
line-height: var(--line-height--xl);
white-space: pre-wrap;
word-break: break-word;
color: var(--color--text);
max-height: 420px;
overflow-y: auto;
}
</style>
@@ -0,0 +1,135 @@
<script lang="ts" setup>
import type { InstanceAiRunDebugWorkflowCodeSnapshot, ReadableSegment } from '@n8n/api-types';
import { useI18n } from '@n8n/i18n';
import { getToolCallIdFromMetadata, isWorkflowCodeToolName } from '../utils/workflow-code-match';
import InstanceAiDebugJsonPanel from './InstanceAiDebugJsonPanel.vue';
import InstanceAiDebugWorkflowCodeSnapshot from './InstanceAiDebugWorkflowCodeSnapshot.vue';
const props = defineProps<{
segments: ReadableSegment[];
workflowSnapshotsByToolCallId?: ReadonlyMap<string, InstanceAiRunDebugWorkflowCodeSnapshot>;
}>();
const i18n = useI18n();
function getWorkflowSnapshot(
segment: Extract<ReadableSegment, { type: 'tool-call' | 'tool-result' }>,
): InstanceAiRunDebugWorkflowCodeSnapshot | undefined {
if (!isWorkflowCodeToolName(segment.name) || !props.workflowSnapshotsByToolCallId) {
return undefined;
}
const toolCallId = getToolCallIdFromMetadata(segment.metadata);
if (!toolCallId) {
return undefined;
}
return props.workflowSnapshotsByToolCallId.get(toolCallId);
}
</script>
<template>
<div :class="$style.root">
<template v-for="(segment, index) in segments" :key="`${segment.type}-${index}`">
<p v-if="segment.type === 'text'" :class="$style.text">{{ segment.text }}</p>
<div v-else-if="segment.type === 'reasoning'" :class="$style.inlineBlock">
<div :class="$style.inlineHeader">
<span :class="$style.kindLabel">
{{ i18n.baseText('instanceAi.debug.runDebug.reasoning') }}
</span>
</div>
<p :class="$style.text">{{ segment.text }}</p>
</div>
<div v-else-if="segment.type === 'tool-call'" :class="$style.inlineBlock">
<div :class="$style.inlineHeader">
<span :class="$style.kindLabel">
{{ i18n.baseText('instanceAi.debug.runDebug.toolCall') }}
</span>
<span :class="$style.nameLabel">{{ segment.name }}</span>
</div>
<InstanceAiDebugJsonPanel
v-if="segment.payload !== undefined"
:value="segment.payload"
:label="i18n.baseText('instanceAi.debug.runDebug.toolInput')"
/>
<InstanceAiDebugJsonPanel
v-if="segment.metadata"
:value="segment.metadata"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
</div>
<div v-else-if="segment.type === 'tool-result'" :class="$style.inlineBlock">
<div :class="$style.inlineHeader">
<span :class="$style.kindLabel">
{{ i18n.baseText('instanceAi.debug.runDebug.toolResult') }}
</span>
<span v-if="segment.name" :class="$style.nameLabel">{{ segment.name }}</span>
</div>
<InstanceAiDebugJsonPanel
v-if="segment.payload !== undefined"
:value="segment.payload"
:label="i18n.baseText('instanceAi.debug.runDebug.toolOutput')"
/>
<InstanceAiDebugWorkflowCodeSnapshot
v-if="getWorkflowSnapshot(segment)"
variant="inline"
:snapshot="getWorkflowSnapshot(segment)!"
/>
<InstanceAiDebugJsonPanel
v-if="segment.metadata"
:value="segment.metadata"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
</div>
<InstanceAiDebugJsonPanel
v-else
:value="segment.payload"
:label="segment.label ?? i18n.baseText('instanceAi.debug.runDebug.json')"
/>
</template>
</div>
</template>
<style lang="scss" module>
.root {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
}
.text {
margin: 0;
font-size: var(--font-size--2xs);
line-height: var(--line-height--xl);
white-space: pre-wrap;
word-break: break-word;
color: var(--color--text);
}
.inlineBlock {
display: flex;
flex-direction: column;
gap: var(--spacing--4xs);
}
.inlineHeader {
display: flex;
align-items: baseline;
gap: var(--spacing--2xs);
}
.kindLabel {
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
}
.nameLabel {
font-size: var(--font-size--3xs);
font-family: monospace;
color: var(--color--text);
}
</style>
@@ -1,21 +1,31 @@
<script lang="ts" setup>
import { N8nIcon, N8nIconButton } from '@n8n/design-system';
import { formatDebugJson } from '@n8n/api-types';
import { useI18n } from '@n8n/i18n';
import { computed, nextTick, onMounted, ref, useTemplateRef, watch } from 'vue';
import { useSettingsStore } from '@/app/stores/settings.store';
import { useThread } from '../instanceAi.store';
import { useInstanceAiDebugStore } from '../instanceAiDebug.store';
import InstanceAiDebugWorkflowCodeSnapshot from './InstanceAiDebugWorkflowCodeSnapshot.vue';
import InstanceAiLlmStepsModal from './InstanceAiLlmStepsModal.vue';
const emit = defineEmits<{ close: [] }>();
const i18n = useI18n();
const settingsStore = useSettingsStore();
const currentThread = useThread();
const debugStore = useInstanceAiDebugStore();
const isRunDebugEnabled = computed(
() => settingsStore.moduleSettings['instance-ai']?.runDebugEnabled === true,
);
// --- Tab state ---
type Tab = 'events' | 'threads';
type Tab = 'events' | 'threads' | 'llmSteps' | 'workflowCode';
const activeTab = ref<Tab>('events');
// --- Events tab state ---
const expandedIndex = ref<number | null>(null);
const showLlmStepsModal = ref(false);
const eventListRef = useTemplateRef<HTMLElement>('eventList');
const events = computed(() => currentThread.debugEvents);
@@ -30,12 +40,16 @@ function toggleMessage(index: number) {
expandedMessageIndex.value = expandedMessageIndex.value === index ? null : index;
}
function formatJson(value: unknown): string {
try {
return JSON.stringify(value, null, 2);
} catch {
return String(value);
async function handleSelectDebugRun(runId: string) {
if (activeTab.value === 'llmSteps') {
await debugStore.loadRunDebug(runId);
if (debugStore.runDebug?.runId === runId) {
showLlmStepsModal.value = true;
}
return;
}
void debugStore.loadRunDebug(runId);
}
function getTypeBadgeClass(type: string): string {
@@ -63,6 +77,14 @@ function formatTime(iso: string): string {
}
}
function formatTimestamp(ms: number): string {
try {
return new Date(ms).toLocaleTimeString('en-US', { hour12: false, fractionalSecondDigits: 3 });
} catch {
return String(ms);
}
}
function formatDateTime(iso: string): string {
try {
const d = new Date(iso);
@@ -72,6 +94,10 @@ function formatDateTime(iso: string): string {
}
}
async function refreshRunDebugData() {
await debugStore.refreshRunDebug(currentThread.id, currentThread.activeRunId);
}
function contentPreview(content: unknown): string {
if (typeof content === 'string') {
return content.length > 100 ? content.slice(0, 100) + '...' : content;
@@ -102,8 +128,32 @@ watch(activeTab, (tab) => {
if (tab === 'threads' && debugStore.threads.length === 0) {
void debugStore.loadThreads();
}
if (isRunDebugEnabled.value && (tab === 'llmSteps' || tab === 'workflowCode')) {
void refreshRunDebugData();
}
});
watch(isRunDebugEnabled, (enabled) => {
if (!enabled && (activeTab.value === 'llmSteps' || activeTab.value === 'workflowCode')) {
activeTab.value = 'events';
showLlmStepsModal.value = false;
}
});
watch(
() => currentThread.isStreaming,
(isStreaming, wasStreaming) => {
if (
isRunDebugEnabled.value &&
wasStreaming &&
!isStreaming &&
(activeTab.value === 'llmSteps' || activeTab.value === 'workflowCode')
) {
void refreshRunDebugData();
}
},
);
function handleSelectThread(threadId: string) {
expandedMessageIndex.value = null;
void debugStore.selectThread(threadId);
@@ -171,6 +221,20 @@ onMounted(() => {
>
{{ i18n.baseText('instanceAi.debug.tab.threads') }}
</button>
<button
v-if="isRunDebugEnabled"
:class="[$style.tab, activeTab === 'llmSteps' && $style.tabActive]"
@click="activeTab = 'llmSteps'"
>
{{ i18n.baseText('instanceAi.debug.tab.llmSteps') }}
</button>
<button
v-if="isRunDebugEnabled"
:class="[$style.tab, activeTab === 'workflowCode' && $style.tabActive]"
@click="activeTab = 'workflowCode'"
>
{{ i18n.baseText('instanceAi.debug.tab.workflowCode') }}
</button>
</div>
<!-- Events tab -->
@@ -206,7 +270,7 @@ onMounted(() => {
</span>
</div>
<pre v-if="expandedIndex === index" :class="$style.eventPayload">{{
formatJson(entry.event)
formatDebugJson(entry.event)
}}</pre>
</div>
</div>
@@ -282,13 +346,90 @@ onMounted(() => {
</div>
<div :class="$style.messagePreview">{{ contentPreview(msg.content) }}</div>
<pre v-if="expandedMessageIndex === mIdx" :class="$style.eventPayload">{{
formatJson(msg.content)
formatDebugJson(msg.content)
}}</pre>
</div>
</template>
</div>
</template>
</template>
<!-- LLM Steps / Workflow Code tabs -->
<template
v-if="isRunDebugEnabled && (activeTab === 'llmSteps' || activeTab === 'workflowCode')"
>
<div :class="$style.threadListHeader">
<span :class="$style.sectionLabel">{{
i18n.baseText('instanceAi.debug.runDebug.selectRun')
}}</span>
<button :class="$style.copyButton" @click="refreshRunDebugData">
{{ i18n.baseText('instanceAi.debug.runDebug.refresh') }}
</button>
</div>
<div v-if="debugStore.isLoadingThreadDebugRuns" :class="$style.loadingState">
<N8nIcon icon="spinner" color="primary" spin size="small" />
</div>
<div v-else-if="debugStore.threadDebugRuns.length === 0" :class="$style.emptyState">
{{ i18n.baseText('instanceAi.debug.runDebug.noRuns') }}
</div>
<div v-else :class="$style.threadList">
<div
v-for="run in debugStore.threadDebugRuns"
:key="run.runId"
:class="[
$style.threadRow,
debugStore.selectedRunId === run.runId && $style.threadRowSelected,
]"
data-test-id="instance-ai-debug-run-row"
@click="handleSelectDebugRun(run.runId)"
>
<div :class="$style.threadRowMain">
<div :class="$style.runMeta">
<span :class="$style.threadTitle">{{ run.runId.slice(0, 12) }}</span>
<span v-if="run.label" :class="$style.runLabel">{{ run.label }}</span>
</div>
<span v-if="run.runId === currentThread.activeRunId" :class="$style.currentBadge">
{{ i18n.baseText('instanceAi.debug.threads.current') }}
</span>
</div>
<span :class="$style.threadTime">{{ formatTimestamp(run.startedAt) }}</span>
</div>
</div>
<div
v-if="debugStore.isLoadingRunDebug && activeTab === 'llmSteps'"
:class="$style.loadingState"
>
<N8nIcon icon="spinner" color="primary" spin size="small" />
</div>
<div
v-if="debugStore.isLoadingRunDebug && activeTab === 'workflowCode'"
:class="$style.loadingState"
>
<N8nIcon icon="spinner" color="primary" spin size="small" />
</div>
<template v-else-if="debugStore.runDebug && activeTab === 'workflowCode'">
<!-- Workflow Code -->
<div v-if="debugStore.runDebug.workflowCode.length === 0" :class="$style.emptyState">
{{ i18n.baseText('instanceAi.debug.runDebug.noWorkflowCode') }}
</div>
<div v-else :class="[$style.threadDetailContent, $style.workflowSnapshotList]">
<InstanceAiDebugWorkflowCodeSnapshot
v-for="(snapshot, wIdx) in debugStore.runDebug.workflowCode"
:key="`${snapshot.capturedAt}-${wIdx}`"
:snapshot="snapshot"
variant="inline"
/>
</div>
</template>
</template>
<InstanceAiLlmStepsModal v-if="isRunDebugEnabled" v-model:open="showLlmStepsModal" />
</div>
</template>
@@ -577,10 +718,27 @@ onMounted(() => {
.threadRowMain {
display: flex;
align-items: center;
align-items: flex-start;
justify-content: space-between;
gap: var(--spacing--3xs);
}
.runMeta {
display: flex;
flex-direction: column;
gap: var(--spacing--5xs);
min-width: 0;
flex: 1;
}
.runLabel {
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.threadTitle {
font-size: var(--font-size--2xs);
color: var(--color--text);
@@ -614,6 +772,13 @@ onMounted(() => {
font-size: var(--font-size--3xs);
}
.workflowSnapshotList {
display: flex;
flex-direction: column;
gap: var(--spacing--2xs);
padding: var(--spacing--2xs);
}
.messageRow {
padding: var(--spacing--4xs) var(--spacing--sm);
cursor: pointer;
@@ -0,0 +1,237 @@
<script lang="ts" setup>
import type { InstanceAiRunDebugWorkflowCodeSnapshot } from '@n8n/api-types';
import { useI18n } from '@n8n/i18n';
import { computed } from 'vue';
import InstanceAiDebugJsonPanel from './InstanceAiDebugJsonPanel.vue';
const props = withDefaults(
defineProps<{
snapshot: InstanceAiRunDebugWorkflowCodeSnapshot;
variant?: 'card' | 'inline';
defaultOpen?: boolean;
}>(),
{
variant: 'card',
defaultOpen: false,
},
);
const i18n = useI18n();
const summaryPreview = computed(() => {
const parts = [
formatStatus(props.snapshot.success),
props.snapshot.source,
formatCharCount(props.snapshot.code.length),
];
if (props.snapshot.workflowId) {
parts.push(props.snapshot.workflowId);
}
return parts.join(' · ');
});
function formatTimestamp(ms: number): string {
try {
return new Date(ms).toLocaleTimeString('en-US', { hour12: false, fractionalSecondDigits: 3 });
} catch {
return String(ms);
}
}
function formatCharCount(count: number): string {
return i18n.baseText('instanceAi.debug.runDebug.charCount', {
interpolate: { count: count.toLocaleString() },
});
}
function formatStatus(success: boolean): string {
return success
? i18n.baseText('instanceAi.debug.runDebug.success')
: i18n.baseText('instanceAi.debug.runDebug.failed');
}
</script>
<template>
<details v-if="variant === 'inline'" :class="$style.root" :open="defaultOpen">
<summary :class="$style.summary">
<span :class="$style.summaryLabel">
{{ i18n.baseText('instanceAi.debug.tab.workflowCode') }}
</span>
<code :class="$style.preview">{{ summaryPreview }}</code>
</summary>
<div :class="$style.body">
<div :class="$style.metaBlock">
<span
:class="[
$style.statusChip,
snapshot.success ? $style.statusSuccess : $style.statusFailed,
]"
>
{{ formatStatus(snapshot.success) }}
</span>
<span v-if="snapshot.source" :class="$style.metaLabel">{{ snapshot.source }}</span>
<span v-if="snapshot.workflowId" :class="$style.metaLabel">
{{ snapshot.workflowId }}
</span>
<span :class="$style.metaLabel">{{ formatCharCount(snapshot.code.length) }}</span>
<span :class="$style.metaLabel">{{ formatTimestamp(snapshot.capturedAt) }}</span>
</div>
<InstanceAiDebugJsonPanel
v-if="snapshot.errors?.length"
:value="snapshot.errors"
:label="i18n.baseText('instanceAi.debug.runDebug.failed')"
/>
<InstanceAiDebugJsonPanel
v-if="snapshot.patches"
:value="snapshot.patches"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
<pre :class="$style.code">{{ snapshot.code }}</pre>
</div>
</details>
<div v-else :class="$style.cardRoot">
<div :class="$style.metaBlock">
<span
:class="[$style.statusChip, snapshot.success ? $style.statusSuccess : $style.statusFailed]"
>
{{ formatStatus(snapshot.success) }}
</span>
<span v-if="snapshot.source" :class="$style.metaLabel">{{ snapshot.source }}</span>
<span v-if="snapshot.workflowId" :class="$style.metaLabel">
{{ snapshot.workflowId }}
</span>
<span :class="$style.metaLabel">{{ formatCharCount(snapshot.code.length) }}</span>
<span :class="$style.metaLabel">{{ formatTimestamp(snapshot.capturedAt) }}</span>
</div>
<InstanceAiDebugJsonPanel
v-if="snapshot.errors?.length"
:value="snapshot.errors"
:label="i18n.baseText('instanceAi.debug.runDebug.failed')"
/>
<InstanceAiDebugJsonPanel
v-if="snapshot.patches"
:value="snapshot.patches"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
<pre :class="$style.code">{{ snapshot.code }}</pre>
</div>
</template>
<style lang="scss" module>
.root {
border: 1px solid var(--color--foreground--tint-2);
border-radius: var(--radius);
background: var(--color--background--shade-1);
overflow: hidden;
}
.summary {
display: flex;
align-items: baseline;
gap: var(--spacing--2xs);
padding: var(--spacing--3xs) var(--spacing--2xs);
cursor: pointer;
list-style: none;
user-select: none;
&::-webkit-details-marker {
display: none;
}
&::before {
content: '▸';
flex-shrink: 0;
color: var(--color--text--tint-1);
transition: transform var(--duration--fast) ease;
}
}
.root[open] .summary::before {
transform: rotate(90deg);
}
.summaryLabel {
flex-shrink: 0;
font-size: var(--font-size--3xs);
font-weight: var(--font-weight--medium);
color: var(--color--text--tint-1);
}
.preview {
flex: 1;
min-width: 0;
font-family: monospace;
font-size: var(--font-size--3xs);
line-height: var(--line-height--lg);
color: var(--color--text);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.body {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
padding: var(--spacing--3xs) var(--spacing--2xs);
border-top: 1px solid var(--color--foreground--tint-2);
background: var(--background--surface);
}
.cardRoot {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
}
.metaBlock {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: var(--spacing--2xs);
}
.metaLabel {
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
font-family: monospace;
}
.statusChip {
padding: var(--spacing--5xs) var(--spacing--3xs);
border-radius: var(--radius--xl);
font-size: var(--font-size--3xs);
text-transform: lowercase;
}
.statusSuccess {
background: color-mix(in srgb, var(--color--success) 15%, transparent);
color: var(--color--success);
}
.statusFailed {
background: color-mix(in srgb, var(--color--danger) 15%, transparent);
color: var(--color--danger);
}
.code {
margin: 0;
padding: var(--spacing--3xs) var(--spacing--2xs);
border: 1px solid var(--color--foreground--tint-2);
border-radius: var(--radius);
background: var(--color--background--shade-1);
font-family: monospace;
font-size: var(--font-size--3xs);
line-height: var(--line-height--xl);
white-space: pre-wrap;
word-break: break-word;
color: var(--color--text);
max-height: 420px;
overflow-y: auto;
}
</style>
@@ -0,0 +1,393 @@
<script lang="ts" setup>
import {
type InstanceAiRunDebugStep,
type InstanceAiRunDebugWorkflowCodeSnapshot,
parseInputExtras,
parseMessageBlocks,
parseOutputDisplayBlocks,
parseOutputExtras,
parseSystemPromptForDisplay,
parseUsageSummary,
} from '@n8n/api-types';
import { N8nText } from '@n8n/design-system';
import { useI18n } from '@n8n/i18n';
import { computed, ref } from 'vue';
import { mapWorkflowSnapshotsByToolCallId } from '../utils/workflow-code-match';
import InstanceAiDebugJsonPanel from './InstanceAiDebugJsonPanel.vue';
import InstanceAiDebugMessageBody from './InstanceAiDebugMessageBody.vue';
const props = defineProps<{
input?: Record<string, unknown>;
output?: Record<string, unknown>;
runSteps?: InstanceAiRunDebugStep[];
workflowCode?: InstanceAiRunDebugWorkflowCodeSnapshot[];
}>();
const i18n = useI18n();
const outputSectionRef = ref<HTMLElement | null>(null);
const parsedSystemPrompt = computed(() => parseSystemPromptForDisplay(props.input?.system));
const systemBlocks = computed(() => parsedSystemPrompt.value.systemBlocks);
const systemObservations = computed(() => parsedSystemPrompt.value.observations);
const messageBlocks = computed(() => parseMessageBlocks(props.input?.messages));
const inputExtras = computed(() => parseInputExtras(props.input));
const outputDisplayBlocks = computed(() => parseOutputDisplayBlocks(props.output));
const usageSummary = computed(() => parseUsageSummary(props.output?.usage));
const outputExtras = computed(() => parseOutputExtras(props.output));
const workflowSnapshotsByToolCallId = computed(() =>
mapWorkflowSnapshotsByToolCallId(props.runSteps ?? [], props.workflowCode ?? []),
);
const finishReason = computed(() =>
typeof props.output?.finishReason === 'string' ? props.output.finishReason : undefined,
);
const systemCharCount = computed(() =>
systemBlocks.value.reduce((total, block) => total + block.content.length, 0),
);
const hasInputContent = computed(
() =>
systemBlocks.value.length > 0 ||
Boolean(systemObservations.value) ||
messageBlocks.value.length > 0 ||
Boolean(inputExtras.value),
);
const hasOutputContent = computed(
() =>
outputDisplayBlocks.value.length > 0 ||
Boolean(outputExtras.value) ||
Boolean(usageSummary.value),
);
const showSystemDetails = computed(() => systemBlocks.value.length > 0);
function getCardClass(role: string): string {
const normalized = role.toLowerCase();
if (normalized.includes('system')) return 'cardSystem';
if (normalized === 'reasoning') return 'cardMuted';
if (normalized === 'user') return 'cardUser';
if (normalized === 'assistant') return 'cardAssistant';
if (normalized === 'tool') return 'cardTool';
return 'cardMuted';
}
function formatCharCount(count: number): string {
return i18n.baseText('instanceAi.debug.runDebug.charCount', {
interpolate: { count: count.toLocaleString() },
});
}
function scrollToOutput(container: HTMLElement) {
if (!outputSectionRef.value) {
container.scrollTo({ top: 0, behavior: 'smooth' });
return;
}
const containerTop = container.getBoundingClientRect().top;
const sectionTop = outputSectionRef.value.getBoundingClientRect().top;
container.scrollTo({
top: container.scrollTop + (sectionTop - containerTop),
behavior: 'smooth',
});
}
defineExpose({ scrollToOutput });
</script>
<template>
<div :class="$style.root">
<section v-if="input" :class="$style.section">
<N8nText tag="h3" size="small" bold color="text-dark" :class="$style.sectionTitle">
{{ i18n.baseText('instanceAi.debug.runDebug.input') }}
</N8nText>
<div v-if="hasInputContent" :class="$style.stack">
<details
v-if="showSystemDetails"
:class="[$style.card, $style.cardSystem, $style.expandableCard]"
>
<summary :class="$style.cardHeader">
<span :class="$style.roleLabel">system</span>
<span :class="$style.headerMeta">
<span :class="$style.metaLabel">{{ formatCharCount(systemCharCount) }}</span>
</span>
</summary>
<div :class="$style.cardBody">
<template v-for="(block, index) in systemBlocks" :key="`system-${index}`">
<InstanceAiDebugMessageBody
v-if="block.segments?.length"
:segments="block.segments"
:workflow-snapshots-by-tool-call-id="workflowSnapshotsByToolCallId"
/>
<p v-else :class="$style.text">{{ block.content }}</p>
<InstanceAiDebugJsonPanel
v-if="block.metadata"
:value="block.metadata"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
</template>
</div>
</details>
<article v-if="systemObservations" :class="[$style.card, $style.cardObservations]">
<div :class="$style.cardHeader">
<span :class="$style.roleLabel">observations</span>
</div>
<div :class="$style.cardBody">
<p :class="$style.text">{{ systemObservations }}</p>
</div>
</article>
<template v-if="messageBlocks.length > 0">
<N8nText size="small" color="text-light" :class="$style.stackLabel">
{{ i18n.baseText('instanceAi.debug.runDebug.messages') }}
</N8nText>
<article
v-for="(block, index) in messageBlocks"
:key="`message-${index}`"
:class="[$style.card, $style[getCardClass(block.role)]]"
>
<div :class="$style.cardHeader">
<span :class="$style.roleLabel">{{ block.role }}</span>
</div>
<div :class="$style.cardBody">
<InstanceAiDebugMessageBody
v-if="block.segments?.length"
:segments="block.segments"
:workflow-snapshots-by-tool-call-id="workflowSnapshotsByToolCallId"
/>
<p v-else :class="$style.text">{{ block.content }}</p>
<InstanceAiDebugJsonPanel
v-if="block.metadata"
:value="block.metadata"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
</div>
</article>
</template>
<InstanceAiDebugJsonPanel
v-if="inputExtras"
:value="inputExtras"
:label="i18n.baseText('instanceAi.debug.runDebug.inputExtras')"
/>
</div>
</section>
<section
v-if="output"
ref="outputSectionRef"
data-test-id="instance-ai-llm-step-output"
:class="$style.section"
>
<div :class="$style.sectionHeader">
<N8nText tag="h3" size="small" bold color="text-dark" :class="$style.sectionTitle">
{{ i18n.baseText('instanceAi.debug.runDebug.output') }}
</N8nText>
<div v-if="finishReason || usageSummary" :class="$style.statsBar">
<span v-if="finishReason" :class="$style.statChip">{{ finishReason }}</span>
<N8nText v-if="usageSummary" size="small" color="text-light">
{{ usageSummary.label }}
</N8nText>
</div>
</div>
<div v-if="hasOutputContent" :class="$style.stack">
<article
v-for="(block, index) in outputDisplayBlocks"
:key="`output-${index}`"
:class="[$style.card, $style[getCardClass(block.role)]]"
>
<div :class="$style.cardHeader">
<span :class="$style.roleLabel">{{ block.role }}</span>
</div>
<div :class="$style.cardBody">
<InstanceAiDebugMessageBody
v-if="block.segments?.length"
:segments="block.segments"
:workflow-snapshots-by-tool-call-id="workflowSnapshotsByToolCallId"
/>
<p v-else :class="$style.text">{{ block.content }}</p>
<InstanceAiDebugJsonPanel
v-if="block.metadata"
:value="block.metadata"
:label="i18n.baseText('instanceAi.debug.runDebug.metadata')"
/>
</div>
</article>
<InstanceAiDebugJsonPanel
v-if="usageSummary"
:value="usageSummary.metadata"
:label="i18n.baseText('instanceAi.debug.runDebug.usage')"
/>
<InstanceAiDebugJsonPanel
v-if="outputExtras"
:value="outputExtras"
:label="i18n.baseText('instanceAi.debug.runDebug.outputMetadata')"
/>
</div>
</section>
<div v-if="!input && !output" :class="$style.emptyState">
{{ i18n.baseText('instanceAi.debug.runDebug.noStepDetail') }}
</div>
</div>
</template>
<style lang="scss" module>
.root {
display: flex;
flex-direction: column;
gap: var(--spacing--2xs);
}
.section {
display: flex;
flex-direction: column;
gap: var(--spacing--2xs);
}
.section + .section {
padding-top: var(--spacing--2xs);
border-top: var(--border);
}
.sectionHeader {
display: flex;
align-items: center;
justify-content: space-between;
flex-wrap: wrap;
gap: var(--spacing--2xs);
}
.sectionTitle {
margin: 0;
}
.statsBar {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: var(--spacing--2xs);
}
.statChip {
padding: var(--spacing--5xs) var(--spacing--3xs);
border-radius: var(--radius--xl);
background: var(--color--background--shade-1);
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
}
.stack {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
}
.stackLabel {
margin-top: var(--spacing--5xs);
}
.card {
border-radius: var(--radius);
background: var(--background--surface);
border: 1px solid var(--color--foreground--tint-2);
overflow: hidden;
}
.expandableCard {
.cardHeader {
cursor: pointer;
list-style: none;
&::-webkit-details-marker {
display: none;
}
}
.headerMeta::after {
content: '▸';
margin-left: var(--spacing--3xs);
color: var(--color--text--tint-1);
transition: transform var(--duration--fast) ease;
}
&[open] .headerMeta::after {
transform: rotate(90deg);
}
}
.cardSystem {
border-left: 2px solid color-mix(in srgb, var(--color--warning) 45%, transparent);
}
.cardObservations {
border-left: 2px solid color-mix(in srgb, var(--color--primary) 35%, transparent);
}
.cardUser {
border-left: 2px solid color-mix(in srgb, var(--color--success) 45%, transparent);
}
.cardAssistant {
border-left: 2px solid color-mix(in srgb, var(--color--primary) 45%, transparent);
}
.cardTool {
border-left: 2px solid var(--color--foreground--tint-1);
}
.cardMuted {
border-left: 2px solid var(--color--foreground--tint-2);
}
.cardHeader {
display: flex;
align-items: center;
justify-content: space-between;
gap: var(--spacing--2xs);
padding: var(--spacing--3xs) var(--spacing--2xs);
background: var(--color--background--shade-1);
}
.headerMeta {
display: inline-flex;
align-items: center;
}
.cardBody {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
padding: var(--spacing--2xs);
}
.roleLabel {
font-size: var(--font-size--3xs);
font-weight: var(--font-weight--medium);
color: var(--color--text);
text-transform: lowercase;
}
.metaLabel {
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
}
.text {
margin: 0;
font-size: var(--font-size--2xs);
line-height: var(--line-height--xl);
white-space: pre-wrap;
word-break: break-word;
color: var(--color--text);
}
.emptyState {
padding: var(--spacing--md);
text-align: center;
font-size: var(--font-size--2xs);
color: var(--color--text--tint-1);
}
</style>
@@ -0,0 +1,490 @@
<script lang="ts" setup>
import {
N8nButton,
N8nDialog,
N8nDialogHeader,
N8nDialogTitle,
N8nIcon,
N8nText,
} from '@n8n/design-system';
import { useI18n } from '@n8n/i18n';
import { computed, nextTick, ref, watch } from 'vue';
import { useThread } from '../instanceAi.store';
import { useInstanceAiDebugStore } from '../instanceAiDebug.store';
import { parseStepSummary } from '@n8n/api-types';
import InstanceAiLlmStepDetail from './InstanceAiLlmStepDetail.vue';
import InstanceAiRunWorkflowCodeSection from './InstanceAiRunWorkflowCodeSection.vue';
const props = defineProps<{
open: boolean;
}>();
const emit = defineEmits<{ 'update:open': [value: boolean] }>();
const i18n = useI18n();
const debugStore = useInstanceAiDebugStore();
const currentThread = useThread();
const selectedStepNumber = ref<number | null>(null);
const detailPaneRef = ref<HTMLElement | null>(null);
const stepDetailRef = ref<InstanceType<typeof InstanceAiLlmStepDetail> | null>(null);
const steps = computed(() => debugStore.runDebug?.steps ?? []);
const runWorkflowCode = computed(() => debugStore.runDebug?.workflowCode ?? []);
const selectedRunId = computed(() => debugStore.selectedRunId);
const selectedStep = computed(() => {
if (selectedStepNumber.value === null) return undefined;
return steps.value.find((step) => step.stepNumber === selectedStepNumber.value);
});
const stepSummaries = computed(() =>
steps.value.map((step) => ({
stepNumber: step.stepNumber,
summary: parseStepSummary(step.input, step.output),
})),
);
watch(
() => props.open,
(isOpen) => {
if (!isOpen) {
selectedStepNumber.value = null;
return;
}
selectedStepNumber.value = steps.value[0]?.stepNumber ?? null;
},
);
watch(
() => steps.value,
(nextSteps) => {
if (!props.open || nextSteps.length === 0) {
selectedStepNumber.value = null;
return;
}
if (
selectedStepNumber.value === null ||
!nextSteps.some((step) => step.stepNumber === selectedStepNumber.value)
) {
selectedStepNumber.value = nextSteps[0]?.stepNumber ?? null;
}
},
);
function handleOpenChange(open: boolean) {
emit('update:open', open);
}
function selectStep(stepNumber: number) {
selectedStepNumber.value = stepNumber;
}
async function scrollDetailToOutput() {
await nextTick();
if (!detailPaneRef.value || !stepDetailRef.value) return;
stepDetailRef.value.scrollToOutput(detailPaneRef.value);
}
watch([selectedStep, () => debugStore.isLoadingRunDebug], ([step, isLoading]) => {
if (!props.open || !step || isLoading) return;
void scrollDetailToOutput();
});
async function selectRun(runId: string) {
if (runId === selectedRunId.value) return;
await debugStore.loadRunDebug(runId);
selectedStepNumber.value = debugStore.runDebug?.steps[0]?.stepNumber ?? null;
}
function formatTimestamp(ms: number): string {
try {
return new Date(ms).toLocaleTimeString('en-US', { hour12: false, fractionalSecondDigits: 3 });
} catch {
return String(ms);
}
}
function formatStepCount(count: number): string {
return i18n.baseText('instanceAi.debug.runDebug.stepCount', {
interpolate: { count: String(count) },
});
}
</script>
<template>
<N8nDialog
:open="open"
size="cover"
data-test-id="instance-ai-llm-steps-modal"
@update:open="handleOpenChange"
>
<div :class="$style.shell">
<N8nDialogHeader :class="$style.header">
<div :class="$style.headerMain">
<div :class="$style.headerTitleRow">
<N8nDialogTitle>
{{ i18n.baseText('instanceAi.debug.runDebug.stepsModalTitle') }}
</N8nDialogTitle>
<span v-if="steps.length > 0" :class="$style.stepCount">
{{ formatStepCount(steps.length) }}
</span>
</div>
<N8nText v-if="selectedRunId" size="small" color="text-light" :class="$style.runId">
{{ selectedRunId }}
</N8nText>
</div>
</N8nDialogHeader>
<div v-if="debugStore.threadDebugRuns.length === 0" :class="$style.emptyState">
{{ i18n.baseText('instanceAi.debug.runDebug.noRuns') }}
</div>
<div v-else :class="$style.layout">
<aside :class="[$style.sidebar, $style.runsSidebar]">
<div :class="$style.sidebarHeader">
{{ i18n.baseText('instanceAi.debug.runDebug.runs') }}
<span :class="$style.sidebarCount">{{ debugStore.threadDebugRuns.length }}</span>
</div>
<div :class="$style.runList">
<button
v-for="(run, index) in debugStore.threadDebugRuns"
:key="run.runId"
type="button"
:class="[$style.runButton, selectedRunId === run.runId && $style.runButtonSelected]"
data-test-id="instance-ai-llm-steps-modal-run"
@click="selectRun(run.runId)"
>
<div :class="$style.runTopRow">
<span :class="$style.runNumber">{{ index + 1 }}</span>
<div :class="$style.runTopRowRight">
<span v-if="run.runId === currentThread.activeRunId" :class="$style.currentBadge">
{{ i18n.baseText('instanceAi.debug.threads.current') }}
</span>
<span :class="$style.runIdShort">{{ run.runId.slice(0, 12) }}</span>
</div>
</div>
<span v-if="run.label" :class="$style.runLabel">{{ run.label }}</span>
<span :class="$style.runMeta">
{{ formatStepCount(run.stepCount) }} · {{ formatTimestamp(run.startedAt) }}
</span>
</button>
</div>
</aside>
<aside :class="[$style.sidebar, $style.stepsSidebar]">
<div :class="$style.sidebarHeader">
{{ i18n.baseText('instanceAi.debug.tab.llmSteps') }}
<span :class="$style.sidebarCount">{{ steps.length }}</span>
</div>
<div v-if="steps.length === 0" :class="$style.sidebarEmpty">
{{ i18n.baseText('instanceAi.debug.runDebug.noSteps') }}
</div>
<div v-else :class="$style.stepList">
<button
v-for="{ stepNumber, summary } in stepSummaries"
:key="stepNumber"
type="button"
:class="[
$style.stepButton,
selectedStepNumber === stepNumber && $style.stepButtonSelected,
]"
@click="selectStep(stepNumber)"
>
<div :class="$style.stepTopRow">
<span :class="$style.stepNumber">{{ stepNumber + 1 }}</span>
<span v-if="summary.finishReason" :class="$style.finishReason">
{{ summary.finishReason }}
</span>
</div>
<span v-if="summary.toolNames.length > 0" :class="$style.stepTools">
{{ summary.toolNames.join(', ') }}
</span>
<span v-else-if="summary.messagePreview" :class="$style.stepPreview">
{{ summary.messagePreview }}
</span>
<span v-if="summary.usageLabel" :class="$style.stepUsage">
{{ summary.usageLabel }}
</span>
</button>
</div>
</aside>
<div ref="detailPaneRef" :class="$style.detail">
<div v-if="debugStore.isLoadingRunDebug" :class="$style.loadingState">
<N8nIcon icon="spinner" color="primary" spin size="small" />
</div>
<div v-else-if="selectedStep || runWorkflowCode.length > 0" :class="$style.detailContent">
<InstanceAiLlmStepDetail
v-if="selectedStep"
ref="stepDetailRef"
:input="selectedStep.input"
:output="selectedStep.output"
:run-steps="steps"
:workflow-code="runWorkflowCode"
/>
<InstanceAiRunWorkflowCodeSection
v-if="runWorkflowCode.length > 0"
:snapshots="runWorkflowCode"
:show-divider="Boolean(selectedStep)"
/>
</div>
<div v-else :class="$style.emptyState">
{{ i18n.baseText('instanceAi.debug.runDebug.noStepDetail') }}
</div>
</div>
</div>
<div :class="$style.footer">
<N8nButton variant="outline" size="medium" @click="handleOpenChange(false)">
{{ i18n.baseText('generic.close') }}
</N8nButton>
</div>
</div>
</N8nDialog>
</template>
<style lang="scss" module>
.shell {
display: flex;
flex-direction: column;
height: 100%;
min-height: 0;
max-height: 100%;
overflow: hidden;
gap: var(--spacing--xs);
}
.header {
flex-shrink: 0;
margin: 0;
padding-right: var(--spacing--xl);
}
.headerMain {
min-width: 0;
}
.headerTitleRow {
display: flex;
align-items: baseline;
flex-wrap: wrap;
gap: var(--spacing--2xs);
}
.runId {
display: block;
margin-top: var(--spacing--5xs);
font-family: monospace;
}
.stepCount {
flex-shrink: 0;
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
}
.layout {
flex: 1;
min-height: 0;
display: grid;
grid-template-columns: 220px 220px minmax(0, 1fr);
grid-template-rows: minmax(0, 1fr);
gap: var(--spacing--xs);
overflow: hidden;
}
.sidebar {
display: flex;
flex-direction: column;
min-height: 0;
padding: var(--spacing--4xs);
border: var(--border);
border-radius: var(--radius);
background: var(--color--background--shade-1);
}
.sidebarHeader {
display: flex;
align-items: center;
justify-content: space-between;
padding: var(--spacing--3xs) var(--spacing--2xs);
font-size: var(--font-size--3xs);
font-weight: var(--font-weight--medium);
color: var(--color--text--tint-1);
text-transform: uppercase;
letter-spacing: 0.04em;
}
.sidebarCount {
font-family: monospace;
font-weight: var(--font-weight--regular);
}
.runList,
.stepList {
flex: 1;
display: flex;
flex-direction: column;
gap: var(--spacing--4xs);
overflow-y: auto;
min-height: 0;
}
.sidebarEmpty {
padding: var(--spacing--2xs);
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
}
.runButton,
.stepButton {
display: flex;
flex-direction: column;
align-items: flex-start;
gap: var(--spacing--5xs);
width: 100%;
padding: var(--spacing--3xs) var(--spacing--2xs);
border: 1px solid transparent;
border-radius: var(--radius);
background: transparent;
cursor: pointer;
text-align: left;
transition:
background-color var(--duration--fast) ease,
border-color var(--duration--fast) ease;
&:hover {
background: var(--background--surface);
border-color: var(--color--foreground--tint-2);
}
}
.runButtonSelected,
.stepButtonSelected {
background: var(--background--surface);
border-color: var(--color--foreground--tint-2);
border-left: 2px solid var(--color--primary);
&:hover {
background: var(--background--surface);
}
}
.runTopRow,
.stepTopRow {
display: flex;
align-items: center;
justify-content: space-between;
gap: var(--spacing--2xs);
width: 100%;
}
.runTopRowRight {
display: inline-flex;
align-items: center;
justify-content: flex-end;
gap: var(--spacing--2xs);
min-width: 0;
}
.runNumber,
.stepNumber {
display: inline-flex;
align-items: center;
justify-content: center;
min-width: var(--spacing--sm);
height: var(--spacing--sm);
border-radius: var(--radius--xl);
background: var(--color--foreground--tint-2);
font-size: var(--font-size--3xs);
font-weight: var(--font-weight--bold);
color: var(--color--text);
}
.runButtonSelected .runNumber,
.stepButtonSelected .stepNumber {
background: color-mix(in srgb, var(--color--primary) 12%, var(--color--foreground--tint-2));
}
.currentBadge {
padding: 0 var(--spacing--4xs);
border-radius: var(--radius--sm);
font-size: var(--font-size--3xs);
background: color-mix(in srgb, var(--color--success) 15%, transparent);
color: var(--color--success);
}
.runIdShort {
font-family: monospace;
font-size: var(--font-size--3xs);
color: var(--color--text);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.runLabel {
width: 100%;
font-size: var(--font-size--3xs);
line-height: var(--line-height--lg);
color: var(--color--text);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.runMeta,
.finishReason,
.stepTools,
.stepPreview,
.stepUsage {
width: 100%;
font-size: var(--font-size--3xs);
line-height: var(--line-height--lg);
color: var(--color--text--tint-1);
}
.stepTools {
font-family: monospace;
color: var(--color--text);
}
.detail {
min-height: 0;
height: 100%;
overflow-x: hidden;
overflow-y: auto;
padding: var(--spacing--2xs);
border: var(--border);
border-radius: var(--radius);
background: var(--background--surface);
}
.detailContent {
display: flex;
flex-direction: column;
gap: var(--spacing--2xs);
}
.loadingState {
display: flex;
align-items: center;
justify-content: center;
min-height: var(--spacing--2xl);
color: var(--color--text--tint-1);
}
.emptyState {
padding: var(--spacing--md);
text-align: center;
font-size: var(--font-size--2xs);
color: var(--color--text--tint-1);
}
.footer {
display: flex;
flex-shrink: 0;
justify-content: flex-end;
padding-top: var(--spacing--4xs);
}
</style>
@@ -0,0 +1,136 @@
<script lang="ts" setup>
import type { InstanceAiRunDebugWorkflowCodeSnapshot } from '@n8n/api-types';
import { N8nText } from '@n8n/design-system';
import { useI18n } from '@n8n/i18n';
import InstanceAiDebugWorkflowCodeSnapshot from './InstanceAiDebugWorkflowCodeSnapshot.vue';
defineProps<{
snapshots: InstanceAiRunDebugWorkflowCodeSnapshot[];
showDivider?: boolean;
}>();
const i18n = useI18n();
function formatCharCount(count: number): string {
return i18n.baseText('instanceAi.debug.runDebug.charCount', {
interpolate: { count: count.toLocaleString() },
});
}
</script>
<template>
<section :class="[$style.section, showDivider && $style.sectionDivider]">
<N8nText tag="h3" size="small" bold color="text-dark" :class="$style.sectionTitle">
{{ i18n.baseText('instanceAi.debug.tab.workflowCode') }}
</N8nText>
<div :class="$style.stack">
<details
v-for="(snapshot, index) in snapshots"
:key="`${snapshot.capturedAt}-${index}`"
:class="[$style.card, $style.cardWorkflow, $style.expandableCard]"
data-test-id="instance-ai-run-workflow-code-snapshot"
>
<summary :class="$style.cardHeader">
<span :class="$style.roleLabel">{{ snapshot.source }}</span>
<span :class="$style.headerMeta">
<span :class="$style.metaLabel">{{ formatCharCount(snapshot.code.length) }}</span>
</span>
</summary>
<div :class="$style.cardBody">
<InstanceAiDebugWorkflowCodeSnapshot :snapshot="snapshot" />
</div>
</details>
</div>
</section>
</template>
<style lang="scss" module>
.section {
display: flex;
flex-direction: column;
gap: var(--spacing--2xs);
}
.sectionDivider {
padding-top: var(--spacing--2xs);
border-top: var(--border);
}
.sectionTitle {
margin: 0;
}
.stack {
display: flex;
flex-direction: column;
gap: var(--spacing--3xs);
}
.card {
border-radius: var(--radius);
background: var(--background--surface);
border: 1px solid var(--color--foreground--tint-2);
overflow: hidden;
}
.expandableCard {
.cardHeader {
cursor: pointer;
list-style: none;
&::-webkit-details-marker {
display: none;
}
}
.headerMeta::after {
content: '▸';
margin-left: var(--spacing--3xs);
color: var(--color--text--tint-1);
transition: transform var(--duration--fast) ease;
}
&[open] .headerMeta::after {
transform: rotate(90deg);
}
}
.cardWorkflow {
border-left: 2px solid color-mix(in srgb, var(--color--secondary) 45%, transparent);
}
.cardHeader {
display: flex;
align-items: center;
justify-content: space-between;
gap: var(--spacing--2xs);
padding: var(--spacing--3xs) var(--spacing--2xs);
background: var(--color--background--shade-1);
}
.headerMeta {
display: inline-flex;
align-items: center;
flex-wrap: wrap;
justify-content: flex-end;
gap: var(--spacing--2xs);
}
.cardBody {
padding: var(--spacing--2xs);
}
.roleLabel {
font-size: var(--font-size--3xs);
font-weight: var(--font-weight--medium);
color: var(--color--text);
text-transform: lowercase;
}
.metaLabel {
font-size: var(--font-size--3xs);
color: var(--color--text--tint-1);
font-family: monospace;
}
</style>
@@ -5,6 +5,8 @@ import type {
InstanceAiThreadListResponse,
InstanceAiRichMessagesResponse,
InstanceAiThreadStatusResponse,
InstanceAiRunDebugResponse,
InstanceAiThreadDebugRunsResponse,
} from '@n8n/api-types';
export async function fetchThreads(
@@ -60,3 +62,17 @@ export async function fetchThreadStatus(
): Promise<InstanceAiThreadStatusResponse> {
return await makeRestApiRequest(context, 'GET', `/instance-ai/threads/${threadId}/status`);
}
export async function fetchRunDebug(
context: IRestApiContext,
runId: string,
): Promise<InstanceAiRunDebugResponse> {
return await makeRestApiRequest(context, 'GET', `/instance-ai/debug/runs/${runId}`);
}
export async function fetchThreadDebugRuns(
context: IRestApiContext,
threadId: string,
): Promise<InstanceAiThreadDebugRunsResponse> {
return await makeRestApiRequest(context, 'GET', `/instance-ai/debug/threads/${threadId}/runs`);
}
@@ -1,10 +1,20 @@
import { defineStore } from 'pinia';
import { ref, computed } from 'vue';
import { ref } from 'vue';
import { useRootStore } from '@n8n/stores/useRootStore';
import { useToast } from '@/app/composables/useToast';
import { useI18n } from '@n8n/i18n';
import { fetchThreads, fetchThreadMessages } from './instanceAi.memory.api';
import type { InstanceAiThreadInfo, InstanceAiStoredMessage } from '@n8n/api-types';
import {
fetchThreads,
fetchThreadMessages,
fetchRunDebug,
fetchThreadDebugRuns,
} from './instanceAi.memory.api';
import type {
InstanceAiThreadInfo,
InstanceAiStoredMessage,
InstanceAiRunDebugResponse,
InstanceAiRunDebugSummary,
} from '@n8n/api-types';
export const useInstanceAiDebugStore = defineStore('instanceAiDebug', () => {
const rootStore = useRootStore();
@@ -18,8 +28,11 @@ export const useInstanceAiDebugStore = defineStore('instanceAiDebug', () => {
const isLoadingThreads = ref(false);
const isLoadingMessages = ref(false);
// --- Computed ---
const selectedThread = computed(() => threads.value.find((t) => t.id === selectedThreadId.value));
const selectedRunId = ref<string | null>(null);
const runDebug = ref<InstanceAiRunDebugResponse | null>(null);
const threadDebugRuns = ref<InstanceAiRunDebugSummary[]>([]);
const isLoadingRunDebug = ref(false);
const isLoadingThreadDebugRuns = ref(false);
// --- Actions ---
async function loadThreads(): Promise<void> {
@@ -63,12 +76,65 @@ export const useInstanceAiDebugStore = defineStore('instanceAiDebug', () => {
}
}
async function loadThreadDebugRuns(threadId: string): Promise<void> {
isLoadingThreadDebugRuns.value = true;
try {
const result = await fetchThreadDebugRuns(rootStore.restApiContext, threadId);
threadDebugRuns.value = result.runs;
} catch {
toast.showError(
new Error(i18n.baseText('instanceAi.debug.runDebug.fetchError')),
'Run Debug',
);
} finally {
isLoadingThreadDebugRuns.value = false;
}
}
async function loadRunDebug(runId: string): Promise<void> {
selectedRunId.value = runId;
isLoadingRunDebug.value = true;
try {
runDebug.value = await fetchRunDebug(rootStore.restApiContext, runId);
} catch {
runDebug.value = null;
toast.showError(
new Error(i18n.baseText('instanceAi.debug.runDebug.fetchError')),
'Run Debug',
);
} finally {
isLoadingRunDebug.value = false;
}
}
async function refreshRunDebug(threadId: string, preferredRunId?: string | null): Promise<void> {
await loadThreadDebugRuns(threadId);
const selectedRunIdForThread = threadDebugRuns.value.some(
(run) => run.runId === selectedRunId.value,
)
? selectedRunId.value
: null;
const runId =
preferredRunId ?? selectedRunIdForThread ?? threadDebugRuns.value.at(-1)?.runId ?? null;
if (runId) {
await loadRunDebug(runId);
} else {
selectedRunId.value = null;
runDebug.value = null;
}
}
function reset(): void {
threads.value = [];
selectedThreadId.value = null;
threadMessages.value = [];
isLoadingThreads.value = false;
isLoadingMessages.value = false;
selectedRunId.value = null;
runDebug.value = null;
threadDebugRuns.value = [];
isLoadingRunDebug.value = false;
isLoadingThreadDebugRuns.value = false;
}
return {
@@ -78,12 +144,17 @@ export const useInstanceAiDebugStore = defineStore('instanceAiDebug', () => {
threadMessages,
isLoadingThreads,
isLoadingMessages,
// Computed
selectedThread,
selectedRunId,
runDebug,
threadDebugRuns,
isLoadingRunDebug,
isLoadingThreadDebugRuns,
// Actions
loadThreads,
selectThread,
loadMessages,
loadRunDebug,
refreshRunDebug,
reset,
};
});
@@ -125,6 +125,7 @@ export const useInstanceAiSettingsStore = defineStore('instanceAiSettings', () =
sandboxUnavailableReason: adminRes.sandboxEnabled
? (prev?.sandboxUnavailableReason ?? null)
: null,
runDebugEnabled: prev?.runDebugEnabled ?? false,
};
settingsStore.moduleSettings = {
...ms,
@@ -0,0 +1,95 @@
import type {
InstanceAiRunDebugStep,
InstanceAiRunDebugWorkflowCodeSnapshot,
} from '@n8n/api-types';
import { describe, expect, it } from 'vitest';
import {
getToolCallIdFromMetadata,
isWorkflowCodeToolName,
mapWorkflowSnapshotsByToolCallId,
} from '../workflow-code-match';
function makeSnapshot(
overrides: Partial<InstanceAiRunDebugWorkflowCodeSnapshot> = {},
): InstanceAiRunDebugWorkflowCodeSnapshot {
return {
code: 'workflow code',
source: 'full-code',
success: true,
capturedAt: 1,
...overrides,
};
}
describe('workflow-code-match', () => {
it('identifies build-workflow as a workflow code tool', () => {
expect(isWorkflowCodeToolName('build-workflow')).toBe(true);
expect(isWorkflowCodeToolName('search_nodes')).toBe(false);
});
it('reads toolCallId from segment metadata', () => {
expect(getToolCallIdFromMetadata({ toolCallId: 'tc-1' })).toBe('tc-1');
expect(getToolCallIdFromMetadata(undefined)).toBeUndefined();
});
it('maps snapshots by toolCallId when present', () => {
const steps: InstanceAiRunDebugStep[] = [
{
stepNumber: 0,
output: {
toolResults: [
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
output: { success: true, workflowId: 'wf-1' },
},
],
},
},
];
const workflowCode = [makeSnapshot({ toolCallId: 'tc-1', workflowId: 'wf-1' })];
const map = mapWorkflowSnapshotsByToolCallId(steps, workflowCode);
expect(map.get('tc-1')).toEqual(workflowCode[0]);
});
it('falls back to sequential matching when snapshots lack toolCallId', () => {
const steps: InstanceAiRunDebugStep[] = [
{
stepNumber: 0,
output: {
toolResults: [
{
toolCallId: 'tc-1',
toolName: 'build-workflow',
output: { success: true, workflowId: 'wf-1' },
},
],
},
},
{
stepNumber: 1,
output: {
toolResults: [
{
toolCallId: 'tc-2',
toolName: 'build-workflow',
output: { success: false },
},
],
},
},
];
const workflowCode = [
makeSnapshot({ workflowId: 'wf-1', code: 'first' }),
makeSnapshot({ success: false, code: 'second' }),
];
const map = mapWorkflowSnapshotsByToolCallId(steps, workflowCode);
expect(map.get('tc-1')?.code).toBe('first');
expect(map.get('tc-2')?.code).toBe('second');
});
});
@@ -0,0 +1,84 @@
import type {
InstanceAiRunDebugStep,
InstanceAiRunDebugWorkflowCodeSnapshot,
} from '@n8n/api-types';
const WORKFLOW_CODE_TOOL_NAMES = new Set(['build-workflow']);
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}
export function isWorkflowCodeToolName(name: string | undefined): boolean {
return name !== undefined && WORKFLOW_CODE_TOOL_NAMES.has(name);
}
function extractBuildWorkflowToolCallIds(output: Record<string, unknown> | undefined): string[] {
if (!output || !Array.isArray(output.toolResults)) {
return [];
}
const ids: string[] = [];
for (const toolResult of output.toolResults) {
if (!isRecord(toolResult)) continue;
const name =
(typeof toolResult.toolName === 'string' && toolResult.toolName) ||
(typeof toolResult.name === 'string' && toolResult.name) ||
undefined;
if (!isWorkflowCodeToolName(name)) continue;
if (typeof toolResult.toolCallId === 'string') {
ids.push(toolResult.toolCallId);
}
}
return ids;
}
export function mapWorkflowSnapshotsByToolCallId(
steps: InstanceAiRunDebugStep[],
workflowCode: InstanceAiRunDebugWorkflowCodeSnapshot[],
): ReadonlyMap<string, InstanceAiRunDebugWorkflowCodeSnapshot> {
const map = new Map<string, InstanceAiRunDebugWorkflowCodeSnapshot>();
for (const snapshot of workflowCode) {
if (snapshot.toolCallId) {
map.set(snapshot.toolCallId, snapshot);
}
}
const orderedToolCallIds = steps
.slice()
.sort((left, right) => left.stepNumber - right.stepNumber)
.flatMap((step) => extractBuildWorkflowToolCallIds(step.output));
let snapshotIndex = 0;
for (const toolCallId of orderedToolCallIds) {
if (map.has(toolCallId)) continue;
while (
snapshotIndex < workflowCode.length &&
workflowCode[snapshotIndex]?.toolCallId &&
map.has(workflowCode[snapshotIndex]?.toolCallId ?? '')
) {
snapshotIndex++;
}
const snapshot = workflowCode[snapshotIndex];
if (!snapshot) break;
map.set(toolCallId, snapshot);
snapshotIndex++;
}
return map;
}
export function getToolCallIdFromMetadata(metadata: unknown): string | undefined {
if (!isRecord(metadata) || typeof metadata.toolCallId !== 'string') {
return undefined;
}
return metadata.toolCallId;
}