From ade94af96a10e84cf7ff09f8eb153dce53c02ff6 Mon Sep 17 00:00:00 2001 From: Jaakko Husso Date: Fri, 19 Jun 2026 09:03:20 +0200 Subject: [PATCH] feat(core): OpenAI Responses API support for agent pdf passthrough (#32604) Co-authored-by: Claude Opus 4.8 (1M context) --- .../ToolsAgent/V3/helpers/executeBatch.ts | 2 +- .../V3/helpers/prepareItemContext.ts | 3 + .../agents/Agent/agents/ToolsAgent/common.ts | 71 ++++++++++- .../Agent/test/ToolsAgent/commons.test.ts | 112 +++++++++++++++++- 4 files changed, 181 insertions(+), 7 deletions(-) diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/executeBatch.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/executeBatch.ts index 9480c156d7b..7657c2d8265 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/executeBatch.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/executeBatch.ts @@ -80,7 +80,7 @@ export async function executeBatch( checkMaxIterations(response, maxIterations, ctx.getNode()); - const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse); + const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse, model); const { tools, prompt, options, outputParser } = itemContext; diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts index 445778ff2d2..989a71120d6 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/V3/helpers/prepareItemContext.ts @@ -1,3 +1,4 @@ +import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { ChatPromptTemplate } from '@langchain/core/prompts'; import type { DynamicStructuredTool, Tool } from '@langchain/classic/tools'; import { NodeOperationError } from 'n8n-workflow'; @@ -41,6 +42,7 @@ export async function prepareItemContext( ctx: IExecuteFunctions | ISupplyDataFunctions, itemIndex: number, response?: EngineResponse, + model?: BaseChatModel, ): Promise { const steps = buildSteps(response, itemIndex); @@ -68,6 +70,7 @@ export async function prepareItemContext( passthroughBinaryImages: options.passthroughBinaryImages ?? true, passthroughBinaryPdfs: options.passthroughBinaryPdfs ?? false, outputParser, + model, }); const prompt: ChatPromptTemplate = preparePrompt(messages); diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/common.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/common.ts index cf11ee53a49..a28719b361d 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/common.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/common.ts @@ -83,6 +83,46 @@ function shouldPassthroughBinary(data: IBinaryData, options: BinaryPassthroughOp return false; } +// How a file (PDF) attachment must be encoded for the connected model. +// - 'standard': LangChain standard data content block (Gemini, Anthropic, OpenAI Completions) +// - 'openai-responses': OpenAI Responses API native part, which rejects the standard block +type BinaryContentFormat = 'standard' | 'openai-responses'; + +// Structural view of the ChatOpenAI internals we probe. `_useResponsesApi` is +// protected and `useResponsesApi` is public; neither is part of BaseChatModel, so +// we read them defensively and treat their absence as "not OpenAI Responses". +type ResponsesApiModel = { + _useResponsesApi?: (options?: unknown) => boolean; + useResponsesApi?: boolean; +}; + +/** + * OpenAI's Responses API rejects the standard `file` content block (it expects an + * `input_file` part), so when the connected model talks to that API we must emit a + * provider-native block instead. Gemini, Anthropic, and OpenAI's Completions API all + * consume the standard block. + * + * Detection relies on ChatOpenAI's `_useResponsesApi()` because LangChain exposes no + * public API for it; `_useResponsesApi()` (unlike the `useResponsesApi` flag alone) + * also covers models that auto-select the Responses API (e.g. gpt-5/o-series). Note it + * is evaluated without invoke-time call options, so Responses usage triggered solely by + * call-time tools/kwargs is not detected here. Guarded so an unexpected shape degrades + * to the standard block rather than throwing. + */ +function resolveBinaryContentFormat(model?: BaseChatModel): BinaryContentFormat { + if (!model) return 'standard'; + const candidate = model as unknown as ResponsesApiModel; + try { + const usesResponsesApi = + typeof candidate._useResponsesApi === 'function' + ? candidate._useResponsesApi(undefined) + : candidate.useResponsesApi === true; + return usesResponsesApi ? 'openai-responses' : 'standard'; + } catch { + return 'standard'; + } +} + /** * Processes a binary data to be used in agent passthrough. * @param ctx - The execution context @@ -94,6 +134,7 @@ async function processBinaryForAgentPassthrough( ctx: IExecuteFunctions | ISupplyDataFunctions, data: IBinaryData, type: 'image_url' | 'file', + contentFormat: BinaryContentFormat = 'standard', ) { // Resolve the binary contents to a raw base64 string. In filesystem mode the // binary is stored by id and must be streamed before it can be encoded. @@ -130,14 +171,25 @@ async function processBinaryForAgentPassthrough( ); } - // PDFs (and other documents) are passed as a provider-agnostic file content - // block so any chat model with native PDF support can consume them. + // PDFs (and other documents) are passed as a file content block. OpenAI's + // Responses API needs its native `input_file` part; every other supported + // provider consumes the LangChain standard data content block. if (type === 'file') { + if (contentFormat === 'openai-responses') { + return { + type: 'input_file', + file_data: `data:${data.mimeType};base64,${base64Data}`, + filename: data.fileName ?? 'attachment.pdf', + }; + } return { type: 'file', source_type: 'base64', mime_type: data.mimeType, data: base64Data, + // OpenAI's Completions API requires a filename for file blocks (it warns and + // uses a placeholder otherwise); other providers ignore this metadata. + metadata: { filename: data.fileName ?? 'attachment.pdf' }, }; } @@ -160,12 +212,14 @@ async function processBinaryForAgentPassthrough( * @param ctx - The execution context * @param itemIndex - The current item index * @param options - The enabled binary passthrough options + * @param contentFormat - How file attachments must be encoded for the connected model * @returns A HumanMessage containing the binary messages (images and text files). */ export async function extractBinaryMessages( ctx: IExecuteFunctions | ISupplyDataFunctions, itemIndex: number, options: BinaryPassthroughOptions, + contentFormat: BinaryContentFormat = 'standard', ): Promise { const binaryData = ctx.getInputData()?.[itemIndex]?.binary ?? {}; const binaryMessages = await Promise.all( @@ -175,9 +229,9 @@ export async function extractBinaryMessages( .map(async (data) => { // Handle images and PDFs if (isImageFile(data.mimeType)) { - return await processBinaryForAgentPassthrough(ctx, data, 'image_url'); + return await processBinaryForAgentPassthrough(ctx, data, 'image_url', contentFormat); } else if (isPdfFile(data.mimeType)) { - return await processBinaryForAgentPassthrough(ctx, data, 'file'); + return await processBinaryForAgentPassthrough(ctx, data, 'file', contentFormat); } else { // Handle text files let textContent: string; @@ -509,6 +563,8 @@ export async function prepareMessages( passthroughBinaryImages?: boolean; passthroughBinaryPdfs?: boolean; outputParser?: N8nOutputParser; + // The connected chat model, used to pick the right file content-block format. + model?: BaseChatModel; }, ): Promise { const useSystemMessage = options.systemMessage ?? ctx.getNode().typeVersion < 1.9; @@ -530,7 +586,12 @@ export async function prepareMessages( // extractBinaryMessages only processes the binary types that are enabled. const hasBinaryData = ctx.getInputData()?.[itemIndex]?.binary !== undefined; if (hasBinaryData && (options.passthroughBinaryImages || options.passthroughBinaryPdfs)) { - const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options); + // Known limitation: the format is resolved from the primary model only, and the + // prompt (incl. this block) is shared with the fallback model. A fallback from a + // different provider family (e.g. OpenAI Responses -> Gemini) will receive a + // mismatched file block and fail; cross-provider PDF fallback is unsupported. + const contentFormat = resolveBinaryContentFormat(options.model); + const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options, contentFormat); if (binaryMessage.content.length !== 0) { messages.push(binaryMessage); diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/test/ToolsAgent/commons.test.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/test/ToolsAgent/commons.test.ts index ddeae59a3d4..fa3f8a90cd8 100644 --- a/packages/@n8n/nodes-langchain/nodes/agents/Agent/test/ToolsAgent/commons.test.ts +++ b/packages/@n8n/nodes-langchain/nodes/agents/Agent/test/ToolsAgent/commons.test.ts @@ -3,7 +3,7 @@ import type { ToolsAgentAction } from '@langchain/classic/dist/agents/tool_calli import type { Tool } from '@langchain/classic/tools'; import type { BaseChatMemory } from '@langchain/community/memory/chat_memory'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; -import { HumanMessage } from '@langchain/core/messages'; +import { HumanMessage, isDataContentBlock } from '@langchain/core/messages'; import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts'; import { FakeLLM, FakeStreamingChatModel } from '@langchain/core/utils/testing'; import { Buffer } from 'buffer'; @@ -218,6 +218,80 @@ describe('extractBinaryMessages', () => { source_type: 'base64', mime_type: 'application/pdf', data: 'samplePdfData', + metadata: { filename: 'attachment.pdf' }, + }); + }); + + it('should produce a valid LangChain standard data content block for PDFs', async () => { + // Contract check: the standard `file` block must satisfy isDataContentBlock so + // provider converters (Gemini, Anthropic, OpenAI Completions) translate it + // instead of rejecting it. The original `file_url` shape failed this check. + const fakeItem = { + json: {}, + binary: { + doc1: { + mimeType: 'application/pdf', + fileName: 'report.pdf', + data: 'data:application/pdf;base64,samplePdfData', + }, + }, + }; + mockContext.getInputData.mockReturnValue([fakeItem]); + + const humanMsg: HumanMessage = await extractBinaryMessages(mockContext, 0, { + passthroughBinaryImages: true, + passthroughBinaryPdfs: true, + }); + expect(isDataContentBlock(humanMsg.content[0] as object)).toBe(true); + }); + + it('should emit an OpenAI input_file block for PDFs when content format is openai-responses', async () => { + const fakeItem = { + json: {}, + binary: { + doc1: { + mimeType: 'application/pdf', + fileName: 'report.pdf', + data: 'data:application/pdf;base64,samplePdfData', + }, + }, + }; + mockContext.getInputData.mockReturnValue([fakeItem]); + + const humanMsg: HumanMessage = await extractBinaryMessages( + mockContext, + 0, + { passthroughBinaryImages: true, passthroughBinaryPdfs: true }, + 'openai-responses', + ); + expect(humanMsg.content[0]).toEqual({ + type: 'input_file', + file_data: 'data:application/pdf;base64,samplePdfData', + filename: 'report.pdf', + }); + }); + + it('should keep images as image_url even for openai-responses format', async () => { + const fakeItem = { + json: {}, + binary: { + img1: { + mimeType: 'image/png', + data: 'data:image/png;base64,imageData', + }, + }, + }; + mockContext.getInputData.mockReturnValue([fakeItem]); + + const humanMsg: HumanMessage = await extractBinaryMessages( + mockContext, + 0, + { passthroughBinaryImages: true, passthroughBinaryPdfs: true }, + 'openai-responses', + ); + expect(humanMsg.content[0]).toEqual({ + type: 'image_url', + image_url: { url: 'data:image/png;base64,imageData' }, }); }); @@ -256,6 +330,7 @@ describe('extractBinaryMessages', () => { source_type: 'base64', mime_type: 'application/pdf', data: 'pdfData456', + metadata: { filename: 'test.pdf' }, }, ]), ); @@ -288,6 +363,7 @@ describe('extractBinaryMessages', () => { source_type: 'base64', mime_type: 'application/pdf', data: Buffer.from('fakepdfdata').toString(BINARY_ENCODING), + metadata: { filename: 'attachment.pdf' }, }); }); @@ -321,6 +397,7 @@ describe('extractBinaryMessages', () => { source_type: 'base64', mime_type: 'application/pdf', data: 'pdfData456', + metadata: { filename: 'test.pdf' }, }); }); @@ -639,6 +716,39 @@ describe('prepareMessages', () => { expect(hasBinaryMessage).toBe(true); }); + it('should emit input_file for PDFs when the connected model uses the OpenAI Responses API', async () => { + const fakeItem = { + json: {}, + binary: { + doc1: { + mimeType: 'application/pdf', + fileName: 'report.pdf', + data: 'data:application/pdf;base64,samplePdfData', + }, + }, + }; + mockContext.getInputData.mockReturnValue([fakeItem]); + + // Stand-in for a ChatOpenAI configured against the Responses API. + const responsesApiModel = mock(); + (responsesApiModel as unknown as { _useResponsesApi: () => boolean })._useResponsesApi = () => + true; + + const messages = await prepareMessages(mockContext, 0, { + systemMessage: 'Test system', + passthroughBinaryImages: false, + passthroughBinaryPdfs: true, + model: responsesApiModel, + }); + const binaryMessage = messages.find((m) => m instanceof HumanMessage) as HumanMessage; + expect(binaryMessage).toBeDefined(); + expect(binaryMessage.content[0]).toEqual({ + type: 'input_file', + file_data: 'data:application/pdf;base64,samplePdfData', + filename: 'report.pdf', + }); + }); + it('should not include system_message in prompt templates if not provided after version 1.9', async () => { const fakeItem = { json: {} }; const mockNode = mock();