feat(core): OpenAI Responses API support for agent pdf passthrough (#32604)

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 07:36:52 +00:00 · 2026-06-19 09:03:20 +02:00
parent 532669c0c4
commit ade94af96a
4 changed files with 181 additions and 7 deletions
@@ -80,7 +80,7 @@ export async function executeBatch(

 		checkMaxIterations(response, maxIterations, ctx.getNode());

-		const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse);
+		const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse, model);

 		const { tools, prompt, options, outputParser } = itemContext;

@@ -1,3 +1,4 @@
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
 import type { ChatPromptTemplate } from '@langchain/core/prompts';
 import type { DynamicStructuredTool, Tool } from '@langchain/classic/tools';
 import { NodeOperationError } from 'n8n-workflow';
@@ -41,6 +42,7 @@ export async function prepareItemContext(
 	ctx: IExecuteFunctions | ISupplyDataFunctions,
 	itemIndex: number,
 	response?: EngineResponse<RequestResponseMetadata>,
+	model?: BaseChatModel,
 ): Promise<ItemContext> {
 	const steps = buildSteps(response, itemIndex);

@@ -68,6 +70,7 @@ export async function prepareItemContext(
 		passthroughBinaryImages: options.passthroughBinaryImages ?? true,
 		passthroughBinaryPdfs: options.passthroughBinaryPdfs ?? false,
 		outputParser,
+		model,
 	});
 	const prompt: ChatPromptTemplate = preparePrompt(messages);

@@ -83,6 +83,46 @@ function shouldPassthroughBinary(data: IBinaryData, options: BinaryPassthroughOp
 	return false;
 }

+// How a file (PDF) attachment must be encoded for the connected model.
+// - 'standard': LangChain standard data content block (Gemini, Anthropic, OpenAI Completions)
+// - 'openai-responses': OpenAI Responses API native part, which rejects the standard block
+type BinaryContentFormat = 'standard' | 'openai-responses';
+
+// Structural view of the ChatOpenAI internals we probe. `_useResponsesApi` is
+// protected and `useResponsesApi` is public; neither is part of BaseChatModel, so
+// we read them defensively and treat their absence as "not OpenAI Responses".
+type ResponsesApiModel = {
+	_useResponsesApi?: (options?: unknown) => boolean;
+	useResponsesApi?: boolean;
+};
+
+/**
+ * OpenAI's Responses API rejects the standard `file` content block (it expects an
+ * `input_file` part), so when the connected model talks to that API we must emit a
+ * provider-native block instead. Gemini, Anthropic, and OpenAI's Completions API all
+ * consume the standard block.
+ *
+ * Detection relies on ChatOpenAI's `_useResponsesApi()` because LangChain exposes no
+ * public API for it; `_useResponsesApi()` (unlike the `useResponsesApi` flag alone)
+ * also covers models that auto-select the Responses API (e.g. gpt-5/o-series). Note it
+ * is evaluated without invoke-time call options, so Responses usage triggered solely by
+ * call-time tools/kwargs is not detected here. Guarded so an unexpected shape degrades
+ * to the standard block rather than throwing.
+ */
+function resolveBinaryContentFormat(model?: BaseChatModel): BinaryContentFormat {
+	if (!model) return 'standard';
+	const candidate = model as unknown as ResponsesApiModel;
+	try {
+		const usesResponsesApi =
+			typeof candidate._useResponsesApi === 'function'
+				? candidate._useResponsesApi(undefined)
+				: candidate.useResponsesApi === true;
+		return usesResponsesApi ? 'openai-responses' : 'standard';
+	} catch {
+		return 'standard';
+	}
+}
+
 /**
 * Processes a binary data to be used in agent passthrough.
 * @param ctx - The execution context
@@ -94,6 +134,7 @@ async function processBinaryForAgentPassthrough(
 	ctx: IExecuteFunctions | ISupplyDataFunctions,
 	data: IBinaryData,
 	type: 'image_url' | 'file',
+	contentFormat: BinaryContentFormat = 'standard',
 ) {
 	// Resolve the binary contents to a raw base64 string. In filesystem mode the
 	// binary is stored by id and must be streamed before it can be encoded.
@@ -130,14 +171,25 @@ async function processBinaryForAgentPassthrough(
 		);
 	}

-	// PDFs (and other documents) are passed as a provider-agnostic file content
-	// block so any chat model with native PDF support can consume them.
+	// PDFs (and other documents) are passed as a file content block. OpenAI's
+	// Responses API needs its native `input_file` part; every other supported
+	// provider consumes the LangChain standard data content block.
 	if (type === 'file') {
+		if (contentFormat === 'openai-responses') {
+			return {
+				type: 'input_file',
+				file_data: `data:${data.mimeType};base64,${base64Data}`,
+				filename: data.fileName ?? 'attachment.pdf',
+			};
+		}
 		return {
 			type: 'file',
 			source_type: 'base64',
 			mime_type: data.mimeType,
 			data: base64Data,
+			// OpenAI's Completions API requires a filename for file blocks (it warns and
+			// uses a placeholder otherwise); other providers ignore this metadata.
+			metadata: { filename: data.fileName ?? 'attachment.pdf' },
 		};
 	}

@@ -160,12 +212,14 @@ async function processBinaryForAgentPassthrough(
 * @param ctx - The execution context
 * @param itemIndex - The current item index
 * @param options - The enabled binary passthrough options
+ * @param contentFormat - How file attachments must be encoded for the connected model
 * @returns A HumanMessage containing the binary messages (images and text files).
 */
 export async function extractBinaryMessages(
 	ctx: IExecuteFunctions | ISupplyDataFunctions,
 	itemIndex: number,
 	options: BinaryPassthroughOptions,
+	contentFormat: BinaryContentFormat = 'standard',
 ): Promise<HumanMessage> {
 	const binaryData = ctx.getInputData()?.[itemIndex]?.binary ?? {};
 	const binaryMessages = await Promise.all(
@@ -175,9 +229,9 @@ export async function extractBinaryMessages(
 			.map(async (data) => {
 				// Handle images and PDFs
 				if (isImageFile(data.mimeType)) {
-					return await processBinaryForAgentPassthrough(ctx, data, 'image_url');
+					return await processBinaryForAgentPassthrough(ctx, data, 'image_url', contentFormat);
 				} else if (isPdfFile(data.mimeType)) {
-					return await processBinaryForAgentPassthrough(ctx, data, 'file');
+					return await processBinaryForAgentPassthrough(ctx, data, 'file', contentFormat);
 				} else {
 					// Handle text files
 					let textContent: string;
@@ -509,6 +563,8 @@ export async function prepareMessages(
 		passthroughBinaryImages?: boolean;
 		passthroughBinaryPdfs?: boolean;
 		outputParser?: N8nOutputParser;
+		// The connected chat model, used to pick the right file content-block format.
+		model?: BaseChatModel;
 	},
 ): Promise<BaseMessagePromptTemplateLike[]> {
 	const useSystemMessage = options.systemMessage ?? ctx.getNode().typeVersion < 1.9;
@@ -530,7 +586,12 @@ export async function prepareMessages(
 	// extractBinaryMessages only processes the binary types that are enabled.
 	const hasBinaryData = ctx.getInputData()?.[itemIndex]?.binary !== undefined;
 	if (hasBinaryData && (options.passthroughBinaryImages || options.passthroughBinaryPdfs)) {
-		const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options);
+		// Known limitation: the format is resolved from the primary model only, and the
+		// prompt (incl. this block) is shared with the fallback model. A fallback from a
+		// different provider family (e.g. OpenAI Responses -> Gemini) will receive a
+		// mismatched file block and fail; cross-provider PDF fallback is unsupported.
+		const contentFormat = resolveBinaryContentFormat(options.model);
+		const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options, contentFormat);

 		if (binaryMessage.content.length !== 0) {
 			messages.push(binaryMessage);
@@ -3,7 +3,7 @@ import type { ToolsAgentAction } from '@langchain/classic/dist/agents/tool_calli
 import type { Tool } from '@langchain/classic/tools';
 import type { BaseChatMemory } from '@langchain/community/memory/chat_memory';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import { HumanMessage } from '@langchain/core/messages';
+import { HumanMessage, isDataContentBlock } from '@langchain/core/messages';
 import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts';
 import { FakeLLM, FakeStreamingChatModel } from '@langchain/core/utils/testing';
 import { Buffer } from 'buffer';
@@ -218,6 +218,80 @@ describe('extractBinaryMessages', () => {
 			source_type: 'base64',
 			mime_type: 'application/pdf',
 			data: 'samplePdfData',
+			metadata: { filename: 'attachment.pdf' },
+		});
+	});
+
+	it('should produce a valid LangChain standard data content block for PDFs', async () => {
+		// Contract check: the standard `file` block must satisfy isDataContentBlock so
+		// provider converters (Gemini, Anthropic, OpenAI Completions) translate it
+		// instead of rejecting it. The original `file_url` shape failed this check.
+		const fakeItem = {
+			json: {},
+			binary: {
+				doc1: {
+					mimeType: 'application/pdf',
+					fileName: 'report.pdf',
+					data: 'data:application/pdf;base64,samplePdfData',
+				},
+			},
+		};
+		mockContext.getInputData.mockReturnValue([fakeItem]);
+
+		const humanMsg: HumanMessage = await extractBinaryMessages(mockContext, 0, {
+			passthroughBinaryImages: true,
+			passthroughBinaryPdfs: true,
+		});
+		expect(isDataContentBlock(humanMsg.content[0] as object)).toBe(true);
+	});
+
+	it('should emit an OpenAI input_file block for PDFs when content format is openai-responses', async () => {
+		const fakeItem = {
+			json: {},
+			binary: {
+				doc1: {
+					mimeType: 'application/pdf',
+					fileName: 'report.pdf',
+					data: 'data:application/pdf;base64,samplePdfData',
+				},
+			},
+		};
+		mockContext.getInputData.mockReturnValue([fakeItem]);
+
+		const humanMsg: HumanMessage = await extractBinaryMessages(
+			mockContext,
+			0,
+			{ passthroughBinaryImages: true, passthroughBinaryPdfs: true },
+			'openai-responses',
+		);
+		expect(humanMsg.content[0]).toEqual({
+			type: 'input_file',
+			file_data: 'data:application/pdf;base64,samplePdfData',
+			filename: 'report.pdf',
+		});
+	});
+
+	it('should keep images as image_url even for openai-responses format', async () => {
+		const fakeItem = {
+			json: {},
+			binary: {
+				img1: {
+					mimeType: 'image/png',
+					data: 'data:image/png;base64,imageData',
+				},
+			},
+		};
+		mockContext.getInputData.mockReturnValue([fakeItem]);
+
+		const humanMsg: HumanMessage = await extractBinaryMessages(
+			mockContext,
+			0,
+			{ passthroughBinaryImages: true, passthroughBinaryPdfs: true },
+			'openai-responses',
+		);
+		expect(humanMsg.content[0]).toEqual({
+			type: 'image_url',
+			image_url: { url: 'data:image/png;base64,imageData' },
 		});
 	});

@@ -256,6 +330,7 @@ describe('extractBinaryMessages', () => {
 					source_type: 'base64',
 					mime_type: 'application/pdf',
 					data: 'pdfData456',
+					metadata: { filename: 'test.pdf' },
 				},
 			]),
 		);
@@ -288,6 +363,7 @@ describe('extractBinaryMessages', () => {
 			source_type: 'base64',
 			mime_type: 'application/pdf',
 			data: Buffer.from('fakepdfdata').toString(BINARY_ENCODING),
+			metadata: { filename: 'attachment.pdf' },
 		});
 	});

@@ -321,6 +397,7 @@ describe('extractBinaryMessages', () => {
 			source_type: 'base64',
 			mime_type: 'application/pdf',
 			data: 'pdfData456',
+			metadata: { filename: 'test.pdf' },
 		});
 	});

@@ -639,6 +716,39 @@ describe('prepareMessages', () => {
 		expect(hasBinaryMessage).toBe(true);
 	});

+	it('should emit input_file for PDFs when the connected model uses the OpenAI Responses API', async () => {
+		const fakeItem = {
+			json: {},
+			binary: {
+				doc1: {
+					mimeType: 'application/pdf',
+					fileName: 'report.pdf',
+					data: 'data:application/pdf;base64,samplePdfData',
+				},
+			},
+		};
+		mockContext.getInputData.mockReturnValue([fakeItem]);
+
+		// Stand-in for a ChatOpenAI configured against the Responses API.
+		const responsesApiModel = mock<BaseChatModel>();
+		(responsesApiModel as unknown as { _useResponsesApi: () => boolean })._useResponsesApi = () =>
+			true;
+
+		const messages = await prepareMessages(mockContext, 0, {
+			systemMessage: 'Test system',
+			passthroughBinaryImages: false,
+			passthroughBinaryPdfs: true,
+			model: responsesApiModel,
+		});
+		const binaryMessage = messages.find((m) => m instanceof HumanMessage) as HumanMessage;
+		expect(binaryMessage).toBeDefined();
+		expect(binaryMessage.content[0]).toEqual({
+			type: 'input_file',
+			file_data: 'data:application/pdf;base64,samplePdfData',
+			filename: 'report.pdf',
+		});
+	});
+
 	it('should not include system_message in prompt templates if not provided after version 1.9', async () => {
 		const fakeItem = { json: {} };
 		const mockNode = mock<INode>();