feat(core): OpenAI Responses API support for agent pdf passthrough (#32604)

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jaakko Husso
2026-06-19 09:03:20 +02:00
committed by GitHub
parent 532669c0c4
commit ade94af96a
4 changed files with 181 additions and 7 deletions
@@ -80,7 +80,7 @@ export async function executeBatch(
checkMaxIterations(response, maxIterations, ctx.getNode());
const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse);
const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse, model);
const { tools, prompt, options, outputParser } = itemContext;
@@ -1,3 +1,4 @@
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { ChatPromptTemplate } from '@langchain/core/prompts';
import type { DynamicStructuredTool, Tool } from '@langchain/classic/tools';
import { NodeOperationError } from 'n8n-workflow';
@@ -41,6 +42,7 @@ export async function prepareItemContext(
ctx: IExecuteFunctions | ISupplyDataFunctions,
itemIndex: number,
response?: EngineResponse<RequestResponseMetadata>,
model?: BaseChatModel,
): Promise<ItemContext> {
const steps = buildSteps(response, itemIndex);
@@ -68,6 +70,7 @@ export async function prepareItemContext(
passthroughBinaryImages: options.passthroughBinaryImages ?? true,
passthroughBinaryPdfs: options.passthroughBinaryPdfs ?? false,
outputParser,
model,
});
const prompt: ChatPromptTemplate = preparePrompt(messages);
@@ -83,6 +83,46 @@ function shouldPassthroughBinary(data: IBinaryData, options: BinaryPassthroughOp
return false;
}
// How a file (PDF) attachment must be encoded for the connected model.
// - 'standard': LangChain standard data content block (Gemini, Anthropic, OpenAI Completions)
// - 'openai-responses': OpenAI Responses API native part, which rejects the standard block
type BinaryContentFormat = 'standard' | 'openai-responses';
// Structural view of the ChatOpenAI internals we probe. `_useResponsesApi` is
// protected and `useResponsesApi` is public; neither is part of BaseChatModel, so
// we read them defensively and treat their absence as "not OpenAI Responses".
type ResponsesApiModel = {
_useResponsesApi?: (options?: unknown) => boolean;
useResponsesApi?: boolean;
};
/**
* OpenAI's Responses API rejects the standard `file` content block (it expects an
* `input_file` part), so when the connected model talks to that API we must emit a
* provider-native block instead. Gemini, Anthropic, and OpenAI's Completions API all
* consume the standard block.
*
* Detection relies on ChatOpenAI's `_useResponsesApi()` because LangChain exposes no
* public API for it; `_useResponsesApi()` (unlike the `useResponsesApi` flag alone)
* also covers models that auto-select the Responses API (e.g. gpt-5/o-series). Note it
* is evaluated without invoke-time call options, so Responses usage triggered solely by
* call-time tools/kwargs is not detected here. Guarded so an unexpected shape degrades
* to the standard block rather than throwing.
*/
function resolveBinaryContentFormat(model?: BaseChatModel): BinaryContentFormat {
if (!model) return 'standard';
const candidate = model as unknown as ResponsesApiModel;
try {
const usesResponsesApi =
typeof candidate._useResponsesApi === 'function'
? candidate._useResponsesApi(undefined)
: candidate.useResponsesApi === true;
return usesResponsesApi ? 'openai-responses' : 'standard';
} catch {
return 'standard';
}
}
/**
* Processes a binary data to be used in agent passthrough.
* @param ctx - The execution context
@@ -94,6 +134,7 @@ async function processBinaryForAgentPassthrough(
ctx: IExecuteFunctions | ISupplyDataFunctions,
data: IBinaryData,
type: 'image_url' | 'file',
contentFormat: BinaryContentFormat = 'standard',
) {
// Resolve the binary contents to a raw base64 string. In filesystem mode the
// binary is stored by id and must be streamed before it can be encoded.
@@ -130,14 +171,25 @@ async function processBinaryForAgentPassthrough(
);
}
// PDFs (and other documents) are passed as a provider-agnostic file content
// block so any chat model with native PDF support can consume them.
// PDFs (and other documents) are passed as a file content block. OpenAI's
// Responses API needs its native `input_file` part; every other supported
// provider consumes the LangChain standard data content block.
if (type === 'file') {
if (contentFormat === 'openai-responses') {
return {
type: 'input_file',
file_data: `data:${data.mimeType};base64,${base64Data}`,
filename: data.fileName ?? 'attachment.pdf',
};
}
return {
type: 'file',
source_type: 'base64',
mime_type: data.mimeType,
data: base64Data,
// OpenAI's Completions API requires a filename for file blocks (it warns and
// uses a placeholder otherwise); other providers ignore this metadata.
metadata: { filename: data.fileName ?? 'attachment.pdf' },
};
}
@@ -160,12 +212,14 @@ async function processBinaryForAgentPassthrough(
* @param ctx - The execution context
* @param itemIndex - The current item index
* @param options - The enabled binary passthrough options
* @param contentFormat - How file attachments must be encoded for the connected model
* @returns A HumanMessage containing the binary messages (images and text files).
*/
export async function extractBinaryMessages(
ctx: IExecuteFunctions | ISupplyDataFunctions,
itemIndex: number,
options: BinaryPassthroughOptions,
contentFormat: BinaryContentFormat = 'standard',
): Promise<HumanMessage> {
const binaryData = ctx.getInputData()?.[itemIndex]?.binary ?? {};
const binaryMessages = await Promise.all(
@@ -175,9 +229,9 @@ export async function extractBinaryMessages(
.map(async (data) => {
// Handle images and PDFs
if (isImageFile(data.mimeType)) {
return await processBinaryForAgentPassthrough(ctx, data, 'image_url');
return await processBinaryForAgentPassthrough(ctx, data, 'image_url', contentFormat);
} else if (isPdfFile(data.mimeType)) {
return await processBinaryForAgentPassthrough(ctx, data, 'file');
return await processBinaryForAgentPassthrough(ctx, data, 'file', contentFormat);
} else {
// Handle text files
let textContent: string;
@@ -509,6 +563,8 @@ export async function prepareMessages(
passthroughBinaryImages?: boolean;
passthroughBinaryPdfs?: boolean;
outputParser?: N8nOutputParser;
// The connected chat model, used to pick the right file content-block format.
model?: BaseChatModel;
},
): Promise<BaseMessagePromptTemplateLike[]> {
const useSystemMessage = options.systemMessage ?? ctx.getNode().typeVersion < 1.9;
@@ -530,7 +586,12 @@ export async function prepareMessages(
// extractBinaryMessages only processes the binary types that are enabled.
const hasBinaryData = ctx.getInputData()?.[itemIndex]?.binary !== undefined;
if (hasBinaryData && (options.passthroughBinaryImages || options.passthroughBinaryPdfs)) {
const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options);
// Known limitation: the format is resolved from the primary model only, and the
// prompt (incl. this block) is shared with the fallback model. A fallback from a
// different provider family (e.g. OpenAI Responses -> Gemini) will receive a
// mismatched file block and fail; cross-provider PDF fallback is unsupported.
const contentFormat = resolveBinaryContentFormat(options.model);
const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options, contentFormat);
if (binaryMessage.content.length !== 0) {
messages.push(binaryMessage);
@@ -3,7 +3,7 @@ import type { ToolsAgentAction } from '@langchain/classic/dist/agents/tool_calli
import type { Tool } from '@langchain/classic/tools';
import type { BaseChatMemory } from '@langchain/community/memory/chat_memory';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { HumanMessage } from '@langchain/core/messages';
import { HumanMessage, isDataContentBlock } from '@langchain/core/messages';
import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts';
import { FakeLLM, FakeStreamingChatModel } from '@langchain/core/utils/testing';
import { Buffer } from 'buffer';
@@ -218,6 +218,80 @@ describe('extractBinaryMessages', () => {
source_type: 'base64',
mime_type: 'application/pdf',
data: 'samplePdfData',
metadata: { filename: 'attachment.pdf' },
});
});
it('should produce a valid LangChain standard data content block for PDFs', async () => {
// Contract check: the standard `file` block must satisfy isDataContentBlock so
// provider converters (Gemini, Anthropic, OpenAI Completions) translate it
// instead of rejecting it. The original `file_url` shape failed this check.
const fakeItem = {
json: {},
binary: {
doc1: {
mimeType: 'application/pdf',
fileName: 'report.pdf',
data: 'data:application/pdf;base64,samplePdfData',
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
const humanMsg: HumanMessage = await extractBinaryMessages(mockContext, 0, {
passthroughBinaryImages: true,
passthroughBinaryPdfs: true,
});
expect(isDataContentBlock(humanMsg.content[0] as object)).toBe(true);
});
it('should emit an OpenAI input_file block for PDFs when content format is openai-responses', async () => {
const fakeItem = {
json: {},
binary: {
doc1: {
mimeType: 'application/pdf',
fileName: 'report.pdf',
data: 'data:application/pdf;base64,samplePdfData',
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
const humanMsg: HumanMessage = await extractBinaryMessages(
mockContext,
0,
{ passthroughBinaryImages: true, passthroughBinaryPdfs: true },
'openai-responses',
);
expect(humanMsg.content[0]).toEqual({
type: 'input_file',
file_data: 'data:application/pdf;base64,samplePdfData',
filename: 'report.pdf',
});
});
it('should keep images as image_url even for openai-responses format', async () => {
const fakeItem = {
json: {},
binary: {
img1: {
mimeType: 'image/png',
data: 'data:image/png;base64,imageData',
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
const humanMsg: HumanMessage = await extractBinaryMessages(
mockContext,
0,
{ passthroughBinaryImages: true, passthroughBinaryPdfs: true },
'openai-responses',
);
expect(humanMsg.content[0]).toEqual({
type: 'image_url',
image_url: { url: 'data:image/png;base64,imageData' },
});
});
@@ -256,6 +330,7 @@ describe('extractBinaryMessages', () => {
source_type: 'base64',
mime_type: 'application/pdf',
data: 'pdfData456',
metadata: { filename: 'test.pdf' },
},
]),
);
@@ -288,6 +363,7 @@ describe('extractBinaryMessages', () => {
source_type: 'base64',
mime_type: 'application/pdf',
data: Buffer.from('fakepdfdata').toString(BINARY_ENCODING),
metadata: { filename: 'attachment.pdf' },
});
});
@@ -321,6 +397,7 @@ describe('extractBinaryMessages', () => {
source_type: 'base64',
mime_type: 'application/pdf',
data: 'pdfData456',
metadata: { filename: 'test.pdf' },
});
});
@@ -639,6 +716,39 @@ describe('prepareMessages', () => {
expect(hasBinaryMessage).toBe(true);
});
it('should emit input_file for PDFs when the connected model uses the OpenAI Responses API', async () => {
const fakeItem = {
json: {},
binary: {
doc1: {
mimeType: 'application/pdf',
fileName: 'report.pdf',
data: 'data:application/pdf;base64,samplePdfData',
},
},
};
mockContext.getInputData.mockReturnValue([fakeItem]);
// Stand-in for a ChatOpenAI configured against the Responses API.
const responsesApiModel = mock<BaseChatModel>();
(responsesApiModel as unknown as { _useResponsesApi: () => boolean })._useResponsesApi = () =>
true;
const messages = await prepareMessages(mockContext, 0, {
systemMessage: 'Test system',
passthroughBinaryImages: false,
passthroughBinaryPdfs: true,
model: responsesApiModel,
});
const binaryMessage = messages.find((m) => m instanceof HumanMessage) as HumanMessage;
expect(binaryMessage).toBeDefined();
expect(binaryMessage.content[0]).toEqual({
type: 'input_file',
file_data: 'data:application/pdf;base64,samplePdfData',
filename: 'report.pdf',
});
});
it('should not include system_message in prompt templates if not provided after version 1.9', async () => {
const fakeItem = { json: {} };
const mockNode = mock<INode>();