mirror of
https://github.com/n8n-io/n8n.git
synced 2026-06-19 07:36:52 +00:00
feat(core): OpenAI Responses API support for agent pdf passthrough (#32604)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -80,7 +80,7 @@ export async function executeBatch(
|
||||
|
||||
checkMaxIterations(response, maxIterations, ctx.getNode());
|
||||
|
||||
const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse);
|
||||
const itemContext = await prepareItemContext(ctx, itemIndex, processedResponse, model);
|
||||
|
||||
const { tools, prompt, options, outputParser } = itemContext;
|
||||
|
||||
|
||||
+3
@@ -1,3 +1,4 @@
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { ChatPromptTemplate } from '@langchain/core/prompts';
|
||||
import type { DynamicStructuredTool, Tool } from '@langchain/classic/tools';
|
||||
import { NodeOperationError } from 'n8n-workflow';
|
||||
@@ -41,6 +42,7 @@ export async function prepareItemContext(
|
||||
ctx: IExecuteFunctions | ISupplyDataFunctions,
|
||||
itemIndex: number,
|
||||
response?: EngineResponse<RequestResponseMetadata>,
|
||||
model?: BaseChatModel,
|
||||
): Promise<ItemContext> {
|
||||
const steps = buildSteps(response, itemIndex);
|
||||
|
||||
@@ -68,6 +70,7 @@ export async function prepareItemContext(
|
||||
passthroughBinaryImages: options.passthroughBinaryImages ?? true,
|
||||
passthroughBinaryPdfs: options.passthroughBinaryPdfs ?? false,
|
||||
outputParser,
|
||||
model,
|
||||
});
|
||||
const prompt: ChatPromptTemplate = preparePrompt(messages);
|
||||
|
||||
|
||||
@@ -83,6 +83,46 @@ function shouldPassthroughBinary(data: IBinaryData, options: BinaryPassthroughOp
|
||||
return false;
|
||||
}
|
||||
|
||||
// How a file (PDF) attachment must be encoded for the connected model.
|
||||
// - 'standard': LangChain standard data content block (Gemini, Anthropic, OpenAI Completions)
|
||||
// - 'openai-responses': OpenAI Responses API native part, which rejects the standard block
|
||||
type BinaryContentFormat = 'standard' | 'openai-responses';
|
||||
|
||||
// Structural view of the ChatOpenAI internals we probe. `_useResponsesApi` is
|
||||
// protected and `useResponsesApi` is public; neither is part of BaseChatModel, so
|
||||
// we read them defensively and treat their absence as "not OpenAI Responses".
|
||||
type ResponsesApiModel = {
|
||||
_useResponsesApi?: (options?: unknown) => boolean;
|
||||
useResponsesApi?: boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
* OpenAI's Responses API rejects the standard `file` content block (it expects an
|
||||
* `input_file` part), so when the connected model talks to that API we must emit a
|
||||
* provider-native block instead. Gemini, Anthropic, and OpenAI's Completions API all
|
||||
* consume the standard block.
|
||||
*
|
||||
* Detection relies on ChatOpenAI's `_useResponsesApi()` because LangChain exposes no
|
||||
* public API for it; `_useResponsesApi()` (unlike the `useResponsesApi` flag alone)
|
||||
* also covers models that auto-select the Responses API (e.g. gpt-5/o-series). Note it
|
||||
* is evaluated without invoke-time call options, so Responses usage triggered solely by
|
||||
* call-time tools/kwargs is not detected here. Guarded so an unexpected shape degrades
|
||||
* to the standard block rather than throwing.
|
||||
*/
|
||||
function resolveBinaryContentFormat(model?: BaseChatModel): BinaryContentFormat {
|
||||
if (!model) return 'standard';
|
||||
const candidate = model as unknown as ResponsesApiModel;
|
||||
try {
|
||||
const usesResponsesApi =
|
||||
typeof candidate._useResponsesApi === 'function'
|
||||
? candidate._useResponsesApi(undefined)
|
||||
: candidate.useResponsesApi === true;
|
||||
return usesResponsesApi ? 'openai-responses' : 'standard';
|
||||
} catch {
|
||||
return 'standard';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a binary data to be used in agent passthrough.
|
||||
* @param ctx - The execution context
|
||||
@@ -94,6 +134,7 @@ async function processBinaryForAgentPassthrough(
|
||||
ctx: IExecuteFunctions | ISupplyDataFunctions,
|
||||
data: IBinaryData,
|
||||
type: 'image_url' | 'file',
|
||||
contentFormat: BinaryContentFormat = 'standard',
|
||||
) {
|
||||
// Resolve the binary contents to a raw base64 string. In filesystem mode the
|
||||
// binary is stored by id and must be streamed before it can be encoded.
|
||||
@@ -130,14 +171,25 @@ async function processBinaryForAgentPassthrough(
|
||||
);
|
||||
}
|
||||
|
||||
// PDFs (and other documents) are passed as a provider-agnostic file content
|
||||
// block so any chat model with native PDF support can consume them.
|
||||
// PDFs (and other documents) are passed as a file content block. OpenAI's
|
||||
// Responses API needs its native `input_file` part; every other supported
|
||||
// provider consumes the LangChain standard data content block.
|
||||
if (type === 'file') {
|
||||
if (contentFormat === 'openai-responses') {
|
||||
return {
|
||||
type: 'input_file',
|
||||
file_data: `data:${data.mimeType};base64,${base64Data}`,
|
||||
filename: data.fileName ?? 'attachment.pdf',
|
||||
};
|
||||
}
|
||||
return {
|
||||
type: 'file',
|
||||
source_type: 'base64',
|
||||
mime_type: data.mimeType,
|
||||
data: base64Data,
|
||||
// OpenAI's Completions API requires a filename for file blocks (it warns and
|
||||
// uses a placeholder otherwise); other providers ignore this metadata.
|
||||
metadata: { filename: data.fileName ?? 'attachment.pdf' },
|
||||
};
|
||||
}
|
||||
|
||||
@@ -160,12 +212,14 @@ async function processBinaryForAgentPassthrough(
|
||||
* @param ctx - The execution context
|
||||
* @param itemIndex - The current item index
|
||||
* @param options - The enabled binary passthrough options
|
||||
* @param contentFormat - How file attachments must be encoded for the connected model
|
||||
* @returns A HumanMessage containing the binary messages (images and text files).
|
||||
*/
|
||||
export async function extractBinaryMessages(
|
||||
ctx: IExecuteFunctions | ISupplyDataFunctions,
|
||||
itemIndex: number,
|
||||
options: BinaryPassthroughOptions,
|
||||
contentFormat: BinaryContentFormat = 'standard',
|
||||
): Promise<HumanMessage> {
|
||||
const binaryData = ctx.getInputData()?.[itemIndex]?.binary ?? {};
|
||||
const binaryMessages = await Promise.all(
|
||||
@@ -175,9 +229,9 @@ export async function extractBinaryMessages(
|
||||
.map(async (data) => {
|
||||
// Handle images and PDFs
|
||||
if (isImageFile(data.mimeType)) {
|
||||
return await processBinaryForAgentPassthrough(ctx, data, 'image_url');
|
||||
return await processBinaryForAgentPassthrough(ctx, data, 'image_url', contentFormat);
|
||||
} else if (isPdfFile(data.mimeType)) {
|
||||
return await processBinaryForAgentPassthrough(ctx, data, 'file');
|
||||
return await processBinaryForAgentPassthrough(ctx, data, 'file', contentFormat);
|
||||
} else {
|
||||
// Handle text files
|
||||
let textContent: string;
|
||||
@@ -509,6 +563,8 @@ export async function prepareMessages(
|
||||
passthroughBinaryImages?: boolean;
|
||||
passthroughBinaryPdfs?: boolean;
|
||||
outputParser?: N8nOutputParser;
|
||||
// The connected chat model, used to pick the right file content-block format.
|
||||
model?: BaseChatModel;
|
||||
},
|
||||
): Promise<BaseMessagePromptTemplateLike[]> {
|
||||
const useSystemMessage = options.systemMessage ?? ctx.getNode().typeVersion < 1.9;
|
||||
@@ -530,7 +586,12 @@ export async function prepareMessages(
|
||||
// extractBinaryMessages only processes the binary types that are enabled.
|
||||
const hasBinaryData = ctx.getInputData()?.[itemIndex]?.binary !== undefined;
|
||||
if (hasBinaryData && (options.passthroughBinaryImages || options.passthroughBinaryPdfs)) {
|
||||
const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options);
|
||||
// Known limitation: the format is resolved from the primary model only, and the
|
||||
// prompt (incl. this block) is shared with the fallback model. A fallback from a
|
||||
// different provider family (e.g. OpenAI Responses -> Gemini) will receive a
|
||||
// mismatched file block and fail; cross-provider PDF fallback is unsupported.
|
||||
const contentFormat = resolveBinaryContentFormat(options.model);
|
||||
const binaryMessage = await extractBinaryMessages(ctx, itemIndex, options, contentFormat);
|
||||
|
||||
if (binaryMessage.content.length !== 0) {
|
||||
messages.push(binaryMessage);
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { ToolsAgentAction } from '@langchain/classic/dist/agents/tool_calli
|
||||
import type { Tool } from '@langchain/classic/tools';
|
||||
import type { BaseChatMemory } from '@langchain/community/memory/chat_memory';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { HumanMessage } from '@langchain/core/messages';
|
||||
import { HumanMessage, isDataContentBlock } from '@langchain/core/messages';
|
||||
import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts';
|
||||
import { FakeLLM, FakeStreamingChatModel } from '@langchain/core/utils/testing';
|
||||
import { Buffer } from 'buffer';
|
||||
@@ -218,6 +218,80 @@ describe('extractBinaryMessages', () => {
|
||||
source_type: 'base64',
|
||||
mime_type: 'application/pdf',
|
||||
data: 'samplePdfData',
|
||||
metadata: { filename: 'attachment.pdf' },
|
||||
});
|
||||
});
|
||||
|
||||
it('should produce a valid LangChain standard data content block for PDFs', async () => {
|
||||
// Contract check: the standard `file` block must satisfy isDataContentBlock so
|
||||
// provider converters (Gemini, Anthropic, OpenAI Completions) translate it
|
||||
// instead of rejecting it. The original `file_url` shape failed this check.
|
||||
const fakeItem = {
|
||||
json: {},
|
||||
binary: {
|
||||
doc1: {
|
||||
mimeType: 'application/pdf',
|
||||
fileName: 'report.pdf',
|
||||
data: 'data:application/pdf;base64,samplePdfData',
|
||||
},
|
||||
},
|
||||
};
|
||||
mockContext.getInputData.mockReturnValue([fakeItem]);
|
||||
|
||||
const humanMsg: HumanMessage = await extractBinaryMessages(mockContext, 0, {
|
||||
passthroughBinaryImages: true,
|
||||
passthroughBinaryPdfs: true,
|
||||
});
|
||||
expect(isDataContentBlock(humanMsg.content[0] as object)).toBe(true);
|
||||
});
|
||||
|
||||
it('should emit an OpenAI input_file block for PDFs when content format is openai-responses', async () => {
|
||||
const fakeItem = {
|
||||
json: {},
|
||||
binary: {
|
||||
doc1: {
|
||||
mimeType: 'application/pdf',
|
||||
fileName: 'report.pdf',
|
||||
data: 'data:application/pdf;base64,samplePdfData',
|
||||
},
|
||||
},
|
||||
};
|
||||
mockContext.getInputData.mockReturnValue([fakeItem]);
|
||||
|
||||
const humanMsg: HumanMessage = await extractBinaryMessages(
|
||||
mockContext,
|
||||
0,
|
||||
{ passthroughBinaryImages: true, passthroughBinaryPdfs: true },
|
||||
'openai-responses',
|
||||
);
|
||||
expect(humanMsg.content[0]).toEqual({
|
||||
type: 'input_file',
|
||||
file_data: 'data:application/pdf;base64,samplePdfData',
|
||||
filename: 'report.pdf',
|
||||
});
|
||||
});
|
||||
|
||||
it('should keep images as image_url even for openai-responses format', async () => {
|
||||
const fakeItem = {
|
||||
json: {},
|
||||
binary: {
|
||||
img1: {
|
||||
mimeType: 'image/png',
|
||||
data: 'data:image/png;base64,imageData',
|
||||
},
|
||||
},
|
||||
};
|
||||
mockContext.getInputData.mockReturnValue([fakeItem]);
|
||||
|
||||
const humanMsg: HumanMessage = await extractBinaryMessages(
|
||||
mockContext,
|
||||
0,
|
||||
{ passthroughBinaryImages: true, passthroughBinaryPdfs: true },
|
||||
'openai-responses',
|
||||
);
|
||||
expect(humanMsg.content[0]).toEqual({
|
||||
type: 'image_url',
|
||||
image_url: { url: 'data:image/png;base64,imageData' },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -256,6 +330,7 @@ describe('extractBinaryMessages', () => {
|
||||
source_type: 'base64',
|
||||
mime_type: 'application/pdf',
|
||||
data: 'pdfData456',
|
||||
metadata: { filename: 'test.pdf' },
|
||||
},
|
||||
]),
|
||||
);
|
||||
@@ -288,6 +363,7 @@ describe('extractBinaryMessages', () => {
|
||||
source_type: 'base64',
|
||||
mime_type: 'application/pdf',
|
||||
data: Buffer.from('fakepdfdata').toString(BINARY_ENCODING),
|
||||
metadata: { filename: 'attachment.pdf' },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -321,6 +397,7 @@ describe('extractBinaryMessages', () => {
|
||||
source_type: 'base64',
|
||||
mime_type: 'application/pdf',
|
||||
data: 'pdfData456',
|
||||
metadata: { filename: 'test.pdf' },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -639,6 +716,39 @@ describe('prepareMessages', () => {
|
||||
expect(hasBinaryMessage).toBe(true);
|
||||
});
|
||||
|
||||
it('should emit input_file for PDFs when the connected model uses the OpenAI Responses API', async () => {
|
||||
const fakeItem = {
|
||||
json: {},
|
||||
binary: {
|
||||
doc1: {
|
||||
mimeType: 'application/pdf',
|
||||
fileName: 'report.pdf',
|
||||
data: 'data:application/pdf;base64,samplePdfData',
|
||||
},
|
||||
},
|
||||
};
|
||||
mockContext.getInputData.mockReturnValue([fakeItem]);
|
||||
|
||||
// Stand-in for a ChatOpenAI configured against the Responses API.
|
||||
const responsesApiModel = mock<BaseChatModel>();
|
||||
(responsesApiModel as unknown as { _useResponsesApi: () => boolean })._useResponsesApi = () =>
|
||||
true;
|
||||
|
||||
const messages = await prepareMessages(mockContext, 0, {
|
||||
systemMessage: 'Test system',
|
||||
passthroughBinaryImages: false,
|
||||
passthroughBinaryPdfs: true,
|
||||
model: responsesApiModel,
|
||||
});
|
||||
const binaryMessage = messages.find((m) => m instanceof HumanMessage) as HumanMessage;
|
||||
expect(binaryMessage).toBeDefined();
|
||||
expect(binaryMessage.content[0]).toEqual({
|
||||
type: 'input_file',
|
||||
file_data: 'data:application/pdf;base64,samplePdfData',
|
||||
filename: 'report.pdf',
|
||||
});
|
||||
});
|
||||
|
||||
it('should not include system_message in prompt templates if not provided after version 1.9', async () => {
|
||||
const fakeItem = { json: {} };
|
||||
const mockNode = mock<INode>();
|
||||
|
||||
Reference in New Issue
Block a user