fix: improve flow chat and benchmark coverage (#8825)
* fix: support special flow modules in evals Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor: extract shared flow helper logic Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: make special flow tools openai-compatible Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: improve flow eval prompts and validation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test: relax flow benchmark overfits Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * test: record updated flow benchmark history Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: address flow review findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor: source flow chat special module prompt Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: narrow rawscript helper return type Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor: dedupe flow chat prompt guidance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: relax flow test10 validation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -4,10 +4,15 @@ import type { FlowModule, InputTransform } from '../../../../../frontend/src/lib
|
||||
import type { ExtendedOpenFlow } from '../../../../../frontend/src/lib/components/flows/types'
|
||||
import type { FlowAIChatHelpers } from '../../../../../frontend/src/lib/components/copilot/chat/flow/core'
|
||||
import type { ScriptLintResult } from '../../../../../frontend/src/lib/components/copilot/chat/shared'
|
||||
import { findModuleById } from '../../../../../frontend/src/lib/components/copilot/chat/shared'
|
||||
import { getSubModules } from '../../../../../frontend/src/lib/components/flows/flowExplorer'
|
||||
import {
|
||||
createInlineScriptSession
|
||||
} from '../../../../../frontend/src/lib/components/copilot/chat/flow/inlineScriptsUtils'
|
||||
import {
|
||||
applyFlowJsonUpdate,
|
||||
getFlowModuleById,
|
||||
updateRawScriptModuleContent
|
||||
} from '../../../../../frontend/src/lib/components/copilot/chat/flow/helperUtils'
|
||||
import {
|
||||
registerBenchmarkWorkspace,
|
||||
registerBenchmarkWorkspaceRunnables,
|
||||
@@ -32,6 +37,8 @@ export interface FlowWorkspaceFixtures {
|
||||
export async function createFlowFileHelpers(
|
||||
initialModules: FlowModule[] = [],
|
||||
initialSchema?: Record<string, any>,
|
||||
initialPreprocessorModule?: FlowModule,
|
||||
initialFailureModule?: FlowModule,
|
||||
workspaceRoot?: string,
|
||||
workspaceFixtures?: FlowWorkspaceFixtures
|
||||
): Promise<{
|
||||
@@ -42,7 +49,11 @@ export async function createFlowFileHelpers(
|
||||
workspaceDir: string | null
|
||||
}> {
|
||||
let flow: ExtendedOpenFlow = {
|
||||
value: { modules: structuredClone(initialModules) },
|
||||
value: {
|
||||
modules: structuredClone(initialModules),
|
||||
preprocessor_module: structuredClone(initialPreprocessorModule),
|
||||
failure_module: structuredClone(initialFailureModule)
|
||||
},
|
||||
summary: '',
|
||||
schema: initialSchema ?? {
|
||||
$schema: 'https://json-schema.org/draft/2020-12/schema',
|
||||
@@ -76,36 +87,29 @@ export async function createFlowFileHelpers(
|
||||
getFlowAndSelectedId: () => ({ flow, selectedId: '' }),
|
||||
getModules: (id?: string) => {
|
||||
if (!id) return flow.value.modules
|
||||
const module = findModuleById(flow.value.modules, id)
|
||||
return module ? [module] : []
|
||||
const module = getFlowModuleById(flow, id)
|
||||
return module ? getSubModules(module).flat() : []
|
||||
},
|
||||
inlineScriptSession,
|
||||
setSnapshot: () => {},
|
||||
revertToSnapshot: () => {},
|
||||
setCode: async (id: string, code: string) => {
|
||||
const module = findModuleById(flow.value.modules, id)
|
||||
if (module && module.value.type === 'rawscript') {
|
||||
module.value.content = code
|
||||
}
|
||||
updateRawScriptModuleContent(flow, id, code)
|
||||
inlineScriptSession.set(id, code)
|
||||
await persistFlow()
|
||||
},
|
||||
setFlowJson: async (
|
||||
modules: FlowModule[] | undefined,
|
||||
schema: Record<string, any> | undefined
|
||||
schema: Record<string, any> | undefined,
|
||||
preprocessorModule: FlowModule | null | undefined,
|
||||
failureModule: FlowModule | null | undefined
|
||||
) => {
|
||||
if (modules) {
|
||||
flow.value.modules = inlineScriptSession.restoreInlineScriptReferences(modules)
|
||||
const unresolvedRefs = inlineScriptSession.findUnresolvedInlineScriptRefs(flow.value.modules)
|
||||
if (unresolvedRefs.length > 0) {
|
||||
throw new Error(
|
||||
`Unresolved inline script references: ${unresolvedRefs.join(', ')}`
|
||||
)
|
||||
}
|
||||
}
|
||||
if (schema !== undefined) {
|
||||
flow.schema = schema
|
||||
}
|
||||
applyFlowJsonUpdate(flow, inlineScriptSession, {
|
||||
modules,
|
||||
schema,
|
||||
preprocessorModule,
|
||||
failureModule
|
||||
})
|
||||
await persistFlow()
|
||||
},
|
||||
getFlowInputsSchema: async () => flow.schema ?? {},
|
||||
@@ -122,7 +126,9 @@ export async function createFlowFileHelpers(
|
||||
JSON.stringify(
|
||||
{
|
||||
requestedArgs: args ?? {},
|
||||
modules: flow.value.modules.map((module) => module.id)
|
||||
modules: flow.value.modules.map((module) => module.id),
|
||||
preprocessor_module: flow.value.preprocessor_module?.id ?? null,
|
||||
failure_module: flow.value.failure_module?.id ?? null
|
||||
},
|
||||
null,
|
||||
2
|
||||
@@ -136,6 +142,8 @@ export async function createFlowFileHelpers(
|
||||
result: {
|
||||
requestedArgs: args ?? {},
|
||||
modules: flow.value.modules.map((module) => module.id),
|
||||
preprocessor_module: flow.value.preprocessor_module?.id ?? null,
|
||||
failure_module: flow.value.failure_module?.id ?? null,
|
||||
mocked: true
|
||||
},
|
||||
logs: 'Mock benchmark flow test run completed successfully.'
|
||||
|
||||
@@ -19,6 +19,8 @@ import type { TokenUsage } from '../shared/types'
|
||||
export interface FlowFixture {
|
||||
value?: {
|
||||
modules?: FlowModule[]
|
||||
preprocessor_module?: FlowModule
|
||||
failure_module?: FlowModule
|
||||
}
|
||||
schema?: Record<string, unknown>
|
||||
}
|
||||
@@ -54,6 +56,8 @@ export async function runFlowEval(
|
||||
const { helpers, getFlow, cleanup } = await createFlowFileHelpers(
|
||||
options?.initialFlow?.value?.modules ?? [],
|
||||
options?.initialFlow?.schema,
|
||||
options?.initialFlow?.value?.preprocessor_module,
|
||||
options?.initialFlow?.value?.failure_module,
|
||||
workspaceRoot,
|
||||
options?.workspaceFixtures
|
||||
)
|
||||
|
||||
@@ -136,7 +136,7 @@
|
||||
- search FAQs
|
||||
- open a support ticket when needed
|
||||
|
||||
After that, log the interaction and return the assistant's response along with any actions it took.
|
||||
After that, log the interaction and return the assistant's response.
|
||||
judgeChecklist:
|
||||
- "the input schema includes `customer_id` and `query_text`"
|
||||
- the flow loads the customer's profile and order history
|
||||
@@ -146,24 +146,40 @@
|
||||
- the assistant can search FAQs
|
||||
- the assistant can open a support ticket
|
||||
- the flow logs the interaction
|
||||
- the final output returns the assistant response along with any actions taken or resulting support action details
|
||||
- the final output returns the assistant response
|
||||
|
||||
- id: flow-test7-simple-modification
|
||||
prompt: |-
|
||||
Update this flow so it validates processed data before saving it.
|
||||
|
||||
After `process_data`, add a `validate_data` step that checks the data array is not empty.
|
||||
If the array is empty, it should return an error object with the message `No data to save`.
|
||||
If the array is empty, the flow should surface the message `No data to save` and prevent saving.
|
||||
If validation passes, let the save continue normally.
|
||||
Update `save_results` so it handles the validation result correctly.
|
||||
Update `save_results` so it uses the validation outcome instead of bypassing it.
|
||||
initial: ai_evals/fixtures/frontend/flow/initial/test5_initial.json
|
||||
expected: ai_evals/fixtures/frontend/flow/expected/test5_modify_simple.json
|
||||
validate:
|
||||
topLevelStepIds:
|
||||
- fetch_data
|
||||
- process_data
|
||||
- validate_data
|
||||
topLevelStepOrder:
|
||||
- fetch_data
|
||||
- process_data
|
||||
- validate_data
|
||||
topLevelStepTypes:
|
||||
- id: fetch_data
|
||||
type: rawscript
|
||||
- id: process_data
|
||||
type: rawscript
|
||||
- id: validate_data
|
||||
type: rawscript
|
||||
judgeChecklist:
|
||||
- the updated flow keeps the original fetch and process steps intact
|
||||
- "a `validate_data` step is added after `process_data`"
|
||||
- "`validate_data` checks that the processed data array is not empty"
|
||||
- "empty data returns an error object with the message `No data to save`"
|
||||
- "`save_results` handles the validation result correctly"
|
||||
- "when processed data is empty, the flow surfaces the message `No data to save` and does not save results"
|
||||
- "`save_results` uses the validation outcome instead of reading `results.process_data` directly"
|
||||
- "exact field names or wrapper object shape for the validation result are not important"
|
||||
|
||||
- id: flow-test8-branching-in-loop
|
||||
prompt: |-
|
||||
@@ -193,7 +209,29 @@
|
||||
Update `combine_data` so it merges the enrichment results and sets a `hasFallbacks` flag when any fallback was used.
|
||||
Keep `get_item` as the first step and `return_result` as the last step.
|
||||
initial: ai_evals/fixtures/frontend/flow/initial/test7_initial.json
|
||||
expected: ai_evals/fixtures/frontend/flow/expected/test7_modify_complex.json
|
||||
validate:
|
||||
topLevelStepIds:
|
||||
- get_item
|
||||
- combine_data
|
||||
- return_result
|
||||
topLevelStepOrder:
|
||||
- get_item
|
||||
- combine_data
|
||||
- return_result
|
||||
topLevelStepTypeCountsAtLeast:
|
||||
- type: branchall
|
||||
count: 1
|
||||
topLevelStepTypes:
|
||||
- id: get_item
|
||||
type: rawscript
|
||||
- id: combine_data
|
||||
type: rawscript
|
||||
- id: return_result
|
||||
type: rawscript
|
||||
moduleRules:
|
||||
- id: enrich_price
|
||||
- id: enrich_inventory
|
||||
- id: enrich_reviews
|
||||
judgeChecklist:
|
||||
- "the updated flow keeps `get_item` as the first step"
|
||||
- "the updated flow keeps `return_result` as the last step"
|
||||
@@ -206,14 +244,42 @@
|
||||
prompt: |-
|
||||
Create a flow that keeps incrementing a counter until it reaches a target value.
|
||||
The input should include a number field named `target`.
|
||||
Name the looping step `count_until_target`.
|
||||
Once the target is reached, return the final counter value.
|
||||
expected: ai_evals/fixtures/frontend/flow/expected/test10_while_loop_counter.json
|
||||
Use a top-level loop step named `count_until_target`.
|
||||
Inside it, use a single step named `increment_counter` that increments the current counter.
|
||||
The loop should stop once the counter reaches `target`.
|
||||
After the loop, add a top-level step named `return_final_counter` that returns the last counter value.
|
||||
validate:
|
||||
exactTopLevelStepIds:
|
||||
- count_until_target
|
||||
- return_final_counter
|
||||
topLevelStepOrder:
|
||||
- count_until_target
|
||||
- return_final_counter
|
||||
topLevelStepTypes:
|
||||
- id: count_until_target
|
||||
type: whileloopflow
|
||||
- id: return_final_counter
|
||||
type: rawscript
|
||||
moduleRules:
|
||||
- id: count_until_target
|
||||
hasStopAfterIf: true
|
||||
hasStopAfterAllItersIf: false
|
||||
exactImmediateChildStepIds:
|
||||
- increment_counter
|
||||
immediateChildStepTypes:
|
||||
- id: increment_counter
|
||||
type: rawscript
|
||||
moduleFieldRules:
|
||||
- id: count_until_target
|
||||
path: stop_after_if.expr
|
||||
equals: result >= flow_input.target
|
||||
judgeChecklist:
|
||||
- "the input schema includes a number field named `target`"
|
||||
- "the looping step is named `count_until_target`"
|
||||
- the flow keeps incrementing a counter until the target is reached
|
||||
- the final output returns the final counter value
|
||||
- "the top-level while loop step is named `count_until_target`"
|
||||
- "`count_until_target` contains a single increment step named `increment_counter`"
|
||||
- "`count_until_target` uses module-level `stop_after_if` to stop when the counter reaches `target`"
|
||||
- "`increment_counter` uses `flow_input.iter.value` or an equivalent loop-state expression and falls back to `0` on the first iteration"
|
||||
- "`return_final_counter` returns the final counter value"
|
||||
|
||||
- id: flow-test11-preprocessor-and-failure-handler
|
||||
prompt: |-
|
||||
@@ -242,8 +308,16 @@
|
||||
Add an approval step named `request_approval` that pauses the flow and asks the approver for a comment.
|
||||
One approval should be enough to continue.
|
||||
After approval, add a final step named `finalize_purchase` that returns an approved status object.
|
||||
expected: ai_evals/fixtures/frontend/flow/expected/test12_approval_step.json
|
||||
validate:
|
||||
topLevelStepIds:
|
||||
- request_approval
|
||||
- finalize_purchase
|
||||
topLevelStepOrder:
|
||||
- request_approval
|
||||
- finalize_purchase
|
||||
topLevelStepTypes:
|
||||
- id: finalize_purchase
|
||||
type: rawscript
|
||||
schemaRequiredPaths:
|
||||
- requester_email
|
||||
- amount
|
||||
|
||||
@@ -34,6 +34,8 @@ export async function judgeOutput(input: {
|
||||
"If a checklist is provided, treat it as the explicit acceptance criteria for this case.",
|
||||
"Be strict about missing requested functionality.",
|
||||
"When the prompt wording is ambiguous, prefer the checklist over inferred structural requirements.",
|
||||
"Do not invent additional Windmill-specific constraints that are not explicit in the prompt, checklist, or expected state.",
|
||||
"Do not lower the score just because the output uses a different but valid Windmill idiom, naming choice, or equivalent field shape.",
|
||||
"Do not require exact ids, exact topology, or exact field names unless the prompt, checklist, or expected state clearly requires them.",
|
||||
`Always respond by calling the ${JUDGE_TOOL_NAME} tool exactly once.`,
|
||||
].join("\n\n");
|
||||
|
||||
@@ -16,6 +16,39 @@ export interface FlowValidationSpec {
|
||||
schemaAnyOf?: Array<{
|
||||
requiredPaths: string[];
|
||||
}>;
|
||||
exactTopLevelStepIds?: string[];
|
||||
topLevelStepIds?: string[];
|
||||
topLevelStepOrder?: string[];
|
||||
topLevelStepTypeCountsAtLeast?: Array<{
|
||||
type: string;
|
||||
count: number;
|
||||
}>;
|
||||
topLevelStepTypes?: Array<{
|
||||
id: string;
|
||||
type: string;
|
||||
}>;
|
||||
moduleRules?: Array<{
|
||||
id: string;
|
||||
hasStopAfterIf?: boolean;
|
||||
hasStopAfterAllItersIf?: boolean;
|
||||
immediateChildStepIds?: string[];
|
||||
exactImmediateChildStepIds?: string[];
|
||||
immediateChildStepTypes?: Array<{
|
||||
id: string;
|
||||
type: string;
|
||||
}>;
|
||||
requiredInputTransforms?: Array<{
|
||||
type?: string;
|
||||
expr?: string;
|
||||
exprAnyOf?: string[];
|
||||
value?: string | number | boolean | null;
|
||||
}>;
|
||||
}>;
|
||||
moduleFieldRules?: Array<{
|
||||
id: string;
|
||||
path: string;
|
||||
equals: string | number | boolean | null;
|
||||
}>;
|
||||
resolveResultsRefs?: boolean;
|
||||
requireSpecialModules?: Array<"preprocessor_module" | "failure_module">;
|
||||
requireSuspendSteps?: Array<{
|
||||
|
||||
@@ -490,6 +490,181 @@ function validateFlowRequirements(
|
||||
validate: FlowValidationSpec
|
||||
): BenchmarkCheck[] {
|
||||
const checks: BenchmarkCheck[] = [];
|
||||
const actualTopLevelModules = getFlowModules(flow);
|
||||
const actualIds = actualTopLevelModules
|
||||
.map((module) => (typeof module.id === "string" ? module.id : null))
|
||||
.filter((id): id is string => Boolean(id));
|
||||
|
||||
if (validate.exactTopLevelStepIds && validate.exactTopLevelStepIds.length > 0) {
|
||||
checks.push(
|
||||
check(
|
||||
"flow top-level step ids match exactly",
|
||||
stringArraysEqual(actualIds, validate.exactTopLevelStepIds),
|
||||
`expected ids: ${validate.exactTopLevelStepIds.join(", ")}; actual ids: ${actualIds.join(", ")}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (validate.topLevelStepIds && validate.topLevelStepIds.length > 0) {
|
||||
checks.push(
|
||||
check(
|
||||
"flow includes required top-level step ids",
|
||||
validate.topLevelStepIds.every((id) => actualIds.includes(id)),
|
||||
`required ids: ${validate.topLevelStepIds.join(", ")}; actual ids: ${actualIds.join(", ")}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (validate.topLevelStepOrder && validate.topLevelStepOrder.length > 0) {
|
||||
checks.push(
|
||||
check(
|
||||
"flow preserves required top-level step order",
|
||||
preservesRelativeOrder(actualIds, validate.topLevelStepOrder),
|
||||
`required order: ${validate.topLevelStepOrder.join(" -> ")}; actual ids: ${actualIds.join(" -> ")}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
for (const typeRequirement of validate.topLevelStepTypeCountsAtLeast ?? []) {
|
||||
const actualCount = actualTopLevelModules.filter(
|
||||
(module) => getModuleType(module) === typeRequirement.type
|
||||
).length;
|
||||
checks.push(
|
||||
check(
|
||||
`flow includes at least ${typeRequirement.count} top-level ${typeRequirement.type} step${typeRequirement.count === 1 ? "" : "s"}`,
|
||||
actualCount >= typeRequirement.count,
|
||||
`expected at least ${typeRequirement.count}, got ${actualCount}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
for (const requiredStep of validate.topLevelStepTypes ?? []) {
|
||||
const module = actualTopLevelModules.find((candidate) => candidate.id === requiredStep.id);
|
||||
checks.push(check(`${requiredStep.id} step exists`, Boolean(module)));
|
||||
if (!module) {
|
||||
continue;
|
||||
}
|
||||
|
||||
checks.push(
|
||||
check(
|
||||
`${requiredStep.id} type matches required`,
|
||||
getModuleType(module) === requiredStep.type,
|
||||
`expected ${requiredStep.type}, got ${getModuleType(module) ?? "(missing)"}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
for (const moduleRule of validate.moduleRules ?? []) {
|
||||
const module = findFlowModuleById(flow, moduleRule.id);
|
||||
checks.push(check(`${moduleRule.id} module exists for rule validation`, Boolean(module)));
|
||||
if (!module) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (moduleRule.hasStopAfterIf !== undefined) {
|
||||
checks.push(
|
||||
check(
|
||||
`${moduleRule.id} stop_after_if presence matches required shape`,
|
||||
hasStopAfterIf(module) === moduleRule.hasStopAfterIf,
|
||||
`expected stop_after_if=${moduleRule.hasStopAfterIf}, got ${hasStopAfterIf(module)}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (moduleRule.hasStopAfterAllItersIf !== undefined) {
|
||||
checks.push(
|
||||
check(
|
||||
`${moduleRule.id} stop_after_all_iters_if presence matches required shape`,
|
||||
hasStopAfterAllItersIf(module) === moduleRule.hasStopAfterAllItersIf,
|
||||
`expected stop_after_all_iters_if=${moduleRule.hasStopAfterAllItersIf}, got ${hasStopAfterAllItersIf(module)}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
const immediateChildren = getImmediateNestedModules(module);
|
||||
const childIds = immediateChildren
|
||||
.map((child) => (typeof child.id === "string" ? child.id : null))
|
||||
.filter((id): id is string => Boolean(id));
|
||||
|
||||
if (moduleRule.immediateChildStepIds && moduleRule.immediateChildStepIds.length > 0) {
|
||||
checks.push(
|
||||
check(
|
||||
`${moduleRule.id} includes required immediate child steps`,
|
||||
moduleRule.immediateChildStepIds.every((id) => childIds.includes(id)),
|
||||
`required child ids: ${moduleRule.immediateChildStepIds.join(", ")}; actual child ids: ${childIds.join(", ")}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
if (moduleRule.exactImmediateChildStepIds && moduleRule.exactImmediateChildStepIds.length > 0) {
|
||||
checks.push(
|
||||
check(
|
||||
`${moduleRule.id} immediate child steps match exactly`,
|
||||
stringArraysEqual(childIds, moduleRule.exactImmediateChildStepIds),
|
||||
`expected child ids: ${moduleRule.exactImmediateChildStepIds.join(", ")}; actual child ids: ${childIds.join(", ")}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
for (const requiredChild of moduleRule.immediateChildStepTypes ?? []) {
|
||||
const child = immediateChildren.find((candidate) => candidate.id === requiredChild.id);
|
||||
checks.push(check(`${moduleRule.id}.${requiredChild.id} child step exists`, Boolean(child)));
|
||||
if (!child) {
|
||||
continue;
|
||||
}
|
||||
|
||||
checks.push(
|
||||
check(
|
||||
`${moduleRule.id}.${requiredChild.id} child type matches required`,
|
||||
getModuleType(child) === requiredChild.type,
|
||||
`expected ${requiredChild.type}, got ${getModuleType(child) ?? "(missing)"}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
const inputTransforms = getInputTransformRecords(module);
|
||||
for (const requiredTransform of moduleRule.requiredInputTransforms ?? []) {
|
||||
const matchedTransform = inputTransforms.find((transform) =>
|
||||
matchesRequiredInputTransform(transform, requiredTransform)
|
||||
);
|
||||
|
||||
const expectedParts = [
|
||||
requiredTransform.type ? `type=${JSON.stringify(requiredTransform.type)}` : null,
|
||||
requiredTransform.expr ? `expr=${JSON.stringify(requiredTransform.expr)}` : null,
|
||||
requiredTransform.exprAnyOf && requiredTransform.exprAnyOf.length > 0
|
||||
? `exprAnyOf=${JSON.stringify(requiredTransform.exprAnyOf)}`
|
||||
: null,
|
||||
requiredTransform.value !== undefined
|
||||
? `value=${JSON.stringify(requiredTransform.value)}`
|
||||
: null,
|
||||
].filter(Boolean);
|
||||
|
||||
checks.push(
|
||||
check(
|
||||
`${moduleRule.id} includes required input transform (${expectedParts.join(", ")})`,
|
||||
Boolean(matchedTransform),
|
||||
`available transforms: ${summarizeInputTransforms(inputTransforms)}`
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (const fieldRule of validate.moduleFieldRules ?? []) {
|
||||
const module = findFlowModuleById(flow, fieldRule.id);
|
||||
checks.push(check(`${fieldRule.id} module exists for field validation`, Boolean(module)));
|
||||
if (!module) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const actualValue = getValueAtPath(module, fieldRule.path);
|
||||
checks.push(
|
||||
check(
|
||||
`${fieldRule.id}.${fieldRule.path} matches required value`,
|
||||
valuesEqualForValidation(actualValue, fieldRule.equals),
|
||||
`expected ${JSON.stringify(fieldRule.equals)}, got ${JSON.stringify(actualValue)}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
for (const requiredPath of validate.schemaRequiredPaths ?? []) {
|
||||
checks.push(
|
||||
@@ -639,6 +814,30 @@ function preservesRelativeOrder(actualIds: string[], expectedIds: string[]): boo
|
||||
return false;
|
||||
}
|
||||
|
||||
// Exact equality, including order. Use a different helper for order-insensitive checks.
|
||||
function stringArraysEqual(left: string[], right: string[]): boolean {
|
||||
if (left.length !== right.length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return left.every((value, index) => value === right[index]);
|
||||
}
|
||||
|
||||
function valuesEqualForValidation(
|
||||
actual: unknown,
|
||||
expected: string | number | boolean | null
|
||||
): boolean {
|
||||
if (typeof expected === "string" && typeof actual === "string") {
|
||||
return normalizeInlineExpression(actual) === normalizeInlineExpression(expected);
|
||||
}
|
||||
|
||||
return actual === expected;
|
||||
}
|
||||
|
||||
function normalizeInlineExpression(value: string): string {
|
||||
return value.replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function collectUnresolvedResultsRefs(flow: FlowState): string[] {
|
||||
const unresolved = new Set<string>();
|
||||
validateModuleSequence(getFlowModules(flow), new Map<string, Record<string, unknown>>(), unresolved);
|
||||
@@ -873,18 +1072,24 @@ function getInlineScriptPlaceholderModuleIds(flow: FlowState): string[] {
|
||||
}
|
||||
|
||||
function getImmediateNestedModuleIds(module: Record<string, unknown>): string[] {
|
||||
const ids: string[] = [];
|
||||
return getImmediateNestedModules(module).flatMap((child) =>
|
||||
typeof child.id === "string" ? [child.id] : []
|
||||
);
|
||||
}
|
||||
|
||||
function getImmediateNestedModules(module: Record<string, unknown>): Array<Record<string, unknown>> {
|
||||
const nested: Array<Record<string, unknown>> = [];
|
||||
const value = isObjectRecord(module.value) ? module.value : null;
|
||||
if (!value) {
|
||||
return ids;
|
||||
return nested;
|
||||
}
|
||||
|
||||
if (Array.isArray(value.modules)) {
|
||||
ids.push(...asModuleArray(value.modules).flatMap((child) => (typeof child.id === "string" ? [child.id] : [])));
|
||||
nested.push(...asModuleArray(value.modules));
|
||||
}
|
||||
|
||||
if (Array.isArray(value.default)) {
|
||||
ids.push(...asModuleArray(value.default).flatMap((child) => (typeof child.id === "string" ? [child.id] : [])));
|
||||
nested.push(...asModuleArray(value.default));
|
||||
}
|
||||
|
||||
if (Array.isArray(value.branches)) {
|
||||
@@ -892,13 +1097,11 @@ function getImmediateNestedModuleIds(module: Record<string, unknown>): string[]
|
||||
if (!isObjectRecord(branch) || !Array.isArray(branch.modules)) {
|
||||
continue;
|
||||
}
|
||||
ids.push(
|
||||
...asModuleArray(branch.modules).flatMap((child) => (typeof child.id === "string" ? [child.id] : []))
|
||||
);
|
||||
nested.push(...asModuleArray(branch.modules));
|
||||
}
|
||||
}
|
||||
|
||||
return ids;
|
||||
return nested;
|
||||
}
|
||||
|
||||
function getModuleCode(module: Record<string, unknown>): string | null {
|
||||
@@ -906,6 +1109,79 @@ function getModuleCode(module: Record<string, unknown>): string | null {
|
||||
return typeof value?.content === "string" ? value.content : null;
|
||||
}
|
||||
|
||||
function getValueAtPath(record: Record<string, unknown>, dottedPath: string): unknown {
|
||||
const segments = dottedPath.split(".").filter(Boolean);
|
||||
let current: unknown = record;
|
||||
|
||||
for (const segment of segments) {
|
||||
if (!isObjectRecord(current)) {
|
||||
return undefined;
|
||||
}
|
||||
current = current[segment];
|
||||
}
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
function getInputTransformRecords(module: Record<string, unknown>): Array<Record<string, unknown>> {
|
||||
const value = isObjectRecord(module.value) ? module.value : null;
|
||||
const inputTransforms = isObjectRecord(value?.input_transforms) ? value.input_transforms : null;
|
||||
if (!inputTransforms) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return Object.values(inputTransforms).filter(isObjectRecord);
|
||||
}
|
||||
|
||||
function matchesRequiredInputTransform(
|
||||
actual: Record<string, unknown>,
|
||||
required: {
|
||||
type?: string;
|
||||
expr?: string;
|
||||
exprAnyOf?: string[];
|
||||
value?: string | number | boolean | null;
|
||||
}
|
||||
): boolean {
|
||||
if (required.type !== undefined && !valuesEqualForValidation(actual.type, required.type)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (required.expr !== undefined && !valuesEqualForValidation(actual.expr, required.expr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (required.exprAnyOf !== undefined) {
|
||||
if (
|
||||
typeof actual.expr !== "string" ||
|
||||
!required.exprAnyOf.some((candidate) => valuesEqualForValidation(actual.expr, candidate))
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (required.value !== undefined && !valuesEqualForValidation(actual.value, required.value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function summarizeInputTransforms(transforms: Array<Record<string, unknown>>): string {
|
||||
if (transforms.length === 0) {
|
||||
return "(none)";
|
||||
}
|
||||
|
||||
return transforms
|
||||
.map((transform) =>
|
||||
JSON.stringify({
|
||||
type: transform.type,
|
||||
expr: transform.expr,
|
||||
value: transform.value,
|
||||
})
|
||||
)
|
||||
.join("; ");
|
||||
}
|
||||
|
||||
function asModuleArray(value: unknown[]): Array<Record<string, unknown>> {
|
||||
return value.filter(isObjectRecord);
|
||||
}
|
||||
@@ -950,6 +1226,14 @@ function hasSuspendConfig(module: Record<string, unknown>): boolean {
|
||||
return typeof module.suspend === "object" && module.suspend !== null;
|
||||
}
|
||||
|
||||
function hasStopAfterIf(module: Record<string, unknown>): boolean {
|
||||
return isObjectRecord(module.stop_after_if);
|
||||
}
|
||||
|
||||
function hasStopAfterAllItersIf(module: Record<string, unknown>): boolean {
|
||||
return isObjectRecord(module.stop_after_all_iters_if);
|
||||
}
|
||||
|
||||
function getSuspendRequiredEvents(module: Record<string, unknown>): number | null {
|
||||
const suspend = isObjectRecord(module.suspend) ? module.suspend : null;
|
||||
return typeof suspend?.required_events === "number" ? suspend.required_events : null;
|
||||
|
||||
@@ -4,11 +4,25 @@
|
||||
{
|
||||
"id": "count_until_target",
|
||||
"value": {
|
||||
"type": "whileloopflow"
|
||||
"type": "whileloopflow",
|
||||
"skip_failures": false,
|
||||
"modules": [
|
||||
{
|
||||
"id": "increment_counter",
|
||||
"value": {
|
||||
"type": "rawscript",
|
||||
"language": "bun"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"stop_after_if": {
|
||||
"expr": "result >= flow_input.target",
|
||||
"skip_if_stopped": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "return_final_count",
|
||||
"id": "return_final_counter",
|
||||
"value": {
|
||||
"type": "rawscript"
|
||||
}
|
||||
@@ -25,6 +39,9 @@
|
||||
},
|
||||
"required": [
|
||||
"target"
|
||||
],
|
||||
"order": [
|
||||
"target"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
3
ai_evals/history/README.md
Normal file
3
ai_evals/history/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
Recorded history rows are anchored to the benchmark-definition commit used for the run.
|
||||
|
||||
That means `gitSha` points to the commit whose prompts, evaluators, and fixtures produced the recorded result. A later commit may only add the new JSONL row to git history without changing the benchmark itself.
|
||||
@@ -1,3 +1,15 @@
|
||||
{"createdAt":"2026-04-10T14:25:16.664Z","gitSha":"8f8b487be517a0bdd318c36857c1d46d5ab0723a","mode":"flow","runs":1,"runModel":"anthropic:claude-haiku-4-5-20251001","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":8,"passRate":0.6153846153846154,"averageDurationMs":33424.692307692305,"averageJudgeScore":82.61538461538461,"averageTokenUsagePerAttempt":{"prompt":131901,"completion":3121.230769230769,"total":135022.23076923078},"failedCaseIds":["flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":16943,"averageJudgeScore":98,"averageTokenUsagePerAttempt":{"prompt":126615,"completion":839,"total":127454}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":15220,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":75614,"completion":805,"total":76419}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":15699,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":76182,"completion":887,"total":77069}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":21605,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":62230,"completion":1509,"total":63739}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":47228,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":143511,"completion":5443,"total":148954}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":81870,"averageJudgeScore":92,"averageTokenUsagePerAttempt":{"prompt":194542,"completion":12409,"total":206951}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":51878,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":142071,"completion":5720,"total":147791}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":49113,"averageJudgeScore":42,"averageTokenUsagePerAttempt":{"prompt":318525,"completion":2702,"total":321227}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":18244,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":78441,"completion":979,"total":79420}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":49485,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":135237,"completion":5467,"total":140704}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":21210,"averageJudgeScore":90,"averageTokenUsagePerAttempt":{"prompt":127844,"completion":1179,"total":129023}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":25142,"averageJudgeScore":42,"averageTokenUsagePerAttempt":{"prompt":128648,"completion":1337,"total":129985}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":20884,"averageJudgeScore":90,"averageTokenUsagePerAttempt":{"prompt":105253,"completion":1300,"total":106553}}]}
|
||||
{"createdAt":"2026-04-10T14:57:17.513Z","gitSha":"2a58402cfc5c320748839e92b51a1291b937bf26","mode":"flow","runs":1,"runModel":"anthropic:claude-opus-4-6","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":8,"passRate":0.6153846153846154,"averageDurationMs":58074.53846153846,"averageJudgeScore":87.53846153846153,"averageTokenUsagePerAttempt":{"prompt":125452.76923076923,"completion":2957.769230769231,"total":128410.53846153847},"failedCaseIds":["flow-test4-order-processing-loop","flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":26967,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":103796,"completion":634,"total":104430}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":29009,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":75507,"completion":743,"total":76250}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":26828,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":76172,"completion":807,"total":76979}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":44418,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":130440,"completion":1787,"total":132227}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":82185,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":116133,"completion":4905,"total":121038}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":110344,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":122092,"completion":6980,"total":129072}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":119901,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":154916,"completion":8908,"total":163824}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":44333,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":109935,"completion":1536,"total":111471}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":54247,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":136872,"completion":2638,"total":139510}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":63274,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":137794,"completion":3686,"total":141480}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":38813,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":105075,"completion":1157,"total":106232}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":77267,"averageJudgeScore":52,"averageTokenUsagePerAttempt":{"prompt":256547,"completion":3398,"total":259945}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":37383,"averageJudgeScore":90,"averageTokenUsagePerAttempt":{"prompt":105607,"completion":1272,"total":106879}}]}
|
||||
{"createdAt":"2026-04-10T14:29:52.249Z","gitSha":"8f8b487be517a0bdd318c36857c1d46d5ab0723a","mode":"flow","runs":1,"runModel":"openai:gpt-4o","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":6,"passRate":0.46153846153846156,"averageDurationMs":29841.53846153846,"averageJudgeScore":68.46153846153847,"averageTokenUsagePerAttempt":{"prompt":72815.92307692308,"completion":770.7692307692307,"total":73586.69230769231},"failedCaseIds":["flow-test5-parallel-data-pipeline","flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler","flow-test12-approval-step"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":20059,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":64091,"completion":265,"total":64356}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":20728,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":46594,"completion":270,"total":46864}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":21533,"averageJudgeScore":98,"averageTokenUsagePerAttempt":{"prompt":46859,"completion":232,"total":47091}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":29004,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":64593,"completion":568,"total":65161}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":36250,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":66346,"completion":1259,"total":67605}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":46151,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":104676,"completion":1698,"total":106374}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":72403,"averageJudgeScore":62,"averageTokenUsagePerAttempt":{"prompt":105280,"completion":2216,"total":107496}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":41599,"averageJudgeScore":20,"averageTokenUsagePerAttempt":{"prompt":103053,"completion":707,"total":103760}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":23352,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":97955,"completion":468,"total":98423}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":19341,"averageJudgeScore":0,"averageTokenUsagePerAttempt":{"prompt":12254,"completion":1057,"total":13311}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":16143,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":64480,"completion":445,"total":64925}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":24231,"averageJudgeScore":52,"averageTokenUsagePerAttempt":{"prompt":106068,"completion":472,"total":106540}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":17146,"averageJudgeScore":30,"averageTokenUsagePerAttempt":{"prompt":64358,"completion":363,"total":64721}}]}
|
||||
{"createdAt":"2026-04-13T16:38:05.547Z","gitSha":"3f5841f84d878cd3f43c435fa237d3f0c2265fb9","mode":"flow","runs":1,"runModel":"anthropic:claude-haiku-4-5-20251001","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":8,"passRate":0.6153846153846154,"averageDurationMs":28942.846153846152,"averageJudgeScore":83.46153846153847,"averageTokenUsagePerAttempt":{"prompt":110218.15384615384,"completion":2819,"total":113037.15384615384},"failedCaseIds":["flow-test4-order-processing-loop","flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":15019,"averageJudgeScore":98,"averageTokenUsagePerAttempt":{"prompt":103955,"completion":771,"total":104726}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":15667,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":75649,"completion":803,"total":76452}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":13990,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":76215,"completion":877,"total":77092}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":17999,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":46494,"completion":1476,"total":47970}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":44637,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":142164,"completion":4784,"total":146948}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":66613,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":158640,"completion":10231,"total":168871}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":59129,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":149720,"completion":7633,"total":157353}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":23655,"averageJudgeScore":62,"averageTokenUsagePerAttempt":{"prompt":124117,"completion":1380,"total":125497}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":17782,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":78450,"completion":958,"total":79408}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":30100,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":90009,"completion":3124,"total":93133}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":24845,"averageJudgeScore":85,"averageTokenUsagePerAttempt":{"prompt":153396,"completion":1967,"total":155363}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":24102,"averageJudgeScore":35,"averageTokenUsagePerAttempt":{"prompt":128760,"completion":1351,"total":130111}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":22719,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":105267,"completion":1292,"total":106559}}]}
|
||||
{"createdAt":"2026-04-13T16:41:07.631Z","gitSha":"3f5841f84d878cd3f43c435fa237d3f0c2265fb9","mode":"flow","runs":1,"runModel":"anthropic:claude-opus-4-6","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":9,"passRate":0.6923076923076923,"averageDurationMs":51699.38461538462,"averageJudgeScore":84.3076923076923,"averageTokenUsagePerAttempt":{"prompt":126038.92307692308,"completion":2519.6923076923076,"total":128558.61538461539},"failedCaseIds":["flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":25781,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":103871,"completion":637,"total":104508}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":21895,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":75587,"completion":716,"total":76303}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":24773,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":76207,"completion":790,"total":76997}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":41700,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":130588,"completion":1785,"total":132373}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":79107,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":143173,"completion":4977,"total":148150}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":89071,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":118418,"completion":5658,"total":124076}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":83867,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":138732,"completion":4745,"total":143477}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":44256,"averageJudgeScore":30,"averageTokenUsagePerAttempt":{"prompt":111016,"completion":1873,"total":112889}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":50962,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":137240,"completion":2722,"total":139962}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":58847,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":137437,"completion":3521,"total":140958}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":38971,"averageJudgeScore":90,"averageTokenUsagePerAttempt":{"prompt":105189,"completion":1161,"total":106350}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":79582,"averageJudgeScore":52,"averageTokenUsagePerAttempt":{"prompt":256128,"completion":3124,"total":259252}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":33280,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":104920,"completion":1047,"total":105967}}]}
|
||||
{"createdAt":"2026-04-13T16:42:33.076Z","gitSha":"3f5841f84d878cd3f43c435fa237d3f0c2265fb9","mode":"flow","runs":1,"runModel":"openai:gpt-4o","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":7,"passRate":0.5384615384615384,"averageDurationMs":25127.30769230769,"averageJudgeScore":71.07692307692308,"averageTokenUsagePerAttempt":{"prompt":75554.46153846153,"completion":772.8461538461538,"total":76327.30769230769},"failedCaseIds":["flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler","flow-test12-approval-step"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":16276,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":64149,"completion":312,"total":64461}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":13918,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":46634,"completion":270,"total":46904}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":15559,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":46899,"completion":229,"total":47128}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":18332,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":64651,"completion":528,"total":65179}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":35969,"averageJudgeScore":92,"averageTokenUsagePerAttempt":{"prompt":85106,"completion":1226,"total":86332}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":44250,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":120119,"completion":1514,"total":121633}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":39138,"averageJudgeScore":62,"averageTokenUsagePerAttempt":{"prompt":104858,"completion":2010,"total":106868}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":30801,"averageJudgeScore":20,"averageTokenUsagePerAttempt":{"prompt":140601,"completion":837,"total":141438}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":29650,"averageJudgeScore":90,"averageTokenUsagePerAttempt":{"prompt":84676,"completion":434,"total":85110}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":15278,"averageJudgeScore":0,"averageTokenUsagePerAttempt":{"prompt":12264,"completion":1037,"total":13301}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":18609,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":64538,"completion":447,"total":64985}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":24459,"averageJudgeScore":30,"averageTokenUsagePerAttempt":{"prompt":64752,"completion":522,"total":65274}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":24416,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":82961,"completion":681,"total":83642}}]}
|
||||
{"createdAt":"2026-04-13T16:44:35.781Z","gitSha":"3f5841f84d878cd3f43c435fa237d3f0c2265fb9","mode":"flow","runs":1,"runModel":"googleai:gemini-3-flash-preview","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":8,"passRate":0.6153846153846154,"averageDurationMs":37479.307692307695,"averageJudgeScore":85,"averageTokenUsagePerAttempt":{"prompt":186704.3076923077,"completion":1286.076923076923,"total":189682.92307692306},"failedCaseIds":["flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor","flow-test10-while-loop-counter","flow-test11-preprocessor-and-failure-handler"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":17390,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":91200,"completion":368,"total":92084}},{"id":"flow-test1-reuse-existing-script","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":16881,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":65540,"completion":414,"total":66412}},{"id":"flow-test2-call-existing-subflow","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":17296,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":66397,"completion":482,"total":67455}},{"id":"flow-test3-branchone-routing","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":29437,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":174842,"completion":1107,"total":176621}},{"id":"flow-test4-order-processing-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":46387,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":233010,"completion":1931,"total":236992}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":47883,"averageJudgeScore":88,"averageTokenUsagePerAttempt":{"prompt":300741,"completion":2353,"total":304779}},{"id":"flow-test6-ai-agent-tools","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":51830,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":255392,"completion":2178,"total":259675}},{"id":"flow-test7-simple-modification","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":43691,"averageJudgeScore":62,"averageTokenUsagePerAttempt":{"prompt":167159,"completion":1056,"total":171042}},{"id":"flow-test8-branching-in-loop","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":38113,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":222138,"completion":1578,"total":225135}},{"id":"flow-test9-parallel-refactor","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":59161,"averageJudgeScore":78,"averageTokenUsagePerAttempt":{"prompt":342540,"completion":2071,"total":347200}},{"id":"flow-test10-while-loop-counter","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":41602,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":146820,"completion":755,"total":151064}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":1,"passedAttempts":0,"passRate":0,"averageDurationMs":48067,"averageJudgeScore":52,"averageTokenUsagePerAttempt":{"prompt":245838,"completion":1399,"total":249623}},{"id":"flow-test12-approval-step","attemptCount":1,"passedAttempts":1,"passRate":1,"averageDurationMs":29493,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":115539,"completion":1027,"total":117796}}]}
|
||||
{"createdAt":"2026-04-15T12:47:42.333Z","gitSha":"fada91cb74cbb0d8c4191e88c9c782661fa79e0c","mode":"flow","runs":2,"runModel":"anthropic:claude-haiku-4-5-20251001","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":22,"passRate":0.8461538461538461,"averageDurationMs":30184.96153846154,"averageJudgeScore":90.23076923076923,"averageTokenUsagePerAttempt":{"prompt":131953,"completion":3005.4615384615386,"total":134958.46153846153},"failedCaseIds":["flow-test6-ai-agent-tools","flow-test9-parallel-refactor"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":17632.5,"averageJudgeScore":99,"averageTokenUsagePerAttempt":{"prompt":119410.5,"completion":785,"total":120195.5}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":15469,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":91090,"completion":796,"total":91886}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":14306.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":101415.5,"completion":1010,"total":102425.5}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":23193,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":76384,"completion":2375.5,"total":78759.5}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":44973,"averageJudgeScore":92.5,"averageTokenUsagePerAttempt":{"prompt":189119,"completion":4639.5,"total":193758.5}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":63343.5,"averageJudgeScore":94.5,"averageTokenUsagePerAttempt":{"prompt":171440.5,"completion":8551,"total":179991.5}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":64051,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":200807,"completion":8626,"total":209433}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":20897,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":124223,"completion":1363,"total":125586}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":26266.5,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":99486,"completion":3338.5,"total":102824.5}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":34616.5,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":139827,"completion":3639.5,"total":143466.5}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":25068,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":134504.5,"completion":1472,"total":135976.5}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":22762,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":147320,"completion":1372,"total":148692}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":19826,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":120362,"completion":1103,"total":121465}}]}
|
||||
{"createdAt":"2026-04-15T12:59:23.430Z","gitSha":"fada91cb74cbb0d8c4191e88c9c782661fa79e0c","mode":"flow","runs":2,"runModel":"openai:gpt-4o","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":17,"passRate":0.6538461538461539,"averageDurationMs":22773.73076923077,"averageJudgeScore":74.96153846153847,"averageTokenUsagePerAttempt":{"prompt":80958.57692307692,"completion":794,"total":81752.57692307692},"failedCaseIds":["flow-test4-order-processing-loop","flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor","flow-test10-while-loop-counter"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":21414.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":74020,"completion":278,"total":74298}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":11469,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":56486,"completion":264,"total":56750}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":11158,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":56791,"completion":271.5,"total":57062.5}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":15699.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":74511,"completion":517,"total":75028}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":22957.5,"averageJudgeScore":67,"averageTokenUsagePerAttempt":{"prompt":65343,"completion":1127.5,"total":66470.5}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":33018.5,"averageJudgeScore":87,"averageTokenUsagePerAttempt":{"prompt":76464,"completion":1572,"total":78036}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":37364,"averageJudgeScore":67,"averageTokenUsagePerAttempt":{"prompt":130732,"completion":2106,"total":132838}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":24472.5,"averageJudgeScore":36,"averageTokenUsagePerAttempt":{"prompt":123649,"completion":896,"total":124545}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":23635.5,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":104919,"completion":460.5,"total":105379.5}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":28727,"averageJudgeScore":15,"averageTokenUsagePerAttempt":{"prompt":48189.5,"completion":1501.5,"total":49691}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":22109,"averageJudgeScore":56,"averageTokenUsagePerAttempt":{"prompt":84576.5,"completion":403,"total":84979.5}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":25620,"averageJudgeScore":88.5,"averageTokenUsagePerAttempt":{"prompt":105479.5,"completion":500.5,"total":105980}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":18413.5,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":51301,"completion":424.5,"total":51725.5}}]}
|
||||
{"createdAt":"2026-04-15T13:04:53.138Z","gitSha":"fada91cb74cbb0d8c4191e88c9c782661fa79e0c","mode":"flow","runs":2,"runModel":"anthropic:claude-opus-4-6","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":18,"passRate":0.6923076923076923,"averageDurationMs":53728.153846153844,"averageJudgeScore":90.46153846153847,"averageTokenUsagePerAttempt":{"prompt":136217.65384615384,"completion":2690.576923076923,"total":138908.23076923078},"failedCaseIds":["flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor","flow-test10-while-loop-counter","flow-test12-approval-step"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":26766.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":119291.5,"completion":619.5,"total":119911}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":25131.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":90983.5,"completion":746.5,"total":91730}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":25598.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":91533,"completion":718.5,"total":92251.5}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":42976.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":149081,"completion":1746,"total":150827}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":82068,"averageJudgeScore":98,"averageTokenUsagePerAttempt":{"prompt":160765,"completion":4723,"total":165488}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":107520.5,"averageJudgeScore":96,"averageTokenUsagePerAttempt":{"prompt":137528,"completion":6918,"total":144446}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":117563,"averageJudgeScore":77,"averageTokenUsagePerAttempt":{"prompt":172375,"completion":8691.5,"total":181066.5}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":40348,"averageJudgeScore":77,"averageTokenUsagePerAttempt":{"prompt":125491.5,"completion":1557,"total":127048.5}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":52332.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":155749,"completion":2693,"total":158442}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":58810,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":154580,"completion":3080,"total":157660}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":39319.5,"averageJudgeScore":72,"averageTokenUsagePerAttempt":{"prompt":120779,"completion":1131.5,"total":121910.5}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":43657.5,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":172242,"completion":1277,"total":173519}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":36374,"averageJudgeScore":75,"averageTokenUsagePerAttempt":{"prompt":120431,"completion":1076,"total":121507}}]}
|
||||
{"createdAt":"2026-04-15T13:09:23.557Z","gitSha":"fada91cb74cbb0d8c4191e88c9c782661fa79e0c","mode":"flow","runs":2,"runModel":"googleai:gemini-3-flash-preview","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":23,"passRate":0.8846153846153846,"averageDurationMs":38015.153846153844,"averageJudgeScore":92.61538461538461,"averageTokenUsagePerAttempt":{"prompt":213122.73076923078,"completion":1306.6923076923076,"total":216288.61538461538},"failedCaseIds":["flow-test7-simple-modification","flow-test9-parallel-refactor"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":17852,"averageJudgeScore":97.5,"averageTokenUsagePerAttempt":{"prompt":106013.5,"completion":461,"total":106898.5}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":17556,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":80428.5,"completion":521,"total":81375.5}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":16211,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":80653,"completion":538,"total":81544.5}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":28206.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":195088,"completion":1003.5,"total":196934.5}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":49612,"averageJudgeScore":89.5,"averageTokenUsagePerAttempt":{"prompt":285979.5,"completion":2140.5,"total":289883}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":52635,"averageJudgeScore":94.5,"averageTokenUsagePerAttempt":{"prompt":315058,"completion":2118,"total":319111}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":55039,"averageJudgeScore":89,"averageTokenUsagePerAttempt":{"prompt":298999.5,"completion":2563,"total":304299}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":45571,"averageJudgeScore":77,"averageTokenUsagePerAttempt":{"prompt":177988,"completion":963,"total":182547}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":53957,"averageJudgeScore":96,"averageTokenUsagePerAttempt":{"prompt":326580,"completion":1650,"total":331999}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":78361.5,"averageJudgeScore":93.5,"averageTokenUsagePerAttempt":{"prompt":495491,"completion":2535,"total":503137}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":27481,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":132736,"completion":820,"total":134766.5}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":29757.5,"averageJudgeScore":92.5,"averageTokenUsagePerAttempt":{"prompt":168158.5,"completion":1022.5,"total":170345.5}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":21957.5,"averageJudgeScore":92.5,"averageTokenUsagePerAttempt":{"prompt":107422,"completion":651.5,"total":108911}}]}
|
||||
{"createdAt":"2026-04-15T13:56:16.609Z","gitSha":"cc3e17dbc1c204b5d4e30ad449d59e9e7cd0bb89","mode":"flow","runs":2,"runModel":"anthropic:claude-haiku-4-5-20251001","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":26,"passRate":1,"averageDurationMs":35150.57692307692,"averageJudgeScore":92.07692307692308,"averageTokenUsagePerAttempt":{"prompt":139081.07692307694,"completion":3570.3076923076924,"total":142651.38461538462},"failedCaseIds":[],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":16746.5,"averageJudgeScore":98,"averageTokenUsagePerAttempt":{"prompt":119410.5,"completion":786.5,"total":120197}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":16781.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":91090,"completion":796,"total":91886}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":20842,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":101415.5,"completion":1065.5,"total":102481}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":28184,"averageJudgeScore":98.5,"averageTokenUsagePerAttempt":{"prompt":76383,"completion":2365.5,"total":78748.5}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":48227,"averageJudgeScore":91,"averageTokenUsagePerAttempt":{"prompt":187421,"completion":4314.5,"total":191735.5}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":117878.5,"averageJudgeScore":94.5,"averageTokenUsagePerAttempt":{"prompt":308754.5,"completion":19364.5,"total":328119}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":44483.5,"averageJudgeScore":89,"averageTokenUsagePerAttempt":{"prompt":158473.5,"completion":5044,"total":163517.5}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":21374,"averageJudgeScore":92,"averageTokenUsagePerAttempt":{"prompt":124028,"completion":1309,"total":125337}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":30584.5,"averageJudgeScore":95,"averageTokenUsagePerAttempt":{"prompt":99486,"completion":3344,"total":102830}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":43953,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":153129,"completion":4306,"total":157435}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":20196.5,"averageJudgeScore":96,"averageTokenUsagePerAttempt":{"prompt":120701,"completion":1159,"total":121860}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":25325.5,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":147320,"completion":1369,"total":148689}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":22381,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":120442,"completion":1190.5,"total":121632.5}}]}
|
||||
{"createdAt":"2026-04-15T13:59:07.056Z","gitSha":"cc3e17dbc1c204b5d4e30ad449d59e9e7cd0bb89","mode":"flow","runs":2,"runModel":"openai:gpt-4o","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":21,"passRate":0.8076923076923077,"averageDurationMs":28529.346153846152,"averageJudgeScore":82.65384615384616,"averageTokenUsagePerAttempt":{"prompt":87358.15384615384,"completion":964.4615384615385,"total":88322.61538461539},"failedCaseIds":["flow-test4-order-processing-loop","flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test9-parallel-refactor"],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":16221,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":74020,"completion":280,"total":74300}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":17431.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":56484.5,"completion":257,"total":56741.5}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":14980.5,"averageJudgeScore":97.5,"averageTokenUsagePerAttempt":{"prompt":56751,"completion":230,"total":56981}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":20897,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":64328,"completion":521,"total":64849}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":61242,"averageJudgeScore":70,"averageTokenUsagePerAttempt":{"prompt":158766.5,"completion":3520,"total":162286.5}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":47899.5,"averageJudgeScore":86.5,"averageTokenUsagePerAttempt":{"prompt":87984.5,"completion":1582.5,"total":89567}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":42154.5,"averageJudgeScore":77,"averageTokenUsagePerAttempt":{"prompt":130936,"completion":2206.5,"total":133142.5}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":1,"passRate":0.5,"averageDurationMs":38449.5,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":150313.5,"completion":948,"total":151261.5}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":35552.5,"averageJudgeScore":90,"averageTokenUsagePerAttempt":{"prompt":112832,"completion":470.5,"total":113302.5}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":0,"passRate":0,"averageDurationMs":22728.5,"averageJudgeScore":3.5,"averageTokenUsagePerAttempt":{"prompt":14727,"completion":1063,"total":15790}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":19612.5,"averageJudgeScore":93.5,"averageTokenUsagePerAttempt":{"prompt":84800.5,"completion":526.5,"total":85327}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":18568.5,"averageJudgeScore":92,"averageTokenUsagePerAttempt":{"prompt":92412,"completion":507.5,"total":92919.5}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":15144,"averageJudgeScore":88.5,"averageTokenUsagePerAttempt":{"prompt":51300.5,"completion":425.5,"total":51726}}]}
|
||||
{"createdAt":"2026-04-15T14:04:19.086Z","gitSha":"cc3e17dbc1c204b5d4e30ad449d59e9e7cd0bb89","mode":"flow","runs":2,"runModel":"anthropic:claude-opus-4-6","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":26,"passRate":1,"averageDurationMs":53226.5,"averageJudgeScore":95.8076923076923,"averageTokenUsagePerAttempt":{"prompt":136106.3076923077,"completion":2673.5,"total":138779.8076923077},"failedCaseIds":[],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":27188.5,"averageJudgeScore":98,"averageTokenUsagePerAttempt":{"prompt":119289,"completion":630.5,"total":119919.5}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":26495.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":90983.5,"completion":746.5,"total":91730}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":26312.5,"averageJudgeScore":97.5,"averageTokenUsagePerAttempt":{"prompt":91534,"completion":769.5,"total":92303.5}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":42606,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":149110.5,"completion":1761.5,"total":150872}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":77153.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":159363,"completion":4355,"total":163718}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":107545,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":138505.5,"completion":7243.5,"total":145749}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":112611,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":171742,"completion":8499.5,"total":180241.5}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":44779,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":125571.5,"completion":1625.5,"total":127197}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":50868,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":155604.5,"completion":2681,"total":158285.5}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":59752,"averageJudgeScore":92.5,"averageTokenUsagePerAttempt":{"prompt":154274.5,"completion":2961,"total":157235.5}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":36922.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":120778,"completion":1121,"total":121899}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":44307.5,"averageJudgeScore":93.5,"averageTokenUsagePerAttempt":{"prompt":172195,"completion":1285,"total":173480}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":35403.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":120431,"completion":1076,"total":121507}}]}
|
||||
{"createdAt":"2026-04-15T14:09:26.896Z","gitSha":"cc3e17dbc1c204b5d4e30ad449d59e9e7cd0bb89","mode":"flow","runs":2,"runModel":"googleai:gemini-3-flash-preview","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":26,"passedAttempts":26,"passRate":1,"averageDurationMs":43444.88461538462,"averageJudgeScore":93.73076923076923,"averageTokenUsagePerAttempt":{"prompt":209953.38461538462,"completion":1267.2307692307693,"total":213042.65384615384},"failedCaseIds":[],"cases":[{"id":"flow-test0-sum-two-numbers","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":18405.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":106013.5,"completion":466,"total":106954.5}},{"id":"flow-test1-reuse-existing-script","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":18034.5,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":80428.5,"completion":524.5,"total":81372.5}},{"id":"flow-test2-call-existing-subflow","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":17393,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":80653,"completion":538,"total":81544.5}},{"id":"flow-test3-branchone-routing","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":28979,"averageJudgeScore":100,"averageTokenUsagePerAttempt":{"prompt":195088,"completion":1003.5,"total":196934.5}},{"id":"flow-test4-order-processing-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":47315,"averageJudgeScore":87,"averageTokenUsagePerAttempt":{"prompt":264983,"completion":1909.5,"total":268753}},{"id":"flow-test5-parallel-data-pipeline","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":55034.5,"averageJudgeScore":96,"averageTokenUsagePerAttempt":{"prompt":315058,"completion":2118,"total":319111}},{"id":"flow-test6-ai-agent-tools","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":53794.5,"averageJudgeScore":88.5,"averageTokenUsagePerAttempt":{"prompt":278794,"completion":2275,"total":283175}},{"id":"flow-test7-simple-modification","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":43680,"averageJudgeScore":91,"averageTokenUsagePerAttempt":{"prompt":177988,"completion":963,"total":182547}},{"id":"flow-test8-branching-in-loop","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":65355.5,"averageJudgeScore":97,"averageTokenUsagePerAttempt":{"prompt":326580,"completion":1650,"total":331999}},{"id":"flow-test9-parallel-refactor","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":99143,"averageJudgeScore":82,"averageTokenUsagePerAttempt":{"prompt":495491,"completion":2535,"total":503137}},{"id":"flow-test10-while-loop-counter","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":33126.5,"averageJudgeScore":94.5,"averageTokenUsagePerAttempt":{"prompt":132736,"completion":820,"total":134766.5}},{"id":"flow-test11-preprocessor-and-failure-handler","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":52696.5,"averageJudgeScore":87,"averageTokenUsagePerAttempt":{"prompt":168158.5,"completion":1022.5,"total":170345.5}},{"id":"flow-test12-approval-step","attemptCount":2,"passedAttempts":2,"passRate":1,"averageDurationMs":31826,"averageJudgeScore":98.5,"averageTokenUsagePerAttempt":{"prompt":107422.5,"completion":649,"total":108914.5}}]}
|
||||
|
||||
@@ -4374,6 +4374,53 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
- \`preprocessor\` - Reserved for preprocessor module
|
||||
- \`Input\` - Reserved for flow input reference
|
||||
|
||||
## Hard Structural Rules
|
||||
|
||||
These are strict Windmill schema rules. Follow them exactly.
|
||||
|
||||
- \`value.modules\` is only for normal sequential steps
|
||||
- \`value.preprocessor_module\` and \`value.failure_module\` are special top-level fields inside \`value\`, not entries in \`value.modules\`
|
||||
- If a flow needs a preprocessor, create \`value.preprocessor_module\` with \`id: preprocessor\`
|
||||
- If a flow needs a failure handler, create \`value.failure_module\` with \`id: failure\`
|
||||
- Do NOT create regular modules inside \`value.modules\` named \`preprocessor\` or \`failure\`
|
||||
- \`preprocessor_module\` and \`failure_module\` only support \`script\` or \`rawscript\`
|
||||
- \`preprocessor_module\` runs before normal modules and cannot reference \`results.*\`
|
||||
- \`failure_module\` can use the \`error\` object with \`error.message\`, \`error.step_id\`, \`error.name\`, and \`error.stack\`
|
||||
|
||||
Correct shape:
|
||||
|
||||
\`\`\`yaml
|
||||
value:
|
||||
preprocessor_module:
|
||||
id: preprocessor
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
failure_module:
|
||||
id: failure
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
modules:
|
||||
- id: process_event
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
\`\`\`
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
\`\`\`yaml
|
||||
value:
|
||||
modules:
|
||||
- id: preprocessor
|
||||
...
|
||||
- id: process_event
|
||||
...
|
||||
- id: failure
|
||||
...
|
||||
\`\`\`
|
||||
|
||||
## Module ID Rules
|
||||
|
||||
- Must be unique across the entire flow
|
||||
@@ -4389,10 +4436,148 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
## Data Flow Between Steps
|
||||
|
||||
- \`flow_input.property\` - Access flow input parameters
|
||||
- \`results.step_id\` - Access output from a previous step
|
||||
- \`results.step_id.property\` - Access specific property from previous step output
|
||||
- \`flow_input.iter.value\` - Current item when inside a for-loop
|
||||
- \`flow_input.iter.index\` - Current index when inside a for-loop
|
||||
- \`results.step_id\` - Access output from a previous step only when that step result is in scope
|
||||
- \`results.step_id.property\` - Access specific property from a previous step output only when that step result is in scope
|
||||
- \`flow_input.iter.value\` - Current iteration value when inside a loop (\`forloopflow\` or \`whileloopflow\`)
|
||||
- \`flow_input.iter.index\` - Current loop index when inside a loop (\`forloopflow\` or \`whileloopflow\`)
|
||||
|
||||
## Loop Structure Rules
|
||||
|
||||
- For \`whileloopflow\`, use module-level \`stop_after_if\` on the loop module itself when the loop should stop after an iteration result
|
||||
- Do NOT put \`stop_after_if\` inside \`value\` of a \`whileloopflow\`
|
||||
- \`stop_after_all_iters_if\` is for checks after the whole loop finishes, not the normal per-iteration break condition
|
||||
- When a \`whileloopflow\` carries state forward between iterations, use \`flow_input.iter.value\` as the current loop value and provide an explicit first-iteration fallback when needed
|
||||
- Use \`flow_input.iter.index\` only when the loop logic is truly based on the iteration index, not as a replacement for the current loop value
|
||||
- If the user asks for a final scalar/object after a loop, add a normal step after the loop that extracts the final value from the loop result instead of returning the whole loop result array
|
||||
|
||||
Correct \`whileloopflow\` shape:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: loop_until_done
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
skip_if_stopped: false
|
||||
value:
|
||||
type: whileloopflow
|
||||
skip_failures: false
|
||||
modules:
|
||||
- id: advance_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter && flow_input.iter.value !== undefined ? flow_input.iter.value : flow_input.initial_state
|
||||
- id: return_final_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done[results.loop_until_done.length - 1]
|
||||
\`\`\`
|
||||
|
||||
Incorrect \`whileloopflow\` patterns:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: loop_until_done
|
||||
value:
|
||||
type: whileloopflow
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
\`\`\`
|
||||
|
||||
\`\`\`yaml
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter.index
|
||||
\`\`\`
|
||||
|
||||
\`\`\`yaml
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done
|
||||
\`\`\`
|
||||
|
||||
## Approval / Suspend Structure
|
||||
|
||||
- \`suspend\` belongs on the flow module object itself, as a sibling of \`id\` and \`value\`
|
||||
- Never put \`suspend\` inside \`value\`
|
||||
|
||||
Correct shape:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: request_approval
|
||||
suspend:
|
||||
required_events: 1
|
||||
resume_form:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
comment:
|
||||
type: string
|
||||
required: [comment]
|
||||
value:
|
||||
type: identity
|
||||
\`\`\`
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: request_approval
|
||||
value:
|
||||
type: rawscript
|
||||
suspend:
|
||||
required_events: 1
|
||||
\`\`\`
|
||||
|
||||
## Branch Result Scope Rules
|
||||
|
||||
- Inside a branch, you may reference earlier outer steps and earlier steps in the same branch
|
||||
- Outside a \`branchone\`, do NOT reference ids of steps that only exist inside its branches or default branch. Use \`results.<branchone_module_id>\` instead
|
||||
- Outside a \`branchall\`, do NOT reference ids of steps inside its branches. Use \`results.<branchall_module_id>\` instead
|
||||
- If downstream steps need a stable shape after a branch, make each branch return the same fields
|
||||
- When needed, add a normalization step immediately after the branch and consume \`results.<branch_module_id>\` there
|
||||
|
||||
Correct after \`branchone\`:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: route_order
|
||||
value:
|
||||
type: branchone
|
||||
...
|
||||
- id: send_confirmation
|
||||
value:
|
||||
input_transforms:
|
||||
routed:
|
||||
type: javascript
|
||||
expr: results.route_order
|
||||
\`\`\`
|
||||
|
||||
Incorrect after \`branchone\`:
|
||||
|
||||
\`\`\`yaml
|
||||
expr: results.create_shipment
|
||||
expr: results.create_backorder
|
||||
\`\`\`
|
||||
|
||||
Correct after \`branchall\`:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: enrich_parallel
|
||||
value:
|
||||
type: branchall
|
||||
parallel: true
|
||||
...
|
||||
- id: combine_data
|
||||
value:
|
||||
input_transforms:
|
||||
enrichments:
|
||||
type: javascript
|
||||
expr: results.enrich_parallel
|
||||
\`\`\`
|
||||
|
||||
## Input Transforms
|
||||
|
||||
@@ -4409,14 +4594,14 @@ JavaScript transform (dynamic expression):
|
||||
- For flow inputs: Use type \`"object"\` with format \`"resource-{type}"\` (e.g., \`"resource-postgresql"\`)
|
||||
- For step inputs: Use static value \`"$res:path/to/resource"\`
|
||||
|
||||
## Failure Handler
|
||||
## Final Structural Self-Check
|
||||
|
||||
Executes when any step fails. Has access to error details:
|
||||
Before finalizing a flow, verify:
|
||||
|
||||
- \`error.message\` - Error message
|
||||
- \`error.step_id\` - ID of failed step
|
||||
- \`error.name\` - Error name
|
||||
- \`error.stack\` - Stack trace
|
||||
- any preprocessor is in \`value.preprocessor_module\`
|
||||
- any failure handler is in \`value.failure_module\`
|
||||
- any approval step has module-level \`suspend\`
|
||||
- no downstream step references inner branch step ids from outside the branch
|
||||
|
||||
## S3 Object Operations
|
||||
|
||||
|
||||
@@ -2,17 +2,20 @@
|
||||
import FlowModuleSchemaMap from '$lib/components/flows/map/FlowModuleSchemaMap.svelte'
|
||||
import { getContext, untrack } from 'svelte'
|
||||
import type { ExtendedOpenFlow, FlowEditorContext } from '$lib/components/flows/types'
|
||||
import { dfs } from '$lib/components/flows/previousResults'
|
||||
import type { FlowModule, InputTransform, OpenFlow } from '$lib/gen'
|
||||
import type { FlowModule, InputTransform } from '$lib/gen'
|
||||
import type { FlowAIChatHelpers } from './core'
|
||||
import { createInlineScriptSession } from './inlineScriptsUtils'
|
||||
import { loadSchemaFromModule } from '$lib/components/flows/flowInfers'
|
||||
import { aiChatManager } from '../AIChatManager.svelte'
|
||||
import { refreshStateStore } from '$lib/svelte5Utils.svelte'
|
||||
import { getSubModules } from '$lib/components/flows/flowExplorer'
|
||||
import { SPECIAL_MODULE_IDS } from '../shared'
|
||||
import type { FlowCopilotContext } from '../../flow'
|
||||
import type { ScriptLintResult } from '../shared'
|
||||
import {
|
||||
applyFlowJsonUpdate,
|
||||
getFlowModuleById,
|
||||
getRawScriptModuleById
|
||||
} from './helperUtils'
|
||||
|
||||
let {
|
||||
flowModuleSchemaMap,
|
||||
@@ -32,16 +35,6 @@
|
||||
// Get diffManager from the graph
|
||||
const diffManager = $derived(flowModuleSchemaMap?.getDiffManager())
|
||||
|
||||
function getModule(id: string, flow: OpenFlow = flowStore.val) {
|
||||
if (id === SPECIAL_MODULE_IDS.PREPROCESSOR) {
|
||||
return flow.value.preprocessor_module
|
||||
} else if (id === SPECIAL_MODULE_IDS.FAILURE) {
|
||||
return flow.value.failure_module
|
||||
} else {
|
||||
return dfs(id, flow, false)[0]
|
||||
}
|
||||
}
|
||||
|
||||
const flowHelpers: FlowAIChatHelpers = {
|
||||
// flow context
|
||||
getFlowAndSelectedId: () => {
|
||||
@@ -53,7 +46,7 @@
|
||||
},
|
||||
getModules: (id?: string) => {
|
||||
if (id) {
|
||||
const module = getModule(id)
|
||||
const module = getFlowModuleById(flowStore.val, id)
|
||||
|
||||
if (!module) {
|
||||
throw new Error('Module not found')
|
||||
@@ -76,7 +69,7 @@
|
||||
// Update current editor if needed
|
||||
const targetSnapshot = snapshot ?? diffManager.beforeFlow
|
||||
if ($currentEditor && targetSnapshot) {
|
||||
const module = getModule($currentEditor.stepId, targetSnapshot)
|
||||
const module = getFlowModuleById(targetSnapshot, $currentEditor.stepId)
|
||||
if (module) {
|
||||
if ($currentEditor.type === 'script' && module.value.type === 'rawscript') {
|
||||
$currentEditor.editor.setCode(module.value.content)
|
||||
@@ -91,51 +84,48 @@
|
||||
|
||||
// ai chat tools
|
||||
setCode: async (id: string, code: string) => {
|
||||
const module = getModule(id)
|
||||
const module = getRawScriptModuleById(flowStore.val, id)
|
||||
if (!module) {
|
||||
throw new Error('Module not found')
|
||||
throw new Error('Module not found or is not a rawscript')
|
||||
}
|
||||
if (module.value.type === 'rawscript') {
|
||||
// 1. Take snapshot only if none exists (preserves baseline for cumulative changes)
|
||||
if (!diffManager?.beforeFlow) {
|
||||
const snapshot = $state.snapshot(flowStore).val
|
||||
diffManager?.setBeforeFlow(snapshot)
|
||||
diffManager?.setEditMode(true)
|
||||
}
|
||||
|
||||
// 2. Apply the code change
|
||||
module.value.content = code
|
||||
inlineScriptSession.set(id, code)
|
||||
const { input_transforms, schema } = await loadSchemaFromModule(module)
|
||||
module.value.input_transforms = input_transforms
|
||||
refreshStateStore(flowStore)
|
||||
// 1. Take snapshot only if none exists (preserves baseline for cumulative changes)
|
||||
if (!diffManager?.beforeFlow) {
|
||||
const snapshot = $state.snapshot(flowStore).val
|
||||
diffManager?.setBeforeFlow(snapshot)
|
||||
diffManager?.setEditMode(true)
|
||||
}
|
||||
|
||||
// Update exprsToSet if this module is currently selected
|
||||
if (id === selectedId && exprsToSet) {
|
||||
exprsToSet.set(input_transforms)
|
||||
}
|
||||
// 2. Apply the code change
|
||||
module.value.content = code
|
||||
inlineScriptSession.set(id, code)
|
||||
const { input_transforms, schema } = await loadSchemaFromModule(module)
|
||||
module.value.input_transforms = input_transforms
|
||||
refreshStateStore(flowStore)
|
||||
|
||||
if (flowStateStore.val[id]) {
|
||||
flowStateStore.val[id].schema = schema
|
||||
} else {
|
||||
flowStateStore.val[id] = {
|
||||
schema
|
||||
}
|
||||
}
|
||||
// Update exprsToSet if this module is currently selected
|
||||
if (id === selectedId && exprsToSet) {
|
||||
exprsToSet.set(input_transforms)
|
||||
}
|
||||
|
||||
// 3. Manually add to moduleActions, preserving existing action types
|
||||
// Note: currentFlow is auto-synced by FlowGraphV2's effect after refreshStateStore
|
||||
const currentAction = diffManager?.moduleActions[id]
|
||||
if (!currentAction) {
|
||||
diffManager?.setModuleActions({
|
||||
...diffManager?.moduleActions,
|
||||
[id]: { action: 'modified', pending: true }
|
||||
})
|
||||
}
|
||||
// If already tracked (e.g., 'added' from setFlowJson), keep that status
|
||||
if (flowStateStore.val[id]) {
|
||||
flowStateStore.val[id].schema = schema
|
||||
} else {
|
||||
throw new Error('Module is not a rawscript or script')
|
||||
flowStateStore.val[id] = {
|
||||
schema
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Manually add to moduleActions, preserving existing action types
|
||||
// Note: currentFlow is auto-synced by FlowGraphV2's effect after refreshStateStore
|
||||
const currentAction = diffManager?.moduleActions[id]
|
||||
if (!currentAction) {
|
||||
diffManager?.setModuleActions({
|
||||
...diffManager?.moduleActions,
|
||||
[id]: { action: 'modified', pending: true }
|
||||
})
|
||||
}
|
||||
// If already tracked (e.g., 'added' from setFlowJson), keep that status
|
||||
if ($currentEditor && $currentEditor.type === 'script' && $currentEditor.stepId === id) {
|
||||
$currentEditor.editor.setCode(code)
|
||||
}
|
||||
@@ -176,7 +166,7 @@
|
||||
|
||||
getLintErrors: async (moduleId: string): Promise<ScriptLintResult> => {
|
||||
|
||||
const module = getModule(moduleId)
|
||||
const module = getFlowModuleById(flowStore.val, moduleId)
|
||||
if (!module || module.value.type !== 'rawscript') {
|
||||
return { errorCount: 0, warningCount: 0, errors: [], warnings: [] }
|
||||
}
|
||||
@@ -205,10 +195,17 @@
|
||||
|
||||
setFlowJson: async (
|
||||
modules: FlowModule[] | undefined,
|
||||
schema: Record<string, any> | undefined
|
||||
schema: Record<string, any> | undefined,
|
||||
preprocessorModule: FlowModule | null | undefined,
|
||||
failureModule: FlowModule | null | undefined
|
||||
) => {
|
||||
try {
|
||||
if (modules || schema) {
|
||||
if (
|
||||
modules !== undefined ||
|
||||
schema !== undefined ||
|
||||
preprocessorModule !== undefined ||
|
||||
failureModule !== undefined
|
||||
) {
|
||||
// Take snapshot of current flowStore and set as beforeFlow
|
||||
if (!diffManager?.hasPendingChanges) {
|
||||
const snapshot = $state.snapshot(flowStore).val
|
||||
@@ -217,23 +214,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
if (modules) {
|
||||
// Restore inline script references back to full content
|
||||
const restoredModules = inlineScriptSession.restoreInlineScriptReferences(modules)
|
||||
const unresolvedRefs = inlineScriptSession.findUnresolvedInlineScriptRefs(restoredModules)
|
||||
if (unresolvedRefs.length > 0) {
|
||||
throw new Error(
|
||||
`Unresolved inline script references: ${unresolvedRefs.join(', ')}`
|
||||
)
|
||||
}
|
||||
// Directly modify flowStore (immediate effect)
|
||||
flowStore.val.value.modules = restoredModules
|
||||
}
|
||||
|
||||
// Update schema if provided
|
||||
if (schema !== undefined) {
|
||||
flowStore.val.schema = schema
|
||||
}
|
||||
applyFlowJsonUpdate(flowStore.val, inlineScriptSession, {
|
||||
modules,
|
||||
schema,
|
||||
preprocessorModule,
|
||||
failureModule
|
||||
})
|
||||
|
||||
// Refresh the state store to update UI
|
||||
refreshStateStore(flowStore)
|
||||
@@ -253,7 +239,7 @@
|
||||
diffManager?.moduleActions[selectedId]?.pending &&
|
||||
$currentEditor.editor.getAiChatEditorHandler()
|
||||
) {
|
||||
const moduleLastSnapshot = getModule(selectedId, diffManager.beforeFlow)
|
||||
const moduleLastSnapshot = getFlowModuleById(diffManager.beforeFlow, selectedId)
|
||||
const content =
|
||||
moduleLastSnapshot?.value.type === 'rawscript' ? moduleLastSnapshot.value.content : ''
|
||||
if (content.length > 0) {
|
||||
|
||||
@@ -35,9 +35,9 @@ import {
|
||||
import type { ContextElement } from '../context'
|
||||
import type { ExtendedOpenFlow } from '$lib/components/flows/types'
|
||||
import type { InlineScriptSession } from './inlineScriptsUtils'
|
||||
import { flowModulesSchema } from './openFlowZod'
|
||||
import { flowModuleSchema, flowModulesSchema } from './openFlowZod'
|
||||
import { collectAllModuleIdsFromArray } from './utils'
|
||||
import { getFlowPrompt } from '$system_prompts'
|
||||
import { FLOW_CHAT_SPECIAL_MODULES, getFlowPrompt } from '$system_prompts'
|
||||
|
||||
/**
|
||||
* Navigate to a schema at a given path, handling arrays, objects, unions, and wrappers.
|
||||
@@ -259,7 +259,9 @@ export interface FlowAIChatHelpers {
|
||||
setCode: (id: string, code: string) => Promise<void>
|
||||
setFlowJson: (
|
||||
modules: FlowModule[] | undefined,
|
||||
schema: Record<string, any> | undefined
|
||||
schema: Record<string, any> | undefined,
|
||||
preprocessorModule: FlowModule | null | undefined,
|
||||
failureModule: FlowModule | null | undefined
|
||||
) => Promise<void>
|
||||
getFlowInputsSchema: () => Promise<Record<string, any>>
|
||||
/** Update exprsToSet store for InputTransformForm components (only if module is selected) */
|
||||
@@ -309,19 +311,106 @@ const getInstructionsForCodeGenerationToolDef = createToolDef(
|
||||
'Get instructions for code generation for a raw script step'
|
||||
)
|
||||
|
||||
const specialModuleToolArgSchema = z
|
||||
.string()
|
||||
.nullable()
|
||||
.describe(
|
||||
'JSON string containing the special module object. Use null to remove the special module.'
|
||||
)
|
||||
|
||||
// Using string for modules and schema because Gemini-2.5-flash performs better with strings (MALFORMED_FUNCTION_CALL errors happens more often with objects)
|
||||
const setFlowJsonToolSchema = z.object({
|
||||
modules: z.string().optional().nullable().describe('JSON string containing the flow modules'),
|
||||
schema: z.string().optional().nullable().describe('JSON string containing the flow input schema')
|
||||
schema: z.string().optional().nullable().describe('JSON string containing the flow input schema'),
|
||||
preprocessor_module: z
|
||||
.string()
|
||||
.optional()
|
||||
.nullable()
|
||||
.describe('JSON string containing the optional preprocessor module'),
|
||||
failure_module: z
|
||||
.string()
|
||||
.optional()
|
||||
.nullable()
|
||||
.describe('JSON string containing the optional failure module')
|
||||
})
|
||||
|
||||
const setFlowJsonToolDef = createToolDef(
|
||||
setFlowJsonToolSchema,
|
||||
'set_flow_json',
|
||||
'Set the entire flow by providing the complete flow object. This replaces all existing modules and schema.',
|
||||
'Set the entire flow by providing the complete flow object. This replaces any provided modules, schema, preprocessor_module, and failure_module.',
|
||||
{ strict: false }
|
||||
)
|
||||
|
||||
const setPreprocessorModuleToolSchema = z.object({
|
||||
module: specialModuleToolArgSchema
|
||||
})
|
||||
|
||||
const setPreprocessorModuleToolDef = createToolDef(
|
||||
setPreprocessorModuleToolSchema,
|
||||
'set_preprocessor_module',
|
||||
'Set or replace the flow preprocessor module. Use this when the flow needs logic that runs before the main modules.'
|
||||
)
|
||||
|
||||
const setFailureModuleToolSchema = z.object({
|
||||
module: specialModuleToolArgSchema
|
||||
})
|
||||
|
||||
const setFailureModuleToolDef = createToolDef(
|
||||
setFailureModuleToolSchema,
|
||||
'set_failure_module',
|
||||
'Set or replace the flow failure module. Use this when the flow needs a dedicated error handler.'
|
||||
)
|
||||
|
||||
const specialFlowModuleFields = {
|
||||
preprocessor_module: SPECIAL_MODULE_IDS.PREPROCESSOR,
|
||||
failure_module: SPECIAL_MODULE_IDS.FAILURE
|
||||
} as const
|
||||
|
||||
type SpecialFlowModuleField = keyof typeof specialFlowModuleFields
|
||||
|
||||
function parseOptionalJsonArg(value: unknown, field: string): unknown {
|
||||
if (value === undefined || value === null) {
|
||||
return value
|
||||
}
|
||||
|
||||
try {
|
||||
return typeof value === 'string' ? JSON.parse(value) : value
|
||||
} catch (e) {
|
||||
const errorMessage = e instanceof Error ? e.message : String(e)
|
||||
throw new Error(`Invalid JSON for ${field}: ${errorMessage}`)
|
||||
}
|
||||
}
|
||||
|
||||
function validateSpecialFlowModule(
|
||||
module: unknown,
|
||||
field: SpecialFlowModuleField
|
||||
): FlowModule | null | undefined {
|
||||
if (module === undefined || module === null) {
|
||||
return module
|
||||
}
|
||||
|
||||
const result = flowModuleSchema.safeParse(module)
|
||||
if (!result.success) {
|
||||
const errors = result.error.issues.slice(0, 5).map((issue) => {
|
||||
const path = issue.path.length > 0 ? issue.path.join('.') : field
|
||||
return `${path}: ${issue.message}`
|
||||
})
|
||||
throw new Error(`Invalid ${field}:\n${errors.join('\n')}`)
|
||||
}
|
||||
|
||||
const parsedModule = result.data
|
||||
const expectedId = specialFlowModuleFields[field]
|
||||
if (parsedModule.id !== expectedId) {
|
||||
throw new Error(`Invalid ${field}: id must be "${expectedId}"`)
|
||||
}
|
||||
|
||||
if (parsedModule.value.type !== 'rawscript' && parsedModule.value.type !== 'script') {
|
||||
throw new Error(`Invalid ${field}: only "rawscript" and "script" modules are supported`)
|
||||
}
|
||||
|
||||
return parsedModule
|
||||
}
|
||||
|
||||
// Will be overridden by setSchema
|
||||
const testRunFlowSchema = z.object({
|
||||
args: z
|
||||
@@ -634,36 +723,111 @@ export const flowTools: Tool<FlowAIChatHelpers>[] = [
|
||||
return `Code for module '${moduleId}' has been updated successfully.`
|
||||
}
|
||||
},
|
||||
{
|
||||
def: setPreprocessorModuleToolDef,
|
||||
streamArguments: true,
|
||||
showDetails: true,
|
||||
showFade: true,
|
||||
fn: async ({ args, helpers, toolId, toolCallbacks }) => {
|
||||
const parsedArgs = setPreprocessorModuleToolSchema.parse(args)
|
||||
const parsedModule = validateSpecialFlowModule(
|
||||
parseOptionalJsonArg(parsedArgs.module, 'module'),
|
||||
'preprocessor_module'
|
||||
)
|
||||
|
||||
toolCallbacks.setToolStatus(toolId, {
|
||||
content:
|
||||
parsedModule === null ? 'Removing preprocessor module...' : 'Setting preprocessor module...'
|
||||
})
|
||||
await helpers.setFlowJson(undefined, undefined, parsedModule, undefined)
|
||||
|
||||
if (
|
||||
parsedModule &&
|
||||
helpers.getFlowAndSelectedId().selectedId === SPECIAL_MODULE_IDS.PREPROCESSOR &&
|
||||
'input_transforms' in parsedModule.value &&
|
||||
parsedModule.value.input_transforms
|
||||
) {
|
||||
helpers.updateExprsToSet(parsedModule.id, parsedModule.value.input_transforms)
|
||||
}
|
||||
|
||||
toolCallbacks.setToolStatus(toolId, {
|
||||
content:
|
||||
parsedModule === null ? 'Preprocessor module removed' : 'Preprocessor module updated',
|
||||
result: 'Success'
|
||||
})
|
||||
return parsedModule === null
|
||||
? 'Preprocessor module removed'
|
||||
: 'Preprocessor module updated successfully.'
|
||||
}
|
||||
},
|
||||
{
|
||||
def: setFailureModuleToolDef,
|
||||
streamArguments: true,
|
||||
showDetails: true,
|
||||
showFade: true,
|
||||
fn: async ({ args, helpers, toolId, toolCallbacks }) => {
|
||||
const parsedArgs = setFailureModuleToolSchema.parse(args)
|
||||
const parsedModule = validateSpecialFlowModule(
|
||||
parseOptionalJsonArg(parsedArgs.module, 'module'),
|
||||
'failure_module'
|
||||
)
|
||||
|
||||
toolCallbacks.setToolStatus(toolId, {
|
||||
content: parsedModule === null ? 'Removing failure module...' : 'Setting failure module...'
|
||||
})
|
||||
await helpers.setFlowJson(undefined, undefined, undefined, parsedModule)
|
||||
|
||||
if (
|
||||
parsedModule &&
|
||||
helpers.getFlowAndSelectedId().selectedId === SPECIAL_MODULE_IDS.FAILURE &&
|
||||
'input_transforms' in parsedModule.value &&
|
||||
parsedModule.value.input_transforms
|
||||
) {
|
||||
helpers.updateExprsToSet(parsedModule.id, parsedModule.value.input_transforms)
|
||||
}
|
||||
|
||||
toolCallbacks.setToolStatus(toolId, {
|
||||
content: parsedModule === null ? 'Failure module removed' : 'Failure module updated',
|
||||
result: 'Success'
|
||||
})
|
||||
return parsedModule === null
|
||||
? 'Failure module removed'
|
||||
: 'Failure module updated successfully.'
|
||||
}
|
||||
},
|
||||
{
|
||||
def: setFlowJsonToolDef,
|
||||
streamArguments: true,
|
||||
showDetails: true,
|
||||
showFade: true,
|
||||
fn: async ({ args, helpers, toolId, toolCallbacks }) => {
|
||||
const { modules, schema } = args
|
||||
const { modules, schema, preprocessor_module, failure_module } = args
|
||||
|
||||
let parsedModules: FlowModule[] | undefined
|
||||
let parsedSchema: Record<string, any> | undefined
|
||||
let parsedPreprocessorModule: FlowModule | null | undefined
|
||||
let parsedFailureModule: FlowModule | null | undefined
|
||||
|
||||
// Parse JSON strings
|
||||
try {
|
||||
parsedModules = modules
|
||||
? typeof modules === 'string'
|
||||
? JSON.parse(modules)
|
||||
: modules
|
||||
: undefined
|
||||
parsedSchema = schema
|
||||
? typeof schema === 'string'
|
||||
? JSON.parse(schema)
|
||||
: schema
|
||||
: undefined
|
||||
} catch (e) {
|
||||
const errorMessage = e instanceof Error ? e.message : String(e)
|
||||
throw new Error(`Invalid JSON: ${errorMessage}`)
|
||||
parsedModules = parseOptionalJsonArg(modules, 'modules') as FlowModule[] | undefined
|
||||
parsedSchema = parseOptionalJsonArg(schema, 'schema') as Record<string, any> | undefined
|
||||
parsedPreprocessorModule = parseOptionalJsonArg(
|
||||
preprocessor_module,
|
||||
'preprocessor_module'
|
||||
) as FlowModule | null | undefined
|
||||
parsedFailureModule = parseOptionalJsonArg(failure_module, 'failure_module') as
|
||||
| FlowModule
|
||||
| null
|
||||
| undefined
|
||||
if (parsedModules === null) {
|
||||
parsedModules = undefined
|
||||
}
|
||||
if (parsedSchema === null) {
|
||||
parsedSchema = undefined
|
||||
}
|
||||
|
||||
// Validate modules against OpenFlow schema
|
||||
if (parsedModules) {
|
||||
if (parsedModules !== undefined) {
|
||||
const result = flowModulesSchema.safeParse(parsedModules)
|
||||
if (!result.success) {
|
||||
const errors = result.error.issues.slice(0, 5).map((e) => {
|
||||
@@ -699,23 +863,61 @@ export const flowTools: Tool<FlowAIChatHelpers>[] = [
|
||||
|
||||
throw new Error(`Invalid flow modules:\n${errors.join('\n')}`)
|
||||
} else {
|
||||
// check for duplicate ids
|
||||
const ids = collectAllModuleIdsFromArray(parsedModules)
|
||||
if (ids.length !== new Set(ids).size) {
|
||||
throw new Error('Duplicate module IDs found in flow')
|
||||
const reservedIds = ids.filter(
|
||||
(id) =>
|
||||
id === SPECIAL_MODULE_IDS.PREPROCESSOR || id === SPECIAL_MODULE_IDS.FAILURE
|
||||
)
|
||||
if (reservedIds.length > 0) {
|
||||
throw new Error(
|
||||
'Special modules must be provided via preprocessor_module and failure_module, not inside modules'
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsedPreprocessorModule = validateSpecialFlowModule(
|
||||
parsedPreprocessorModule,
|
||||
'preprocessor_module'
|
||||
)
|
||||
parsedFailureModule = validateSpecialFlowModule(parsedFailureModule, 'failure_module')
|
||||
|
||||
const ids = [
|
||||
...(parsedModules ? collectAllModuleIdsFromArray(parsedModules) : []),
|
||||
...([parsedPreprocessorModule, parsedFailureModule].filter(
|
||||
(module): module is FlowModule => module !== undefined && module !== null
|
||||
)
|
||||
.map((module) => module.id))
|
||||
]
|
||||
if (ids.length !== new Set(ids).size) {
|
||||
throw new Error('Duplicate module IDs found in flow')
|
||||
}
|
||||
|
||||
toolCallbacks.setToolStatus(toolId, {
|
||||
content: `Setting flow...`
|
||||
})
|
||||
await helpers.setFlowJson(parsedModules, parsedSchema)
|
||||
await helpers.setFlowJson(
|
||||
parsedModules,
|
||||
parsedSchema,
|
||||
parsedPreprocessorModule,
|
||||
parsedFailureModule
|
||||
)
|
||||
|
||||
// Update exprsToSet if the selected module has input_transforms
|
||||
if (parsedModules) {
|
||||
if (
|
||||
parsedModules !== undefined ||
|
||||
parsedPreprocessorModule !== undefined ||
|
||||
parsedFailureModule !== undefined
|
||||
) {
|
||||
const { selectedId } = helpers.getFlowAndSelectedId()
|
||||
const selectedModule = findModuleById(parsedModules, selectedId)
|
||||
const selectedModule =
|
||||
selectedId === SPECIAL_MODULE_IDS.PREPROCESSOR
|
||||
? parsedPreprocessorModule ?? undefined
|
||||
: selectedId === SPECIAL_MODULE_IDS.FAILURE
|
||||
? parsedFailureModule ?? undefined
|
||||
: parsedModules
|
||||
? findModuleById(parsedModules, selectedId)
|
||||
: undefined
|
||||
if (
|
||||
selectedModule &&
|
||||
'input_transforms' in selectedModule.value &&
|
||||
@@ -767,7 +969,9 @@ export function prepareFlowSystemMessage(customPrompt?: string): ChatCompletionS
|
||||
## Tool Selection Guide
|
||||
|
||||
**Flow Modification:**
|
||||
- **Create or modify the entire flow** → \`set_flow_json\` (provide complete modules array and optional schema)
|
||||
- **Update only the preprocessor** → \`set_preprocessor_module\`
|
||||
- **Update only the failure handler** → \`set_failure_module\`
|
||||
- **Create or replace the full flow** → \`set_flow_json\`
|
||||
|
||||
**Code & Scripts:**
|
||||
- **View existing inline script code** → \`inspect_inline_script\`
|
||||
@@ -789,13 +993,17 @@ export function prepareFlowSystemMessage(customPrompt?: string): ChatCompletionS
|
||||
- **Search resource types** → \`resource_type\`
|
||||
- **Get database schema** → \`get_db_schema\`
|
||||
|
||||
${FLOW_CHAT_SPECIAL_MODULES}
|
||||
|
||||
## Flow Modification with set_flow_json
|
||||
|
||||
Use the \`set_flow_json\` tool to set the entire flow structure at once. Provide the complete modules array and optionally the flow input schema.
|
||||
Use the \`set_flow_json\` tool to set the entire flow structure at once. Provide the complete modules array and optionally the flow input schema, \`preprocessor_module\`, and \`failure_module\`.
|
||||
|
||||
**Parameters:**
|
||||
- \`modules\`: Array of flow modules (required)
|
||||
- \`schema\`: Flow input schema in JSON Schema format (optional)
|
||||
- \`preprocessor_module\`: Special module that runs before \`modules\` (optional, separate from \`modules\`)
|
||||
- \`failure_module\`: Special module that runs on failure (optional, separate from \`modules\`)
|
||||
|
||||
**Example - Simple flow:**
|
||||
\`\`\`javascript
|
||||
|
||||
111
frontend/src/lib/components/copilot/chat/flow/helperUtils.ts
Normal file
111
frontend/src/lib/components/copilot/chat/flow/helperUtils.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import type { FlowModule, OpenFlow, RawScript } from '$lib/gen'
|
||||
import { dfs } from '$lib/components/flows/previousResults'
|
||||
import { SPECIAL_MODULE_IDS } from '../shared'
|
||||
import type { InlineScriptSession } from './inlineScriptsUtils'
|
||||
|
||||
type FlowLike = Pick<OpenFlow, 'value'> & {
|
||||
schema?: Record<string, any>
|
||||
}
|
||||
|
||||
export interface FlowJsonUpdate {
|
||||
modules?: FlowModule[]
|
||||
schema?: Record<string, any>
|
||||
preprocessorModule?: FlowModule | null
|
||||
failureModule?: FlowModule | null
|
||||
}
|
||||
|
||||
export function getFlowModuleById(flow: FlowLike | undefined, id: string): FlowModule | undefined {
|
||||
if (!flow) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
if (id === SPECIAL_MODULE_IDS.PREPROCESSOR) {
|
||||
return flow.value.preprocessor_module
|
||||
}
|
||||
|
||||
if (id === SPECIAL_MODULE_IDS.FAILURE) {
|
||||
return flow.value.failure_module
|
||||
}
|
||||
|
||||
return dfs(id, flow as OpenFlow, false)[0]
|
||||
}
|
||||
|
||||
export function getRawScriptModuleById(
|
||||
flow: FlowLike | undefined,
|
||||
id: string
|
||||
): (FlowModule & { value: RawScript }) | undefined {
|
||||
const module = getFlowModuleById(flow, id)
|
||||
if (!module || module.value.type !== 'rawscript') {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return module as FlowModule & { value: RawScript }
|
||||
}
|
||||
|
||||
export function updateRawScriptModuleContent(
|
||||
flow: FlowLike,
|
||||
id: string,
|
||||
code: string
|
||||
): (FlowModule & { value: RawScript }) | undefined {
|
||||
const rawScriptModule = getRawScriptModuleById(flow, id)
|
||||
if (!rawScriptModule) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
rawScriptModule.value.content = code
|
||||
return rawScriptModule
|
||||
}
|
||||
|
||||
export function applyFlowJsonUpdate(
|
||||
flow: FlowLike,
|
||||
inlineScriptSession: InlineScriptSession,
|
||||
{ modules, schema, preprocessorModule, failureModule }: FlowJsonUpdate
|
||||
): void {
|
||||
if (modules !== undefined) {
|
||||
flow.value.modules = restoreFlowModules(modules, inlineScriptSession)
|
||||
}
|
||||
|
||||
if (schema !== undefined) {
|
||||
flow.schema = schema
|
||||
}
|
||||
|
||||
if (preprocessorModule !== undefined) {
|
||||
flow.value.preprocessor_module =
|
||||
preprocessorModule === null
|
||||
? undefined
|
||||
: restoreFlowModule(preprocessorModule, inlineScriptSession)
|
||||
}
|
||||
|
||||
if (failureModule !== undefined) {
|
||||
flow.value.failure_module =
|
||||
failureModule === null ? undefined : restoreFlowModule(failureModule, inlineScriptSession)
|
||||
}
|
||||
}
|
||||
|
||||
function restoreFlowModules(
|
||||
modules: FlowModule[],
|
||||
inlineScriptSession: InlineScriptSession
|
||||
): FlowModule[] {
|
||||
const restoredModules = inlineScriptSession.restoreInlineScriptReferences(modules)
|
||||
assertResolvedInlineScripts(restoredModules, inlineScriptSession)
|
||||
return restoredModules
|
||||
}
|
||||
|
||||
function restoreFlowModule(
|
||||
module: FlowModule,
|
||||
inlineScriptSession: InlineScriptSession
|
||||
): FlowModule {
|
||||
const [restoredModule] = inlineScriptSession.restoreInlineScriptReferences([module])
|
||||
assertResolvedInlineScripts([restoredModule], inlineScriptSession)
|
||||
return restoredModule
|
||||
}
|
||||
|
||||
function assertResolvedInlineScripts(
|
||||
modules: FlowModule[],
|
||||
inlineScriptSession: InlineScriptSession
|
||||
): void {
|
||||
const unresolvedRefs = inlineScriptSession.findUnresolvedInlineScriptRefs(modules)
|
||||
if (unresolvedRefs.length > 0) {
|
||||
throw new Error(`Unresolved inline script references: ${unresolvedRefs.join(', ')}`)
|
||||
}
|
||||
}
|
||||
@@ -546,6 +546,7 @@ export function createToolDef(
|
||||
let parameters = z.toJSONSchema(zodSchema)
|
||||
delete parameters.$schema
|
||||
if (!parameters.required) parameters.required = []
|
||||
normalizeToolParameterSchema(parameters)
|
||||
|
||||
return {
|
||||
type: 'function',
|
||||
@@ -605,9 +606,9 @@ export const createSearchHubScriptsTool = (withContent: boolean = false) => ({
|
||||
})
|
||||
|
||||
/**
|
||||
* Recursively removes format: null or format: '' from a JSON schema object
|
||||
* Recursively normalizes JSON Schema quirks that specific providers reject.
|
||||
*/
|
||||
function removeNullFormats(schema: Record<string, any> | undefined): void {
|
||||
function normalizeToolParameterSchema(schema: Record<string, any> | undefined): void {
|
||||
if (!schema || typeof schema !== 'object') {
|
||||
return
|
||||
}
|
||||
@@ -620,25 +621,31 @@ function removeNullFormats(schema: Record<string, any> | undefined): void {
|
||||
// Recurse into properties
|
||||
if (schema.properties && typeof schema.properties === 'object') {
|
||||
for (const key of Object.keys(schema.properties)) {
|
||||
removeNullFormats(schema.properties[key])
|
||||
normalizeToolParameterSchema(schema.properties[key])
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse into items (for arrays)
|
||||
if (schema.items) {
|
||||
removeNullFormats(schema.items)
|
||||
if (Array.isArray(schema.items)) {
|
||||
for (const item of schema.items) {
|
||||
normalizeToolParameterSchema(item)
|
||||
}
|
||||
} else {
|
||||
normalizeToolParameterSchema(schema.items)
|
||||
}
|
||||
}
|
||||
|
||||
// Recurse into additionalProperties if it's an object schema
|
||||
if (schema.additionalProperties && typeof schema.additionalProperties === 'object') {
|
||||
removeNullFormats(schema.additionalProperties)
|
||||
normalizeToolParameterSchema(schema.additionalProperties)
|
||||
}
|
||||
|
||||
// Recurse into allOf, anyOf, oneOf
|
||||
for (const key of ['allOf', 'anyOf', 'oneOf']) {
|
||||
if (Array.isArray(schema[key])) {
|
||||
for (const subSchema of schema[key]) {
|
||||
removeNullFormats(subSchema)
|
||||
normalizeToolParameterSchema(subSchema)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -662,8 +669,8 @@ export async function buildSchemaForTool(
|
||||
|
||||
toolDef.function.parameters = { ...schema, additionalProperties: false }
|
||||
|
||||
// recursively remove any format: null or format: '' (empty string) from schema
|
||||
removeNullFormats(toolDef.function.parameters)
|
||||
// recursively normalize provider-incompatible schema fragments
|
||||
normalizeToolParameterSchema(toolDef.function.parameters)
|
||||
|
||||
// OPEN AI models don't support strict mode well with schema with complex properties, so we disable it
|
||||
const model = getCurrentModel()
|
||||
|
||||
@@ -20,6 +20,53 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
- `preprocessor` - Reserved for preprocessor module
|
||||
- `Input` - Reserved for flow input reference
|
||||
|
||||
## Hard Structural Rules
|
||||
|
||||
These are strict Windmill schema rules. Follow them exactly.
|
||||
|
||||
- `value.modules` is only for normal sequential steps
|
||||
- `value.preprocessor_module` and `value.failure_module` are special top-level fields inside `value`, not entries in `value.modules`
|
||||
- If a flow needs a preprocessor, create `value.preprocessor_module` with `id: preprocessor`
|
||||
- If a flow needs a failure handler, create `value.failure_module` with `id: failure`
|
||||
- Do NOT create regular modules inside `value.modules` named `preprocessor` or `failure`
|
||||
- `preprocessor_module` and `failure_module` only support `script` or `rawscript`
|
||||
- `preprocessor_module` runs before normal modules and cannot reference `results.*`
|
||||
- `failure_module` can use the `error` object with `error.message`, `error.step_id`, `error.name`, and `error.stack`
|
||||
|
||||
Correct shape:
|
||||
|
||||
```yaml
|
||||
value:
|
||||
preprocessor_module:
|
||||
id: preprocessor
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
failure_module:
|
||||
id: failure
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
modules:
|
||||
- id: process_event
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
```
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
```yaml
|
||||
value:
|
||||
modules:
|
||||
- id: preprocessor
|
||||
...
|
||||
- id: process_event
|
||||
...
|
||||
- id: failure
|
||||
...
|
||||
```
|
||||
|
||||
## Module ID Rules
|
||||
|
||||
- Must be unique across the entire flow
|
||||
@@ -35,10 +82,148 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
## Data Flow Between Steps
|
||||
|
||||
- `flow_input.property` - Access flow input parameters
|
||||
- `results.step_id` - Access output from a previous step
|
||||
- `results.step_id.property` - Access specific property from previous step output
|
||||
- `flow_input.iter.value` - Current item when inside a for-loop
|
||||
- `flow_input.iter.index` - Current index when inside a for-loop
|
||||
- `results.step_id` - Access output from a previous step only when that step result is in scope
|
||||
- `results.step_id.property` - Access specific property from a previous step output only when that step result is in scope
|
||||
- `flow_input.iter.value` - Current iteration value when inside a loop (`forloopflow` or `whileloopflow`)
|
||||
- `flow_input.iter.index` - Current loop index when inside a loop (`forloopflow` or `whileloopflow`)
|
||||
|
||||
## Loop Structure Rules
|
||||
|
||||
- For `whileloopflow`, use module-level `stop_after_if` on the loop module itself when the loop should stop after an iteration result
|
||||
- Do NOT put `stop_after_if` inside `value` of a `whileloopflow`
|
||||
- `stop_after_all_iters_if` is for checks after the whole loop finishes, not the normal per-iteration break condition
|
||||
- When a `whileloopflow` carries state forward between iterations, use `flow_input.iter.value` as the current loop value and provide an explicit first-iteration fallback when needed
|
||||
- Use `flow_input.iter.index` only when the loop logic is truly based on the iteration index, not as a replacement for the current loop value
|
||||
- If the user asks for a final scalar/object after a loop, add a normal step after the loop that extracts the final value from the loop result instead of returning the whole loop result array
|
||||
|
||||
Correct `whileloopflow` shape:
|
||||
|
||||
```yaml
|
||||
- id: loop_until_done
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
skip_if_stopped: false
|
||||
value:
|
||||
type: whileloopflow
|
||||
skip_failures: false
|
||||
modules:
|
||||
- id: advance_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter && flow_input.iter.value !== undefined ? flow_input.iter.value : flow_input.initial_state
|
||||
- id: return_final_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done[results.loop_until_done.length - 1]
|
||||
```
|
||||
|
||||
Incorrect `whileloopflow` patterns:
|
||||
|
||||
```yaml
|
||||
- id: loop_until_done
|
||||
value:
|
||||
type: whileloopflow
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
```
|
||||
|
||||
```yaml
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter.index
|
||||
```
|
||||
|
||||
```yaml
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done
|
||||
```
|
||||
|
||||
## Approval / Suspend Structure
|
||||
|
||||
- `suspend` belongs on the flow module object itself, as a sibling of `id` and `value`
|
||||
- Never put `suspend` inside `value`
|
||||
|
||||
Correct shape:
|
||||
|
||||
```yaml
|
||||
- id: request_approval
|
||||
suspend:
|
||||
required_events: 1
|
||||
resume_form:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
comment:
|
||||
type: string
|
||||
required: [comment]
|
||||
value:
|
||||
type: identity
|
||||
```
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
```yaml
|
||||
- id: request_approval
|
||||
value:
|
||||
type: rawscript
|
||||
suspend:
|
||||
required_events: 1
|
||||
```
|
||||
|
||||
## Branch Result Scope Rules
|
||||
|
||||
- Inside a branch, you may reference earlier outer steps and earlier steps in the same branch
|
||||
- Outside a `branchone`, do NOT reference ids of steps that only exist inside its branches or default branch. Use `results.<branchone_module_id>` instead
|
||||
- Outside a `branchall`, do NOT reference ids of steps inside its branches. Use `results.<branchall_module_id>` instead
|
||||
- If downstream steps need a stable shape after a branch, make each branch return the same fields
|
||||
- When needed, add a normalization step immediately after the branch and consume `results.<branch_module_id>` there
|
||||
|
||||
Correct after `branchone`:
|
||||
|
||||
```yaml
|
||||
- id: route_order
|
||||
value:
|
||||
type: branchone
|
||||
...
|
||||
- id: send_confirmation
|
||||
value:
|
||||
input_transforms:
|
||||
routed:
|
||||
type: javascript
|
||||
expr: results.route_order
|
||||
```
|
||||
|
||||
Incorrect after `branchone`:
|
||||
|
||||
```yaml
|
||||
expr: results.create_shipment
|
||||
expr: results.create_backorder
|
||||
```
|
||||
|
||||
Correct after `branchall`:
|
||||
|
||||
```yaml
|
||||
- id: enrich_parallel
|
||||
value:
|
||||
type: branchall
|
||||
parallel: true
|
||||
...
|
||||
- id: combine_data
|
||||
value:
|
||||
input_transforms:
|
||||
enrichments:
|
||||
type: javascript
|
||||
expr: results.enrich_parallel
|
||||
```
|
||||
|
||||
## Input Transforms
|
||||
|
||||
@@ -55,14 +240,14 @@ JavaScript transform (dynamic expression):
|
||||
- For flow inputs: Use type `"object"` with format `"resource-{type}"` (e.g., `"resource-postgresql"`)
|
||||
- For step inputs: Use static value `"$res:path/to/resource"`
|
||||
|
||||
## Failure Handler
|
||||
## Final Structural Self-Check
|
||||
|
||||
Executes when any step fails. Has access to error details:
|
||||
Before finalizing a flow, verify:
|
||||
|
||||
- `error.message` - Error message
|
||||
- `error.step_id` - ID of failed step
|
||||
- `error.name` - Error name
|
||||
- `error.stack` - Stack trace
|
||||
- any preprocessor is in `value.preprocessor_module`
|
||||
- any failure handler is in `value.failure_module`
|
||||
- any approval step has module-level `suspend`
|
||||
- no downstream step references inner branch step ids from outside the branch
|
||||
|
||||
## S3 Object Operations
|
||||
|
||||
|
||||
@@ -51,6 +51,53 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
- \`preprocessor\` - Reserved for preprocessor module
|
||||
- \`Input\` - Reserved for flow input reference
|
||||
|
||||
## Hard Structural Rules
|
||||
|
||||
These are strict Windmill schema rules. Follow them exactly.
|
||||
|
||||
- \`value.modules\` is only for normal sequential steps
|
||||
- \`value.preprocessor_module\` and \`value.failure_module\` are special top-level fields inside \`value\`, not entries in \`value.modules\`
|
||||
- If a flow needs a preprocessor, create \`value.preprocessor_module\` with \`id: preprocessor\`
|
||||
- If a flow needs a failure handler, create \`value.failure_module\` with \`id: failure\`
|
||||
- Do NOT create regular modules inside \`value.modules\` named \`preprocessor\` or \`failure\`
|
||||
- \`preprocessor_module\` and \`failure_module\` only support \`script\` or \`rawscript\`
|
||||
- \`preprocessor_module\` runs before normal modules and cannot reference \`results.*\`
|
||||
- \`failure_module\` can use the \`error\` object with \`error.message\`, \`error.step_id\`, \`error.name\`, and \`error.stack\`
|
||||
|
||||
Correct shape:
|
||||
|
||||
\`\`\`yaml
|
||||
value:
|
||||
preprocessor_module:
|
||||
id: preprocessor
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
failure_module:
|
||||
id: failure
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
modules:
|
||||
- id: process_event
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
\`\`\`
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
\`\`\`yaml
|
||||
value:
|
||||
modules:
|
||||
- id: preprocessor
|
||||
...
|
||||
- id: process_event
|
||||
...
|
||||
- id: failure
|
||||
...
|
||||
\`\`\`
|
||||
|
||||
## Module ID Rules
|
||||
|
||||
- Must be unique across the entire flow
|
||||
@@ -66,10 +113,148 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
## Data Flow Between Steps
|
||||
|
||||
- \`flow_input.property\` - Access flow input parameters
|
||||
- \`results.step_id\` - Access output from a previous step
|
||||
- \`results.step_id.property\` - Access specific property from previous step output
|
||||
- \`flow_input.iter.value\` - Current item when inside a for-loop
|
||||
- \`flow_input.iter.index\` - Current index when inside a for-loop
|
||||
- \`results.step_id\` - Access output from a previous step only when that step result is in scope
|
||||
- \`results.step_id.property\` - Access specific property from a previous step output only when that step result is in scope
|
||||
- \`flow_input.iter.value\` - Current iteration value when inside a loop (\`forloopflow\` or \`whileloopflow\`)
|
||||
- \`flow_input.iter.index\` - Current loop index when inside a loop (\`forloopflow\` or \`whileloopflow\`)
|
||||
|
||||
## Loop Structure Rules
|
||||
|
||||
- For \`whileloopflow\`, use module-level \`stop_after_if\` on the loop module itself when the loop should stop after an iteration result
|
||||
- Do NOT put \`stop_after_if\` inside \`value\` of a \`whileloopflow\`
|
||||
- \`stop_after_all_iters_if\` is for checks after the whole loop finishes, not the normal per-iteration break condition
|
||||
- When a \`whileloopflow\` carries state forward between iterations, use \`flow_input.iter.value\` as the current loop value and provide an explicit first-iteration fallback when needed
|
||||
- Use \`flow_input.iter.index\` only when the loop logic is truly based on the iteration index, not as a replacement for the current loop value
|
||||
- If the user asks for a final scalar/object after a loop, add a normal step after the loop that extracts the final value from the loop result instead of returning the whole loop result array
|
||||
|
||||
Correct \`whileloopflow\` shape:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: loop_until_done
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
skip_if_stopped: false
|
||||
value:
|
||||
type: whileloopflow
|
||||
skip_failures: false
|
||||
modules:
|
||||
- id: advance_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter && flow_input.iter.value !== undefined ? flow_input.iter.value : flow_input.initial_state
|
||||
- id: return_final_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done[results.loop_until_done.length - 1]
|
||||
\`\`\`
|
||||
|
||||
Incorrect \`whileloopflow\` patterns:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: loop_until_done
|
||||
value:
|
||||
type: whileloopflow
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
\`\`\`
|
||||
|
||||
\`\`\`yaml
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter.index
|
||||
\`\`\`
|
||||
|
||||
\`\`\`yaml
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done
|
||||
\`\`\`
|
||||
|
||||
## Approval / Suspend Structure
|
||||
|
||||
- \`suspend\` belongs on the flow module object itself, as a sibling of \`id\` and \`value\`
|
||||
- Never put \`suspend\` inside \`value\`
|
||||
|
||||
Correct shape:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: request_approval
|
||||
suspend:
|
||||
required_events: 1
|
||||
resume_form:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
comment:
|
||||
type: string
|
||||
required: [comment]
|
||||
value:
|
||||
type: identity
|
||||
\`\`\`
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: request_approval
|
||||
value:
|
||||
type: rawscript
|
||||
suspend:
|
||||
required_events: 1
|
||||
\`\`\`
|
||||
|
||||
## Branch Result Scope Rules
|
||||
|
||||
- Inside a branch, you may reference earlier outer steps and earlier steps in the same branch
|
||||
- Outside a \`branchone\`, do NOT reference ids of steps that only exist inside its branches or default branch. Use \`results.<branchone_module_id>\` instead
|
||||
- Outside a \`branchall\`, do NOT reference ids of steps inside its branches. Use \`results.<branchall_module_id>\` instead
|
||||
- If downstream steps need a stable shape after a branch, make each branch return the same fields
|
||||
- When needed, add a normalization step immediately after the branch and consume \`results.<branch_module_id>\` there
|
||||
|
||||
Correct after \`branchone\`:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: route_order
|
||||
value:
|
||||
type: branchone
|
||||
...
|
||||
- id: send_confirmation
|
||||
value:
|
||||
input_transforms:
|
||||
routed:
|
||||
type: javascript
|
||||
expr: results.route_order
|
||||
\`\`\`
|
||||
|
||||
Incorrect after \`branchone\`:
|
||||
|
||||
\`\`\`yaml
|
||||
expr: results.create_shipment
|
||||
expr: results.create_backorder
|
||||
\`\`\`
|
||||
|
||||
Correct after \`branchall\`:
|
||||
|
||||
\`\`\`yaml
|
||||
- id: enrich_parallel
|
||||
value:
|
||||
type: branchall
|
||||
parallel: true
|
||||
...
|
||||
- id: combine_data
|
||||
value:
|
||||
input_transforms:
|
||||
enrichments:
|
||||
type: javascript
|
||||
expr: results.enrich_parallel
|
||||
\`\`\`
|
||||
|
||||
## Input Transforms
|
||||
|
||||
@@ -86,14 +271,14 @@ JavaScript transform (dynamic expression):
|
||||
- For flow inputs: Use type \`"object"\` with format \`"resource-{type}"\` (e.g., \`"resource-postgresql"\`)
|
||||
- For step inputs: Use static value \`"$res:path/to/resource"\`
|
||||
|
||||
## Failure Handler
|
||||
## Final Structural Self-Check
|
||||
|
||||
Executes when any step fails. Has access to error details:
|
||||
Before finalizing a flow, verify:
|
||||
|
||||
- \`error.message\` - Error message
|
||||
- \`error.step_id\` - ID of failed step
|
||||
- \`error.name\` - Error name
|
||||
- \`error.stack\` - Stack trace
|
||||
- any preprocessor is in \`value.preprocessor_module\`
|
||||
- any failure handler is in \`value.failure_module\`
|
||||
- any approval step has module-level \`suspend\`
|
||||
- no downstream step references inner branch step ids from outside the branch
|
||||
|
||||
## S3 Object Operations
|
||||
|
||||
@@ -149,6 +334,46 @@ Reference a specific resource using \`$res:\` prefix:
|
||||
\`\`\`
|
||||
`;
|
||||
|
||||
export const FLOW_CHAT_SPECIAL_MODULES = `## Special Modules
|
||||
|
||||
- Use \`set_preprocessor_module\` to add, replace, or remove the top-level \`value.preprocessor_module\`
|
||||
- Use \`set_failure_module\` to add, replace, or remove the top-level \`value.failure_module\`
|
||||
- Use \`set_flow_json\` only when you are replacing the whole flow, including normal modules and optional special modules
|
||||
|
||||
**Example - Update only the special modules:**
|
||||
\`\`\`javascript
|
||||
set_preprocessor_module({
|
||||
module: JSON.stringify({
|
||||
id: "preprocessor",
|
||||
value: {
|
||||
type: "rawscript",
|
||||
language: "bun",
|
||||
content: "export async function preprocessor(payload: string) { const trimmed = payload.trim(); if (!trimmed) { throw new Error('payload must not be empty'); } return { payload: trimmed }; }",
|
||||
input_transforms: {
|
||||
payload: { type: "javascript", expr: "flow_input.payload" }
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
set_failure_module({
|
||||
module: JSON.stringify({
|
||||
id: "failure",
|
||||
value: {
|
||||
type: "rawscript",
|
||||
language: "bun",
|
||||
content: "export async function main(message: string, name: string, step_id: string) { return { message, name, step_id }; }",
|
||||
input_transforms: {
|
||||
message: { type: "javascript", expr: "error.message" },
|
||||
name: { type: "javascript", expr: "error.name" },
|
||||
step_id: { type: "javascript", expr: "error.step_id" }
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
\`\`\`
|
||||
`;
|
||||
|
||||
export const SDK_TYPESCRIPT = `# TypeScript SDK (windmill-client)
|
||||
|
||||
Import: import * as wmill from 'windmill-client'
|
||||
|
||||
@@ -25,6 +25,53 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
- `preprocessor` - Reserved for preprocessor module
|
||||
- `Input` - Reserved for flow input reference
|
||||
|
||||
## Hard Structural Rules
|
||||
|
||||
These are strict Windmill schema rules. Follow them exactly.
|
||||
|
||||
- `value.modules` is only for normal sequential steps
|
||||
- `value.preprocessor_module` and `value.failure_module` are special top-level fields inside `value`, not entries in `value.modules`
|
||||
- If a flow needs a preprocessor, create `value.preprocessor_module` with `id: preprocessor`
|
||||
- If a flow needs a failure handler, create `value.failure_module` with `id: failure`
|
||||
- Do NOT create regular modules inside `value.modules` named `preprocessor` or `failure`
|
||||
- `preprocessor_module` and `failure_module` only support `script` or `rawscript`
|
||||
- `preprocessor_module` runs before normal modules and cannot reference `results.*`
|
||||
- `failure_module` can use the `error` object with `error.message`, `error.step_id`, `error.name`, and `error.stack`
|
||||
|
||||
Correct shape:
|
||||
|
||||
```yaml
|
||||
value:
|
||||
preprocessor_module:
|
||||
id: preprocessor
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
failure_module:
|
||||
id: failure
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
modules:
|
||||
- id: process_event
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
```
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
```yaml
|
||||
value:
|
||||
modules:
|
||||
- id: preprocessor
|
||||
...
|
||||
- id: process_event
|
||||
...
|
||||
- id: failure
|
||||
...
|
||||
```
|
||||
|
||||
## Module ID Rules
|
||||
|
||||
- Must be unique across the entire flow
|
||||
@@ -40,10 +87,148 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
## Data Flow Between Steps
|
||||
|
||||
- `flow_input.property` - Access flow input parameters
|
||||
- `results.step_id` - Access output from a previous step
|
||||
- `results.step_id.property` - Access specific property from previous step output
|
||||
- `flow_input.iter.value` - Current item when inside a for-loop
|
||||
- `flow_input.iter.index` - Current index when inside a for-loop
|
||||
- `results.step_id` - Access output from a previous step only when that step result is in scope
|
||||
- `results.step_id.property` - Access specific property from a previous step output only when that step result is in scope
|
||||
- `flow_input.iter.value` - Current iteration value when inside a loop (`forloopflow` or `whileloopflow`)
|
||||
- `flow_input.iter.index` - Current loop index when inside a loop (`forloopflow` or `whileloopflow`)
|
||||
|
||||
## Loop Structure Rules
|
||||
|
||||
- For `whileloopflow`, use module-level `stop_after_if` on the loop module itself when the loop should stop after an iteration result
|
||||
- Do NOT put `stop_after_if` inside `value` of a `whileloopflow`
|
||||
- `stop_after_all_iters_if` is for checks after the whole loop finishes, not the normal per-iteration break condition
|
||||
- When a `whileloopflow` carries state forward between iterations, use `flow_input.iter.value` as the current loop value and provide an explicit first-iteration fallback when needed
|
||||
- Use `flow_input.iter.index` only when the loop logic is truly based on the iteration index, not as a replacement for the current loop value
|
||||
- If the user asks for a final scalar/object after a loop, add a normal step after the loop that extracts the final value from the loop result instead of returning the whole loop result array
|
||||
|
||||
Correct `whileloopflow` shape:
|
||||
|
||||
```yaml
|
||||
- id: loop_until_done
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
skip_if_stopped: false
|
||||
value:
|
||||
type: whileloopflow
|
||||
skip_failures: false
|
||||
modules:
|
||||
- id: advance_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter && flow_input.iter.value !== undefined ? flow_input.iter.value : flow_input.initial_state
|
||||
- id: return_final_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done[results.loop_until_done.length - 1]
|
||||
```
|
||||
|
||||
Incorrect `whileloopflow` patterns:
|
||||
|
||||
```yaml
|
||||
- id: loop_until_done
|
||||
value:
|
||||
type: whileloopflow
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
```
|
||||
|
||||
```yaml
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter.index
|
||||
```
|
||||
|
||||
```yaml
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done
|
||||
```
|
||||
|
||||
## Approval / Suspend Structure
|
||||
|
||||
- `suspend` belongs on the flow module object itself, as a sibling of `id` and `value`
|
||||
- Never put `suspend` inside `value`
|
||||
|
||||
Correct shape:
|
||||
|
||||
```yaml
|
||||
- id: request_approval
|
||||
suspend:
|
||||
required_events: 1
|
||||
resume_form:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
comment:
|
||||
type: string
|
||||
required: [comment]
|
||||
value:
|
||||
type: identity
|
||||
```
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
```yaml
|
||||
- id: request_approval
|
||||
value:
|
||||
type: rawscript
|
||||
suspend:
|
||||
required_events: 1
|
||||
```
|
||||
|
||||
## Branch Result Scope Rules
|
||||
|
||||
- Inside a branch, you may reference earlier outer steps and earlier steps in the same branch
|
||||
- Outside a `branchone`, do NOT reference ids of steps that only exist inside its branches or default branch. Use `results.<branchone_module_id>` instead
|
||||
- Outside a `branchall`, do NOT reference ids of steps inside its branches. Use `results.<branchall_module_id>` instead
|
||||
- If downstream steps need a stable shape after a branch, make each branch return the same fields
|
||||
- When needed, add a normalization step immediately after the branch and consume `results.<branch_module_id>` there
|
||||
|
||||
Correct after `branchone`:
|
||||
|
||||
```yaml
|
||||
- id: route_order
|
||||
value:
|
||||
type: branchone
|
||||
...
|
||||
- id: send_confirmation
|
||||
value:
|
||||
input_transforms:
|
||||
routed:
|
||||
type: javascript
|
||||
expr: results.route_order
|
||||
```
|
||||
|
||||
Incorrect after `branchone`:
|
||||
|
||||
```yaml
|
||||
expr: results.create_shipment
|
||||
expr: results.create_backorder
|
||||
```
|
||||
|
||||
Correct after `branchall`:
|
||||
|
||||
```yaml
|
||||
- id: enrich_parallel
|
||||
value:
|
||||
type: branchall
|
||||
parallel: true
|
||||
...
|
||||
- id: combine_data
|
||||
value:
|
||||
input_transforms:
|
||||
enrichments:
|
||||
type: javascript
|
||||
expr: results.enrich_parallel
|
||||
```
|
||||
|
||||
## Input Transforms
|
||||
|
||||
@@ -60,14 +245,14 @@ JavaScript transform (dynamic expression):
|
||||
- For flow inputs: Use type `"object"` with format `"resource-{type}"` (e.g., `"resource-postgresql"`)
|
||||
- For step inputs: Use static value `"$res:path/to/resource"`
|
||||
|
||||
## Failure Handler
|
||||
## Final Structural Self-Check
|
||||
|
||||
Executes when any step fails. Has access to error details:
|
||||
Before finalizing a flow, verify:
|
||||
|
||||
- `error.message` - Error message
|
||||
- `error.step_id` - ID of failed step
|
||||
- `error.name` - Error name
|
||||
- `error.stack` - Stack trace
|
||||
- any preprocessor is in `value.preprocessor_module`
|
||||
- any failure handler is in `value.failure_module`
|
||||
- any approval step has module-level `suspend`
|
||||
- no downstream step references inner branch step ids from outside the branch
|
||||
|
||||
## S3 Object Operations
|
||||
|
||||
|
||||
@@ -20,6 +20,53 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
- `preprocessor` - Reserved for preprocessor module
|
||||
- `Input` - Reserved for flow input reference
|
||||
|
||||
## Hard Structural Rules
|
||||
|
||||
These are strict Windmill schema rules. Follow them exactly.
|
||||
|
||||
- `value.modules` is only for normal sequential steps
|
||||
- `value.preprocessor_module` and `value.failure_module` are special top-level fields inside `value`, not entries in `value.modules`
|
||||
- If a flow needs a preprocessor, create `value.preprocessor_module` with `id: preprocessor`
|
||||
- If a flow needs a failure handler, create `value.failure_module` with `id: failure`
|
||||
- Do NOT create regular modules inside `value.modules` named `preprocessor` or `failure`
|
||||
- `preprocessor_module` and `failure_module` only support `script` or `rawscript`
|
||||
- `preprocessor_module` runs before normal modules and cannot reference `results.*`
|
||||
- `failure_module` can use the `error` object with `error.message`, `error.step_id`, `error.name`, and `error.stack`
|
||||
|
||||
Correct shape:
|
||||
|
||||
```yaml
|
||||
value:
|
||||
preprocessor_module:
|
||||
id: preprocessor
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
failure_module:
|
||||
id: failure
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
modules:
|
||||
- id: process_event
|
||||
value:
|
||||
type: rawscript
|
||||
...
|
||||
```
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
```yaml
|
||||
value:
|
||||
modules:
|
||||
- id: preprocessor
|
||||
...
|
||||
- id: process_event
|
||||
...
|
||||
- id: failure
|
||||
...
|
||||
```
|
||||
|
||||
## Module ID Rules
|
||||
|
||||
- Must be unique across the entire flow
|
||||
@@ -35,10 +82,148 @@ The OpenFlow schema (openflow.openapi.yaml) is the source of truth for flow stru
|
||||
## Data Flow Between Steps
|
||||
|
||||
- `flow_input.property` - Access flow input parameters
|
||||
- `results.step_id` - Access output from a previous step
|
||||
- `results.step_id.property` - Access specific property from previous step output
|
||||
- `flow_input.iter.value` - Current item when inside a for-loop
|
||||
- `flow_input.iter.index` - Current index when inside a for-loop
|
||||
- `results.step_id` - Access output from a previous step only when that step result is in scope
|
||||
- `results.step_id.property` - Access specific property from a previous step output only when that step result is in scope
|
||||
- `flow_input.iter.value` - Current iteration value when inside a loop (`forloopflow` or `whileloopflow`)
|
||||
- `flow_input.iter.index` - Current loop index when inside a loop (`forloopflow` or `whileloopflow`)
|
||||
|
||||
## Loop Structure Rules
|
||||
|
||||
- For `whileloopflow`, use module-level `stop_after_if` on the loop module itself when the loop should stop after an iteration result
|
||||
- Do NOT put `stop_after_if` inside `value` of a `whileloopflow`
|
||||
- `stop_after_all_iters_if` is for checks after the whole loop finishes, not the normal per-iteration break condition
|
||||
- When a `whileloopflow` carries state forward between iterations, use `flow_input.iter.value` as the current loop value and provide an explicit first-iteration fallback when needed
|
||||
- Use `flow_input.iter.index` only when the loop logic is truly based on the iteration index, not as a replacement for the current loop value
|
||||
- If the user asks for a final scalar/object after a loop, add a normal step after the loop that extracts the final value from the loop result instead of returning the whole loop result array
|
||||
|
||||
Correct `whileloopflow` shape:
|
||||
|
||||
```yaml
|
||||
- id: loop_until_done
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
skip_if_stopped: false
|
||||
value:
|
||||
type: whileloopflow
|
||||
skip_failures: false
|
||||
modules:
|
||||
- id: advance_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter && flow_input.iter.value !== undefined ? flow_input.iter.value : flow_input.initial_state
|
||||
- id: return_final_state
|
||||
value:
|
||||
type: rawscript
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done[results.loop_until_done.length - 1]
|
||||
```
|
||||
|
||||
Incorrect `whileloopflow` patterns:
|
||||
|
||||
```yaml
|
||||
- id: loop_until_done
|
||||
value:
|
||||
type: whileloopflow
|
||||
stop_after_if:
|
||||
expr: result.done === true
|
||||
```
|
||||
|
||||
```yaml
|
||||
input_transforms:
|
||||
state:
|
||||
type: javascript
|
||||
expr: flow_input.iter.index
|
||||
```
|
||||
|
||||
```yaml
|
||||
input_transforms:
|
||||
final_state:
|
||||
type: javascript
|
||||
expr: results.loop_until_done
|
||||
```
|
||||
|
||||
## Approval / Suspend Structure
|
||||
|
||||
- `suspend` belongs on the flow module object itself, as a sibling of `id` and `value`
|
||||
- Never put `suspend` inside `value`
|
||||
|
||||
Correct shape:
|
||||
|
||||
```yaml
|
||||
- id: request_approval
|
||||
suspend:
|
||||
required_events: 1
|
||||
resume_form:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
comment:
|
||||
type: string
|
||||
required: [comment]
|
||||
value:
|
||||
type: identity
|
||||
```
|
||||
|
||||
Incorrect shape:
|
||||
|
||||
```yaml
|
||||
- id: request_approval
|
||||
value:
|
||||
type: rawscript
|
||||
suspend:
|
||||
required_events: 1
|
||||
```
|
||||
|
||||
## Branch Result Scope Rules
|
||||
|
||||
- Inside a branch, you may reference earlier outer steps and earlier steps in the same branch
|
||||
- Outside a `branchone`, do NOT reference ids of steps that only exist inside its branches or default branch. Use `results.<branchone_module_id>` instead
|
||||
- Outside a `branchall`, do NOT reference ids of steps inside its branches. Use `results.<branchall_module_id>` instead
|
||||
- If downstream steps need a stable shape after a branch, make each branch return the same fields
|
||||
- When needed, add a normalization step immediately after the branch and consume `results.<branch_module_id>` there
|
||||
|
||||
Correct after `branchone`:
|
||||
|
||||
```yaml
|
||||
- id: route_order
|
||||
value:
|
||||
type: branchone
|
||||
...
|
||||
- id: send_confirmation
|
||||
value:
|
||||
input_transforms:
|
||||
routed:
|
||||
type: javascript
|
||||
expr: results.route_order
|
||||
```
|
||||
|
||||
Incorrect after `branchone`:
|
||||
|
||||
```yaml
|
||||
expr: results.create_shipment
|
||||
expr: results.create_backorder
|
||||
```
|
||||
|
||||
Correct after `branchall`:
|
||||
|
||||
```yaml
|
||||
- id: enrich_parallel
|
||||
value:
|
||||
type: branchall
|
||||
parallel: true
|
||||
...
|
||||
- id: combine_data
|
||||
value:
|
||||
input_transforms:
|
||||
enrichments:
|
||||
type: javascript
|
||||
expr: results.enrich_parallel
|
||||
```
|
||||
|
||||
## Input Transforms
|
||||
|
||||
@@ -55,14 +240,14 @@ JavaScript transform (dynamic expression):
|
||||
- For flow inputs: Use type `"object"` with format `"resource-{type}"` (e.g., `"resource-postgresql"`)
|
||||
- For step inputs: Use static value `"$res:path/to/resource"`
|
||||
|
||||
## Failure Handler
|
||||
## Final Structural Self-Check
|
||||
|
||||
Executes when any step fails. Has access to error details:
|
||||
Before finalizing a flow, verify:
|
||||
|
||||
- `error.message` - Error message
|
||||
- `error.step_id` - ID of failed step
|
||||
- `error.name` - Error name
|
||||
- `error.stack` - Stack trace
|
||||
- any preprocessor is in `value.preprocessor_module`
|
||||
- any failure handler is in `value.failure_module`
|
||||
- any approval step has module-level `suspend`
|
||||
- no downstream step references inner branch step ids from outside the branch
|
||||
|
||||
## S3 Object Operations
|
||||
|
||||
|
||||
38
system_prompts/base/flow-chat-special-modules.md
Normal file
38
system_prompts/base/flow-chat-special-modules.md
Normal file
@@ -0,0 +1,38 @@
|
||||
## Special Modules
|
||||
|
||||
- Use `set_preprocessor_module` to add, replace, or remove the top-level `value.preprocessor_module`
|
||||
- Use `set_failure_module` to add, replace, or remove the top-level `value.failure_module`
|
||||
- Use `set_flow_json` only when you are replacing the whole flow, including normal modules and optional special modules
|
||||
|
||||
**Example - Update only the special modules:**
|
||||
```javascript
|
||||
set_preprocessor_module({
|
||||
module: JSON.stringify({
|
||||
id: "preprocessor",
|
||||
value: {
|
||||
type: "rawscript",
|
||||
language: "bun",
|
||||
content: "export async function preprocessor(payload: string) { const trimmed = payload.trim(); if (!trimmed) { throw new Error('payload must not be empty'); } return { payload: trimmed }; }",
|
||||
input_transforms: {
|
||||
payload: { type: "javascript", expr: "flow_input.payload" }
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
set_failure_module({
|
||||
module: JSON.stringify({
|
||||
id: "failure",
|
||||
value: {
|
||||
type: "rawscript",
|
||||
language: "bun",
|
||||
content: "export async function main(message: string, name: string, step_id: string) { return { message, name, step_id }; }",
|
||||
input_transforms: {
|
||||
message: { type: "javascript", expr: "error.message" },
|
||||
name: { type: "javascript", expr: "error.name" },
|
||||
step_id: { type: "javascript", expr: "error.step_id" }
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
```
|
||||
@@ -1162,6 +1162,7 @@ def main():
|
||||
|
||||
script_base = read_markdown_file(base_dir / "script-base.md")
|
||||
flow_base = read_markdown_file(base_dir / "flow-base.md")
|
||||
flow_chat_special_modules = read_markdown_file(base_dir / "flow-chat-special-modules.md")
|
||||
|
||||
# Read language files
|
||||
languages = {}
|
||||
@@ -1212,6 +1213,7 @@ def main():
|
||||
# Base prompts
|
||||
'SCRIPT_BASE': script_base,
|
||||
'FLOW_BASE': flow_base,
|
||||
'FLOW_CHAT_SPECIAL_MODULES': flow_chat_special_modules,
|
||||
|
||||
# SDKs
|
||||
'SDK_TYPESCRIPT': ts_sdk_md,
|
||||
|
||||
Reference in New Issue
Block a user