feat(utils): add flow.yaml validation function (#6316)

* add validateflow function

* cleaner code

* preprocess json

* cleaning

* create specific package

* cleaning

* add tests
This commit is contained in:
centdix
2025-08-01 13:14:39 +02:00
committed by GitHub
parent 6ea2029265
commit 493707668b
19 changed files with 4754 additions and 0 deletions

85
windmill-yaml-validator/.gitignore vendored Normal file
View File

@@ -0,0 +1,85 @@
# Dependencies
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Build outputs
dist/
build/
*.tsbuildinfo
# Generated client
src/gen/
# Environment variables
.env
.env.local
.env.*.local
# IDE/Editor files
.idea/
*.swp
*.swo
*~
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
*.log
logs/
# Runtime data
pids/
*.pid
*.seed
*.pid.lock
# Coverage directory used by tools like istanbul
coverage/
*.lcov
# nyc test coverage
.nyc_output
# Dependency directories
jspm_packages/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# TypeScript cache
*.tsbuildinfo
# Temporary folders
tmp/
temp/

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
script_dirpath="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
output_dirpath="${script_dirpath}/src/gen"
mkdir -p "${output_dirpath}"
npx @redocly/openapi-cli@latest bundle "${script_dirpath}/../openflow.openapi.yaml" --ext json > "${output_dirpath}/openflow.json"
# Remove discriminator mapping from openflow.json as it's not supported by ajv
node -e "
const fs = require('fs');
const filePath = '${output_dirpath}/openflow.json';
try {
const schema = JSON.parse(fs.readFileSync(filePath, 'utf8'));
function removeMapping(obj) {
if (obj && typeof obj === 'object') {
if (obj.discriminator?.mapping) delete obj.discriminator.mapping;
for (const v of Object.values(obj)) removeMapping(v);
}
}
removeMapping(schema);
fs.writeFileSync(filePath, JSON.stringify(schema, null, 2));
console.log('Removed discriminator mappings from openflow.json');
} catch (e) {
console.error('Error removing discriminator mappings:', e);
}
"

View File

@@ -0,0 +1,14 @@
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
roots: ['<rootDir>/src'],
testMatch: ['**/__tests__/**/*.ts', '**/?(*.)+(spec|test).ts'],
transform: {
'^.+\\.ts$': 'ts-jest',
},
collectCoverageFrom: [
'src/**/*.ts',
'!src/**/*.d.ts',
'!src/gen/**',
],
};

3972
windmill-yaml-validator/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
{
"name": "windmill-yaml-validator",
"version": "1.0.0",
"description": "YAML validator for Windmill",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"scripts": {
"build": "./gen_openflow_schema.sh && tsc",
"test": "jest",
"test:watch": "jest --watch",
"prepublishOnly": "npm run build"
},
"keywords": [
"windmill"
],
"author": "Ruben Fiszel",
"license": "Apache 2.0",
"devDependencies": {
"@types/jest": "^29.5.0",
"@types/node": "^24.1.0",
"jest": "^29.5.0",
"ts-jest": "^29.1.0",
"typescript": "^5.0.0"
},
"files": [
"dist/**/*"
],
"dependencies": {
"@stoplight/yaml": "^4.3.0",
"ajv": "^8.17.1"
}
}

View File

@@ -0,0 +1,11 @@
#!/bin/bash
set -eou pipefail
script_dirpath="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
args=${1:-}
rm -rf "${script_dirpath}/dist"
npm install
npm run build
npm publish ${args}

View File

@@ -0,0 +1 @@
export * from "./validation";

View File

@@ -0,0 +1,285 @@
import { FlowValidator } from '../flow-validator';
import * as fs from 'fs';
import * as path from 'path';
describe('FlowValidator', () => {
let validator: FlowValidator;
const samplesDir = path.join(__dirname, 'test-samples');
const readSample = (filename: string): string => {
return fs.readFileSync(path.join(samplesDir, filename), 'utf-8');
};
beforeEach(() => {
validator = new FlowValidator();
});
describe('constructor', () => {
it('should create a validator instance', () => {
expect(validator).toBeInstanceOf(FlowValidator);
});
it('should initialize without throwing', () => {
expect(() => new FlowValidator()).not.toThrow();
});
});
describe('validateFlow', () => {
it('should throw error for non-string input', () => {
expect(() => validator.validateFlow(null as any)).toThrow('Document must be a string');
expect(() => validator.validateFlow(123 as any)).toThrow('Document must be a string');
expect(() => validator.validateFlow({} as any)).toThrow('Document must be a string');
expect(() => validator.validateFlow([] as any)).toThrow('Document must be a string');
});
describe('valid flows', () => {
it('should validate a valid minimal flow from sample file', () => {
const validFlow = readSample('valid-minimal.yaml');
const result = validator.validateFlow(validFlow);
expect(result.errors).toHaveLength(0);
expect(result.parsed).toBeDefined();
expect(result.parsed.data).toMatchObject({
summary: 'Test Flow',
value: {
modules: []
}
});
});
it('should validate a script flow from sample file', () => {
const validFlow = readSample('valid-script-flow.yaml');
const result = validator.validateFlow(validFlow);
expect(result.errors).toHaveLength(0);
expect(result.parsed.data).toMatchObject({
summary: 'Simple Script Flow',
description: 'A basic flow that runs a TypeScript script',
schema: expect.objectContaining({
type: 'object',
properties: expect.objectContaining({
message: expect.objectContaining({
type: 'string',
default: 'Hello World'
})
})
}),
value: {
modules: expect.arrayContaining([
expect.objectContaining({
id: 'script_step',
value: expect.objectContaining({
type: 'rawscript',
language: 'deno',
input_transforms: {}
})
})
])
}
});
});
});
describe('invalid flows', () => {
it('should return errors for missing summary from sample file', () => {
const invalidFlow = readSample('invalid-missing-summary.yaml');
const result = validator.validateFlow(invalidFlow);
expect(result.errors.length).toBeGreaterThan(0);
expect(result.errors.some(error =>
error.instancePath === '' &&
error.keyword === 'required' &&
error.params?.missingProperty === 'summary'
)).toBe(true);
});
it('should return errors for invalid types from sample file', () => {
const invalidFlow = readSample('invalid-wrong-types.yaml');
const result = validator.validateFlow(invalidFlow);
expect(result.errors.length).toBeGreaterThan(0);
expect(result.errors.some(error =>
error.instancePath === '/summary' &&
error.keyword === 'type'
)).toBe(true);
});
it('should return errors for invalid language from sample file', () => {
const invalidFlow = readSample('invalid-language.yaml');
const result = validator.validateFlow(invalidFlow);
expect(result.errors.length).toBeGreaterThan(0);
expect(result.errors.some(error =>
error.instancePath === '/value/modules/0/value/language' &&
error.keyword === 'enum'
)).toBe(true);
});
it('should handle empty file from sample', () => {
const emptyFlow = readSample('empty.yaml');
const result = validator.validateFlow(emptyFlow);
expect(result.parsed).toBeDefined();
expect(result.errors.length).toBeGreaterThan(0);
});
it('should handle complex invalid flow with comprehensive error detection', () => {
const complexInvalidFlow = readSample('invalid-complex-flow.yaml');
const result = validator.validateFlow(complexInvalidFlow);
expect(result.errors.length).toBeGreaterThan(20); // Should have many errors
// Check for missing required fields errors
const missingRequiredErrors = result.errors.filter(error =>
error.keyword === 'required'
);
expect(missingRequiredErrors.length).toBeGreaterThan(10);
// Check for invalid enum errors (invalid language)
const enumErrors = result.errors.filter(error =>
error.keyword === 'enum'
);
expect(enumErrors.length).toBeGreaterThan(0);
// Check for type errors (string vs number, boolean vs string, etc.)
const typeErrors = result.errors.filter(error =>
error.keyword === 'type'
);
expect(typeErrors.length).toBeGreaterThan(5);
// Should detect invalid forloop structure (by checking for forloop-related required fields)
const forloopRequiredErrors = result.errors.filter(error =>
error.keyword === 'required' && error.message &&
(error.message.includes('modules') || error.message.includes('iterator') || error.message.includes('skip_failures'))
);
expect(forloopRequiredErrors.length).toBeGreaterThan(0);
// Should detect invalid branch structure
const branchRequiredErrors = result.errors.filter(error =>
error.keyword === 'required' && error.message &&
(error.message.includes('branches') || error.message.includes('default') || error.message.includes('expr'))
);
expect(branchRequiredErrors.length).toBeGreaterThan(0);
// Should detect discriminator errors for invalid transform types
const discriminatorErrors = result.errors.filter(error =>
error.keyword === 'discriminator'
);
expect(discriminatorErrors.length).toBeGreaterThan(0);
});
it('should handle deeply nested invalid structures with detailed error reporting', () => {
const nestedInvalidFlow = readSample('invalid-nested-structures.yaml');
const result = validator.validateFlow(nestedInvalidFlow);
expect(result.errors.length).toBeGreaterThan(10); // Should have many nested errors
// Check for deeply nested path errors (paths with many levels)
const deepNestedErrors = result.errors.filter(error =>
error.instancePath.split('/').length > 6 // Deep nesting
);
expect(deepNestedErrors.length).toBeGreaterThan(0);
// Check for transform-related errors
const transformErrors = result.errors.filter(error =>
error.instancePath.includes('input_transforms') ||
error.instancePath.includes('iterator')
);
expect(transformErrors.length).toBeGreaterThan(0);
// Check for type errors in general
const typeErrors = result.errors.filter(error =>
error.keyword === 'type'
);
expect(typeErrors.length).toBeGreaterThan(0);
// Check for missing required fields in nested structures
const nestedRequiredErrors = result.errors.filter(error =>
error.keyword === 'required' &&
error.instancePath.includes('/modules/')
);
expect(nestedRequiredErrors.length).toBeGreaterThan(0);
// Check for discriminator errors in nested transforms
const nestedDiscriminatorErrors = result.errors.filter(error =>
error.keyword === 'discriminator'
);
expect(nestedDiscriminatorErrors.length).toBeGreaterThan(0);
});
it('should provide specific error locations for complex validation failures', () => {
const complexInvalidFlow = readSample('invalid-complex-flow.yaml');
const result = validator.validateFlow(complexInvalidFlow);
// Verify that errors have meaningful instance paths
const errorsWithPaths = result.errors.filter(error =>
error.instancePath && error.instancePath.length > 0
);
expect(errorsWithPaths.length).toBeGreaterThan(5);
// Check that we can identify specific problematic modules
const moduleSpecificErrors = result.errors.filter(error =>
error.instancePath.includes('/value/modules/')
);
expect(moduleSpecificErrors.length).toBeGreaterThan(0);
// Verify error messages are descriptive
const descriptiveErrors = result.errors.filter(error =>
error.message && error.message.length > 0
);
expect(descriptiveErrors.length).toBe(result.errors.length);
});
it('should handle all major flow control structures with validation errors', () => {
const complexInvalidFlow = readSample('invalid-complex-flow.yaml');
const result = validator.validateFlow(complexInvalidFlow);
expect(result.errors.length).toBeGreaterThan(20);
// Should find errors related to flow control structures by checking instance paths
const flowControlErrors = result.errors.filter(error =>
error.instancePath.includes('/modules/2/') || // forloop module
error.instancePath.includes('/modules/3/') || // forloop module
error.instancePath.includes('/modules/4/') || // branch module
error.instancePath.includes('/modules/5/') || // branch module
error.instancePath.includes('/modules/6/') || // branch all module
error.instancePath.includes('/modules/7/') // while loop module
);
expect(flowControlErrors.length).toBeGreaterThan(10);
// Should find errors in script and flow references
const pathReferenceErrors = result.errors.filter(error =>
error.instancePath.includes('/modules/8/') || // script path module
error.instancePath.includes('/modules/9/') // flow path module
);
expect(pathReferenceErrors.length).toBeGreaterThan(0);
// Should detect modules with missing IDs
const missingIdErrors = result.errors.filter(error =>
error.keyword === 'required' && error.message &&
error.message.includes('id')
);
expect(missingIdErrors.length).toBeGreaterThan(0);
// Should detect type mismatches at the flow level
const flowLevelTypeErrors = result.errors.filter(error =>
error.instancePath.startsWith('/value/') &&
!error.instancePath.includes('/modules/') &&
error.keyword === 'type'
);
expect(flowLevelTypeErrors.length).toBeGreaterThan(0);
});
});
});
});

View File

@@ -0,0 +1,102 @@
summary: Complex Invalid Flow
description: A flow with multiple validation errors across different components
value:
modules:
# Module with missing required fields
- id: invalid_module_1
value:
type: rawscript
# Missing required input_transforms
# Missing required content
# Missing required language
# Module with invalid language enum
- id: invalid_module_2
value:
type: rawscript
input_transforms: {}
content: "console.log('hello');"
language: invalid_language
# Invalid forloop structure
- id: invalid_forloop_1
value:
type: forloopflow
# Missing required modules
# Missing required iterator
# Missing required skip_failures
parallelism: "not_a_number"
# Forloop with invalid iterator
- id: invalid_forloop_2
value:
type: forloopflow
modules: []
iterator:
type: invalid_transform_type
skip_failures: "not_a_boolean"
# Invalid branch one structure
- id: invalid_branch_1
value:
type: branchone
# Missing required branches
# Missing required default
invalid_field: "should not be here"
# Branch one with invalid branches structure
- id: invalid_branch_2
value:
type: branchone
branches:
- summary: "Branch 1"
# Missing required expr
# Missing required modules
modules: "not_an_array"
default: "not_an_array"
# Invalid branch all structure
- id: invalid_branch_all
value:
type: branchall
branches:
- summary: 123 # should be string
skip_failure: "not_a_boolean"
modules: []
- # Missing required modules
summary: "Branch 2"
parallel: "not_a_boolean"
# Invalid while loop
- id: invalid_while_loop
value:
type: whileloopflow
modules: "not_an_array"
skip_failures: 42 # should be boolean
# Missing required condition
# Invalid script path reference
- id: invalid_script_path
value:
type: script
# Missing required path
# Missing required input_transforms
invalid_hash: "not_valid"
# Invalid flow path reference
- id: invalid_flow_path
value:
type: flow
path: 123 # should be string
# Missing required input_transforms
# Module with invalid ID type
- 123: # ID should be string, not number
value:
type: identity
# Invalid top-level flow properties
invalid_property: "should not be here"
concurrent_limit: "not_a_number"
cache_ttl: -1 # should be positive
priority: "not_a_number"

View File

@@ -0,0 +1,24 @@
summary: New flow
description: ""
value:
modules:
- id: k
value:
type: rawscript
content: "!inline k.inline_script.ts"
language: invalid
input_transforms:
x:
type: javascript
expr: flow_input.k
schema:
$schema: https://json-schema.org/draft/2020-12/schema
type: object
order:
- k
properties:
k:
type: string
description: ""
default: ""
required: []

View File

@@ -0,0 +1,2 @@
value:
modules: []

View File

@@ -0,0 +1,89 @@
summary: Invalid Nested Structures
description: Testing deeply nested invalid structures
value:
modules:
- id: deeply_nested_invalid
value:
type: forloopflow
modules:
- id: nested_branch
value:
type: branchone
branches:
- expr: "invalid expression syntax"
modules:
- id: triple_nested
value:
type: whileloopflow
modules:
- id: quad_nested
value:
type: rawscript
input_transforms:
invalid_transform:
type: invalid_type
# Missing required fields for transform
content: 123 # should be string
language: "cobol" # invalid enum
skip_failures: [] # should be boolean
# Missing condition
default: null # should be array
# Missing required branches structure
iterator:
type: static
# Missing required value
skip_failures: {} # should be boolean
parallelism: -5 # should be positive integer
- id: invalid_input_transforms
value:
type: rawscript
input_transforms:
transform1:
type: javascript
# Missing required expr for javascript transform
transform2:
type: static
# Missing required value for static transform
transform3:
type: unknown_type
value: "test"
content: "" # empty content
language: deno
- id: invalid_suspend_config
suspend:
required_events: "not_a_number"
timeout: -1 # should be positive
resume_form:
schema: "not_an_object"
user_auth_required: "not_a_boolean"
user_groups_required:
type: invalid_transform
value:
type: identity
- id: invalid_mock_config
mock:
enabled: "not_a_boolean"
return_value:
# Circular reference attempt
self_ref: *self_ref
value:
type: rawscript
input_transforms: {}
content: "test"
language: python3
- id: invalid_retry_config
retry:
constant:
attempts: "not_a_number"
seconds: -1
exponential:
attempts: 0 # should be positive
multiplier: "not_a_number"
seconds: -5
random_factor: 150 # should be 0-100
value:
type: identity

View File

@@ -0,0 +1,3 @@
summary: 123
description: []
value: "not an object"

View File

@@ -0,0 +1,3 @@
summary: Test Flow
value:
modules: []

View File

@@ -0,0 +1,20 @@
summary: Simple Script Flow
description: A basic flow that runs a TypeScript script
schema:
type: object
properties:
message:
type: string
default: "Hello World"
value:
modules:
- id: script_step
value:
type: rawscript
input_transforms: {}
content: |
export async function main(message: string) {
console.log(message);
return { result: message.toUpperCase() };
}
language: deno

View File

@@ -0,0 +1,50 @@
import Ajv, { AnySchema, ErrorObject, ValidateFunction } from 'ajv';
import { parseWithPointers, YamlParserResult } from '@stoplight/yaml';
import openFlowSchema from '../gen/openflow.json';
/**
* Flow validator class that initializes AJV once and reuses it for validation.
*/
export class FlowValidator {
private readonly validate: ValidateFunction;
constructor() {
const ajv = new Ajv({ strict: false, allErrors: true, discriminator: true });
for (const [n, s] of Object.entries(openFlowSchema.components.schemas)) {
ajv.addSchema(s as AnySchema, `#/components/schemas/${n}`);
}
this.validate = ajv.getSchema('#/components/schemas/OpenFlow')!;
}
/**
* Validates a flow document against the OpenFlow schema.
* @param doc - The YAML flow document as a string
* @returns Object containing the parsed document and any validation errors
*/
validateFlow(doc: string): {
parsed: YamlParserResult<unknown>;
errors: ErrorObject[];
} {
if (typeof doc !== 'string') {
throw new Error('Document must be a string');
}
const parsed = parseWithPointers(doc);
const { data } = parsed;
const ok = this.validate(data);
if (ok) {
return {
parsed,
errors: [],
};
}
return {
parsed,
errors: this.validate.errors!,
};
}
}

View File

@@ -0,0 +1 @@
export * from './flow-validator';

View File

@@ -0,0 +1,30 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "commonjs",
"lib": ["ES2022"],
"declaration": true,
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true,
"moduleResolution": "node",
"baseUrl": "./",
"esModuleInterop": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"resolveJsonModule": true
},
"include": [
"src/**/*"
],
"exclude": [
"node_modules",
"dist"
]
}