test: record improved flow benchmark

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
centdix
2026-04-09 15:59:02 +02:00
parent 47044b1635
commit f45af0624b

View File

@@ -1 +1,2 @@
{"createdAt":"2026-04-09T13:56:20.263Z","gitSha":"431c1d7f75c6f5c90f063dd18420b83a7b121353","mode":"flow","runs":1,"runModel":"anthropic:claude-haiku-4-5-20251001","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":9,"passRate":0.6923076923076923,"averageDurationMs":34728.46153846154,"failedCaseIds":["flow-test4-order-processing-loop","flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test11-preprocessor-and-failure-handler"]}
{"createdAt":"2026-04-09T13:58:53.544Z","gitSha":"47044b163510bf0b51d2ad2d8cf984c51f478415","mode":"flow","runs":1,"runModel":"anthropic:claude-haiku-4-5-20251001","judgeModel":"claude-sonnet-4-6","caseCount":13,"attemptCount":13,"passedAttempts":10,"passRate":0.7692307692307693,"averageDurationMs":30970.46153846154,"failedCaseIds":["flow-test6-ai-agent-tools","flow-test7-simple-modification","flow-test11-preprocessor-and-failure-handler"]}