cuga-agent / src /system_tests /profiling /config /full_matrix_comparison.yaml
Sami Marreed
feat: docker-v1 with optimized frontend
0646b18
# Full Matrix Comparison: Multiple Providers x Multiple Modes
# Compare all combinations of providers and modes
profiling:
configs:
- "settings.openai.toml"
- "settings.azure.toml"
- "settings.groq.toml"
modes:
- "balanced"
tasks:
- "test_get_top_account_by_revenue_stream"
runs: 3
langfuse:
host: "https://cloud.langfuse.com"
retry:
max_attempts: 10
initial_delay: 7.0
experiment:
name: "full_matrix_comparison"
description: "Compare OpenAI vs Azure vs Groq across Fast and Balanced modes (3x2 matrix)"
runs:
- name: "openai_balanced"
test_id: "settings.openai.toml:balanced:test_get_top_account_by_revenue_stream"
iterations: 3
output: "experiments/openai_balanced_{{timestamp}}.json"
env:
MODEL_NAME: "Azure/gpt-4o"
- name: "azure_balanced"
test_id: "settings.azure.toml:balanced:test_get_top_account_by_revenue_stream"
iterations: 3
output: "experiments/azure_balanced_{{timestamp}}.json"
env:
MODEL_NAME: null
- name: "groq_balanced"
test_id: "settings.groq.toml:balanced:test_get_top_account_by_revenue_stream"
iterations: 3
output: "experiments/groq_balanced_{{timestamp}}.json"
env:
MODEL_NAME: openai/gpt-oss-20b
comparison:
generate_html: true
html_output: "experiments/comparison.html"
auto_open: true