name: Example - Eval on PR with Matrix Testing # This is an example workflow showing how to use the waza-eval reusable workflow # It demonstrates: # - Running evaluations on pull requests # - Matrix testing with multiple models # - Using the reusable workflow with different configurations on: pull_request: branches: [ main ] paths: - 'examples/code-explainer/**' - 'skills/code-explainer/**' permissions: contents: read jobs: # Single evaluation job example basic-eval: name: Basic Evaluation uses: ./.github/workflows/waza-eval.yml with: eval-yaml: 'examples/code-explainer/eval.yaml' verbose: true output-file: 'results-basic.json' # Matrix testing example - test with multiple models matrix-eval: name: Matrix Test (${{ matrix.model }}) strategy: matrix: model: - claude-sonnet-4-20250514 - gpt-4o - claude-opus-4-20250514 fail-fast: false runs-on: ubuntu-latest steps: - name: Checkout Repository uses: actions/checkout@v4 - name: Setup Go Environment uses: actions/setup-go@v5 with: go-version: '1.26' cache-dependency-path: go.sum - name: Download Dependencies run: go mod download - name: Build Waza Binary run: | go build -v -o waza ./cmd/waza chmod +x ./waza - name: Prepare Eval Config for Model run: | # Create a temporary eval file with the specific model # Note: This uses GNU sed syntax (works on ubuntu-latest) EVAL_FILE="examples/code-explainer/eval.yaml" TEMP_EVAL="eval-${{ matrix.model }}.yaml" # Copy eval file and replace model cp "$EVAL_FILE" "$TEMP_EVAL" sed -i "s/model: .*/model: ${{ matrix.model }}/" "$TEMP_EVAL" echo "Created eval file: $TEMP_EVAL" cat "$TEMP_EVAL" - name: Run Evaluation with ${{ matrix.model }} run: | ./waza run eval-${{ matrix.model }}.yaml \ --context-dir examples/code-explainer/fixtures \ --verbose \ --output results-${{ matrix.model }}.json - name: Upload Results for ${{ matrix.model }} if: always() uses: actions/upload-artifact@v4 with: name: results-${{ matrix.model }} path: | results-${{ matrix.model }}.json transcripts/ retention-days: 30 # Compare results from matrix runs compare-results: name: Compare Model Results needs: matrix-eval runs-on: ubuntu-latest if: always() steps: - name: Download All Results uses: actions/download-artifact@v4 with: path: results - name: Display Results Summary run: | { echo "## Evaluation Results by Model" echo "" for model_dir in results/results-*; do if [ -d "$model_dir" ]; then model_name="$(basename "$model_dir" | sed 's/results-//')" echo "### $model_name" json_file="$model_dir/results-$model_name.json" if [ -f "$json_file" ]; then # Extract summary info from results JSON echo '```json' head -20 "$json_file" echo '```' else echo "No results file found" fi echo "" fi done } >> "$GITHUB_STEP_SUMMARY"