copilot-instructions-cli/scripts/run_protocol_tests.sh at develop · synthable/copilot-instructions-cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash

# A simple test runner for the Agent Feedback Protocol v1.2

# --- Colors for output ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# --- Test Fixture Paths ---
# Assuming the script is run from the project root
FIXTURE_PATH="tests/fixtures"
REQUEST_T1="$FIXTURE_PATH/request_t1.json"
REQUEST_T2="$FIXTURE_PATH/request_t2.json"
REQUEST_MALFORMED="$FIXTURE_PATH/request_malformed.txt"
REQUEST_INCOMPLETE="$FIXTURE_PATH/request_incomplete.json"

# --- Temp file for output ---
OUTPUT_FILE=$(mktemp)

# --- Cleanup function ---
cleanup() {
  rm -f "$OUTPUT_FILE"
}
trap cleanup EXIT

# --- Helper for running a test ---
# Usage: run_test "Test Name" "command_to_run" "jq_filter" "expected_jq_result"
run_test() {
  local test_name="$1"
  local command="$2"
  local jq_filter="$3"
  local expected_result="$4"

  echo -e "\n${YELLOW}Running Test: $test_name${NC}"

  # Execute the command and capture stdout to a file, stderr is ignored for now
  eval "$command" > "$OUTPUT_FILE" 2>/dev/null
  local exit_code=$?

  # Check if jq is installed
  if ! command -v jq &> /dev/null;
  then
      echo -e "${RED}FAIL: 'jq' is not installed. Cannot verify JSON output.${NC}"
      return 1
  fi

  # For tests that are expected to succeed on the command level
  if [[ "$test_name" != *"Invalid Request"* ]];
  then
    if [ $exit_code -ne 0 ];
    then
        echo -e "${RED}FAIL: Command failed unexpectedly with exit code $exit_code${NC}"
        cat "$OUTPUT_FILE"
        return 1
    fi
  fi

  # Verify the output with jq
  local actual_result=$(jq -r "$jq_filter" "$OUTPUT_FILE")

  if [ "$actual_result" == "$expected_result" ];
  then
    echo -e "${GREEN}PASS${NC}"
    return 0
  else
    echo -e "${RED}FAIL: Assertion failed.${NC}"
    echo "  Expected: $expected_result"
    echo "  Actual:   $actual_result"
    echo "  Full output:"
    cat "$OUTPUT_FILE"
    return 1
  fi
}

# --- Test Cases ---

test_case_1() {
  run_test \
    "Test Case 1: New Session Initiation" \
    "cat $REQUEST_T1 | opencode run --agent qualitative-reviewer" \
    ".protocol_version" \
    "1.2"
}

test_case_2() {
  # This test uses a hardcoded session_id for simplicity.
  # A more advanced script would capture the session_id from test_case_1.
  run_test \
    "Test Case 2: Follow-up Session with Feedback Selection" \
    "cat $REQUEST_T2 | opencode run --agent qualitative-reviewer --session session-123" \
    ".applied_feedback_ack.items[0].processing_status" \
    "acknowledged"
}

test_case_3() {
    run_test \
    "Test Case 3: Invalid Request (Malformed JSON)" \
    "cat $REQUEST_MALFORMED | opencode run --agent qualitative-reviewer" \
    ".error.code" \
    "INVALID_REQUEST"
}

test_case_4() {
    run_test \
    "Test Case 4: Invalid Request (Missing Required Field)" \
    "cat $REQUEST_INCOMPLETE | opencode run --agent qualitative-reviewer" \
    ".error.code" \
    "INVALID_REQUEST"
}


# --- Main execution ---
main() {
  echo "Starting Agent Feedback Protocol v1.2 Test Suite..."
  local overall_status=0

  test_case_1 || overall_status=1
  # SKIPPING: Test Case 2 is disabled due to a known bug in the opencode CLI's --session flag handling.
  # test_case_2 || overall_status=1
  test_case_3 || overall_status=1
  test_case_4 || overall_status=1

  echo ""
  if [ $overall_status -eq 0 ];
  then
    echo -e "${GREEN}All tests seem to have passed!${NC}"
  else
    echo -e "${RED}Some tests failed.${NC}"
  fi

  exit $overall_status
}

main