From bc898c8653abd898d35ddf266f06bd50183160eb Mon Sep 17 00:00:00 2001
From: pureliture <tkdgur1756@naver.com>
Date: Fri, 19 Jun 2026 13:25:02 +0900
Subject: [PATCH 1/2] feat(storage): shard list/index GSI hot partitions (issue
 #23 follow-on)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generalize the #23 RepoAxisKey pattern to the remaining static single-value
list/index GSI partitions, and remove never-read dead-write GSI keys.

- axis_core.py: shared shard primitives (axis_shard/bucket_width/axis_material);
  repo_axis.py re-exports as back-compat aliases (#23 behavior unchanged).
- list_axis.py: ListAxisSpec (carries index_name + real gsi attr names) + 4 axis
  specs — TARGET_LIST/REPO_LIST/SCAN_DATE (8), SCAN_JOB pending/leased (4);
  list_axis_inputs single source of truth for the per-axis key formula.
- list_axis_reader.py: read_list_axis (flat, parallel fan-out for the SCAN_JOB
  lease loop) + read_list_axis_ordered (k-way merge preserving newest-first +
  limit). fail-closed; dedupe by (PK,SK) preferring higher version.
- store.py: list_scan_targets / read_recent_repo_metadata / read_scan_runs_for_date
  / _read_scan_jobs_by_status route through the fan-out readers (include_legacy
  migration flag, default off). Direct primary-key reads unchanged.
- list_axis_migration.py: per-axis in-place conditional backfill + remaining==0
  removal gate (SCAN_JOB status filter excludes completed/dead_letter).
- dead-write removal (D3/D4): drop never-read gsi2 #ALL keys from ghas_alert /
  secret_evidence mappers (GHAS GSI1 repo-axis + secret-evidence gsi1 link
  fallback preserved); drop gsi1 #ALL keys from ref_state / repo_lease mappers.
- CLI: `security-scanner backfill-list-axis [--dry-run]` to run the migration.

Implements docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
(M1-M7) with the locked self-Q&A decisions (D1-D5) and the spec multi-agent
review fixes. Full suite green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../design.md                                 | 520 ++++++++++++++++++
 .../milestones.md                             |  53 ++
 .../requirements.md                           |  64 +++
 src/security_scanner/cli/commands/migrate.py  |  52 ++
 .../storage/adapters/nosql_db/axis_core.py    |  40 ++
 .../storage/adapters/nosql_db/items.py        |  66 ++-
 .../storage/adapters/nosql_db/list_axis.py    | 198 +++++++
 .../adapters/nosql_db/list_axis_migration.py  | 168 ++++++
 .../adapters/nosql_db/list_axis_reader.py     | 213 +++++++
 .../storage/adapters/nosql_db/repo_axis.py    |  37 +-
 .../storage/adapters/nosql_db/store.py        |  90 +--
 tests/test_cli.py                             |   1 +
 tests/test_cli_backfill_repo_axis.py          |  58 ++
 tests/test_dead_write_gsi_keys.py             |  97 ++++
 tests/test_dynamodb_compatible_store.py       |  73 ++-
 tests/test_incremental_scan_storage.py        |   7 +-
 tests/test_list_axis_migration.py             | 197 +++++++
 tests/test_list_axis_reader.py                | 230 ++++++++
 tests/test_list_axis_sharding.py              | 157 ++++++
 tests/test_repo_axis_sharding.py              |   6 +-
 20 files changed, 2203 insertions(+), 124 deletions(-)
 create mode 100644 docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
 create mode 100644 docs/workbench/specs/scale-redesign-list-axis-sharding/milestones.md
 create mode 100644 docs/workbench/specs/scale-redesign-list-axis-sharding/requirements.md
 create mode 100644 src/security_scanner/storage/adapters/nosql_db/axis_core.py
 create mode 100644 src/security_scanner/storage/adapters/nosql_db/list_axis.py
 create mode 100644 src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py
 create mode 100644 src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py
 create mode 100644 tests/test_dead_write_gsi_keys.py
 create mode 100644 tests/test_list_axis_migration.py
 create mode 100644 tests/test_list_axis_reader.py
 create mode 100644 tests/test_list_axis_sharding.py

diff --git a/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md b/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
new file mode 100644
index 0000000..4a80662
--- /dev/null
+++ b/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
@@ -0,0 +1,520 @@
+# 리스트/인덱스 GSI 파티션 클라우드-스케일 핫파티션 재설계 (issue #23 후속)
+
+## 개요
+
+issue #23은 per-repo 엔티티 행이 단일 `REPO#<repo>` GSI1 파티션에 몰리던 핫파티션을 `RepoAxisKey`로 고정 샤드 분산(`REPO#<repo>#SHARD#00..15`, `repoAxisVersion=2`)했고, 읽기는 scatter-gather + migration-only legacy fallback + in-place conditional backfill로 해결했다. 이 설계는 그 작업을 **남아 있는 단일값 list/index GSI 파티션**으로 이어간다.
+
+대상은 단일 정적값을 `gsi1pk` 또는 `gsi2pk`로 쓰는 카탈로그/인덱스 파티션이다. 이들은 클라우드 규모에서 쓰기·읽기가 한 물리 파티션(DynamoDB 파티션당 1,000 WCU/s, 3,000 RCU/s 한도)에 직렬화되어 throttle 벽에 부딪힌다. 기존 schema review(`docs/workbench/dynamodb-schema-review-complete.md`)도 P2 follow-up에서 `allPk = ALL` hot partition 가능성을 명시했고, `store.py`의 `list_scan_targets` 위 TODO 주석(`TARGET_LIST_PK is a static GSI1PK creating a single hot partition ... requires write sharding`)이 이를 재확인한다.
+
+핵심 설계 원칙은 #23의 `RepoAxisKey` 패턴을 **per-surface 복제하지 않고 재사용 가능한 list-axis 샤드 헬퍼로 일반화**하는 것이다. #23이 "repo identity를 prefix에 두고 그 뒤에 `#SHARD#<bucket>`을 붙이는" 키 형태였다면, list-axis는 "정적 list prefix(또는 동적 partition root) 뒤에 `#SHARD#<bucket>`을 붙이는" 더 일반적인 변형이다. 두 패턴은 동일한 샤드 해시 함수, 동일한 scatter-gather/dedupe 구조, 동일한 conditional-backfill 메커닉을 공유하므로, 공통 부분을 추출하고 axis별로 prefix/version/shard-count/material/**대상 GSI(GSI1 vs GSI2)**만 주입한다.
+
+이 문서는 설계 전용이다. rollout/canary는 #23과 동일하게 범위 밖이며, "마이그레이션 완료 기준"은 in-place backfill의 gate 조건(per-axis remaining=0 등)까지만 정의한다. 소스 코드는 변경하지 않는다.
+
+## 요구사항 참조
+
+- **R1 핫파티션 제거**: write-hot/read-hot인 단일값 list/index GSI 파티션을 고정 샤드 수로 분산해 per-partition WCU/RCU 집중을 제거한다.
+- **R2 읽기 논리 결과 보존**: 모든 현재 read가 반환하던 논리 결과(집합, 정렬, newest-first + limit, lease fairness)를 fan-out 후에도 그대로 보존한다. 호출자 인터페이스(반환 타입, 정렬 계약)는 바뀌지 않는다.
+- **R3 #23 패턴 일반화/공유**: `RepoAxisKey`/`read_repo_axis`/`backfill_repo_axis`의 공통 메커닉을 재사용 가능한 list-axis 헬퍼로 추출하되, repo-axis와 충돌 없이 공존한다(back-compat alias 보존).
+- **R4 fail-closed**: 임의의 샤드 쿼리 실패는 전파되어야 하며 부분 결과가 완전 결과처럼 보여서는 안 된다(`read_repo_axis`와 동일 불변식).
+- **R5 in-place 마이그레이션 + gate**: 기존 행은 primary key 위에서 conditional update로 in-place 백필하며, per-axis inventory/remaining=0 gate로만 legacy 경로 제거를 허용한다.
+- **R6 durable shard-count contract**: 샤드 수는 런타임 knob이 아니라 스키마 계약이다. version stamp가 shard count를 고정하고, 변경은 새 version + 재해시 마이그레이션을 요구한다(#23 `REPO_AXIS_SHARD_COUNT=16`과 동일 제약).
+- **R7 dead-write 키 제거**: 쓰여지지만 어떤 GSI 경로로도 읽히지 않는 단일값 list-ALL projection은 sharding 이득이 0이므로 샤딩이 아니라 write mapper에서 **제거**한다(D3·D4). sharding과 무관한 독립 full-scan 문제(`get_queue_status`, D5)만 별도 issue로 미룬다.
+- **R8 멀티-인덱스 정확성 (spec 파라미터화)**: 현재 활성 샤드 list-axis 4개는 모두 GSI1(`gsi1pk`/`gsi1sk`)이다. 그래도 reader는 generic placeholder 대신 `ListAxisSpec`의 `index_name`/`gsi_pk_field`/`gsi_sk_field`만 사용해, 미래에 GSI2 axis가 추가돼도 코드 변경 없이 올바른 인덱스·속성으로 라우팅되게 한다.
+
+## 접근 후보
+
+### 후보 A — surface별 독립 샤드 헬퍼 (기각)
+
+각 list 파티션마다 `target_list_axis.py`, `repo_list_axis.py`, `scan_date_axis.py`, `scan_job_axis.py`를 따로 만든다. 장점은 surface별 미세 조정(shard count, material)이 자유롭다는 것. 단점은 #23 `RepoAxisKey`와 합쳐 6개 이상의 거의 동일한 샤드/리더/백필 모듈이 생겨 유지보수·테스트 표면이 폭증하고, 샤드 해시·dedupe·fail-closed 불변식이 모듈마다 drift할 위험이 크다. R3 위반.
+
+### 후보 B — 단일 범용 ListAxisKey 헬퍼로 일반화 (채택)
+
+`repo_axis.py`의 검증된 메커닉(`repo_axis_shard`, `_bucket_width`, dedupe-by-(PK,SK)-prefer-higher-version, conditional `_backfill_one`)을 **list-axis 공통 코어**로 추출한다. 새 모듈 군:
+
+- `list_axis.py` — `ListAxisSpec`(axis별 prefix/version/shard-count/대상 GSI/attribute 이름)과 `ListAxisKey`(샤드된 pk + 메타데이터 projection), 그리고 axis별 키 입력 함수.
+- `list_axis_reader.py` — flat scatter-gather(`read_list_axis`)와 newest-first + limit를 보존하는 ordered k-way merge(`read_list_axis_ordered`). 두 reader 모두 spec에서 `IndexName`과 실제 attribute 이름을 끌어 쓴다.
+- `list_axis_migration.py` — per-axis inventory/backfill/gate-report.
+
+샤드 해시 함수는 **단일 정의로 공유**한다: `repo_axis.py`의 `repo_axis_shard`/`_bucket_width`/`repo_axis_material`을 axis-중립 이름(`axis_shard`, `bucket_width`, `axis_material`)으로 추출해 `list_axis.py`와 `repo_axis.py`가 같은 구현을 부른다. 후보 B는 한 곳에서 해시·dedupe·fail-closed를 보장하므로 R3·R4를 만족하고, surface별로는 `ListAxisSpec` 한 줄과 키 입력 함수만 추가하면 된다.
+
+### 후보 C — 시간 버킷(write-sharding by time window) (부분 기각)
+
+`SCAN_DATE#<date>`처럼 시간축 키는 날짜 자체를 더 잘게(시간/분) 쪼개는 방식도 있다. 그러나 (1) 날짜별 조회 계약을 깨고, (2) busy day의 부하 불균형은 날짜 입도가 아니라 동시 쓰기 충돌이 원인이므로 해시 샤드가 더 균일하다. 시간 버킷은 채택하지 않고, 모든 axis에 해시 샤드(후보 B)를 적용한다.
+
+**채택: 후보 B.** 이하 설계는 모두 후보 B 기준이다.
+
+## 아키텍처
+
+```mermaid
+flowchart TB
+    subgraph shared["공유 샤드 코어 (repo_axis.py에서 추출)"]
+        H["axis_shard(material, count)\nSHA-256 % count, zero-pad"]
+        M["axis_material(*parts)\nNUL-join"]
+        W["bucket_width(count)"]
+    end
+
+    subgraph listaxis["list_axis.py"]
+        SPEC["ListAxisSpec\n(prefix, version, shard_count,\ngsi_pk_field, gsi_sk_field, index_name,\nversion_attr, shard_attr, count_attr)"]
+        LAK["ListAxisKey.build(...).projection()"]
+        IN["list_axis_inputs(spec, item)\n→ (partition_root, gsi_sk, material)"]
+    end
+
+    subgraph writers["items.py write mappers (GSI1 list-axis 샤딩)"]
+        ST["scan_target_to_item (gsi1)"]
+        RM["repo_metadata_to_item (gsi1)"]
+        SR["scan_run_summary_to_item (gsi1)"]
+        SJ["scan_job_to_item (gsi1, pending/leased만)"]
+    end
+
+    subgraph deadkeys["dead-write GSI 키 제거 (샤딩 아님, list_axis 미경유)"]
+        GA["ghas_alert_to_item\ngsi2pk/gsi2sk 삭제 (GSI1 repo-axis projection 보존)"]
+        SE["secret_evidence_to_item\ngsi2pk/gsi2sk 삭제"]
+        RFS["ref_state_to_item\ngsi1pk 삭제"]
+        RLS["repo_lease_to_item\ngsi1pk 삭제"]
+    end
+
+    subgraph readers["list_axis_reader.py (IndexName + 실제 attr명을 spec에서)"]
+        FLAT["read_list_axis\nflat scatter+dedupe+sort"]
+        ORD["read_list_axis_ordered\nk-way descending merge + dedupe + limit"]
+    end
+
+    subgraph store["store.py read methods"]
+        LST["list_scan_targets → FLAT (GSI1)"]
+        RR["read_recent_repo_metadata → ORD (GSI1)"]
+        RD["read_scan_runs_for_date → FLAT (GSI1)"]
+        RJ["_read_scan_jobs_by_status → FLAT (GSI1, pending/leased)"]
+    end
+
+    subgraph mig["list_axis_migration.py"]
+        BF["backfill_list_axis(spec)\nconditional in-place update"]
+        GATE["per-axis remaining=0 gate"]
+    end
+
+    H --> LAK
+    M --> IN
+    W --> FLAT
+    W --> ORD
+    SPEC --> LAK
+    IN --> LAK
+    LAK --> writers
+    SPEC --> readers
+    readers --> store
+    IN --> BF
+    SPEC --> BF
+    BF --> GATE
+
+    repo["repo_axis.py / repo_axis_reader.py\n(#23, 동일 코어 공유)"] -.공유.-> shared
+```
+
+repo-axis와 list-axis는 동일한 `shared` 코어를 부르지만 별도 spec/reader/migration을 가진다. 샤딩 대상은 GSI1 list-axis 4개뿐이므로 `list_axis.py`는 `transport`의 `GSI1_NAME`만 필요하다(GSI2 list-axis는 없다). import 방향은 단방향이다: `list_axis.py`는 `nosql_db` 패키지에서 공유 샤드 헬퍼(코어) + `transport.GSI1_NAME` 외에는 아무것도 import하지 않아 `items.py`가 cycle 없이 의존할 수 있고, `list_axis_reader.py`는 `list_axis.py` + `access.py`만 의존하며, `list_axis_migration.py`는 `list_axis.py` + `access.py` + `store.py`의 `_is_conditional_check_failure`만 의존한다(#23의 import 규율과 동일). dead-write GSI 키 제거(GHAS_ALERT/SECRET_EVIDENCE의 gsi2, REF_STATE/REPO_LEASE의 gsi1)는 `list_axis.py`를 경유하지 않는 단순 mapper 편집이다.
+
+## 구성요소
+
+### 공유 샤드 코어 (repo_axis.py에서 추출)
+
+`repo_axis.py`의 다음 셋을 axis-중립 헬퍼로 추출하고, `repo_axis.py`는 backward-compatible alias로 재노출한다(기존 import 깨지 않음):
+
+- `axis_shard(shard_material: str, *, shard_count: int) -> str` — 기존 `repo_axis_shard` 본문 그대로. SHA-256 digest를 `shard_count`로 나눈 나머지를 `bucket_width(shard_count)` 자리로 zero-pad. `shard_count <= 0`이면 `ValueError`.
+- `bucket_width(shard_count: int) -> int` — 기존 `_bucket_width` (`len(str(shard_count - 1))`).
+- `axis_material(*parts: str) -> str` — 기존 `repo_axis_material`(NUL `\0` join).
+
+`repo_axis.py`는 `repo_axis_shard = axis_shard`, `repo_axis_material = axis_material`, `_bucket_width = bucket_width` 형태로 얇은 alias를 남겨 `RepoAxisKey.build`와 `repo_axis_reader`의 `width = len(str(shard_count - 1))` 계산이 그대로 동작한다. 이 추출은 동작 변경이 아니므로 #23 기존 테스트가 그대로 통과하는지로 검증한다.
+
+### list_axis.py — ListAxisSpec / ListAxisKey
+
+`ListAxisSpec`은 axis 한 개의 키 계약을 완전히 기술한다. **review HIGH 두 건(인덱스명 누락, generic attr명)을 spec 수준에서 해소한다**: spec은 (a) 대상 GSI의 `IndexName`과 (b) 그 GSI의 실제 partition/sort key attribute 이름을 모두 보유한다.
+
+```text
+ListAxisSpec(frozen):
+    prefix: str            # base partition root. 예: "TARGET_LIST#ALL", "REPO_LIST#ALL",
+                           #   "SCAN_DATE", "SCAN_JOB_STATUS"
+    version: int           # 해당 axis의 version stamp (모두 1)
+    shard_count: int       # durable contract
+    gsi_pk_field: str      # 실제 attribute 이름: 현재 샤드 axis는 모두 "gsi1pk"
+    gsi_sk_field: str      # 실제 attribute 이름: 현재 샤드 axis는 모두 "gsi1sk"
+    index_name: str        # transport.GSI1_NAME (현재 샤드 axis 전부)
+    version_attr: str      # 예: "listAxisVersion", "scanDateAxisVersion", "scanJobAxisVersion"
+    shard_attr: str        # 예: "listAxisShard", "scanDateAxisShard", "scanJobAxisShard"
+    count_attr: str        # 예: "listAxisShardCount", ...
+    shard_infix: str = "#SHARD#"
+```
+
+> `index_name`과 `gsi_sk_field`는 `gsi_pk_field`에서 파생할 수도 있으나(`gsi1pk → GSI1_NAME, gsi1sk`), **명시 필드로 둔다.** 그래야 reader가 `IndexName=spec.index_name`, `KeyConditionExpression=f"{spec.gsi_pk_field} = :pk ..."`, merge key `it.get(spec.gsi_sk_field, "")`를 모두 spec 하나에서 일관되게 끌어 쓰고, 새 axis가 잘못된 GSI로 라우팅되는 실수를 줄인다. 현재 샤딩 대상은 GSI1 list-axis 4개뿐이라 네 spec 모두 `index_name=GSI1_NAME`이지만, **미래에 GSI2 기반 list read가 추가되면**(현재 GHAS_ALERT/SECRET_EVIDENCE는 GSI2 read 경로가 없어 샤딩하지 않고 dead-write 키를 제거한다 — D3) 그때 `gsi_pk_field="gsi2pk"`, `index_name=GSI2_NAME`인 spec을 새로 추가하면 reader 코드 변경 없이 그대로 GSI2로 라우팅된다. 즉 멀티-인덱스 일반성은 spec 필드로 보존하되 현재 활성 axis는 모두 GSI1이다.
+
+axis별 정적 spec 상수 (모두 GSI1):
+
+```text
+TARGET_LIST_AXIS  = ListAxisSpec(prefix="TARGET_LIST#ALL", version=1, shard_count=8,
+                                 gsi_pk_field="gsi1pk", gsi_sk_field="gsi1sk", index_name=GSI1_NAME,
+                                 version_attr="listAxisVersion",
+                                 shard_attr="listAxisShard", count_attr="listAxisShardCount")
+REPO_LIST_AXIS    = ListAxisSpec(prefix="REPO_LIST#ALL", version=1, shard_count=8,
+                                 gsi_pk_field="gsi1pk", gsi_sk_field="gsi1sk", index_name=GSI1_NAME, ...)
+SCAN_DATE_AXIS    = ListAxisSpec(prefix="SCAN_DATE", version=1, shard_count=8,
+                                 gsi_pk_field="gsi1pk", gsi_sk_field="gsi1sk", index_name=GSI1_NAME,
+                                 version_attr="scanDateAxisVersion",
+                                 shard_attr="scanDateAxisShard", count_attr="scanDateAxisShardCount")
+SCAN_JOB_AXIS     = ListAxisSpec(prefix="SCAN_JOB_STATUS", version=1, shard_count=4,
+                                 gsi_pk_field="gsi1pk", gsi_sk_field="gsi1sk", index_name=GSI1_NAME,
+                                 version_attr="scanJobAxisVersion",
+                                 shard_attr="scanJobAxisShard", count_attr="scanJobAxisShardCount")
+```
+
+> GHAS_ALERT / SECRET_EVIDENCE에는 `ListAxisSpec` 상수를 두지 않는다. 둘은 GSI2 never-read이므로 샤딩하지 않고 GSI2 키 자체를 제거한다(D3). 미래에 GSI2 list read가 생기면 그때 `gsi_pk_field="gsi2pk"`/`index_name=GSI2_NAME` spec을 추가한다.
+
+**인덱스 라우팅 계약 (R8)**: 현재 샤드 axis 4개(TARGET_LIST / REPO_LIST / SCAN_DATE / SCAN_JOB_STATUS)는 모두 GSI1(`index_name=GSI1_NAME`, `gsi1pk`/`gsi1sk`)에 있다. reader는 `spec.index_name`으로 쿼리하고 `spec.gsi_pk_field`/`spec.gsi_sk_field`로 KeyCondition·merge key를 구성하므로, 미래에 GSI2 axis가 추가되어도 spec 필드만으로 정확히 라우팅된다.
+
+`SCAN_DATE`와 `SCAN_JOB_STATUS`는 prefix 뒤에 동적 세그먼트(`#<date>`, `#<status>`)가 붙는 변형이므로, spec의 `prefix`는 base만 두고 키 빌드 시 `partition_root`를 추가로 받는다(아래).
+
+```text
+sharded_list_axis_pk(spec, partition_root, bucket) -> str
+    # partition_root 예: "TARGET_LIST#ALL", "REPO_LIST#ALL", "SCAN_DATE#2026-06-19",
+    #                    "SCAN_JOB_STATUS#pending"
+    return f"{partition_root}{spec.shard_infix}{bucket}"
+
+legacy_list_axis_pk(partition_root) -> str
+    return partition_root      # 샤딩 이전 정적값 (예: "TARGET_LIST#ALL")
+```
+
+> 주의: legacy 정적값과 `partition_root`는 정확히 일치한다. `TARGET_LIST` axis의 legacy 값은 `transport.TARGET_LIST_PK = "TARGET_LIST#ALL"`, 샤드 값은 `TARGET_LIST#ALL#SHARD#<bucket>`이다. 따라서 `partition_root`를 `"TARGET_LIST#ALL"`로 두면 (1) legacy 식별이 `item[gsi_pk_field] == partition_root` and `version_attr 없음`으로 명확하고, (2) `#SHARD#` infix 유무로 샤드/legacy를 문자열만으로 구분할 수 있다(#23 `REPO_AXIS_SHARD_INFIX` 규율과 동일).
+
+```text
+ListAxisKey.build(*, spec, partition_root, gsi_sk, shard_material) -> ListAxisKey:
+    bucket = axis_shard(shard_material, shard_count=spec.shard_count)
+    pk     = sharded_list_axis_pk(spec, partition_root, bucket)
+    return ListAxisKey(spec, pk, gsi_sk, bucket)
+
+ListAxisKey.projection() -> dict:
+    # 실제 attribute 이름으로 emit (review HIGH #2): 현재 샤드 axis는 모두 gsi1pk/gsi1sk
+    #   (spec.gsi_pk_field로 파라미터화되어 있어 미래 gsi2pk axis도 그대로 동작)
+    return {
+        spec.gsi_pk_field:  self.pk,        # 현재 "gsi1pk"
+        spec.gsi_sk_field:  self.gsi_sk,    # 현재 "gsi1sk"
+        spec.version_attr:  spec.version,
+        spec.count_attr:    spec.shard_count,
+        spec.shard_attr:    self.bucket,
+    }
+```
+
+`list_axis_inputs(spec, item) -> (partition_root, gsi_sk, shard_material)`은 #23 `repo_axis_inputs`와 동형이다. spec과 item dict를 받아 axis별로 `partition_root`/정렬키/샤드 material을 그 행 자신의 필드에서만 도출한다. 이 함수가 axis별 키 공식의 단일 진실원이며, write mapper와 backfill이 같은 코드를 통과하므로 신규 write와 backfill된 legacy 행의 키가 절대 drift하지 않는다. axis별 분기(요약):
+
+- TARGET_LIST: `("TARGET_LIST#ALL", f"TARGET#{item['name']}", item['url'])`
+- REPO_LIST: `("REPO_LIST#ALL", f"UPDATED#{item['updatedAt']}#{item['repoKey']}", item['repoKey'])`
+- SCAN_DATE: `entityType == "SCAN_RUN"` 단언 후 `(f"SCAN_DATE#{scan_date(item['scanAtIso'])}", f"{item['scanAtIso']}#{item['repoKey']}#{item['scanRunId']}", item['scanRunId'])`
+- SCAN_JOB: `status in {pending, leased}`일 때만 `(f"SCAN_JOB_STATUS#{item['status']}", f"{item['nextAttemptAt']}#{int(item['priority']):08d}#{item['createdAt']}#{item['jobId']}", item['jobId'])`; 그 외 status는 `ValueError`(샤드 비대상)
+
+(GHAS_ALERT / SECRET_EVIDENCE는 `list_axis_inputs` 분기가 없다 — 샤딩 대상이 아니라 dead-write GSI2 키 제거 대상이다. D3.)
+
+### list_axis_reader.py — 두 가지 fan-out
+
+두 reader 모두 `spec.index_name`, `spec.gsi_pk_field`/`spec.gsi_sk_field`(실제 attribute 이름)를 사용한다 — **generic `gsi_pk`/`gsi_sk` placeholder는 어디에도 없다(review HIGH #2/#3 해소).** 현재 활성 axis는 모두 GSI1이지만, `repo_axis_reader._query_partition`이 GSI1 전용으로 `gsi1pk`/`gsi1sk`를 하드코딩한 것과 달리 list reader는 attribute 이름을 spec에서 파라미터화한다. 그래서 미래에 GSI2 axis가 추가되어도 reader 코드 변경 없이 그대로 동작한다(현재 GHAS/SECRET은 GSI2 read 경로가 없어 샤딩 대신 키 제거 — D3).
+
+#### read_list_axis (flat, 순서 무관 집합)
+
+`read_repo_axis`와 동형. `partition_root`에 대해 `0..shard_count-1` 샤드 파티션을 순회 쿼리하고, `include_legacy=True`일 때만 legacy 정적 파티션을 추가하며, `_dedupe_and_sort`(by (PK,SK), prefer higher `version_attr`, sort by `(gsi_sk_value, PK, SK)`)로 병합한다. fail-closed: 임의 샤드 쿼리 예외는 전파한다. `list_scan_targets`, `read_scan_runs_for_date`, `_read_scan_jobs_by_status`가 사용한다.
+
+```text
+def _query_list_partition(query_pages, table, spec, partition, gsi_sk_prefix):
+    if gsi_sk_prefix is None:
+        return query_pages(
+            table, IndexName=spec.index_name,
+            KeyConditionExpression=f"{spec.gsi_pk_field} = :pk",
+            ExpressionAttributeValues={":pk": partition})
+    return query_pages(
+        table, IndexName=spec.index_name,
+        KeyConditionExpression=f"{spec.gsi_pk_field} = :pk AND begins_with({spec.gsi_sk_field}, :p)",
+        ExpressionAttributeValues={":pk": partition, ":p": gsi_sk_prefix})
+```
+
+> `_dedupe_and_sort`의 정렬 key는 `item.get(spec.gsi_sk_field, "")`로, version 비교는 `int(item.get(spec.version_attr, 0))`로 spec을 따른다. `read_repo_axis._dedupe_and_sort`가 `gsi1sk`/`repoAxisVersion`을 하드코딩한 것과 달리 list reader는 spec 주입으로 axis별 attribute를 정확히 정렬하므로, 미래 GSI2 axis가 추가되어도 코드 변경 없이 올바른 sort key를 쓴다(현재 axis는 모두 `gsi1sk`).
+
+#### read_list_axis_ordered (newest-first + limit 보존, k-way merge)
+
+`read_recent_repo_metadata`의 `ScanIndexForward=False` + `limit` 계약을 보존하기 위한 ordered 변형. 핵심은 **각 샤드 파티션이 자기 내부에서는 이미 정렬되어 있다**는 점이다(REPO_LIST의 `gsi1sk = UPDATED#<iso>#<repo_key>`는 ISO 타임스탬프 prefix이므로 lexicographic 정렬 = 시간순).
+
+```text
+read_list_axis_ordered(table, *, spec, partition_root, gsi_sk_prefix, limit,
+                        descending=True, include_legacy=False, query_pages=query_all_pages) -> list[item]:
+    width = bucket_width(spec.shard_count)
+    per_shard = []
+    fetch_n = _ordered_fetch_limit(limit, include_legacy)     # 아래 정확성 절 참조
+    for bucket in range(spec.shard_count):
+        pk = sharded_list_axis_pk(spec, partition_root, f"{bucket:0{width}d}")
+        per_shard.append(
+            query_pages(table, IndexName=spec.index_name,
+                KeyConditionExpression=f"{spec.gsi_pk_field} = :pk AND begins_with({spec.gsi_sk_field}, :p)",
+                ExpressionAttributeValues={":pk": pk, ":p": gsi_sk_prefix},
+                ScanIndexForward=not descending,
+                limit=fetch_n))
+    if include_legacy:
+        per_shard.append(query_pages(table, IndexName=spec.index_name,
+            KeyConditionExpression=f"{spec.gsi_pk_field} = :pk AND begins_with({spec.gsi_sk_field}, :p)",
+            ExpressionAttributeValues={":pk": legacy_list_axis_pk(partition_root), ":p": gsi_sk_prefix},
+            ScanIndexForward=not descending, limit=fetch_n))
+    merged  = kway_merge_desc(per_shard, key=lambda it: it.get(spec.gsi_sk_field, ""))
+    deduped = dedupe_preserving_order(merged, pk_key=("PK","SK"), prefer_version=spec.version_attr)
+    return deduped[:limit] if limit is not None else deduped
+```
+
+`kway_merge_desc`는 `heapq` 기반 k-way merge다. 각 샤드 리스트가 이미 내림차순(`ScanIndexForward=False`)이므로, 각 리스트 head를 reverse-비교 heap에 넣고 가장 큰 `gsi_sk_field` 값을 차례로 pop한다. merge key는 **반드시 `spec.gsi_sk_field`의 실제 값**을 읽는다(존재하지 않는 generic `gsi_sk`를 읽으면 전 항목이 `""`가 되어 순서가 파괴된다 — review HIGH #3).
+
+##### ordered+limit 정확성 증명 (dedupe 보정 포함, review HIGH #1)
+
+**기본 경우(`include_legacy=False`, 중복 없음):** 각 샤드는 전역 정렬축(`gsi_sk`) 기준으로 내부 내림차순이다. 전역 top-`limit` 원소 `x`를 생각하면, `x`는 어떤 샤드 `s`에 속하고 그 샤드 안에서 `x`보다 큰 원소는 많아야 전역에서 `x`보다 큰 원소 수(< `limit`)이다. 즉 `x`는 `s`의 상위 `limit`개 안에 든다. 따라서 **샤드별 `Limit=limit`이면 전역 top-`limit`는 빠짐없이 merge 입력에 포함**되고, 내림차순 k-way merge 후 `[:limit]` 절단은 정확한 전역 top-`limit`를 준다. 전체 카탈로그가 `limit`보다 작으면 `limit`개 미만 반환은 정상(현재도 동일).
+
+**dedupe가 개입하는 경우(`include_legacy=True`, 마이그레이션 윈도):** 같은 논리 REPO_META 행이 legacy 파티션과 샤드 파티션에 동일 `(PK,SK)`로 동시에 존재할 수 있다. 이때 샤드별 `Limit=limit`만 뽑으면, merge window가 곧 dedupe로 붕괴할 중복 행들로 일부 채워져 **distinct 행이 `limit` 미만으로 남고, per-shard `Limit` 경계 바로 밖에 있던 진짜 더 새로운 행은 애초에 fetch되지 않아** newest-first 페이지가 조용히 짧아지거나 꼬리가 틀어질 수 있다. 이는 strongly-consistent 검증 경로여야 할 `include_legacy=True`에서 특히 위험하다.
+
+**해소 — over-fetch를 dedupe 만큼 보정한다(`_ordered_fetch_limit`):**
+
+- `include_legacy=False` (steady state): 중복이 구조적으로 없으므로 `fetch_n = limit`. 기본 증명 그대로 성립.
+- `include_legacy=True` (마이그레이션 윈도): 한 논리 행은 legacy 1 + 샤드 1 = 최대 2벌로 존재할 수 있으므로 안전한 상한은 **legacy 파티션을 early-`Limit` 없이 prefix 전체 스캔**하거나, 보수적으로 **모든 fan-out 소스(샤드 + legacy)에서 early-`Limit`를 끄고(`fetch_n = None`) 매칭 prefix 전체를 읽은 뒤 dedupe → `[:limit]`**한다. 마이그레이션 윈도는 짧고 검증용이므로 이 추가 RCU는 허용 가능하다. (대안: `fetch_n = limit + expected_duplicate_slack`로 bound하되, slack을 증명 가능한 상한으로 두기 어렵고 윈도가 짧으므로 본 설계는 **full-prefix 스캔**을 기본으로 택한다.)
+
+즉 **early-`Limit` 최적화는 중복이 없는 `include_legacy=False`에서만 적용**하고, `include_legacy=True`에서는 dedupe-before-truncate를 full window 위에서 수행해 전역 top-`limit` 정확성을 보존한다. 경계 테스트: "legacy+shard 동일 `(PK,SK)` 중복 행이 ordered+limit read에 섞인 경우 정확한 distinct top-`limit` 반환".
+
+malformed/null `gsi_sk` 방어: 빈 문자열은 내림차순에서 가장 작게 취급되어 자연히 뒤로 가라앉는다(newest-first 결과의 꼬리). 키가 완전히 없는 행은 merge에서 빈 문자열로 정규화한다(`it.get(spec.gsi_sk_field, "")`).
+
+### list_axis_migration.py — in-place backfill + gate
+
+`repo_axis_migration.py`와 동형. axis(`ListAxisSpec`) + entityType 집합을 받아: `scan_all_pages`(strongly consistent base-table scan)로 entityType별 행을 모으고, `is_legacy_list_axis_item(spec, item)`으로 legacy 행을 식별하고, 각 행에 `list_axis_inputs(spec, item)` → `ListAxisKey.build(...).projection()`으로 샤드 projection을 재계산해 `update_item`을 `ConditionExpression="... attribute_not_exists(<version_attr>)"`로 in-place 적용한다. backfill 후 re-inventory로 `remaining`을 보고한다. 한 malformed 행은 try 안에서 `failed`로 분류되어 전체 패스를 중단시키지 않는다.
+
+migration 대상은 GSI1 list-axis 4개(TARGET_LIST, REPO_LIST, SCAN_DATE, SCAN_JOB)뿐이다. GHAS_ALERT/SECRET_EVIDENCE는 샤딩하지 않고 GSI2 키를 제거하므로 backfill/migration 대상이 아니다(D3). `is_legacy_list_axis_item`는 **반드시 `item[spec.gsi_pk_field]`를 읽는다 — 절대 GSI를 하드코딩하지 않는다**(현재 활성 axis는 모두 `gsi1pk`, spec 파라미터화로 미래 axis도 안전):
+
+```text
+def is_legacy_list_axis_item(spec, item, *, legacy_pk, status_filter=None) -> bool:
+    gsi_pk = str(item.get(spec.gsi_pk_field, ""))     # 현재 활성 axis는 모두 gsi1pk
+    if gsi_pk != legacy_pk:
+        return False
+    if spec.version_attr in item:
+        return False
+    if status_filter is not None and item.get("status") not in status_filter:
+        return False     # SCAN_JOB: completed/dead_letter 제외 (review LOW #1)
+    return True
+```
+
+- **SCAN_JOB legacy predicate (review LOW #1)**: `gsi1pk in {SCAN_JOB_STATUS#pending, SCAN_JOB_STATUS#leased}` AND `scanJobAxisVersion 없음` AND `status in {pending, leased}`. completed/dead_letter 행은 정당하게 `gsi1pk=SCAN_JOB_STATUS#completed`이고 version_attr가 없으므로, status 필터가 없으면 legacy-unsharded pending/leased로 오집계되어 (a) inventory/remaining을 부풀려 gate를 영원히 막거나 (b) 잘못 backfill되어 completed job이 샤드된 pending/leased 파티션으로 이동한다. **status 필터로 inventory·backfill 양쪽에서 completed/dead_letter를 제외한다.**
+- **SECRET_EVIDENCE는 migration 비대상 (D3)**: SECRET_EVIDENCE GSI2 키는 샤드되지 않고 제거되므로 `is_legacy_list_axis_item`/backfill을 통과하지 않는다. 다만 dead-key 제거 시 `_secret_evidence_link_pk`의 gsi1 fallback(`gsi1pk == "SECRET_EVIDENCE#ALL"`)은 별개의 catch-all 버킷이라 절대 건드리지 않는다(범위 분리 절 참조). 즉 GSI2 키 제거는 `secret_evidence_to_item`의 `gsi2pk`/`gsi2sk`에만 적용되고 gsi1 fallback은 불변이다.
+
+## 데이터 모델
+
+### 샤드 키 형식 (axis별)
+
+| Axis | 인덱스 | legacy 정적 pk = `partition_root` (보존, migration-read 전용) | 샤드 pk 형식 | gsi sk attr / 값 (불변) | 샤드 material | shard count |
+|---|---|---|---|---|---|---|
+| TARGET_LIST | GSI1 | `TARGET_LIST#ALL` | `TARGET_LIST#ALL#SHARD#<0..7>` | `gsi1sk` = `TARGET#<name>` | `target.url` | 8 |
+| REPO_LIST | GSI1 | `REPO_LIST#ALL` | `REPO_LIST#ALL#SHARD#<0..7>` | `gsi1sk` = `UPDATED#<iso>#<repo_key>` | `repo.repo_key` | 8 |
+| SCAN_DATE | GSI1 | `SCAN_DATE#<date>` | `SCAN_DATE#<date>#SHARD#<0..7>` | `gsi1sk` = `<scan_at>#<repo_key>#<scan_run_id>` | `scan_run_id` | 8 |
+| SCAN_JOB_STATUS (pending/leased만) | GSI1 | `SCAN_JOB_STATUS#<status>` | `SCAN_JOB_STATUS#<status>#SHARD#<0..3>` | `gsi1sk` = `<next_attempt_at>#<priority:08d>#<created_at>#<job_id>` | `job_id` | 4 |
+
+샤드 키는 GSI1 list-axis 4개에만 적용한다. GHAS_ALERT#ALL / SECRET_EVIDENCE#ALL은 샤드하지 않고 GSI2 키(`gsi2pk`/`gsi2sk`)를 제거한다(D3 — "dead-write GSI 키 제거" 절). `legacy 정적 pk`를 `partition_root`로 그대로 쓰고 그 뒤에 `#SHARD#`를 붙이는 것이 핵심이며, 현재 샤드 axis는 모두 `gsi1pk`/`gsi1sk`를 쓴다(generic 이름 없음).
+
+### 메타데이터 속성
+
+각 샤드 행은 #23의 `repoAxisVersion`/`repoAxisShardCount`/`repoAxisShard`에 대응하는 3속성을 갖는다:
+
+- TARGET_LIST / REPO_LIST: `listAxisVersion=1`, `listAxisShardCount=8`, `listAxisShard`.
+- SCAN_DATE: `scanDateAxisVersion=1`, `scanDateAxisShardCount=8`, `scanDateAxisShard`.
+- SCAN_JOB(pending/leased): `scanJobAxisVersion=1`, `scanJobAxisShardCount=4`, `scanJobAxisShard`.
+
+axis별로 version_attr 이름을 분리해 한 행이 repo-axis와 list-axis 두 샤딩 계약에 동시에 참여해도 충돌하지 않는다(서로 다른 version_attr이라 dedupe/version 비교가 섞이지 않음). GHAS_ALERT 행은 GSI1 repo-axis projection(`repoAxisVersion`, #23)을 그대로 유지하며 — 본 설계에서 제거하는 것은 GHAS_ALERT의 GSI2 list 키(`gsi2pk`/`gsi2sk`)뿐이라 `listAxis*` 메타 속성은 GHAS_ALERT/SECRET_EVIDENCE에 더 이상 쓰지 않는다.
+
+### Durable shard-count contract (axis별)
+
+샤드 수는 런타임 knob이 아니라 스키마 계약이다(R6). `<axis>Version=1`이 각 `shardCount`를 고정한다. 변경하려면 새 version(`=2`) + 재해시 마이그레이션 또는 active-version fan-out이 필요하다. 각 spec 상수에 #23 `repo_axis.py` docstring과 동일한 경고를 단다.
+
+- **TARGET_LIST=8 / REPO_LIST=8**: 카탈로그 크기가 클라우드 규모에서도 bounded(대상 repo 수)이고 동시 쓰기 분산엔 8이 충분, scatter 비용도 낮음. `listAxisVersion=1`이 8을 고정.
+- **SCAN_DATE=8**: 하루 8,000 WCU/s peak를 흡수(대부분 배포에 넉넉). busy day의 쓰기는 quiet day의 ~8배를 거의 넘지 않음. `scanDateAxisVersion=1`이 8을 고정.
+- **SCAN_JOB=4 (확정, D1)**: 큐 backlog는 write-hot이지만 `lease_next_scan_job`이 pending+leased를 fan-out하므로 `shard_count=N`이면 lease 시도당 `2N` GSI 쿼리가 든다. fan-out 비용과 분산 사이 균형으로 4로 확정한다(직렬 worst-case 8 쿼리, pending/leased 병렬화로 latency를 partition-RTT 1회로 bound). 16으로 올리는 것은 go-live 측정(p95 lease latency) 후 `scanJobAxisVersion=2` rehash로 처리하는 튜닝 follow-up이다(D1). `scanJobAxisVersion=1`이 4를 고정.
+
+## 데이터 흐름
+
+### TARGET_LIST (write-both, read scatter) — GSI1
+
+- **Write** — `scan_target_to_item`: 현재 `gsi1pk = TARGET_LIST_PK` 상수 대신
+  `ListAxisKey.build(spec=TARGET_LIST_AXIS, partition_root="TARGET_LIST#ALL", gsi_sk=f"TARGET#{target.name}", shard_material=target.url).projection()`를 item에 merge. `target.url`은 이미 item의 PK material(`SCAN_TARGET#<url>`)이라 결정적·안정적이다.
+- **Read** — `list_scan_targets`: 단일 `query_all_pages(IndexName=GSI1, gsi1pk=TARGET_LIST#ALL, begins_with(gsi1sk, "TARGET#"))`를 `read_list_axis(spec=TARGET_LIST_AXIS, partition_root="TARGET_LIST#ALL", gsi_sk_prefix="TARGET#")`로 교체. 반환 후 `items_to_scan_targets`로 변환. 현재 `list_scan_targets`는 순서를 계약하지 않으므로(호출자가 삽입 순서에 의존하지 않음) flat dedupe+sort-by-`gsi1sk`로 충분하다(R2). `store.py:179-181`의 TODO 주석은 제거한다.
+
+### REPO_LIST (write-both, read ordered+limit) — GSI1
+
+- **Write** — `repo_metadata_to_item`: `gsi1pk = REPO_LIST_PK` 대신 `partition_root="REPO_LIST#ALL"`, `gsi_sk` 불변(`UPDATED#<updated_at>#<repo_key>`), `shard_material=repo.repo_key`로 projection merge. `write_scan_result`가 scan 결과마다 `put_repo_metadata`를 부르는 쓰기 핫니스가 이로써 8 파티션으로 분산된다.
+  - **샤드 안정성 + in-place 덮어쓰기 (review MEDIUM #5)**: REPO_META 행은 `PK=REPO#<repo_key>`/`SK=META`이고, `put_repo_metadata`는 **`update_item`이 아니라 전체 `put_item`**이다. 샤드 bucket은 `repo_key`(rescan 전반에 안정)에서 파생되므로 같은 행은 항상 같은 샤드에 머문다(old 샤드에 orphan 없음). 동시에 매 write마다 `gsi1sk`(`UPDATED#<updated_at>#...`)가 갱신되는데, full `put_item`이라 같은 `(PK,SK)`를 in-place로 통째 덮어써 **old `gsi1sk` 값이 원자적으로 교체**되어 stale한 `UPDATED#` 인덱스 엔트리가 남지 않는다 → 중복 샤드 행 없음, newest-first 정확. **만약 미래에 부분 `update_item` 경로가 추가된다면 반드시 `gsi1sk`(와 샤드 projection)도 함께 덮어써야** stale newest-first 엔트리/샤드 drift를 막는다. 이 불변식을 `repo_metadata_to_item`/`put_repo_metadata` docstring과 테스트로 고정한다.
+- **Read** — `read_recent_repo_metadata(limit)`: 현재 `query_all_pages(IndexName=GSI1, gsi1pk=REPO_LIST#ALL, begins_with(gsi1sk,"UPDATED#"), ScanIndexForward=False, limit=limit)` 단일 쿼리를 `read_list_axis_ordered(spec=REPO_LIST_AXIS, partition_root="REPO_LIST#ALL", gsi_sk_prefix="UPDATED#", limit=limit, descending=True)`로 교체. 각 샤드는 `ScanIndexForward=False`로 newest-first, k-way 내림차순 merge로 전역 newest-first 재구성, dedupe 후 `[:limit]` 절단. `ScanIndexForward=False` 계약과 limit semantics(전역 newest-first 상위 limit개)가 정확히 보존된다(R2). over-fetch 정확성과 dedupe 보정은 위 "ordered+limit 정확성 증명" 절을 따른다.
+
+### SCAN_DATE (write-both, read scatter) — GSI1
+
+- **Write** — `scan_run_summary_to_item`: `gsi1pk = f"SCAN_DATE#{scan_date(scan_at)}"` 대신 `partition_root=f"SCAN_DATE#{scan_date(scan_at)}"`, `gsi_sk` 불변(`<scan_at>#<repo_key>#<scan_run_id>`), `shard_material=run.scan_run_id`로 projection merge. `scan_run_id`는 write 전에 정해지고 변하지 않으므로 결정적이다.
+- **Read** — `read_scan_runs_for_date(scan_date)`: 단일 `query_all_pages(IndexName=GSI1, gsi1pk=SCAN_DATE#<date>)`(현재 sk 조건 없음)를 `read_list_axis(spec=SCAN_DATE_AXIS, partition_root=f"SCAN_DATE#{scan_date}", gsi_sk_prefix=None)`로 교체. 8 샤드 fan-out → dedupe → sort-by-`gsi1sk`. 날짜별 목록은 순서 무관(현재도 `query_all_pages` 결과를 `items_to_scan_run_summaries`로 변환만)이므로 flat 변형으로 충분하다.
+
+> **SCAN_RUN 엔티티 가드 (review MEDIUM #4)**: SCAN_DATE list-axis는 SCAN_RUN 행에 `gsi1pk`/`gsi1sk`를 쓰는 **유일한 소유자**다. 현재 `scan_run_summary_to_item`은 `repo_axis_projection_for_item`을 **호출하지 않는다**(repo-axis projection 없음). SCAN_RUN은 `REPO_AXIS_ENTITY_TYPES`에 없어 `repo_axis_inputs(SCAN_RUN)`은 `ValueError`를 던지므로, 누군가 실수로 SCAN_RUN을 repo-axis surface에 추가하면 즉시 실패한다 — 이건 incidental guard다. 본 설계는 여기에 **능동 단언을 추가**한다: (1) SCAN_DATE `list_axis_inputs` 분기 진입 시 `item["entityType"] == "SCAN_RUN"`을 assert해 mis-wiring을 큰 소리로 실패시키고, (2) "SCAN_DATE list-axis가 SCAN_RUN의 `gsi1pk`/`gsi1sk` 단독 소유자"임을 docstring 불변식으로 명시하며, (3) 회귀 테스트로 SCAN_RUN에 repo-axis projection이 절대 적용되지 않음을 고정한다. list-axis와 (가상의) repo-axis가 둘 다 GSI1의 `gsi1pk`/`gsi1sk`를 노리므로, 단독 소유자 규칙이 clobber를 구조적으로 차단한다.
+
+### SCAN_JOB_STATUS (pending/leased만 write-both/read-hot, lease fairness 보존) — GSI1
+
+- **Write** — `scan_job_to_item`: `job.status in {"pending","leased"}`이면 `gsi1pk = f"SCAN_JOB_STATUS#{job.status}"` 대신 `ListAxisKey.build(spec=SCAN_JOB_AXIS, partition_root=f"SCAN_JOB_STATUS#{job.status}", gsi_sk=<불변>, shard_material=job.job_id)` projection을 merge. `job.status in {"completed","dead_letter"}`이면 **샤드하지 않고** 기존 `gsi1pk = f"SCAN_JOB_STATUS#{status}"` + version_attr 없음으로 둔다(의도적 비대칭: 이 두 상태는 GSI로 읽히지 않으므로 fan-out 비용만 늘어난다). `gsi1sk`(`<next_attempt_at>#<priority:08d>#<created_at>#<job_id>`)는 모든 상태에서 불변.
+  - `job_id`는 status 전이 전반에 안정적이어야 한다(같은 job은 항상 같은 bucket). `record_retryable_failure`/`complete_processed_job`/`return_job_to_pending`/`move_job_to_dead_letter`가 모두 `replace(job, status=...)`로 `job_id`를 보존함을 확인했다. 이 불변식을 `ListAxisKey.build` 호출부 docstring과 테스트로 고정한다.
+- **Read** — `_read_scan_jobs_by_status(status)`: `status in {"pending","leased"}`이면 `read_list_axis(spec=SCAN_JOB_AXIS, partition_root=f"SCAN_JOB_STATUS#{status}", gsi_sk_prefix=None)`로 4-way fan-out. `status in {"completed","dead_letter"}`이면 기존 단일 쿼리 유지. dedupe→sort-by-`gsi1sk`로 병합된 후, `lease_next_scan_job`은 기존대로 `candidates.sort(key=_scan_job_lease_sort_key)`로 재정렬하므로 lease fairness가 보존된다(R2). `lease_next_scan_job` 본문은 변경 불필요(이미 merged list 위에서 동작).
+
+> **lease fan-out 비용 — 확정 결정 (D1)**: `lease_next_scan_job`은 `_read_scan_jobs_by_status`를 pending·leased 2회 부르므로 `shard_count=N`이면 lease 시도당 **2N GSI 쿼리**다. 이 경로는 hot worker polling loop에 있어 직렬 latency가 워커 lease throughput을 깎으므로, 본 설계에서 다음을 **확정한다**:
+>
+>   1. **큐 axis shard count를 4로 확정(`SCAN_JOB_AXIS.shard_count=4`)**. lease 시도당 2N=8 쿼리로 묶인다. 큐는 #23 finding cardinality만큼 크지 않고 backlog drain은 quiet 시점에 흡수되므로 4로도 단일 파티션 1,000 WCU/s 벽을 넘기 충분하다(다른 axis와 달리 SCAN_JOB만 4). **trade-off**: 4 샤드는 16 대비 peak write 분산이 낮지만, lease read 비용을 4배 줄인다 — 큐는 read-hot이 write-hot보다 핵심이라 read 비용을 우선한다.
+>   2. **pending/leased fan-out 병렬화를 본 작업에 포함(D1)**: SCAN_JOB axis에 한해 thread-pool로 pending·leased 양쪽 샤드 쿼리를 병렬 발행해 wall-clock latency를 partition-RTT 1회 수준으로 bound한다. fail-closed는 유지(임의 샤드 future 실패는 전파).
+>   3. **수용 기준(gate)**: go-live 전 per-lease latency와 lease throughput을 측정해 샤딩 전 대비 회귀가 허용 범위(예: p95 lease latency 증가 ≤ X ms) 안인지 확인한다. "측정 없이 prod에서 발견"을 금지한다.
+>
+>   확정: shard_count=4 + pending/leased 병렬 fan-out을 본 작업에 포함하고, go-live 측정 후 부족하면 `scanJobAxisVersion=2` rehash로 16+ 튜닝하는 follow-up으로 처리한다. 본 spec에 남은 운영자 선택지 없음(D1).
+
+> lease fairness 불변식: scatter-gather가 `gsi1sk`로 병합한 뒤 in-memory `_scan_job_lease_sort_key`가 최종 순서를 결정한다. 임의 샤드 쿼리 실패는 fail-closed로 전파되어야 한다 — 부분 candidate set은 lease 정렬을 오염시킨다(R4). read 윈도 중 다른 워커가 status를 바꿔도 `_try_lease_scan_job`의 conditional write가 backstop이며, 이는 샤딩 전과 동일하다.
+
+### GHAS_ALERT#ALL / SECRET_EVIDENCE#ALL (GSI2, dead-write 제거) — D3
+
+이 두 axis는 store.py 검증 결과 **GSI2로 전혀 읽히지 않는다**(write-only·never-read). 따라서 샤딩하지 않고 **미사용 GSI2 projection을 제거한다**(D3). never-read 인덱스를 샤딩하면 read 이득 0에 scatter 코드만 늘고, 제거하면 hot partition과 write amplification(put당 GSI item 1개)을 동시에 없앤다.
+
+- **Write** — `ghas_alert_to_item`: `gsi2pk = GHAS_ALERT_LIST_PK`(와 짝 `gsi2sk`)를 **emit하지 않는다**(키 제거). `secret_evidence_to_item`: `gsi2pk = SECRET_EVIDENCE_LIST_PK`(와 짝 `gsi2sk`)를 **emit하지 않는다**.
+  - GHAS_ALERT 행은 `ghas_alert_to_item` 끝에서 `item.update(repo_axis_projection_for_item(item))`로 GSI1 repo-axis projection(#23)을 merge한다. **이 GSI1 repo-axis projection은 그대로 보존한다** — 제거 대상은 GSI2 list 키(`gsi2pk`/`gsi2sk`)뿐이다. 두 키는 서로 다른 GSI라 GSI2 키 제거가 GSI1 repo-axis projection에 영향을 주지 않는다.
+- **Read** — 변경 없음. `read_ghas_alerts`는 GSI2를 쓰지 않고 base table `scan_all_pages`(entityType=GHAS_ALERT filter)를 쓴다. SECRET_EVIDENCE에는 list read 자체가 없다(`SecretEvidenceStore` 프로토콜에 `read_*` 없음). GSI2 키 제거는 read 경로에 무영향이다. 미래에 GSI2 기반 list read가 생기면 그때 `ListAxisSpec`(index_name=GSI2_NAME)으로 새로 샤딩해 도입한다.
+
+> `_secret_evidence_link_pk` 비간섭 (review LOW #7/#3): `_secret_evidence_link_pk`는 linked finding/alert가 없을 때 **`gsi1pk` fallback**으로 `SECRET_EVIDENCE_LIST_PK`(`SECRET_EVIDENCE#ALL`)를 쓴다. 이건 GSI1의 catch-all 버킷으로, 본 설계가 제거하는 SECRET_EVIDENCE GSI2 키(`gsi2pk`)와 무관하다. 문자열이 겹치는 것은 우연이며 (1) **`_secret_evidence_link_pk`(gsi1 fallback)는 절대 건드리지 않고**, (2) GSI2 키 제거는 `secret_evidence_to_item`의 `gsi2pk`/`gsi2sk`에만 적용된다. unlinked SECRET_EVIDENCE 행(gsi1pk==SECRET_EVIDENCE#ALL)을 가진 테스트로 gsi1 fallback이 그대로 유지됨을 고정한다.
+
+## 범위 분리
+
+### 샤딩 적용 (in-scope)
+
+| Surface | 인덱스 | hotness | read 변경 | write 변경 |
+|---|---|---|---|---|
+| TARGET_LIST#ALL | GSI1 | write+read | `list_scan_targets` → `read_list_axis` (flat) | `scan_target_to_item` → 샤드 gsi1pk + listAxis* |
+| REPO_LIST#ALL | GSI1 | write+read | `read_recent_repo_metadata` → `read_list_axis_ordered` (k-way merge, limit 보존) | `repo_metadata_to_item` → 샤드 gsi1pk + listAxis* |
+| SCAN_DATE#&lt;date&gt; | GSI1 | write+read | `read_scan_runs_for_date` → `read_list_axis` (flat) | `scan_run_summary_to_item` → 샤드 gsi1pk + scanDateAxis* |
+| SCAN_JOB_STATUS#pending, #leased | GSI1 | write+read | `_read_scan_jobs_by_status` → `read_list_axis` (pending/leased만) | `scan_job_to_item` → 샤드 gsi1pk + scanJobAxis* (pending/leased만) |
+
+샤딩 적용은 정확히 GSI1 list-axis 4개(TARGET_LIST, REPO_LIST, SCAN_DATE, SCAN_JOB_STATUS)다. GHAS_ALERT/SECRET_EVIDENCE(GSI2)는 never-read이므로 샤딩하지 않고 아래 dead-write 키 제거 대상으로 옮겼다(D3).
+
+### dead-write GSI 키 제거 (in-scope, 샤딩 아님)
+
+never-read GSI projection은 샤딩 이득이 0이고 write amplification만 만든다. 아래 4개 키를 write mapper에서 제거한다(D3·D4). 모두 어떤 GSI 경로로도 읽히지 않음을 store.py 검증으로 확정했다.
+
+| Surface | 제거 키 | write mapper | rationale |
+|---|---|---|---|
+| GHAS_ALERT#ALL | `gsi2pk`(+`gsi2sk`) | `ghas_alert_to_item` | 어떤 GSI 경로로도 읽히지 않음 → projection 제거로 hot partition + write amplification 동시 제거 |
+| SECRET_EVIDENCE#ALL | `gsi2pk`(+`gsi2sk`) | `secret_evidence_to_item` | 어떤 GSI 경로로도 읽히지 않음 → projection 제거로 hot partition + write amplification 동시 제거 |
+| REF_STATE#ALL | `gsi1pk` | `ref_state_to_item` | 어떤 GSI 경로로도 읽히지 않음 → projection 제거로 hot partition + write amplification 동시 제거 |
+| REPO_LEASE#ALL | `gsi1pk` | `repo_lease_to_item` | 어떤 GSI 경로로도 읽히지 않음 → projection 제거로 hot partition + write amplification 동시 제거 |
+
+> **GHAS_ALERT GSI1 repo-axis projection은 보존**: `ghas_alert_to_item`는 끝에서 `repo_axis_projection_for_item`로 GSI1 repo-axis projection(#23)을 merge한다. 이번 제거 대상은 **GSI2 list 키(`gsi2pk`/`gsi2sk`)뿐**이며, GHAS_ALERT의 GSI1 repo-axis projection은 건드리지 않는다.
+
+### 정적 유지 (retained, 의도적 비샤딩)
+
+| Surface | 이유 |
+|---|---|
+| SCAN_JOB_STATUS#completed, #dead_letter | GSI로 읽히지 않음(`get_queue_status`는 base-table `scan_all_pages`). cold 파티션을 샤딩하면 fan-out 비용만 증가. version_attr 없이 plain 유지 → 샤드/legacy와 명확히 구분, migration predicate가 status로 제외(review LOW #1). |
+| `RULE#<rule_id>` (gsi2pk; `finding_to_items`의 FINDING/OBSERVATION/STATE + `finding_state_event_to_item`) · `REPO#<repo_id>` (gsi2pk; `scan_job_to_item`) | 현재 GSI2 reader가 0건이라 이들도 엄밀히는 never-read다(review LOW #3·#6·#7). 그러나 dead-write **제거** 대상(GHAS/SECRET/REF_STATE/REPO_LEASE)과 달리 (1) 단일값 list-ALL 핫버킷이 아니라 `rule_id`/`repo_id`별로 **분산**돼 핫파티션이 아니고, (2) "rule별 finding 조회"·"repo별 job 조회"라는 **예약된 access pattern**(향후 product 기능)으로 의도적으로 둔 키다. 따라서 본 작업에서는 **보존**한다. 향후 read가 끝내 안 생기면 write amplification 절감을 위해 별도 follow-up으로 제거를 재검토한다. 제거 기준 = never-read **이고** 단일값 list-ALL 버킷 **이고** 예약 access pattern 없음 — 이 셋을 모두 만족하는 것만 제거한다. |
+| `_secret_evidence_link_pk` fallback (`SECRET_EVIDENCE#ALL` as **gsi1pk**) | linked finding/alert가 없을 때 쓰는 catch-all gsi1 버킷. 제거하는 SECRET_EVIDENCE GSI2 키(`gsi2pk`)와 무관한 별도 access pattern이므로 **건드리지 않는다**(review LOW #3). |
+| `transport.REPO_LIST_PK` / `TARGET_LIST_PK` 상수 | GSI1 list-axis legacy 정적값 = `partition_root`로 보존(migration-read 전용). gate clear 후 삭제. (`items.GHAS_ALERT_LIST_PK`/`SECRET_EVIDENCE_LIST_PK`는 GSI2 키 제거 시 함께 삭제 — migration-read 대상이 아님, M7.) |
+
+### 범위 밖 (out-of-scope)
+
+| Surface | hotness | 이유 |
+|---|---|---|
+| get_queue_status full-table scan | n/a | `get_queue_status`는 `scan_all_pages`(entityType filter)로 전체 base table을 읽는다. 이는 GSI 핫파티션과 독립된 full-scan 스케일 문제이며 본 list-axis 재설계 범위 밖이다 — 별도 issue로 다룬다(D5). |
+
+> REPO_LEASE#ALL(gsi1)·REF_STATE#ALL(gsi1)·GHAS_ALERT#ALL(gsi2)·SECRET_EVIDENCE#ALL(gsi2)은 더 이상 out-of-scope가 아니다. 모두 never-read dead-write 키로 확정되어 **in-scope 제거 대상**으로 옮겼다(위 "dead-write GSI 키 제거" 절, D3·D4).
+
+> **REF_STATE / REPO# base-table PK 공유 주의 (review LOW #2)**: REF_STATE 행은 `PK=REPO#<repo_id>`로 repo-axis FINDING 행과 같은 base-table 파티션 family를 공유한다. 그러나 (1) REF_STATE는 `REPO_AXIS_ENTITY_TYPES`에 없어 repo-axis backfill이 무시하고, (2) base-table read(`get_ref_state` point get, `list_ref_states` `PK=REPO#<id>` query)는 `gsi1pk` 제거와 무관하다. 즉 REPO# PK를 공유한다고 해서 REF_STATE가 repo-axis 샤딩에 참여하지 않으며, dead `gsi1pk=REF_STATE#ALL` 제거 후에도 base-table 접근은 영향 없다 — 독자가 헷갈리지 않도록 명시.
+
+**미래 안전망**: 미래에 fleet-wide lease/ref 조회가 필요해지면: (a) 운영용·저빈도면 `get_queue_status`처럼 `scan_all_pages`+entityType filter, (b) 진짜 list-all GSI 쿼리이고 fleet 규모가 ALL 파티션을 위협하면 본 설계의 `ListAxisSpec`(`shard_material=repo_id`, shard_count=16, version=2)을 추가해 동일 패턴으로 샤딩. 구체적 read 경로가 생기기 전까지는 키 자체를 두지 않는다(dead write 제거 후 재도입은 read 경로와 함께).
+
+## 에러 처리 / fail-closed
+
+- **읽기 fail-closed (R4)**: `read_list_axis`/`read_list_axis_ordered`는 임의 샤드 쿼리가 raise하면 그대로 전파한다. 부분 결과가 완전 결과로 보이면 안 된다 — 특히 `_read_scan_jobs_by_status`의 부분 candidate set은 lease fairness를 깨고, `read_recent_repo_metadata`의 부분 merge는 newest-first top-limit를 누락시킨다(`read_repo_axis`와 동일 불변식). 병렬 fan-out을 채택해도 한 future라도 실패하면 전파한다.
+- **쓰기 conditional 일관성**: backfill `update_item`은 `attribute_not_exists(<version_attr>)` condition으로 이중 backfill/이미-샤드된 행 clobber를 막는다. condition 실패는 `skipped`로 분류(이미 샤드됨). 그 외 예외는 `failed`. 한 malformed legacy 행은 try 안에서 `failed`로 분류되어 전체 패스를 중단시키지 않는다.
+- **인덱스/속성명 정확성 (R8)**: reader는 `spec.index_name`(GSI1/GSI2)으로 쿼리하고 `spec.gsi_pk_field`/`spec.gsi_sk_field`로 KeyCondition·merge key를 만든다. generic `gsi_pk`/`gsi_sk`는 어떤 item에도 존재하지 않으므로 사용 금지 — 잘못 쓰면 빈 결과/정렬 파괴로 이어진다(review HIGH #2/#3).
+- **SCAN_RUN/SCAN_JOB 엔티티 가드**: list-axis projection은 해당 axis에만 적용한다. SCAN_JOB의 completed/dead_letter는 plain 유지(샤드 projection·version_attr 미적용)이고 migration predicate가 status로 제외, SCAN_RUN에 repo-axis projection이 잘못 적용되지 않음을 능동 단언+테스트로 고정.
+- **GSI eventual consistency (마이그레이션 윈도)**: backfill 직후 행이 old 파티션에 잠시 남을 수 있다. 이 윈도에서 `include_legacy=False` read가 갓 backfill된 행을 놓칠 수 있으므로, 마이그레이션 윈도는 짧게 유지하고 strong consistency가 필요한 검증은 `include_legacy=True`로 수행한다(ordered read는 이때 full-prefix over-fetch — 정확성 절 참조).
+- **malformed gsi_sk 정렬 방어**: ordered merge는 absent/null `spec.gsi_sk_field`를 빈 문자열로 정규화해 내림차순 꼬리로 가라앉힌다.
+
+## 마이그레이션 완료 기준 (in-place conditional backfill + per-axis gate)
+
+`repo_axis_migration.py`의 메커닉/gate를 axis별로 그대로 적용한다(이 모듈은 프로덕션 rollout이 아니라 legacy read 경로 제거 gate를 정의한다).
+
+1. **in-place backfill**: legacy 행(=`item[spec.gsi_pk_field] == legacy 정적값` and `version_attr 없음`, SCAN_JOB은 추가로 `status in {pending,leased}`)을 `scan_all_pages`(entityType filter, strongly consistent)로 inventory하고, 각 행의 자체 속성에서 `list_axis_inputs(spec, item)`로 샤드 projection을 재계산해 같은 primary key 위에서 `update_item`(condition `attribute_not_exists(<version_attr>)`)으로 in-place 갱신. 행을 새 키로 복사하지 않는다.
+2. **per-axis inventory/remaining 보고**: GSI1 list-axis 4개에 대해 inventory/backfilled/skipped/failed/remaining을 보고한다. 대상 entityType:
+   - TARGET_LIST: `SCAN_TARGET`
+   - REPO_LIST: `REPO_META`
+   - SCAN_DATE: `SCAN_RUN`
+   - SCAN_JOB: `SCAN_JOB` (status ∈ {pending, leased}인 행만 — completed/dead_letter는 version_attr 없는 정상 상태이며 predicate가 status로 제외; inventory·backfill 둘 다에서 제외 — review LOW #1)
+   - (GHAS_ALERT / SECRET_EVIDENCE는 샤딩하지 않고 GSI2 키를 제거하므로 backfill/inventory 대상이 아니다 — D3.)
+3. **removal gate (#23 동일, axis별로 ALL 충족 시)**:
+   - (a) 모든 대상 entity에서 backfill report `remaining == 0`.
+   - (b) `include_legacy=False` 샤드-only read가 이전과 동일 논리 결과(집합/정렬/newest-first+limit/lease 후보 집합) 반환 — 샘플 검증.
+   - (c) `include_legacy=False` parity 테스트 통과.
+   - (d) 정상 샤드 read가 legacy 쿼리를 0회 발행함을 테스트로 확인.
+   - (e) legacy-only / sharded-only / mixed 상태 모두 테스트 커버(ordered read는 mixed에서 dedupe over-fetch 정확성 포함).
+   - (f) legacy read가 named `include_legacy` 플래그 뒤에 있어 제거가 기본 경로를 바꾸지 않음.
+4. **gate 충족 후**: 해당 axis의 `read_list_axis*`에서 `include_legacy` 분기 제거, 관련 호출자의 `include_legacy` 파라미터 정리, `transport.REPO_LIST_PK`/`TARGET_LIST_PK` 상수 삭제. (GHAS_ALERT/SECRET_EVIDENCE의 `gsi2pk`/`gsi2sk` 키와 `items.GHAS_ALERT_LIST_PK`/`SECRET_EVIDENCE_LIST_PK` 상수는 이 gate와 무관하게 M7 dead-key 제거에서 바로 삭제한다 — backfill 대상이 아님.)
+5. **신규 write는 day-1부터 샤드 파티션으로**. 코드 변경 후 old 파티션은 신규 write를 받지 않는다.
+
+이 gate는 GSI1 list-axis 4개의 legacy read 경로 제거에만 적용된다. GHAS_ALERT/SECRET_EVIDENCE는 GSI2로 읽히지 않아 backfill/gate 대상이 아니며, GSI2 키 제거는 read 회귀를 일으키지 않는다(D3). 미래에 GSI2 list read가 추가되면 그때 새 `ListAxisSpec`(GSI2)으로 샤딩하고 동일한 backfill/gate를 적용한다.
+
+## 테스트 전략
+
+- **공유 코어 추출 회귀**: `axis_shard`/`bucket_width`/`axis_material` 추출이 동작 무변경임을 #23 기존 repo-axis 테스트 그대로 통과로 확인. alias가 같은 bucket을 내는지 property test.
+- **샤드 결정성/균일성**: axis별 `shard_material`이 같은 입력에 같은 bucket, 입력 분포에 대해 4(SCAN_JOB)/8(나머지) bucket 근사 균일.
+- **인덱스 라우팅 (R8)**: 현재 샤드 axis 4개가 모두 `IndexName=GSI1`, `gsi1pk`/`gsi1sk`로 쿼리·정렬함을 단언. reader가 `spec.index_name`/`spec.gsi_pk_field`/`spec.gsi_sk_field`만 사용하므로 미래 GSI2 axis(가상)도 spec 필드로 정확히 라우팅됨을 단위 테스트로 고정(spec에서 잘못된 GSI/attr로 가지 않음).
+- **write mapper projection**: 각 write mapper가 샤드 `<gsi_pk_field>`(올바른 `partition_root#SHARD#<bucket>`) + 3개 메타 속성 + 올바른 `<gsi_sk_field>`를 emit, sort key 값 불변. completed/dead_letter SCAN_JOB은 plain gsi1pk + version_attr 부재.
+- **flat scatter parity**: `list_scan_targets`/`read_scan_runs_for_date`/`_read_scan_jobs_by_status`가 단일-파티션 시절과 동일 집합 반환(fake boto3 + dynalite). dedupe-by-(PK,SK)-prefer-higher-version 정확성.
+- **ordered+limit merge parity (REPO_LIST 핵심)**: 여러 샤드에 분산된 데이터에서 `read_recent_repo_metadata(limit)`가 전역 newest-first 상위 limit개를 정확히 반환. 경계: (i) 전 데이터가 한 샤드에 몰린 경우, (ii) 동률 `gsi1sk`, (iii) limit > total, (iv) limit=None, (v) malformed/absent `gsi1sk`가 꼬리로 가라앉음, **(vi) `include_legacy=True`에서 legacy+shard 동일 `(PK,SK)` 중복 행이 섞여도 정확한 distinct top-limit 반환(over-fetch/dedupe 보정 — review HIGH #1)**.
+- **lease fairness + 비용**: 4-way 샤드에 분산된 pending/leased에서 `lease_next_scan_job`이 샤딩 전과 동일한 lease 순서/eligible 선택. `job_id`가 status 전이에 보존됨(같은 job이 항상 같은 bucket). **pending/leased 병렬 fan-out 비용/latency 측정 테스트 또는 벤치(D1 수용 기준)**.
+- **fail-closed**: 임의 샤드 쿼리 예외가 reader에서 전파되고 부분 결과를 반환하지 않음(직렬·병렬 양쪽, R4).
+- **마이그레이션 상태 머신**: legacy-only / sharded-only / mixed 각각에서 read 정확성. `include_legacy=True`가 legacy 파티션을 fan-out에 추가하고 dedupe가 higher-version 선호. `include_legacy=False`가 legacy 쿼리 0회.
+- **ordered reader zero-legacy-query (gate (d), review LOW #8)**: flat reader뿐 아니라 **ordered reader도 명시 커버** — `read_list_axis_ordered(include_legacy=False)`는 정확히 `shard_count`회 쿼리(legacy 파티션 0회), 각 쿼리 early-`Limit`(=limit) ON; `read_list_axis_ordered(include_legacy=True)`는 `shard_count+1`회(legacy 분기 포함)에 early-`Limit` OFF(full-prefix over-fetch)임을 쿼리 호출 수/인자로 단언. flat과 ordered 두 reader 모두에서 gate (d)를 고정.
+- **backfill 메커닉**: conditional update가 in-place 갱신, 이미-샤드 행은 skipped, malformed 행은 failed로 분류되고 패스 중단 없음, re-inventory remaining 정확.
+- **엔티티 가드**: SCAN_RUN에 repo-axis projection 미적용 + SCAN_DATE `list_axis_inputs`가 SCAN_RUN을 능동 단언(review MEDIUM #4); SCAN_JOB completed/dead_letter에 샤드 projection 미적용 + migration predicate가 count/mutate 둘 다에서 제외(review LOW #1).
+- **dead-write GSI 키 제거 (D3·D4)**: `ghas_alert_to_item`·`secret_evidence_to_item`가 `gsi2pk`/`gsi2sk`를 더 이상 emit하지 않음; `ref_state_to_item`·`repo_lease_to_item`가 `gsi1pk`를 더 이상 emit하지 않음을 단언. **기존 positive 단언 회귀 방지**: `tests/test_incremental_scan_storage.py:284-287`의 `gsi1pk == "REPO_LEASE#ALL"`/`gsi1sk` positive 단언을 키 부재 단언으로 교체(미교체 시 `KeyError`로 회귀); 4개 제거 키에 대한 기존 positive 단언을 grep으로 정리(review LOW #2/#5). `_secret_evidence_link_pk`의 gsi1 fallback은 불변(SECRET_EVIDENCE gsi2 키 제거와 무관). REF_STATE가 REPO# PK 공유에도 repo-axis/list-axis 어디에도 안 들어감. base-table read(`get_ref_state` point get, `list_ref_states` `PK=REPO#` query)가 gsi1pk 제거 후에도 동일 결과.
+- **REPO_LIST in-place 덮어쓰기**: `put_repo_metadata` full put이 `gsi1sk`를 원자적으로 교체해 stale newest-first 엔트리 없음(review MEDIUM #5).
+- **GHAS GSI1 repo-axis projection 보존**: GHAS_ALERT 행이 GSI2 키 제거 후에도 GSI1 repo-axis projection(`repoAxisVersion`/`gsi1pk`/`gsi1sk`, #23)을 그대로 가짐.
+
+## 마일스톤
+
+1. **M1 — 공유 코어 추출**: `repo_axis.py`에서 `axis_shard`/`bucket_width`/`axis_material` 추출 + alias. #23 테스트 그대로 통과.
+2. **M2 — list_axis.py**: `ListAxisSpec`(index_name/gsi_sk_field 포함), `ListAxisKey`, axis별 spec 상수, `list_axis_inputs`(엔티티 가드 단언 포함), 키 헬퍼.
+3. **M3 — list_axis_reader.py**: `read_list_axis`(flat) + `read_list_axis_ordered`(k-way merge + dedupe-aware over-fetch). reader 단위 테스트(parity/fail-closed/limit/인덱스 라우팅/중복 보정).
+4. **M4 — write 경로 전환**: 4개 GSI1 write mapper(`scan_target_to_item`/`repo_metadata_to_item`/`scan_run_summary_to_item`/`scan_job_to_item`)를 샤드 projection으로 전환. write mapper 테스트.
+5. **M5 — read 경로 전환**: `list_scan_targets`/`read_recent_repo_metadata`/`read_scan_runs_for_date`/`_read_scan_jobs_by_status` 교체. SCAN_JOB read 경로는 pending/leased fan-out을 병렬(thread pool) 발행(D1, fail-closed 유지). TODO 주석 제거. store-level parity 테스트(lease fairness + 병렬 fan-out 비용/latency 측정 포함).
+6. **M6 — list_axis_migration.py**: per-axis inventory/backfill/gate-report(SCAN_JOB status 필터 포함). backfill/gate 테스트. 대상은 GSI1 list-axis 4개만(GHAS/SECRET_EVIDENCE는 샤딩하지 않으므로 backfill 없음).
+7. **M7 — dead-write GSI 키 제거 (D3·D4)**: `ghas_alert_to_item`·`secret_evidence_to_item`에서 `gsi2pk`/`gsi2sk`를 삭제하고(GHAS_ALERT의 GSI1 repo-axis projection은 보존), `ref_state_to_item`·`repo_lease_to_item`에서 `gsi1pk`를 삭제한다. 키별로 **(신규) 키 부재 단언 추가 + (기존) positive 단언 정리**를 한 쌍으로 처리한다 — 특히 `tests/test_incremental_scan_storage.py:284-287`의 `lease_item["gsi1pk"] == "REPO_LEASE#ALL"`/`gsi1sk` positive 단언은 제거 시 `KeyError`로 깨지므로 키 부재 단언으로 교체해야 한다. 4개 키 각각에 대해 `gsi2pk`/`gsi2sk`(GHAS/SECRET), `gsi1pk`(REF_STATE/REPO_LEASE) positive 단언을 grep으로 찾아 정리한다. REF_STATE/REPO# base-table PK 공유 주석 유지.
+
+(rollout/canary는 #23과 동일하게 범위 밖. legacy 상수/`include_legacy` 분기 삭제는 gate 충족 후 별도 정리 작업.)
+
+## 확정 결정 (자문자답)
+
+이 절은 직전 초안의 열린 질문을 자문자답으로 확정한 결과다. 모든 항목은 실제 코드 검증(store.py에 GSI2 쿼리 0건, `GHAS_ALERT#ALL`·`SECRET_EVIDENCE#ALL`·`REF_STATE#ALL`·`REPO_LEASE#ALL`는 어떤 GSI 경로로도 읽히지 않음) 위에서 권장안을 채택했다.
+
+**D1 — SCAN_JOB 큐 샤드 수와 lease fan-out 비용**
+- 문: `lease_next_scan_job`이 pending+leased를 fan-out하므로 `shard_count=N`이면 lease 시도당 `2N` GSI 쿼리. 4로 줄일까, 16 유지하고 병렬화할까?
+- 답: `SCAN_JOB_AXIS.shard_count=4` + pending/leased fan-out 병렬 실행(thread pool)을 본 작업에 포함한다. 4는 직렬 worst-case도 8 쿼리로 묶고, 병렬화가 latency를 partition-RTT 1회 수준으로 bound한다. 16으로 올리는 것은 go-live 측정(p95 lease latency) 후 `scanJobAxisVersion=2` rehash로 처리하는 튜닝 follow-up이며 본 spec의 blocker가 아니다.
+
+**D2 — 워커 폴링 backoff**
+- 문: 샤딩으로 큐 read CU가 (4×) 증가한다. job 미발견 시 exponential backoff를 본 작업에 넣을까?
+- 답: 별도 follow-up issue로 분리한다(worker 런타임 정책이지 schema 샤딩이 아님). 단 read CU 증가가 실재하므로 "강력 권장" 후속으로 명시한다.
+
+**D3 — GHAS_ALERT#ALL / SECRET_EVIDENCE#ALL (GSI2)**
+- 문: 이 둘을 GSI2에서 샤드할까?
+- 답: 샤드하지 않고 미사용 GSI2 projection을 제거한다. 검증 결과 store.py에 GSI2 쿼리가 전혀 없어 두 파티션은 write-only·never-read다. never-read 인덱스를 샤딩하면 read 이득 0에 scatter 코드만 늘어난다. `gsi2pk`/`gsi2sk`를 `ghas_alert_to_item`·`secret_evidence_to_item`에서 제거하면 hot partition과 write amplification(put당 GSI item 1개)을 동시에 없앤다. 미래에 GSI2 list read가 생기면 그때 `ListAxisSpec`(index_name=GSI2)으로 샤딩한다.
+
+**D4 — REF_STATE#ALL / REPO_LEASE#ALL (dead gsi1 write)**
+- 문: 주석만 달까, 제거할까?
+- 답: dead gsi1 write를 실제 제거한다(M7). 둘 다 어떤 GSI 경로로도 읽히지 않음을 검증했으므로 `gsi1pk`를 `ref_state_to_item`·`repo_lease_to_item`에서 빼 write amplification을 줄인다. base-table read(point get / `PK=REPO#<id>` query)는 영향 없음.
+
+**D5 — get_queue_status 전체 스캔**
+- 문: `get_queue_status`의 base-table `scan_all_pages`를 GSI query로 바꿀까?
+- 답: 별도 issue로 분리한다. 이는 GSI 핫파티션과 독립된 full-scan 스케일 문제이며 본 list-axis 재설계 범위 밖이다.
+
+확정 결과: GSI1 list-axis 4개만 샤딩(TARGET_LIST, REPO_LIST, SCAN_DATE, SCAN_JOB_STATUS) + dead-write GSI 키 4개 제거(`GHAS_ALERT#ALL`·`SECRET_EVIDENCE#ALL` gsi2, `REF_STATE#ALL`·`REPO_LEASE#ALL` gsi1). 본 spec에 남은 열린 질문 없음(D1 튜닝·D2·D5는 측정/별도 issue로 위임).
+
+---
+
+참조한 실제 코드 경로(절대 경로):
+- `/Users/ddalkak/Projects/security-scanner/.worktrees/scale-redesign-list-axis/src/security_scanner/storage/adapters/nosql_db/repo_axis.py` — `RepoAxisKey`, `repo_axis_shard`/`_bucket_width`/`repo_axis_material`, `REPO_AXIS_SHARD_INFIX`, `repo_axis_inputs`(SCAN_RUN 미지원→ValueError)
+- `.../nosql_db/repo_axis_reader.py` — `read_repo_axis`, `_query_partition`(GSI1 하드코딩), `_dedupe_and_sort`, fail-closed 불변식
+- `.../nosql_db/repo_axis_migration.py` — `backfill_repo_axis`, `_backfill_one`, `is_legacy_repo_axis_item`, removal gate
+- `.../nosql_db/transport.py` — `REPO_LIST_PK`/`TARGET_LIST_PK`, `GSI1_NAME="GSI1"`/`GSI2_NAME="GSI2"`
+- `.../nosql_db/items.py` — GSI1 샤딩 대상 4개 mapper(`scan_target_to_item`/`repo_metadata_to_item`/`scan_run_summary_to_item`/`scan_job_to_item`) + dead-key 제거 대상 mapper(`ghas_alert_to_item`/`secret_evidence_to_item`의 gsi2, `ref_state_to_item`/`repo_lease_to_item`의 gsi1), `GHAS_ALERT_LIST_PK`/`SECRET_EVIDENCE_LIST_PK`, `_secret_evidence_link_pk`(gsi1pk fallback, 불변), `scan_run_summary_to_item`(repo-axis projection 미호출)
+- `.../nosql_db/store.py` — `list_scan_targets`(TODO 179-181), `read_recent_repo_metadata`(ScanIndexForward=False+limit), `read_scan_runs_for_date`(sk 조건 없음), `_read_scan_jobs_by_status`(sk 조건 없음), `lease_next_scan_job`(pending+leased→sort), `_scan_job_lease_sort_key`, `put_repo_metadata`(full put_item), `_is_conditional_check_failure`
+- `.../nosql_db/access.py` — `query_all_pages`(limit=총량 상한, `items[:limit]`), `scan_all_pages`
+- `.../docs/workbench/dynamodb-schema-review-complete.md` — `allPk=ALL` hot partition P2 note
diff --git a/docs/workbench/specs/scale-redesign-list-axis-sharding/milestones.md b/docs/workbench/specs/scale-redesign-list-axis-sharding/milestones.md
new file mode 100644
index 0000000..e82f6fe
--- /dev/null
+++ b/docs/workbench/specs/scale-redesign-list-axis-sharding/milestones.md
@@ -0,0 +1,53 @@
+# Milestones — scale-redesign-list-axis-sharding
+
+Source: design.md (approved + multi-agent reviewed). Full autopilot. All done.
+Evidence: full suite 672 passed; new/changed files ruff-clean (advisory).
+
+## M1 공유 샤드 코어 추출 — done
+- `axis_core.py` (axis_shard/bucket_width/axis_material); repo_axis.py re-exports
+  as back-compat aliases. Evidence: #23 repo-axis tests stay green (74 passed).
+
+## M2 list_axis.py — done
+- ListAxisSpec (index_name/gsi_pk_field/gsi_sk_field + version/shard/count attrs),
+  4 axis constants (TARGET_LIST/REPO_LIST/SCAN_DATE=8, SCAN_JOB=4), ListAxisKey,
+  list_axis_inputs (entity guards). Evidence: test_list_axis_sharding (10).
+
+## M3 list_axis_reader.py — done
+- read_list_axis (flat, +parallel for SCAN_JOB) + read_list_axis_ordered (k-way
+  merge, dedupe-aware over-fetch). fail-closed, spec-parameterized attrs.
+  Evidence: test_list_axis_reader (fan-out/zero-legacy/dedupe/fail-closed/
+  ordered top-limit/limit=None/limit>total/ordered-include-legacy).
+
+## M4 write 경로 전환 — done
+- scan_target/repo_metadata/scan_run_summary/scan_job mappers → sharded
+  projection (scan_job pending/leased only; cold stay plain). regression scan
+  allows list_axis.py as the list-axis #SHARD# source. Evidence: mapper-shape
+  tests updated to sharded format.
+
+## M5 read 경로 전환 — done
+- list_scan_targets / read_recent_repo_metadata (ordered) /
+  read_scan_runs_for_date / _read_scan_jobs_by_status (parallel pending/leased)
+  → fan-out readers; include_legacy params. TODO removed. Evidence: store-level
+  parity tests updated to fan-out (660 passed).
+
+## M6 list_axis_migration.py — done
+- per-axis inventory/backfill/gate (4 GSI1 axes; SCAN_JOB status filter excludes
+  completed/dead_letter). Evidence: test_list_axis_migration (in-place backfill,
+  gate_clear, status filter, idempotent re-run, inventory no-mutate).
+
+## M7 dead-write GSI 키 제거 — done
+- dropped gsi2pk/gsi2sk from ghas_alert_to_item & secret_evidence_to_item (GHAS
+  GSI1 repo-axis + secret-evidence gsi1 link fallback preserved); dropped gsi1pk
+  from ref_state_to_item & repo_lease_to_item; removed unused GHAS_ALERT_LIST_PK.
+  Updated existing positive assertion (test_incremental_scan_storage.py:284).
+  Evidence: test_dead_write_gsi_keys (4) + updated round-trip test.
+
+## Operability addition (beyond documented M1–M7, #37 precedent)
+- `security-scanner backfill-list-axis [--dry-run]` CLI (cli/commands/migrate.py)
+  so the migration is runnable on the host. Evidence: test_cli_backfill_repo_axis
+  list-axis cases (dry-run/apply/backend-guard) + registration-order lock updated.
+
+## Deferred (environment-impossible only)
+- Live `backfill-list-axis` run against the Tailscale Ubuntu host DynamoDB-local:
+  no reachable endpoint from this session. Mechanics + CLI + tests verified here;
+  operator runs `--dry-run` then apply on the host.
diff --git a/docs/workbench/specs/scale-redesign-list-axis-sharding/requirements.md b/docs/workbench/specs/scale-redesign-list-axis-sharding/requirements.md
new file mode 100644
index 0000000..9e05215
--- /dev/null
+++ b/docs/workbench/specs/scale-redesign-list-axis-sharding/requirements.md
@@ -0,0 +1,64 @@
+# 요구사항 — 리스트/인덱스 GSI 파티션 클라우드-스케일 핫파티션 재설계 (issue #23 후속)
+
+이 문서는 `design.md`의 승인된 요구사항(approved requirements) 목록이다. 설계 세부는 `design.md`를 따르며, 여기서는 in-scope 표면, 계약, 명시적 제외 항목만 고정한다.
+
+## In-scope 표면
+
+본 작업의 in-scope는 두 갈래다: (A) GSI1 list-axis 4개 샤딩, (B) never-read dead-write GSI 키 4개 제거. 확정 결정은 `design.md`의 "확정 결정 (자문자답)" 절(D1–D5)을 따른다.
+
+### A. 샤딩 적용 대상 (GSI1 list-axis 4개)
+
+단일 정적값을 GSI1 partition key로 쓰는 핫파티션을 #23 `RepoAxisKey` 패턴을 일반화한 list-axis 샤드 헬퍼로 분산한다.
+
+| Axis | 인덱스 | hotness | shard count | read 변경 |
+|---|---|---|---|---|
+| TARGET_LIST#ALL | GSI1 (`gsi1pk`/`gsi1sk`) | write+read | 8 | `list_scan_targets` → flat scatter |
+| REPO_LIST#ALL | GSI1 (`gsi1pk`/`gsi1sk`) | write+read | 8 | `read_recent_repo_metadata` → ordered k-way merge + limit |
+| SCAN_DATE#&lt;date&gt; | GSI1 (`gsi1pk`/`gsi1sk`) | write+read | 8 | `read_scan_runs_for_date` → flat scatter |
+| SCAN_JOB_STATUS#pending, #leased | GSI1 (`gsi1pk`/`gsi1sk`) | write+read | 4 (확정, D1) | `_read_scan_jobs_by_status` → flat scatter (pending/leased만), pending/leased fan-out 병렬 실행 |
+
+### B. dead-write GSI 키 제거 (샤딩 아님)
+
+never-read GSI projection은 샤딩 이득이 0이고 write amplification만 만든다. store.py 검증 결과 아래 4개 키는 어떤 GSI 경로로도 읽히지 않으므로 write mapper에서 제거한다(D3·D4).
+
+| Surface | 제거 키 | write mapper | rationale |
+|---|---|---|---|
+| GHAS_ALERT#ALL | `gsi2pk`/`gsi2sk` | `ghas_alert_to_item` | GSI2 never-read → projection 제거로 hot partition + write amplification 동시 제거. GSI1 repo-axis projection(#23)은 보존. |
+| SECRET_EVIDENCE#ALL | `gsi2pk`/`gsi2sk` | `secret_evidence_to_item` | GSI2 never-read → projection 제거. `_secret_evidence_link_pk`의 gsi1 fallback은 불변. |
+| REF_STATE#ALL | `gsi1pk` | `ref_state_to_item` | GSI1 never-read → projection 제거. base-table read(point get / `PK=REPO#` query)는 영향 없음. |
+| REPO_LEASE#ALL | `gsi1pk` | `repo_lease_to_item` | GSI1 never-read → projection 제거. base-table read는 영향 없음. |
+
+## 승인된 계약 (approved contracts)
+
+1. **공유 코어 재사용 + back-compat**: 샤드 해시(`axis_shard`)·material(`axis_material`)·bucket width(`bucket_width`)는 `repo_axis.py`에서 추출한 단일 구현을 repo-axis와 list-axis가 공유한다. `repo_axis.py`는 기존 이름을 alias로 보존해 #23 import/테스트가 깨지지 않는다.
+
+2. **durable shard-count = 스키마 계약 (런타임 knob 아님)**: 각 axis의 `<axis>Version=1`이 그 `shardCount`를 고정한다. 샤드 수 변경은 새 version(`=2`) + 재해시 마이그레이션(또는 active-version fan-out)을 요구한다. shard count는 환경변수/런타임 설정으로 바꾸지 않는다.
+
+3. **멀티-인덱스 정확성 (spec 파라미터화)**: `ListAxisSpec`은 `index_name`, `gsi_pk_field`, `gsi_sk_field`를 명시 보유하고 reader는 이 spec 필드만 사용한다(generic `gsi_pk`/`gsi_sk` placeholder 금지). 현재 활성 샤드 axis 4개는 모두 GSI1(`index_name=GSI1_NAME`, `gsi1pk`/`gsi1sk`)이지만, spec 파라미터화로 미래에 GSI2 axis가 추가되어도 reader 코드 변경 없이 라우팅된다. (GHAS_ALERT/SECRET_EVIDENCE는 GSI2 never-read이므로 샤딩하지 않고 키를 제거한다 — D3.)
+
+4. **ordered/limited read 보존**: `read_recent_repo_metadata(limit)`의 `ScanIndexForward=False` + limit 계약을 보존한다. per-shard 내림차순 + k-way descending merge로 전역 newest-first 상위 limit개를 정확히 재구성한다. dedupe가 개입하는 `include_legacy=True` 윈도에서는 early-`Limit` 최적화를 끄고 full-prefix over-fetch 후 dedupe→truncate해 전역 top-limit 정확성을 보존한다(중복으로 인한 short/wrong-tail 금지).
+
+5. **fail-closed scatter-gather**: 임의 샤드 쿼리 실패는 전파한다. 부분 결과가 완전 결과처럼 보여서는 안 된다(직렬·병렬 fan-out 모두).
+
+6. **migration-only legacy fallback + in-place backfill gate**: legacy 정적 파티션은 `include_legacy=True` 뒤에서만 읽는다(기본 `False`). backfill은 같은 primary key 위에서 `update_item`(condition `attribute_not_exists(<version_attr>)`)으로 in-place 수행하며 행을 새 키로 복사하지 않는다. legacy read 경로 제거는 **per-axis remaining==0** + parity/zero-legacy-query 테스트 gate 충족 시에만 허용한다. legacy 식별 predicate는 `item[spec.gsi_pk_field]`(axis별 올바른 GSI)만 읽는다. 이 backfill/gate는 GSI1 list-axis 4개에만 적용된다(dead-write 키 제거는 backfill 대상이 아님).
+
+7. **lease fan-out 확정 (D1)**: `lease_next_scan_job`은 lease 시도당 2N(=N pending + N leased) GSI 쿼리를 발행한다. 확정 결정은 SCAN_JOB shard_count=4 + pending/leased fan-out **병렬 실행(thread pool)을 본 작업에 포함**. fail-closed는 병렬 fan-out에서도 유지(한 future 실패도 전파). go-live 측정 후 부족하면 `scanJobAxisVersion=2` rehash로 16+ 튜닝하는 follow-up이며 본 spec의 blocker가 아니다. "측정 없이 prod 발견" 금지.
+
+## 명시적 제외 (out-of-scope / 비샤딩 retained)
+
+- **SCAN_JOB_STATUS#completed, #dead_letter**: GSI로 읽히지 않는 cold 파티션 — plain pk 유지(version_attr 없음). migration predicate가 status로 inventory·backfill 양쪽에서 제외.
+- **`_secret_evidence_link_pk` (gsi1pk catch-all = `SECRET_EVIDENCE#ALL`)**: linked finding/alert가 없을 때 쓰는 gsi1 catch-all 버킷. 제거하는 SECRET_EVIDENCE GSI2 키(`gsi2pk`)와 무관한 별도 access pattern이라 **건드리지 않음**(D3).
+- **`RULE#<rule_id>` / `REPO#<repo_id>` gsi2 projection** (`finding`·`observation`·`state`·`state_event`·`scan_job` mappers): 현재 GSI2 reader 0건이라 엄밀히 never-read이나, 단일값 list-ALL 핫버킷이 아니라 key별 **분산**이고 "rule별 finding/repo별 job 조회"라는 **예약 access pattern**이라 **보존**한다. dead-write 제거 기준 = never-read ∧ 단일값 list-ALL ∧ 예약 access pattern 없음(세 조건 동시) — 이들은 미충족. 향후 read 미생성 시 write-amp 절감을 위해 별도 follow-up으로 제거 재검토(review LOW #3·#6·#7).
+- **rollout / canary**: 본 작업 범위 밖(#23과 동일). legacy 상수/`include_legacy` 분기 삭제는 gate 충족 후 별도 정리 작업.
+- **소스 코드 변경**: 본 spec은 설계 전용. 구현은 별도 단계.
+
+> REPO_LEASE#ALL(gsi1)·REF_STATE#ALL(gsi1)은 더 이상 out-of-scope가 아니다. never-read dead-write 키로 확정되어 **in-scope 제거 대상**(위 "B. dead-write GSI 키 제거")으로 옮겼다(D4). REF_STATE는 `REPO#` base-table PK를 공유하지만 repo-axis/list-axis 어디에도 참여하지 않으며, `gsi1pk=REF_STATE#ALL` 제거 후에도 base-table 접근(point get / `PK=REPO#` query)은 영향 없다.
+
+## Follow-up (별도 issue)
+
+- **D2 — 워커 폴링 backoff**: 샤딩으로 scan-job read CU가 (4×) 증가한다(throttle 회피 trade-off는 옳음). job 미발견 시 워커 exponential backoff는 worker 런타임 정책이므로 schema 샤딩과 분리해 별도 follow-up issue로 다룬다(read CU 증가가 실재하므로 강력 권장).
+- **D5 — get_queue_status full-table scan**: `get_queue_status`의 base-table `scan_all_pages`는 GSI 핫파티션과 독립된 full-scan 스케일 문제 — 별도 issue.
+
+## 열린 질문
+
+없음. 직전 초안의 모든 열린 질문은 `design.md`의 "확정 결정 (자문자답)"(D1–D5)에서 확정했다. D1 튜닝(16+ rehash)·D2·D5는 측정/별도 issue로 위임한다.
diff --git a/src/security_scanner/cli/commands/migrate.py b/src/security_scanner/cli/commands/migrate.py
index c223ad1..2d19a8d 100644
--- a/src/security_scanner/cli/commands/migrate.py
+++ b/src/security_scanner/cli/commands/migrate.py
@@ -16,6 +16,10 @@
 
 from security_scanner.cli._args import add_storage_args
 from security_scanner.cli._store import dynamodb_config_from_args
+from security_scanner.storage.adapters.nosql_db.list_axis_migration import (
+    backfill_list_axis,
+    inventory_legacy_list_axis,
+)
 from security_scanner.storage.adapters.nosql_db.repo_axis_migration import (
     backfill_repo_axis,
     inventory_legacy_repo_axis,
@@ -38,6 +42,18 @@ def register(subparsers) -> None:
     )
     parser.set_defaults(func=cmd_backfill_repo_axis)
 
+    list_parser = subparsers.add_parser(
+        "backfill-list-axis",
+        help="Migrate legacy list/index GSI rows to the sharded layout (#23).",
+    )
+    add_storage_args(list_parser, include_jsonl_path="", default_backend="dynamodb")
+    list_parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Report legacy inventory per axis without mutating anything.",
+    )
+    list_parser.set_defaults(func=cmd_backfill_list_axis)
+
 
 def _table_from_args(args: argparse.Namespace):
     config = dynamodb_config_from_args(args)
@@ -79,3 +95,39 @@ def cmd_backfill_repo_axis(args: argparse.Namespace) -> int:
         return 0
     print("gate: NOT CLEAR (legacy rows remain or failures occurred)")
     return 1
+
+
+def cmd_backfill_list_axis(args: argparse.Namespace) -> int:
+    """Backfill (or inventory) legacy list-axis GSI rows for the dynamodb store."""
+    if args.storage_backend != "dynamodb":
+        print(
+            "backfill-list-axis requires --storage-backend dynamodb "
+            f"(got '{args.storage_backend}')",
+            file=sys.stderr,
+        )
+        return 2
+
+    table = _table_from_args(args)
+
+    if args.dry_run:
+        inventory = inventory_legacy_list_axis(table)
+        print("list-axis legacy inventory (dry-run, no mutation):")
+        for axis, count in inventory.items():
+            print(f"  {axis}: {count}")
+        print(f"  total legacy rows: {sum(inventory.values())}")
+        return 0
+
+    report = backfill_list_axis(table)
+    print("list-axis backfill report:")
+    for axis, counts in report.by_axis.items():
+        print(
+            f"  {axis}: inventory={counts.inventory} "
+            f"backfilled={counts.backfilled} skipped={counts.skipped} "
+            f"failed={counts.failed} remaining={counts.remaining}"
+        )
+    any_failed = any(counts.failed for counts in report.by_axis.values())
+    if report.gate_clear and not any_failed:
+        print("gate: CLEAR (no legacy list-axis rows remain)")
+        return 0
+    print("gate: NOT CLEAR (legacy rows remain or failures occurred)")
+    return 1
diff --git a/src/security_scanner/storage/adapters/nosql_db/axis_core.py b/src/security_scanner/storage/adapters/nosql_db/axis_core.py
new file mode 100644
index 0000000..9a53e11
--- /dev/null
+++ b/src/security_scanner/storage/adapters/nosql_db/axis_core.py
@@ -0,0 +1,40 @@
+"""Axis-neutral shard primitives shared by repo-axis and list-axis (issue #23).
+
+These three helpers are the common core extracted from ``repo_axis.py`` so the
+repo-axis (#23) and list-axis (scale redesign) sharding families derive identical
+buckets from one implementation. This module imports nothing from the package
+(only ``hashlib``), so every other sharding module can depend on it without an
+import cycle.
+"""
+
+from __future__ import annotations
+
+import hashlib
+
+
+def bucket_width(shard_count: int) -> int:
+    """Return the zero-pad width for the largest bucket index."""
+    return len(str(shard_count - 1))
+
+
+def axis_shard(shard_material: str, *, shard_count: int) -> str:
+    """Return the fixed-width deterministic shard bucket for stable material.
+
+    The bucket is a SHA-256 digest of ``shard_material`` modulo ``shard_count``,
+    so the same logical row always lands in the same partition regardless of
+    process or host.
+    """
+    if shard_count <= 0:
+        raise ValueError("shard_count must be positive")
+    digest = hashlib.sha256(shard_material.encode("utf-8")).hexdigest()
+    bucket = int(digest, 16) % shard_count
+    return f"{bucket:0{bucket_width(shard_count)}d}"
+
+
+def axis_material(*parts: str) -> str:
+    """Join stable shard inputs into one deterministic material string.
+
+    Uses a NUL separator so distinct field boundaries cannot collide (e.g.
+    ``("a", "bc")`` differs from ``("ab", "c")``).
+    """
+    return "\0".join(parts)
diff --git a/src/security_scanner/storage/adapters/nosql_db/items.py b/src/security_scanner/storage/adapters/nosql_db/items.py
index 20fc113..82b139c 100644
--- a/src/security_scanner/storage/adapters/nosql_db/items.py
+++ b/src/security_scanner/storage/adapters/nosql_db/items.py
@@ -14,13 +14,16 @@
 
 from security_scanner.catalog.scan_target import ScanTarget
 from security_scanner.core.finding.model import Finding
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    REPO_LIST_AXIS,
+    SCAN_DATE_AXIS,
+    SCAN_JOB_AXIS,
+    TARGET_LIST_AXIS,
+    list_axis_projection_for_item,
+)
 from security_scanner.storage.adapters.nosql_db.repo_axis import (
     repo_axis_projection_for_item,
 )
-from security_scanner.storage.adapters.nosql_db.transport import (
-    REPO_LIST_PK,
-    TARGET_LIST_PK,
-)
 from security_scanner.storage.base import (
     FindingStateEvent,
     GhasAlertRecord,
@@ -71,7 +74,9 @@ class ScanRunSummary:
     artifact_uri: str | None = None
 
 
-GHAS_ALERT_LIST_PK = "GHAS_ALERT#ALL"
+# GHAS_ALERT_LIST_PK removed with its never-read GSI2 #ALL key (#23 follow-on D3).
+# SECRET_EVIDENCE_LIST_PK is retained: it is the gsi1 catch-all fallback in
+# _secret_evidence_link_pk (not the removed GSI2 key).
 SECRET_EVIDENCE_LIST_PK = "SECRET_EVIDENCE#ALL"
 SCAN_HEALTH_PK = "SCAN_HEALTH"
 
@@ -164,13 +169,11 @@ def split_target_name(target_name: str) -> tuple[str, str]:
 def repo_metadata_to_item(repo: RepoMetadata) -> dict[str, Any]:
     """Map repository metadata into the NoSQL item shape."""
     updated_at = repo.updated_at_iso or now_iso()
-    return without_none(
+    item = without_none(
         {
             "PK": f"REPO#{repo.repo_key}",
             "SK": "META",
             "entityType": "REPO_META",
-            "gsi1pk": REPO_LIST_PK,
-            "gsi1sk": f"UPDATED#{updated_at}#{repo.repo_key}",
             "createdAt": updated_at,
             "updatedAt": updated_at,
             "repoKey": repo.repo_key,
@@ -185,6 +188,8 @@ def repo_metadata_to_item(repo: RepoMetadata) -> dict[str, Any]:
             "latestCountsByLabel": dict(repo.latest_counts_by_label),
         }
     )
+    item.update(list_axis_projection_for_item(REPO_LIST_AXIS, item))
+    return item
 
 
 def repo_metadata_from_item(item: dict[str, Any]) -> RepoMetadata:
@@ -207,13 +212,11 @@ def repo_metadata_from_item(item: dict[str, Any]) -> RepoMetadata:
 def scan_run_summary_to_item(run: ScanRunSummary) -> dict[str, Any]:
     """Map a per-repository scan-run summary into a table item."""
     scan_at = run.scan_at_iso or now_iso()
-    return without_none(
+    item = without_none(
         {
             "PK": f"REPO#{run.repo_key}",
             "SK": f"SCAN_RUN#{scan_at}#{run.scan_run_id}",
             "entityType": "SCAN_RUN",
-            "gsi1pk": f"SCAN_DATE#{scan_date(scan_at)}",
-            "gsi1sk": f"{scan_at}#{run.repo_key}#{run.scan_run_id}",
             "createdAt": scan_at,
             "updatedAt": scan_at,
             "repoKey": run.repo_key,
@@ -226,6 +229,8 @@ def scan_run_summary_to_item(run: ScanRunSummary) -> dict[str, Any]:
             "artifactUri": run.artifact_uri,
         }
     )
+    item.update(list_axis_projection_for_item(SCAN_DATE_AXIS, item))
+    return item
 
 
 def scan_run_summary_from_item(item: dict[str, Any]) -> ScanRunSummary:
@@ -285,8 +290,8 @@ def ghas_alert_to_item(alert: GhasAlertRecord) -> dict[str, Any]:
             "PK": f"GHAS_ALERT#{alert.ghas_alert_id}",
             "SK": "META",
             "entityType": "GHAS_ALERT",
-            "gsi2pk": GHAS_ALERT_LIST_PK,
-            "gsi2sk": f"{fetched_at}#{alert.repository}#{alert.ghas_alert_id}",
+            # GSI2 #ALL list key removed (#23 follow-on D3): never read via GSI2.
+            # The GSI1 repo-axis projection (#23) is merged below and preserved.
             "ghasAlertId": alert.ghas_alert_id,
             "repository": alert.repository,
             "alertNumber": alert.alert_number,
@@ -355,10 +360,11 @@ def secret_evidence_to_item(evidence: SecretEvidenceRecord) -> dict[str, Any]:
             "PK": f"SECRET_EVIDENCE#{evidence.evidence_id}",
             "SK": "META",
             "entityType": "SECRET_EVIDENCE",
+            # gsi1 link fallback (_secret_evidence_link_pk) is the catch-all
+            # access pattern and is preserved. Only the never-read GSI2 #ALL list
+            # key is removed (#23 follow-on D3).
             "gsi1pk": link_pk,
             "gsi1sk": f"SECRET_EVIDENCE#{created_at}#{evidence.evidence_id}",
-            "gsi2pk": SECRET_EVIDENCE_LIST_PK,
-            "gsi2sk": f"{created_at}#{evidence.evidence_id}",
             "evidenceId": evidence.evidence_id,
             "sourceTool": evidence.source_tool,
             "linkedFindingId": evidence.linked_finding_id,
@@ -408,16 +414,16 @@ def _secret_evidence_link_pk(evidence: SecretEvidenceRecord) -> str:
 
 def scan_target_to_item(target: ScanTarget) -> dict[str, Any]:
     """Map a scan target catalog entry into the NoSQL item shape."""
-    return {
+    item = {
         "PK": f"SCAN_TARGET#{target.url}",
         "SK": "META",
         "entityType": "SCAN_TARGET",
-        "gsi1pk": TARGET_LIST_PK,
-        "gsi1sk": f"TARGET#{target.name}",
         "url": target.url,
         "name": target.name,
         "enabled": target.enabled,
     }
+    item.update(list_axis_projection_for_item(TARGET_LIST_AXIS, item))
+    return item
 
 
 def scan_target_from_item(item: dict[str, Any]) -> ScanTarget:
@@ -436,8 +442,8 @@ def ref_state_to_item(state: RefState) -> dict[str, Any]:
         "PK": f"REPO#{state.repo_id}",
         "SK": f"REF#{state.ref_name}",
         "entityType": "REF_STATE",
-        "gsi1pk": "REF_STATE#ALL",
-        "gsi1sk": f"{updated_at}#{state.repo_id}#{state.ref_name}",
+        # dead-write GSI1 #ALL key removed (#23 follow-on D4): never read via GSI1
+        # (reads use base table PK=REPO#<id>). REF_STATE is not a repo-axis entity.
         "repoId": state.repo_id,
         "repoUrl": state.repo_url,
         "refName": state.ref_name,
@@ -465,15 +471,11 @@ def scan_job_to_item(job: ScanJob) -> dict[str, Any]:
     lease_until = (
         datetime_to_iso(job.lease_until) if job.lease_until is not None else None
     )
-    return without_none(
+    item = without_none(
         {
             "PK": f"SCAN_JOB#{job.job_id}",
             "SK": "META",
             "entityType": "SCAN_JOB",
-            "gsi1pk": f"SCAN_JOB_STATUS#{job.status}",
-            "gsi1sk": (
-                f"{next_attempt_at}#{job.priority:08d}#{created_at}#{job.job_id}"
-            ),
             "gsi2pk": f"REPO#{job.repo_id}",
             "gsi2sk": f"JOB#{job.status}#{created_at}#{job.job_id}",
             "jobId": job.job_id,
@@ -500,6 +502,16 @@ def scan_job_to_item(job: ScanJob) -> dict[str, Any]:
             "lastError": job.last_error,
         }
     )
+    # Hot queue partitions (pending/leased) are sharded (D1); cold terminal
+    # statuses keep the plain unsharded gsi1pk (not GSI-read, see 범위 분리).
+    if job.status in ("pending", "leased"):
+        item.update(list_axis_projection_for_item(SCAN_JOB_AXIS, item))
+    else:
+        item["gsi1pk"] = f"SCAN_JOB_STATUS#{job.status}"
+        item["gsi1sk"] = (
+            f"{next_attempt_at}#{job.priority:08d}#{created_at}#{job.job_id}"
+        )
+    return item
 
 
 def scan_job_from_item(item: dict[str, Any]) -> ScanJob:
@@ -587,8 +599,8 @@ def repo_lease_to_item(lease: RepoLease) -> dict[str, Any]:
         "PK": f"REPO_LEASE#{lease.repo_id}",
         "SK": "META",
         "entityType": "REPO_LEASE",
-        "gsi1pk": "REPO_LEASE#ALL",
-        "gsi1sk": f"{lease_until}#{lease.repo_id}",
+        # dead-write GSI1 #ALL key removed (#23 follow-on D4): never read via GSI1
+        # (get_queue_status scans base table; release is a PK/SK delete).
         "repoId": lease.repo_id,
         "workerId": lease.worker_id,
         "leaseUntil": lease_until,
diff --git a/src/security_scanner/storage/adapters/nosql_db/list_axis.py b/src/security_scanner/storage/adapters/nosql_db/list_axis.py
new file mode 100644
index 0000000..192edfd
--- /dev/null
+++ b/src/security_scanner/storage/adapters/nosql_db/list_axis.py
@@ -0,0 +1,198 @@
+"""List/index-axis GSI sharding: spec, key, per-entity inputs (issue #23 follow-on).
+
+Generalizes the #23 ``RepoAxisKey`` pattern to the static single-value list/index
+GSI partitions that are hot at cloud scale. A :class:`ListAxisSpec` fully describes
+one axis's key contract — including the target GSI ``index_name`` and the real
+attribute names (``gsi_pk_field``/``gsi_sk_field``) — so the reader and migration
+route to the correct index without generic placeholders.
+
+The four active sharded axes are all on GSI1. The spec parameterizes the index so
+a future GSI2 list axis can be added without reader code changes. GHAS_ALERT and
+SECRET_EVIDENCE are intentionally NOT here: their GSI2 ``#ALL`` keys are never read
+and are removed rather than sharded (design D3).
+
+This module imports only the shared shard core and ``transport.GSI1_NAME`` so the
+write mappers in ``items`` can depend on it without an import cycle.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from security_scanner.storage.adapters.nosql_db.axis_core import (
+    axis_shard,
+)
+from security_scanner.storage.adapters.nosql_db.transport import GSI1_NAME
+
+LIST_AXIS_SHARD_INFIX = "#SHARD#"
+_SCAN_JOB_SHARDED_STATUSES = ("pending", "leased")
+
+
+@dataclass(frozen=True)
+class ListAxisSpec:
+    """Complete key contract for one list/index axis.
+
+    ``index_name``/``gsi_pk_field``/``gsi_sk_field`` are explicit (not derived) so
+    the reader pulls the IndexName, KeyCondition attribute, and merge-sort key all
+    from one spec — keeping a future GSI2 axis from being routed to the wrong index.
+    Shard count is a durable schema contract pinned by ``version``: changing it
+    requires a new version plus a rehash migration (see #23 ``repo_axis``).
+    """
+
+    prefix: str
+    version: int
+    shard_count: int
+    gsi_pk_field: str
+    gsi_sk_field: str
+    index_name: str
+    version_attr: str
+    shard_attr: str
+    count_attr: str
+    shard_infix: str = LIST_AXIS_SHARD_INFIX
+
+
+TARGET_LIST_AXIS = ListAxisSpec(
+    prefix="TARGET_LIST#ALL",
+    version=1,
+    shard_count=8,
+    gsi_pk_field="gsi1pk",
+    gsi_sk_field="gsi1sk",
+    index_name=GSI1_NAME,
+    version_attr="listAxisVersion",
+    shard_attr="listAxisShard",
+    count_attr="listAxisShardCount",
+)
+REPO_LIST_AXIS = ListAxisSpec(
+    prefix="REPO_LIST#ALL",
+    version=1,
+    shard_count=8,
+    gsi_pk_field="gsi1pk",
+    gsi_sk_field="gsi1sk",
+    index_name=GSI1_NAME,
+    version_attr="listAxisVersion",
+    shard_attr="listAxisShard",
+    count_attr="listAxisShardCount",
+)
+SCAN_DATE_AXIS = ListAxisSpec(
+    prefix="SCAN_DATE",
+    version=1,
+    shard_count=8,
+    gsi_pk_field="gsi1pk",
+    gsi_sk_field="gsi1sk",
+    index_name=GSI1_NAME,
+    version_attr="scanDateAxisVersion",
+    shard_attr="scanDateAxisShard",
+    count_attr="scanDateAxisShardCount",
+)
+SCAN_JOB_AXIS = ListAxisSpec(
+    prefix="SCAN_JOB_STATUS",
+    version=1,
+    shard_count=4,
+    gsi_pk_field="gsi1pk",
+    gsi_sk_field="gsi1sk",
+    index_name=GSI1_NAME,
+    version_attr="scanJobAxisVersion",
+    shard_attr="scanJobAxisShard",
+    count_attr="scanJobAxisShardCount",
+)
+
+
+def sharded_list_axis_pk(spec: ListAxisSpec, partition_root: str, bucket: str) -> str:
+    """Return ``<partition_root>#SHARD#<bucket>`` for a list axis."""
+    return f"{partition_root}{spec.shard_infix}{bucket}"
+
+
+def legacy_list_axis_pk(partition_root: str) -> str:
+    """Return the pre-sharding static partition value (== ``partition_root``)."""
+    return partition_root
+
+
+@dataclass(frozen=True)
+class ListAxisKey:
+    """Sharded list-axis key for one logical row."""
+
+    spec: ListAxisSpec
+    pk: str
+    gsi_sk: str
+    bucket: str
+
+    @classmethod
+    def build(
+        cls,
+        *,
+        spec: ListAxisSpec,
+        partition_root: str,
+        gsi_sk: str,
+        shard_material: str,
+    ) -> ListAxisKey:
+        bucket = axis_shard(shard_material, shard_count=spec.shard_count)
+        return cls(
+            spec=spec,
+            pk=sharded_list_axis_pk(spec, partition_root, bucket),
+            gsi_sk=gsi_sk,
+            bucket=bucket,
+        )
+
+    def projection(self) -> dict[str, object]:
+        """Return the GSI projection fields under this axis's real attr names."""
+        return {
+            self.spec.gsi_pk_field: self.pk,
+            self.spec.gsi_sk_field: self.gsi_sk,
+            self.spec.version_attr: self.spec.version,
+            self.spec.count_attr: self.spec.shard_count,
+            self.spec.shard_attr: self.bucket,
+        }
+
+
+def list_axis_inputs(spec: ListAxisSpec, item: dict) -> tuple[str, str, str]:
+    """Return ``(partition_root, gsi_sk, shard_material)`` from a row's own fields.
+
+    Single source of truth for the per-axis key formula. Both the write mappers
+    and the backfill feed an item dict through here, so a fresh write and a
+    backfilled legacy row compute the same sharded key — they cannot drift.
+    """
+    if spec.prefix == "TARGET_LIST#ALL":
+        return ("TARGET_LIST#ALL", f"TARGET#{item['name']}", item["url"])
+    if spec.prefix == "REPO_LIST#ALL":
+        return (
+            "REPO_LIST#ALL",
+            f"UPDATED#{item['updatedAt']}#{item['repoKey']}",
+            item["repoKey"],
+        )
+    if spec.prefix == "SCAN_DATE":
+        if item.get("entityType") != "SCAN_RUN":
+            raise ValueError(
+                f"SCAN_DATE list axis requires entityType=SCAN_RUN, got "
+                f"{item.get('entityType')!r}"
+            )
+        scan_at = item["scanAtIso"]
+        return (
+            f"SCAN_DATE#{scan_at[:10]}",
+            f"{scan_at}#{item['repoKey']}#{item['scanRunId']}",
+            item["scanRunId"],
+        )
+    if spec.prefix == "SCAN_JOB_STATUS":
+        status = item.get("status")
+        if status not in _SCAN_JOB_SHARDED_STATUSES:
+            raise ValueError(
+                f"SCAN_JOB list axis shards only {_SCAN_JOB_SHARDED_STATUSES}, "
+                f"got status={status!r}"
+            )
+        return (
+            f"SCAN_JOB_STATUS#{status}",
+            f"{item['nextAttemptAt']}#{int(item['priority']):08d}#"
+            f"{item['createdAt']}#{item['jobId']}",
+            item["jobId"],
+        )
+    raise ValueError(f"unsupported list axis spec: {spec.prefix!r}")
+
+
+def list_axis_projection_for_item(spec: ListAxisSpec, item: dict) -> dict[str, object]:
+    """Return the sharded list-axis projection for an item from its own fields."""
+    partition_root, gsi_sk, shard_material = list_axis_inputs(spec, item)
+    return ListAxisKey.build(
+        spec=spec,
+        partition_root=partition_root,
+        gsi_sk=gsi_sk,
+        shard_material=shard_material,
+    ).projection()
diff --git a/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py b/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py
new file mode 100644
index 0000000..329607e
--- /dev/null
+++ b/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py
@@ -0,0 +1,168 @@
+"""List-axis backfill + compatibility-removal gate (issue #23 follow-on).
+
+Mirrors :mod:`repo_axis_migration`, generalized to the four GSI1 list axes. A
+legacy row lives on the unsharded ``partition_root`` (no ``<axis>Version``).
+Backfill recomputes the sharded projection from the row's own fields (via
+``list_axis_inputs`` — the same path the write mappers use, so keys cannot drift)
+and conditionally updates the existing primary item in place; it never copies rows.
+
+Only the four GSI1 list axes are migrated. GHAS_ALERT/SECRET_EVIDENCE are not
+here: their GSI2 ``#ALL`` keys are removed rather than sharded (design D3).
+
+Removal gate: per axis the report shows inventory / backfilled / skipped / failed
+and a ``remaining`` legacy count; ``remaining == 0`` everywhere clears the gate to
+delete the ``include_legacy`` read branches.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from security_scanner.storage.adapters.nosql_db.access import scan_all_pages
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    REPO_LIST_AXIS,
+    SCAN_DATE_AXIS,
+    SCAN_JOB_AXIS,
+    TARGET_LIST_AXIS,
+    ListAxisSpec,
+    list_axis_projection_for_item,
+)
+from security_scanner.storage.adapters.nosql_db.store import (
+    _is_conditional_check_failure,
+)
+
+
+@dataclass(frozen=True)
+class _MigrationTarget:
+    """One axis's migration scope: spec + the entity rows + legacy detection."""
+
+    spec: ListAxisSpec
+    entity_type: str
+    legacy_prefix: str
+    status_filter: tuple[str, ...] | None = None
+
+
+#: The four GSI1 list axes migrated by backfill (design 범위 분리).
+LIST_AXIS_MIGRATION_TARGETS: tuple[_MigrationTarget, ...] = (
+    _MigrationTarget(TARGET_LIST_AXIS, "SCAN_TARGET", "TARGET_LIST#ALL"),
+    _MigrationTarget(REPO_LIST_AXIS, "REPO_META", "REPO_LIST#ALL"),
+    _MigrationTarget(SCAN_DATE_AXIS, "SCAN_RUN", "SCAN_DATE#"),
+    _MigrationTarget(
+        SCAN_JOB_AXIS, "SCAN_JOB", "SCAN_JOB_STATUS#", ("pending", "leased")
+    ),
+)
+
+
+@dataclass(frozen=True)
+class EntityBackfillCounts:
+    """Per-axis backfill outcome for the removal gate report."""
+
+    inventory: int
+    backfilled: int
+    skipped: int
+    failed: int
+    remaining: int
+
+
+@dataclass(frozen=True)
+class ListAxisBackfillReport:
+    """Per-axis backfill counts keyed by axis prefix; ``gate_clear`` when all 0."""
+
+    by_axis: dict[str, EntityBackfillCounts]
+
+    @property
+    def gate_clear(self) -> bool:
+        return all(counts.remaining == 0 for counts in self.by_axis.values())
+
+
+def is_legacy_list_axis_item(target: _MigrationTarget, item: dict[str, Any]) -> bool:
+    """Return True for a pre-sharding row of this axis (unsharded, no version).
+
+    Reads ``target.spec.gsi_pk_field`` (never hardcodes the GSI). For SCAN_JOB the
+    ``status_filter`` excludes completed/dead_letter, which legitimately live on a
+    plain unsharded ``SCAN_JOB_STATUS#<status>`` partition but are not migrated.
+    """
+    spec = target.spec
+    gsi_pk = str(item.get(spec.gsi_pk_field, ""))
+    if not gsi_pk.startswith(target.legacy_prefix):
+        return False
+    if spec.shard_infix in gsi_pk:
+        return False
+    if spec.version_attr in item:
+        return False
+    if (
+        target.status_filter is not None
+        and item.get("status") not in target.status_filter
+    ):
+        return False
+    return True
+
+
+def backfill_list_axis(table: Any) -> ListAxisBackfillReport:
+    """Backfill every legacy list-axis row in place; return the gate report."""
+    by_axis: dict[str, EntityBackfillCounts] = {}
+    for target in LIST_AXIS_MIGRATION_TARGETS:
+        legacy = _scan_legacy(table, target)
+        backfilled = skipped = failed = 0
+        for item in legacy:
+            outcome = _backfill_one(table, target, item)
+            if outcome == "backfilled":
+                backfilled += 1
+            elif outcome == "skipped":
+                skipped += 1
+            else:
+                failed += 1
+        remaining = len(_scan_legacy(table, target))
+        by_axis[target.spec.prefix] = EntityBackfillCounts(
+            inventory=len(legacy),
+            backfilled=backfilled,
+            skipped=skipped,
+            failed=failed,
+            remaining=remaining,
+        )
+    return ListAxisBackfillReport(by_axis=by_axis)
+
+
+def inventory_legacy_list_axis(table: Any) -> dict[str, int]:
+    """Return per-axis legacy row counts without mutating anything."""
+    return {
+        target.spec.prefix: len(_scan_legacy(table, target))
+        for target in LIST_AXIS_MIGRATION_TARGETS
+    }
+
+
+def _scan_legacy(table: Any, target: _MigrationTarget) -> list[dict[str, Any]]:
+    items = scan_all_pages(
+        table,
+        FilterExpression="entityType = :entity_type",
+        ExpressionAttributeValues={":entity_type": target.entity_type},
+    )
+    return [item for item in items if is_legacy_list_axis_item(target, item)]
+
+
+def _backfill_one(table: Any, target: _MigrationTarget, item: dict[str, Any]) -> str:
+    try:
+        # Inside the try so a malformed row is classified 'failed' rather than
+        # aborting the whole migration pass.
+        projection = list_axis_projection_for_item(target.spec, item)
+        set_parts: list[str] = []
+        values: dict[str, Any] = {}
+        for index, (attr, value) in enumerate(projection.items()):
+            placeholder = f":v{index}"
+            set_parts.append(f"{attr} = {placeholder}")
+            values[placeholder] = value
+        table.update_item(
+            Key={"PK": item["PK"], "SK": item["SK"]},
+            UpdateExpression="SET " + ", ".join(set_parts),
+            ConditionExpression=(
+                "attribute_exists(PK) AND attribute_exists(SK) AND "
+                f"attribute_not_exists({target.spec.version_attr})"
+            ),
+            ExpressionAttributeValues=values,
+        )
+        return "backfilled"
+    except Exception as exc:  # noqa: BLE001 - classified below
+        if _is_conditional_check_failure(exc):
+            return "skipped"
+        return "failed"
diff --git a/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py b/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py
new file mode 100644
index 0000000..cac68df
--- /dev/null
+++ b/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py
@@ -0,0 +1,213 @@
+"""Scatter-gather readers for list/index-axis sharded partitions (#23 follow-on).
+
+Two fan-out readers, both spec-parameterized on the real GSI attribute names
+(``spec.gsi_pk_field``/``spec.gsi_sk_field``) and index (``spec.index_name``) —
+no generic placeholders, so a future GSI2 axis routes correctly without code
+changes. Both fail closed: any shard query exception propagates; a partial result
+must never look like a complete one.
+
+- :func:`read_list_axis` — flat unordered set (dedupe + sort), for
+  ``list_scan_targets`` / ``read_scan_runs_for_date`` / ``_read_scan_jobs_by_status``.
+- :func:`read_list_axis_ordered` — newest-first + limit preserving k-way merge,
+  for ``read_recent_repo_metadata``.
+"""
+
+from __future__ import annotations
+
+import heapq
+from collections.abc import Callable
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any
+
+from security_scanner.storage.adapters.nosql_db.access import query_all_pages
+from security_scanner.storage.adapters.nosql_db.axis_core import bucket_width
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    ListAxisSpec,
+    legacy_list_axis_pk,
+    sharded_list_axis_pk,
+)
+
+QueryPages = Callable[..., list[dict[str, Any]]]
+
+
+def read_list_axis(
+    table: Any,
+    *,
+    spec: ListAxisSpec,
+    partition_root: str,
+    gsi_sk_prefix: str | None = None,
+    include_legacy: bool = False,
+    parallel: bool = False,
+    query_pages: QueryPages = query_all_pages,
+) -> list[dict[str, Any]]:
+    """Flat scatter-gather across every shard of ``partition_root``.
+
+    Dedupes by ``(PK, SK)`` preferring the higher axis version, returns items in
+    canonical ``(gsi_sk, PK, SK)`` order. Fails closed.
+
+    ``parallel`` issues the per-partition queries concurrently (thread pool) to
+    bound fan-out latency — used by the hot SCAN_JOB lease loop (D1). Fail-closed
+    is preserved: the first failing partition query propagates.
+    """
+    width = bucket_width(spec.shard_count)
+    partitions = [
+        sharded_list_axis_pk(spec, partition_root, f"{bucket:0{width}d}")
+        for bucket in range(spec.shard_count)
+    ]
+    if include_legacy:
+        partitions.append(legacy_list_axis_pk(partition_root))
+
+    def _q(partition: str) -> list[dict[str, Any]]:
+        return _query_list_partition(query_pages, table, spec, partition, gsi_sk_prefix)
+
+    if parallel and len(partitions) > 1:
+        with ThreadPoolExecutor(max_workers=len(partitions)) as executor:
+            # list() drains executor.map, which re-raises the first exception →
+            # fail-closed: no partial result is returned.
+            per_partition = list(executor.map(_q, partitions))
+    else:
+        per_partition = [_q(partition) for partition in partitions]
+
+    collected = [item for result in per_partition for item in result]
+    return _dedupe_and_sort(collected, spec)
+
+
+def read_list_axis_ordered(
+    table: Any,
+    *,
+    spec: ListAxisSpec,
+    partition_root: str,
+    gsi_sk_prefix: str | None,
+    limit: int | None,
+    descending: bool = True,
+    include_legacy: bool = False,
+    query_pages: QueryPages = query_all_pages,
+) -> list[dict[str, Any]]:
+    """Ordered fan-out preserving ``ScanIndexForward=False`` + ``limit``.
+
+    Each shard partition is internally sorted by ``gsi_sk``; a k-way merge over the
+    per-shard descending lists reconstructs the exact global top-``limit``. With
+    ``include_legacy`` the early per-shard limit is disabled (full-prefix
+    over-fetch) so dedupe of legacy/shard duplicates cannot truncate the window
+    early. Fails closed.
+    """
+    width = bucket_width(spec.shard_count)
+    fetch_n = _ordered_fetch_limit(limit, include_legacy)
+    per_shard: list[list[dict[str, Any]]] = []
+    for bucket in range(spec.shard_count):
+        partition = sharded_list_axis_pk(spec, partition_root, f"{bucket:0{width}d}")
+        per_shard.append(
+            _query_list_partition(
+                query_pages,
+                table,
+                spec,
+                partition,
+                gsi_sk_prefix,
+                scan_index_forward=not descending,
+                limit=fetch_n,
+            )
+        )
+    if include_legacy:
+        per_shard.append(
+            _query_list_partition(
+                query_pages,
+                table,
+                spec,
+                legacy_list_axis_pk(partition_root),
+                gsi_sk_prefix,
+                scan_index_forward=not descending,
+                limit=fetch_n,
+            )
+        )
+    merged = heapq.merge(
+        *per_shard,
+        key=lambda item: item.get(spec.gsi_sk_field, ""),
+        reverse=descending,
+    )
+    deduped = _dedupe_preserving_order(merged, spec)
+    return deduped[:limit] if limit is not None else deduped
+
+
+def _query_list_partition(
+    query_pages: QueryPages,
+    table: Any,
+    spec: ListAxisSpec,
+    partition: str,
+    gsi_sk_prefix: str | None,
+    *,
+    scan_index_forward: bool | None = None,
+    limit: int | None = None,
+) -> list[dict[str, Any]]:
+    kwargs: dict[str, Any] = {
+        "IndexName": spec.index_name,
+        "ExpressionAttributeValues": {":pk": partition},
+    }
+    if gsi_sk_prefix is None:
+        kwargs["KeyConditionExpression"] = f"{spec.gsi_pk_field} = :pk"
+    else:
+        kwargs["KeyConditionExpression"] = (
+            f"{spec.gsi_pk_field} = :pk AND begins_with({spec.gsi_sk_field}, :p)"
+        )
+        kwargs["ExpressionAttributeValues"][":p"] = gsi_sk_prefix
+    if scan_index_forward is not None:
+        kwargs["ScanIndexForward"] = scan_index_forward
+    if limit is not None:
+        kwargs["limit"] = limit
+    return query_pages(table, **kwargs)
+
+
+def _ordered_fetch_limit(limit: int | None, include_legacy: bool) -> int | None:
+    """Per-shard early limit: ``limit`` in steady state, ``None`` during migration.
+
+    With ``include_legacy`` a logical row can exist twice (legacy + shard) with the
+    same ``(PK, SK)``; an early per-shard limit could drop a genuinely newer row
+    that sits just past the boundary. So over-fetch the full prefix and dedupe
+    before truncating. The migration window is short and verification-only.
+    """
+    if include_legacy:
+        return None
+    return limit
+
+
+def _version_of(item: dict[str, Any], version_attr: str) -> int:
+    try:
+        return int(item.get(version_attr, 0))
+    except (TypeError, ValueError):
+        return 0
+
+
+def _dedupe_and_sort(
+    items: list[dict[str, Any]], spec: ListAxisSpec
+) -> list[dict[str, Any]]:
+    best: dict[tuple[Any, Any], dict[str, Any]] = {}
+    for item in items:
+        key = (item.get("PK"), item.get("SK"))
+        existing = best.get(key)
+        if existing is None or _version_of(item, spec.version_attr) > _version_of(
+            existing, spec.version_attr
+        ):
+            best[key] = item
+    return sorted(
+        best.values(),
+        key=lambda item: (
+            item.get(spec.gsi_sk_field, ""),
+            item.get("PK", ""),
+            item.get("SK", ""),
+        ),
+    )
+
+
+def _dedupe_preserving_order(items: Any, spec: ListAxisSpec) -> list[dict[str, Any]]:
+    """Dedupe by ``(PK, SK)`` preferring higher version, keep first-seen order."""
+    best: dict[tuple[Any, Any], dict[str, Any]] = {}
+    order: list[tuple[Any, Any]] = []
+    for item in items:
+        key = (item.get("PK"), item.get("SK"))
+        if key not in best:
+            best[key] = item
+            order.append(key)
+        elif _version_of(item, spec.version_attr) > _version_of(
+            best[key], spec.version_attr
+        ):
+            best[key] = item
+    return [best[key] for key in order]
diff --git a/src/security_scanner/storage/adapters/nosql_db/repo_axis.py b/src/security_scanner/storage/adapters/nosql_db/repo_axis.py
index 7669933..8297bcc 100644
--- a/src/security_scanner/storage/adapters/nosql_db/repo_axis.py
+++ b/src/security_scanner/storage/adapters/nosql_db/repo_axis.py
@@ -23,10 +23,15 @@
 
 from __future__ import annotations
 
-import hashlib
 from dataclasses import dataclass
 from typing import Any
 
+from security_scanner.storage.adapters.nosql_db.axis_core import (
+    axis_material,
+    axis_shard,
+    bucket_width,
+)
+
 REPO_AXIS_VERSION = 2
 REPO_AXIS_SHARD_COUNT = 16
 #: Literal repo-axis partition prefix/infix. Defined once here; every other
@@ -34,35 +39,21 @@
 REPO_AXIS_PARTITION_PREFIX = "REPO#"
 REPO_AXIS_SHARD_INFIX = "#SHARD#"
 
-
-def repo_axis_material(*parts: str) -> str:
-    """Join stable shard inputs into one deterministic material string.
-
-    Uses a NUL separator so distinct field boundaries cannot collide (e.g.
-    ``("a", "bc")`` differs from ``("ab", "c")``). Shared by the write mappers
-    and the backfill so both derive identical shard buckets.
-    """
-    return "\0".join(parts)
+#: Back-compat aliases over the shared core (axis_core). Behavior unchanged; the
+#: shard primitives now live in axis_core and are shared with the list-axis.
+repo_axis_material = axis_material
+_bucket_width = bucket_width
 
 
 def repo_axis_shard(
     shard_material: str, *, shard_count: int = REPO_AXIS_SHARD_COUNT
 ) -> str:
-    """Return the fixed-width deterministic shard bucket for stable material.
+    """Return the deterministic shard bucket (repo-axis default shard count).
 
-    The bucket is derived from a SHA-256 digest of ``shard_material`` so the same
-    logical row always lands in the same partition regardless of process or host.
+    Thin wrapper over :func:`axis_core.axis_shard` supplying the repo-axis default
+    ``shard_count`` so existing #23 callers keep their one-arg form.
     """
-    if shard_count <= 0:
-        raise ValueError("shard_count must be positive")
-    digest = hashlib.sha256(shard_material.encode("utf-8")).hexdigest()
-    bucket = int(digest, 16) % shard_count
-    return f"{bucket:0{_bucket_width(shard_count)}d}"
-
-
-def _bucket_width(shard_count: int) -> int:
-    """Return the zero-pad width for the largest bucket index."""
-    return len(str(shard_count - 1))
+    return axis_shard(shard_material, shard_count=shard_count)
 
 
 def sharded_repo_axis_pk(repo_axis_id: str, bucket: str) -> str:
diff --git a/src/security_scanner/storage/adapters/nosql_db/store.py b/src/security_scanner/storage/adapters/nosql_db/store.py
index fe895cd..e054327 100644
--- a/src/security_scanner/storage/adapters/nosql_db/store.py
+++ b/src/security_scanner/storage/adapters/nosql_db/store.py
@@ -56,11 +56,19 @@
     secret_evidence_to_item,
     split_target_name,
 )
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    REPO_LIST_AXIS,
+    SCAN_DATE_AXIS,
+    SCAN_JOB_AXIS,
+    TARGET_LIST_AXIS,
+)
+from security_scanner.storage.adapters.nosql_db.list_axis_reader import (
+    read_list_axis,
+    read_list_axis_ordered,
+)
 from security_scanner.storage.adapters.nosql_db.repo_axis_reader import read_repo_axis
 from security_scanner.storage.adapters.nosql_db.transport import (
     GSI1_NAME,
-    REPO_LIST_PK,
-    TARGET_LIST_PK,
     DynamoDbCompatibleConfig,
     build_table_schema,
     make_boto3_resource_and_client,
@@ -176,18 +184,14 @@ def put_secret_evidence(self, evidence: SecretEvidenceRecord) -> None:
         """Persist encrypted secret evidence metadata."""
         self._table.put_item(Item=secret_evidence_to_item(evidence))
 
-    # TODO(performance): TARGET_LIST_PK is a static GSI1PK creating a single hot
-    # partition. Fine for local NoSQL, but requires write sharding
-    # (e.g., TARGET_LIST#0..N) for cloud scale WCU/RCU limits.
-    def list_scan_targets(self) -> list[ScanTarget]:
-        items = query_all_pages(
+    def list_scan_targets(self, *, include_legacy: bool = False) -> list[ScanTarget]:
+        # TARGET_LIST#ALL is sharded (#23 follow-on); fan out across shards.
+        items = read_list_axis(
             self._table,
-            IndexName=GSI1_NAME,
-            KeyConditionExpression="gsi1pk = :pk AND begins_with(gsi1sk, :sk_prefix)",
-            ExpressionAttributeValues={
-                ":pk": TARGET_LIST_PK,
-                ":sk_prefix": "TARGET#",
-            },
+            spec=TARGET_LIST_AXIS,
+            partition_root="TARGET_LIST#ALL",
+            gsi_sk_prefix="TARGET#",
+            include_legacy=include_legacy,
         )
         return items_to_scan_targets(items)
 
@@ -450,17 +454,19 @@ def write_scan_result(self, result: TargetScanResult) -> None:
             )
         )
 
-    def read_recent_repo_metadata(self, limit: int | None = None) -> list[RepoMetadata]:
-        items = query_all_pages(
+    def read_recent_repo_metadata(
+        self, limit: int | None = None, *, include_legacy: bool = False
+    ) -> list[RepoMetadata]:
+        # REPO_LIST#ALL is sharded (#23 follow-on); ordered k-way merge preserves
+        # the newest-first + limit contract across shards.
+        items = read_list_axis_ordered(
             self._table,
+            spec=REPO_LIST_AXIS,
+            partition_root="REPO_LIST#ALL",
+            gsi_sk_prefix="UPDATED#",
             limit=limit,
-            IndexName=GSI1_NAME,
-            KeyConditionExpression=("gsi1pk = :pk AND begins_with(gsi1sk, :sk_prefix)"),
-            ExpressionAttributeValues={
-                ":pk": REPO_LIST_PK,
-                ":sk_prefix": "UPDATED#",
-            },
-            ScanIndexForward=False,
+            descending=True,
+            include_legacy=include_legacy,
         )
         return items_to_repo_metadata(items)
 
@@ -475,14 +481,15 @@ def read_scan_runs_for_repo(self, repo_key: str) -> list[ScanRunSummary]:
         )
         return items_to_scan_run_summaries(items)
 
-    def read_scan_runs_for_date(self, scan_date: str) -> list[ScanRunSummary]:
-        items = query_all_pages(
+    def read_scan_runs_for_date(
+        self, scan_date: str, *, include_legacy: bool = False
+    ) -> list[ScanRunSummary]:
+        # SCAN_DATE#<date> is sharded (#23 follow-on); fan out across shards.
+        items = read_list_axis(
             self._table,
-            IndexName=GSI1_NAME,
-            KeyConditionExpression="gsi1pk = :pk",
-            ExpressionAttributeValues={
-                ":pk": f"SCAN_DATE#{scan_date}",
-            },
+            spec=SCAN_DATE_AXIS,
+            partition_root=f"SCAN_DATE#{scan_date}",
+            include_legacy=include_legacy,
         )
         return items_to_scan_run_summaries(items)
 
@@ -730,14 +737,23 @@ def _put_scan_ledger_if_absent(self, ledger: ScanLedgerEntry) -> None:
             raise
 
     def _read_scan_jobs_by_status(self, status: str) -> list[ScanJob]:
-        items = query_all_pages(
-            self._table,
-            IndexName=GSI1_NAME,
-            KeyConditionExpression="gsi1pk = :pk",
-            ExpressionAttributeValues={
-                ":pk": f"SCAN_JOB_STATUS#{status}",
-            },
-        )
+        if status in ("pending", "leased"):
+            # hot queue partitions are sharded (#23 follow-on, D1); parallel
+            # fan-out bounds lease-loop latency, fail-closed preserved.
+            items = read_list_axis(
+                self._table,
+                spec=SCAN_JOB_AXIS,
+                partition_root=f"SCAN_JOB_STATUS#{status}",
+                parallel=True,
+            )
+        else:
+            # cold terminal statuses keep the plain unsharded partition.
+            items = query_all_pages(
+                self._table,
+                IndexName=GSI1_NAME,
+                KeyConditionExpression="gsi1pk = :pk",
+                ExpressionAttributeValues={":pk": f"SCAN_JOB_STATUS#{status}"},
+            )
         return items_to_scan_jobs(items)
 
     def _try_lease_scan_job(
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3f9e0d7..c5ac8bc 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -724,4 +724,5 @@ def test_subcommand_registration_order_is_stable():
         "disable-target",
         "sync",
         "backfill-repo-axis",
+        "backfill-list-axis",
     ]
diff --git a/tests/test_cli_backfill_repo_axis.py b/tests/test_cli_backfill_repo_axis.py
index 7a97eb7..5881456 100644
--- a/tests/test_cli_backfill_repo_axis.py
+++ b/tests/test_cli_backfill_repo_axis.py
@@ -170,3 +170,61 @@ def update_item(self, **kwargs):
     out = capsys.readouterr().out
     assert exit_code == 1
     assert "gate: NOT CLEAR" in out
+
+
+# --- backfill-list-axis (#23 follow-on) ---
+
+
+def _legacy_target(name: str = "fake-org/fake-repo") -> dict:
+    return {
+        "PK": f"SCAN_TARGET#https://x/{name}",
+        "SK": "META",
+        "entityType": "SCAN_TARGET",
+        "gsi1pk": "TARGET_LIST#ALL",
+        "gsi1sk": f"TARGET#{name}",
+        "name": name,
+        "url": f"https://x/{name}",
+    }
+
+
+def test_list_axis_dry_run_reports_inventory_without_mutating(monkeypatch, capsys):
+    table = _FakeTable([_legacy_target()])
+    _patch_table(monkeypatch, table)
+
+    exit_code = main(
+        ["backfill-list-axis", "--storage-backend", "dynamodb", "--dry-run"]
+    )
+
+    out = capsys.readouterr().out
+    assert exit_code == 0
+    assert "dry-run" in out
+    assert "TARGET_LIST#ALL: 1" in out
+    assert table.update_calls == []
+
+
+def test_list_axis_apply_backfills_and_reports_gate_clear(monkeypatch, capsys):
+    table = _FakeTable([_legacy_target()])
+    _patch_table(monkeypatch, table)
+
+    exit_code = main(["backfill-list-axis", "--storage-backend", "dynamodb"])
+
+    out = capsys.readouterr().out
+    assert exit_code == 0
+    assert "gate: CLEAR" in out
+    assert table.items[0]["gsi1pk"].startswith("TARGET_LIST#ALL#SHARD#")
+    assert table.items[0]["listAxisVersion"] == 1
+
+
+def test_list_axis_rejects_non_dynamodb_backend(monkeypatch, capsys):
+    def _boom(config):
+        raise AssertionError("should not build a table for jsonl")
+
+    monkeypatch.setattr(
+        "security_scanner.cli.commands.migrate.make_boto3_resource_and_client", _boom
+    )
+
+    exit_code = main(["backfill-list-axis", "--storage-backend", "jsonl"])
+
+    err = capsys.readouterr().err
+    assert exit_code == 2
+    assert "requires --storage-backend dynamodb" in err
diff --git a/tests/test_dead_write_gsi_keys.py b/tests/test_dead_write_gsi_keys.py
new file mode 100644
index 0000000..109a6bf
--- /dev/null
+++ b/tests/test_dead_write_gsi_keys.py
@@ -0,0 +1,97 @@
+"""Tests for dead-write GSI key removal (issue #23 follow-on, D3/D4).
+
+Never-read GSI projections are removed from the write mappers: GHAS_ALERT and
+SECRET_EVIDENCE drop their GSI2 ``#ALL`` keys; REF_STATE and REPO_LEASE drop their
+GSI1 ``#ALL`` keys. GHAS_ALERT keeps its GSI1 repo-axis projection (#23); the
+SECRET_EVIDENCE gsi1 link fallback is preserved.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+
+from security_scanner.storage.adapters.nosql_db.items import (
+    ghas_alert_to_item,
+    ref_state_to_item,
+    repo_lease_to_item,
+    secret_evidence_to_item,
+)
+from security_scanner.storage.base import (
+    GhasAlertRecord,
+    RefState,
+    RepoLease,
+    SecretEvidenceRecord,
+)
+
+NOW = dt.datetime(2026, 6, 19, tzinfo=dt.timezone.utc)
+
+
+def test_ghas_alert_drops_gsi2_keeps_gsi1_repo_axis():
+    item = ghas_alert_to_item(
+        GhasAlertRecord(
+            ghas_alert_id="ghas_1",
+            repository="fake-org/fake-repo",
+            alert_number=1,
+            secret_type="aws_access_key_id",
+            state="open",
+            fetched_at=NOW,
+        )
+    )
+    assert "gsi2pk" not in item
+    assert "gsi2sk" not in item
+    # GSI1 repo-axis projection (#23) is preserved
+    assert item["gsi1pk"].startswith("REPO#fake-org/fake-repo#SHARD#")
+    assert item["repoAxisVersion"] == 2
+
+
+def test_secret_evidence_drops_gsi2_keeps_gsi1_link_fallback():
+    item = secret_evidence_to_item(
+        SecretEvidenceRecord(
+            evidence_id="ev_1",
+            source_tool="gitleaks",
+            secret_hash="salted-sha256:abc",
+            encrypted_secret="enc",
+            encrypted_match="enc",
+            key_id="k1",
+            nonce="n1",
+            auth_tag="t1",
+            algorithm="aes-256-gcm",
+            created_at=NOW,
+        )
+    )
+    assert "gsi2pk" not in item
+    assert "gsi2sk" not in item
+    # gsi1 link fallback (catch-all) preserved
+    assert item["gsi1pk"] == "SECRET_EVIDENCE#ALL"
+    assert item["gsi1sk"].startswith("SECRET_EVIDENCE#")
+
+
+def test_ref_state_drops_gsi1_dead_write():
+    item = ref_state_to_item(
+        RefState(
+            repo_id="repo_1",
+            repo_url="https://x/repo_1",
+            ref_name="refs/heads/main",
+            last_seen_sha="s1",
+            updated_at=NOW,
+        )
+    )
+    assert "gsi1pk" not in item
+    assert "gsi1sk" not in item
+    # base-table identity intact
+    assert item["PK"] == "REPO#repo_1"
+    assert item["SK"] == "REF#refs/heads/main"
+
+
+def test_repo_lease_drops_gsi1_dead_write():
+    item = repo_lease_to_item(
+        RepoLease(
+            repo_id="repo_1",
+            worker_id="worker-a",
+            lease_until=NOW,
+            updated_at=NOW,
+        )
+    )
+    assert "gsi1pk" not in item
+    assert "gsi1sk" not in item
+    assert item["PK"] == "REPO_LEASE#repo_1"
diff --git a/tests/test_dynamodb_compatible_store.py b/tests/test_dynamodb_compatible_store.py
index 116e7d1..d817f64 100644
--- a/tests/test_dynamodb_compatible_store.py
+++ b/tests/test_dynamodb_compatible_store.py
@@ -18,6 +18,10 @@
     Verdict,
 )
 from security_scanner.runtime.branch_residual import residual_for_repo
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    REPO_LIST_AXIS,
+    SCAN_DATE_AXIS,
+)
 from security_scanner.storage.base import (
     FindingDispositionWriter,
     FindingReader,
@@ -434,10 +438,13 @@ def test_repo_metadata_to_item_keeps_runtime_metadata_and_repo_list_index():
     assert item["providerHost"] == "example.invalid"
     assert item["adminEmail"] == "owner@example.invalid"
     assert item["enabled"] is True
-    assert item["gsi1pk"] == "REPO_LIST#ALL"
+    # REPO_LIST#ALL is sharded (#23 follow-on); gsi1sk prefix preserved.
+    assert item["gsi1pk"].startswith("REPO_LIST#ALL#SHARD#")
     assert (
         item["gsi1sk"] == "UPDATED#2026-05-22T00:00:00Z#github#example-org/example-repo"
     )
+    assert item["listAxisVersion"] == 1
+    assert item["listAxisShardCount"] == 8
 
 
 def test_scan_run_summary_to_item_stores_summary_and_artifact_pointer_only():
@@ -463,11 +470,14 @@ def test_scan_run_summary_to_item_stores_summary_and_artifact_pointer_only():
     assert item["countsTotal"] == 2
     assert item["countsByLabel"] == {"SECRET": 2}
     assert item["artifactUri"] == "artifact://scan_abc12345/gitleaks.json"
-    assert item["gsi1pk"] == "SCAN_DATE#2026-05-22"
+    # SCAN_DATE#<date> is sharded (#23 follow-on); gsi1sk preserved.
+    assert item["gsi1pk"].startswith("SCAN_DATE#2026-05-22#SHARD#")
     assert (
         item["gsi1sk"]
         == "2026-05-22T00:05:00Z#github#example-org/example-repo#scan_abc12345"
     )
+    assert item["scanDateAxisVersion"] == 1
+    assert item["scanDateAxisShardCount"] == 8
     assert "findings" not in item
 
 
@@ -778,14 +788,8 @@ def test_read_recent_repo_metadata_paginates_until_limit_is_reached():
         provider_type="github",
         updated_at_iso="2026-05-22T00:00:00Z",
     )
-    table = PaginatedQueryTable(
-        [
-            {
-                "Items": [repo_metadata_to_item(newest)],
-                "LastEvaluatedKey": {"PK": "REPO#page-break", "SK": "META"},
-            },
-            {"Items": [repo_metadata_to_item(older)]},
-        ]
+    table = FakeDynamoTable(
+        [repo_metadata_to_item(newest), repo_metadata_to_item(older)]
     )
     store = DynamoDbCompatibleFindingStore(
         DynamoDbCompatibleConfig(table_name="SecurityScannerLocal"),
@@ -795,10 +799,12 @@ def test_read_recent_repo_metadata_paginates_until_limit_is_reached():
 
     repos = store.read_recent_repo_metadata(limit=2)
 
+    # REPO_LIST#ALL fan-out (#23 follow-on): each shard queried newest-first with
+    # the limit, then k-way merged + truncated to the global top-limit.
     assert repos == [newest, older]
-    assert len(table.query_calls) == 2
-    assert table.query_calls[0]["Limit"] == 2
-    assert table.query_calls[1]["Limit"] == 1
+    assert len(table.query_calls) == REPO_LIST_AXIS.shard_count
+    assert all(call["ScanIndexForward"] is False for call in table.query_calls)
+    assert all(call["Limit"] == 2 for call in table.query_calls)
 
 
 def test_read_scan_runs_for_repo_queries_scan_run_items_only():
@@ -908,10 +914,11 @@ def test_read_scan_runs_for_date_queries_date_index():
     runs = store.read_scan_runs_for_date("2026-05-22")
 
     assert runs == [first, second]
-    assert table.query_calls[0]["IndexName"] == "GSI1"
-    assert table.query_calls[0]["ExpressionAttributeValues"] == {
-        ":pk": "SCAN_DATE#2026-05-22",
-    }
+    # SCAN_DATE#<date> fan-out (#23 follow-on): shard partitions of the date only.
+    pks = [call["ExpressionAttributeValues"][":pk"] for call in table.query_calls]
+    assert len(pks) == SCAN_DATE_AXIS.shard_count
+    assert all(pk.startswith("SCAN_DATE#2026-05-22#SHARD#") for pk in pks)
+    assert all(call["IndexName"] == "GSI1" for call in table.query_calls)
 
 
 def test_read_scan_runs_for_date_paginates_query_results():
@@ -927,26 +934,32 @@ def test_read_scan_runs_for_date_paginates_query_results():
         scan_at_iso="2026-05-22T01:05:00Z",
         counts_total=1,
     )
-    table = PaginatedQueryTable(
-        [
-            {
-                "Items": [scan_run_summary_to_item(first)],
-                "LastEvaluatedKey": {
-                    "gsi1pk": "SCAN_DATE#2026-05-22",
-                    "gsi1sk": "2026-05-22T00:05:00Z#page-break",
-                },
-            },
-            {"Items": [scan_run_summary_to_item(second)]},
-        ]
-    )
+    item_first = scan_run_summary_to_item(first)
+    item_second = scan_run_summary_to_item(second)
+    paginated_pk = item_first["gsi1pk"]  # the shard partition holding `first`
+
+    class _PaginatingShardTable(FakeDynamoTable):
+        def query(self, **kwargs):
+            self.query_calls.append(kwargs)
+            pk = kwargs["ExpressionAttributeValues"][":pk"]
+            matched = [i for i in self.items if i.get("gsi1pk") == pk]
+            if pk == paginated_pk and "ExclusiveStartKey" not in kwargs:
+                return {"Items": matched, "LastEvaluatedKey": {"PK": "cursor"}}
+            if pk == paginated_pk:
+                return {"Items": []}  # second page: already drained
+            return {"Items": matched}
+
+    table = _PaginatingShardTable([item_first, item_second])
     store = DynamoDbCompatibleFindingStore(
         DynamoDbCompatibleConfig(table_name="SecurityScannerLocal"),
         resource=FakeDynamoResource(table),
         client=FakeDynamoClient(),
     )
 
+    # fan-out across shards, and the shard holding `first` paginates within itself.
     assert store.read_scan_runs_for_date("2026-05-22") == [first, second]
-    assert len(table.query_calls) == 2
+    pk_calls = [c["ExpressionAttributeValues"][":pk"] for c in table.query_calls]
+    assert pk_calls.count(paginated_pk) == 2  # two pages for that shard
 
 
 def test_read_for_scan_run_roundtrips_only_findings_for_that_run():
diff --git a/tests/test_incremental_scan_storage.py b/tests/test_incremental_scan_storage.py
index f74dd36..907ff19 100644
--- a/tests/test_incremental_scan_storage.py
+++ b/tests/test_incremental_scan_storage.py
@@ -281,10 +281,9 @@ def test_incremental_item_mappers_round_trip_all_entity_types():
     assert repo_lease_from_item(repo_lease_to_item(repo_lease)) == repo_lease
 
     lease_item = repo_lease_to_item(repo_lease)
-    assert lease_item["gsi1pk"] == "REPO_LEASE#ALL"
-    assert lease_item["gsi1sk"] == (
-        f"{datetime_to_iso(repo_lease.lease_until)}#{REPO_ID}"
-    )
+    # dead-write GSI1 #ALL key removed (#23 follow-on D4): never read via GSI1.
+    assert "gsi1pk" not in lease_item
+    assert "gsi1sk" not in lease_item
 
 
 def test_repo_id_and_job_id_are_deterministic_from_contract_fields():
diff --git a/tests/test_list_axis_migration.py b/tests/test_list_axis_migration.py
new file mode 100644
index 0000000..7787a65
--- /dev/null
+++ b/tests/test_list_axis_migration.py
@@ -0,0 +1,197 @@
+"""Tests for list-axis backfill + removal gate (issue #23 follow-on)."""
+
+from __future__ import annotations
+
+from security_scanner.storage.adapters.nosql_db.list_axis_migration import (
+    backfill_list_axis,
+    inventory_legacy_list_axis,
+)
+
+
+class _MigrationTable:
+    class ConditionalCheckFailedException(Exception):
+        pass
+
+    def __init__(self, items: list[dict]) -> None:
+        self.items = [dict(i) for i in items]
+        self.update_calls: list[dict] = []
+
+    def scan(self, **kwargs) -> dict:
+        entity_type = kwargs["ExpressionAttributeValues"][":entity_type"]
+        return {
+            "Items": [dict(i) for i in self.items if i.get("entityType") == entity_type]
+        }
+
+    def _find(self, key: dict) -> dict | None:
+        for item in self.items:
+            if item.get("PK") == key["PK"] and item.get("SK") == key["SK"]:
+                return item
+        return None
+
+    def update_item(
+        self,
+        *,
+        Key,
+        UpdateExpression,
+        ExpressionAttributeValues,
+        ConditionExpression=None,
+    ) -> dict:
+        self.update_calls.append(Key)
+        item = self._find(Key)
+        if not self._condition_holds(ConditionExpression, item):
+            raise self.ConditionalCheckFailedException()
+        assert item is not None
+        for assignment in UpdateExpression.removeprefix("SET ").split(", "):
+            attr, placeholder = assignment.split(" = ", 1)
+            item[attr] = ExpressionAttributeValues[placeholder]
+        return {}
+
+    def _condition_holds(self, expression, item) -> bool:
+        if expression is None:
+            return item is not None
+        for clause in expression.split(" AND "):
+            clause = clause.strip()
+            if clause.startswith("attribute_exists("):
+                attr = clause[len("attribute_exists(") : -1]
+                if item is None or attr not in item:
+                    return False
+            elif clause.startswith("attribute_not_exists("):
+                attr = clause[len("attribute_not_exists(") : -1]
+                if item is not None and attr in item:
+                    return False
+            else:  # pragma: no cover
+                raise AssertionError(f"unhandled clause: {clause!r}")
+        return True
+
+
+def _legacy_target() -> dict:
+    return {
+        "PK": "SCAN_TARGET#https://x/o/r",
+        "SK": "META",
+        "entityType": "SCAN_TARGET",
+        "name": "o/r",
+        "url": "https://x/o/r",
+        "gsi1pk": "TARGET_LIST#ALL",
+        "gsi1sk": "TARGET#o/r",
+    }
+
+
+def _legacy_repo() -> dict:
+    return {
+        "PK": "REPO#o/r",
+        "SK": "META",
+        "entityType": "REPO_META",
+        "repoKey": "o/r",
+        "updatedAt": "2026-06-19T00:00:00+00:00",
+        "gsi1pk": "REPO_LIST#ALL",
+        "gsi1sk": "UPDATED#2026-06-19T00:00:00+00:00#o/r",
+    }
+
+
+def _legacy_scanrun() -> dict:
+    return {
+        "PK": "REPO#o/r",
+        "SK": "SCAN_RUN#2026-06-19T01:02:03+00:00#scan_1",
+        "entityType": "SCAN_RUN",
+        "repoKey": "o/r",
+        "scanRunId": "scan_1",
+        "scanAtIso": "2026-06-19T01:02:03+00:00",
+        "gsi1pk": "SCAN_DATE#2026-06-19",
+        "gsi1sk": "2026-06-19T01:02:03+00:00#o/r#scan_1",
+    }
+
+
+def _legacy_job(status: str = "pending") -> dict:
+    return {
+        "PK": f"SCAN_JOB#job_{status}",
+        "SK": "META",
+        "entityType": "SCAN_JOB",
+        "status": status,
+        "nextAttemptAt": "2026-06-19T00:00:00+00:00",
+        "priority": 5,
+        "createdAt": "2026-06-19T00:00:00+00:00",
+        "jobId": f"job_{status}",
+        "gsi1pk": f"SCAN_JOB_STATUS#{status}",
+        "gsi1sk": "2026-06-19T00:00:00+00:00#00000005#2026-06-19T00:00:00+00:00#job",
+    }
+
+
+def _all_legacy() -> list[dict]:
+    return [_legacy_target(), _legacy_repo(), _legacy_scanrun(), _legacy_job("pending")]
+
+
+def test_backfill_updates_in_place_not_row_copy():
+    table = _MigrationTable([_legacy_target()])
+    before = len(table.items)
+
+    report = backfill_list_axis(table)
+
+    assert len(table.items) == before
+    updated = table.items[0]
+    assert (updated["PK"], updated["SK"]) == ("SCAN_TARGET#https://x/o/r", "META")
+    assert updated["gsi1pk"].startswith("TARGET_LIST#ALL#SHARD#")
+    assert updated["gsi1sk"] == "TARGET#o/r"
+    assert updated["listAxisVersion"] == 1
+    counts = report.by_axis["TARGET_LIST#ALL"]
+    assert (counts.inventory, counts.backfilled, counts.remaining) == (1, 1, 0)
+
+
+def test_backfill_all_axes_clears_gate():
+    table = _MigrationTable(_all_legacy())
+
+    report = backfill_list_axis(table)
+
+    for prefix in ("TARGET_LIST#ALL", "REPO_LIST#ALL", "SCAN_DATE", "SCAN_JOB_STATUS"):
+        counts = report.by_axis[prefix]
+        assert (counts.inventory, counts.backfilled, counts.skipped, counts.failed) == (
+            1,
+            1,
+            0,
+            0,
+        ), prefix
+        assert counts.remaining == 0, prefix
+    assert report.gate_clear is True
+    # every row now sharded
+    for item in table.items:
+        assert "#SHARD#" in item["gsi1pk"]
+
+
+def test_scan_job_completed_and_dead_letter_not_treated_as_legacy():
+    table = _MigrationTable(
+        [_legacy_job("pending"), _legacy_job("completed"), _legacy_job("dead_letter")]
+    )
+
+    report = backfill_list_axis(table)
+
+    counts = report.by_axis["SCAN_JOB_STATUS"]
+    assert (counts.inventory, counts.backfilled, counts.remaining) == (1, 1, 0)
+    # completed/dead_letter rows untouched (still plain, no version attr)
+    cold = [i for i in table.items if i["status"] in ("completed", "dead_letter")]
+    assert all("#SHARD#" not in i["gsi1pk"] for i in cold)
+    assert all("scanJobAxisVersion" not in i for i in cold)
+
+
+def test_already_sharded_rows_not_counted_as_legacy():
+    table = _MigrationTable(_all_legacy())
+    backfill_list_axis(table)  # first pass shards everything
+    updates_after_first = len(table.update_calls)
+
+    second = backfill_list_axis(table)  # idempotent re-run
+
+    assert second.gate_clear is True
+    for prefix in ("TARGET_LIST#ALL", "REPO_LIST#ALL", "SCAN_DATE", "SCAN_JOB_STATUS"):
+        counts = second.by_axis[prefix]
+        assert (counts.inventory, counts.backfilled, counts.remaining) == (0, 0, 0)
+    assert len(table.update_calls) == updates_after_first  # nothing re-updated
+
+
+def test_inventory_counts_without_mutating():
+    table = _MigrationTable(_all_legacy())
+    inventory = inventory_legacy_list_axis(table)
+    assert inventory == {
+        "TARGET_LIST#ALL": 1,
+        "REPO_LIST#ALL": 1,
+        "SCAN_DATE": 1,
+        "SCAN_JOB_STATUS": 1,
+    }
+    assert table.update_calls == []
diff --git a/tests/test_list_axis_reader.py b/tests/test_list_axis_reader.py
new file mode 100644
index 0000000..326b03a
--- /dev/null
+++ b/tests/test_list_axis_reader.py
@@ -0,0 +1,230 @@
+"""Tests for list-axis scatter-gather readers (issue #23 follow-on)."""
+
+from __future__ import annotations
+
+import pytest
+
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    REPO_LIST_AXIS,
+    TARGET_LIST_AXIS,
+    legacy_list_axis_pk,
+    list_axis_projection_for_item,
+)
+from security_scanner.storage.adapters.nosql_db.list_axis_reader import (
+    read_list_axis,
+    read_list_axis_ordered,
+)
+
+
+class _FakeGsiTable:
+    def __init__(self) -> None:
+        self.items: list[dict] = []
+        self.queried_pks: list[str] = []
+
+    def add(self, item: dict) -> None:
+        self.items.append(item)
+
+    def query(self, **kwargs) -> dict:
+        values = kwargs["ExpressionAttributeValues"]
+        pk = values[":pk"]
+        self.queried_pks.append(pk)
+        prefix = values.get(":p")
+        matched = [
+            i
+            for i in self.items
+            if i.get("gsi1pk") == pk
+            and (prefix is None or str(i.get("gsi1sk", "")).startswith(prefix))
+        ]
+        matched.sort(
+            key=lambda i: i.get("gsi1sk", ""),
+            reverse=not kwargs.get("ScanIndexForward", True),
+        )
+        if "Limit" in kwargs:
+            matched = matched[: kwargs["Limit"]]
+        return {"Items": matched}
+
+
+def _target_item(name: str) -> dict:
+    item = {
+        "PK": f"SCAN_TARGET#https://x/{name}",
+        "SK": "META",
+        "entityType": "SCAN_TARGET",
+        "name": name,
+        "url": f"https://x/{name}",
+    }
+    item.update(list_axis_projection_for_item(TARGET_LIST_AXIS, item))
+    return item
+
+
+def _repo_item(repo_key: str, updated_at: str, *, sharded: bool = True) -> dict:
+    item = {
+        "PK": f"REPO#{repo_key}",
+        "SK": "META",
+        "entityType": "REPO_META",
+        "repoKey": repo_key,
+        "updatedAt": updated_at,
+    }
+    if sharded:
+        item.update(list_axis_projection_for_item(REPO_LIST_AXIS, item))
+    else:  # legacy unsharded row, no version attr
+        item["gsi1pk"] = legacy_list_axis_pk("REPO_LIST#ALL")
+        item["gsi1sk"] = f"UPDATED#{updated_at}#{repo_key}"
+    return item
+
+
+def test_flat_read_fans_out_all_shards_and_merges():
+    table = _FakeGsiTable()
+    for n in range(6):
+        table.add(_target_item(f"org/repo{n}"))
+
+    result = read_list_axis(
+        table,
+        spec=TARGET_LIST_AXIS,
+        partition_root="TARGET_LIST#ALL",
+        gsi_sk_prefix="TARGET#",
+    )
+
+    assert len(table.queried_pks) == TARGET_LIST_AXIS.shard_count  # 8, no legacy
+    assert [i["gsi1sk"] for i in result] == [f"TARGET#org/repo{n}" for n in range(6)]
+
+
+def test_flat_read_default_issues_no_legacy_query():
+    table = _FakeGsiTable()
+    read_list_axis(
+        table,
+        spec=TARGET_LIST_AXIS,
+        partition_root="TARGET_LIST#ALL",
+        gsi_sk_prefix="TARGET#",
+    )
+    assert legacy_list_axis_pk("TARGET_LIST#ALL") not in table.queried_pks
+
+
+def test_flat_read_include_legacy_merges_and_dedupes_prefer_higher_version():
+    table = _FakeGsiTable()
+    table.add(_repo_item("org/sharded", "2026-06-19T00:00:00+00:00", sharded=True))
+    # legacy-only row
+    table.add(_repo_item("org/legacy", "2026-06-18T00:00:00+00:00", sharded=False))
+    # duplicate (PK,SK): both legacy and sharded for org/dup
+    dup_sharded = _repo_item("org/dup", "2026-06-17T00:00:00+00:00", sharded=True)
+    dup_legacy = _repo_item("org/dup", "2026-06-17T00:00:00+00:00", sharded=False)
+    table.add(dup_sharded)
+    table.add(dup_legacy)
+
+    result = read_list_axis(
+        table,
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk_prefix="UPDATED#",
+        include_legacy=True,
+    )
+
+    keys = [(i["PK"], i["SK"]) for i in result]
+    assert ("REPO#org/legacy", "META") in keys
+    assert keys.count(("REPO#org/dup", "META")) == 1  # deduped
+    dup = next(i for i in result if i["PK"] == "REPO#org/dup")
+    assert dup.get("listAxisVersion") == 1  # higher-version (sharded) wins
+
+
+def test_flat_read_fails_closed_on_shard_error():
+    class _Boom(_FakeGsiTable):
+        def query(self, **kwargs):
+            # shard_count=8 → single-digit buckets "0".."7"
+            if kwargs["ExpressionAttributeValues"][":pk"].endswith("#SHARD#2"):
+                raise RuntimeError("shard 02 down")
+            return super().query(**kwargs)
+
+    table = _Boom()
+    table.add(_target_item("org/repo"))
+    with pytest.raises(RuntimeError, match="shard 02 down"):
+        read_list_axis(
+            table,
+            spec=TARGET_LIST_AXIS,
+            partition_root="TARGET_LIST#ALL",
+            gsi_sk_prefix="TARGET#",
+        )
+
+
+def test_ordered_read_returns_global_newest_first_within_limit():
+    table = _FakeGsiTable()
+    # 10 repos with increasing timestamps spread across shards
+    for n in range(10):
+        ts = f"2026-06-{10 + n:02d}T00:00:00+00:00"
+        table.add(_repo_item(f"org/repo{n:02d}", ts))
+
+    result = read_list_axis_ordered(
+        table,
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk_prefix="UPDATED#",
+        limit=3,
+    )
+
+    # newest 3 are repo09, repo08, repo07
+    assert [i["repoKey"] for i in result] == ["org/repo09", "org/repo08", "org/repo07"]
+
+
+def test_ordered_read_limit_none_returns_all_sorted():
+    table = _FakeGsiTable()
+    for n in range(5):
+        table.add(_repo_item(f"org/repo{n}", f"2026-06-1{n}T00:00:00+00:00"))
+    result = read_list_axis_ordered(
+        table,
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk_prefix="UPDATED#",
+        limit=None,
+    )
+    assert [i["repoKey"] for i in result] == [f"org/repo{n}" for n in (4, 3, 2, 1, 0)]
+
+
+def test_ordered_read_limit_exceeds_total():
+    table = _FakeGsiTable()
+    table.add(_repo_item("org/a", "2026-06-10T00:00:00+00:00"))
+    result = read_list_axis_ordered(
+        table,
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk_prefix="UPDATED#",
+        limit=50,
+    )
+    assert [i["repoKey"] for i in result] == ["org/a"]
+
+
+def test_ordered_read_default_issues_no_legacy_query():
+    table = _FakeGsiTable()
+    for n in range(3):
+        table.add(_repo_item(f"org/r{n}", f"2026-06-1{n}T00:00:00+00:00"))
+
+    read_list_axis_ordered(
+        table,
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk_prefix="UPDATED#",
+        limit=2,
+    )
+
+    assert legacy_list_axis_pk("REPO_LIST#ALL") not in table.queried_pks
+    assert len(table.queried_pks) == REPO_LIST_AXIS.shard_count
+
+
+def test_ordered_read_include_legacy_dedupes_without_truncating_window():
+    table = _FakeGsiTable()
+    # legacy + sharded duplicate of the SAME newest row, plus older distinct rows
+    newest = "2026-06-20T00:00:00+00:00"
+    table.add(_repo_item("org/newest", newest, sharded=True))
+    table.add(_repo_item("org/newest", newest, sharded=False))  # legacy dup
+    table.add(_repo_item("org/old1", "2026-06-11T00:00:00+00:00", sharded=False))
+    table.add(_repo_item("org/old2", "2026-06-10T00:00:00+00:00", sharded=False))
+
+    result = read_list_axis_ordered(
+        table,
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk_prefix="UPDATED#",
+        limit=2,
+        include_legacy=True,
+    )
+
+    # distinct top-2: newest (deduped to one), then old1 — not silently short
+    assert [i["repoKey"] for i in result] == ["org/newest", "org/old1"]
+    assert legacy_list_axis_pk("REPO_LIST#ALL") in table.queried_pks
diff --git a/tests/test_list_axis_sharding.py b/tests/test_list_axis_sharding.py
new file mode 100644
index 0000000..e82dbf4
--- /dev/null
+++ b/tests/test_list_axis_sharding.py
@@ -0,0 +1,157 @@
+"""Tests for list/index-axis GSI sharding (issue #23 follow-on)."""
+
+from __future__ import annotations
+
+import pytest
+
+from security_scanner.storage.adapters.nosql_db.list_axis import (
+    REPO_LIST_AXIS,
+    SCAN_DATE_AXIS,
+    SCAN_JOB_AXIS,
+    TARGET_LIST_AXIS,
+    ListAxisKey,
+    legacy_list_axis_pk,
+    list_axis_inputs,
+    list_axis_projection_for_item,
+    sharded_list_axis_pk,
+)
+from security_scanner.storage.adapters.nosql_db.transport import GSI1_NAME
+
+
+def test_axis_specs_pin_durable_shard_counts_on_gsi1():
+    assert TARGET_LIST_AXIS.shard_count == 8
+    assert REPO_LIST_AXIS.shard_count == 8
+    assert SCAN_DATE_AXIS.shard_count == 8
+    assert SCAN_JOB_AXIS.shard_count == 4
+    for spec in (TARGET_LIST_AXIS, REPO_LIST_AXIS, SCAN_DATE_AXIS, SCAN_JOB_AXIS):
+        assert spec.index_name == GSI1_NAME
+        assert spec.gsi_pk_field == "gsi1pk"
+        assert spec.gsi_sk_field == "gsi1sk"
+        assert spec.version == 1
+
+
+def test_distinct_version_attrs_so_axes_do_not_collide():
+    attrs = {
+        TARGET_LIST_AXIS.version_attr,
+        REPO_LIST_AXIS.version_attr,
+        SCAN_DATE_AXIS.version_attr,
+        SCAN_JOB_AXIS.version_attr,
+    }
+    # TARGET_LIST and REPO_LIST intentionally share listAxis*, the others differ
+    assert "listAxisVersion" in attrs
+    assert "scanDateAxisVersion" in attrs
+    assert "scanJobAxisVersion" in attrs
+
+
+def test_sharded_and_legacy_pk_helpers():
+    assert sharded_list_axis_pk(TARGET_LIST_AXIS, "TARGET_LIST#ALL", "03") == (
+        "TARGET_LIST#ALL#SHARD#03"
+    )
+    assert legacy_list_axis_pk("TARGET_LIST#ALL") == "TARGET_LIST#ALL"
+
+
+def test_list_axis_key_build_and_projection_uses_real_attr_names():
+    key = ListAxisKey.build(
+        spec=REPO_LIST_AXIS,
+        partition_root="REPO_LIST#ALL",
+        gsi_sk="UPDATED#2026-06-19T00:00:00+00:00#org/repo",
+        shard_material="org/repo",
+    )
+    assert key.pk.startswith("REPO_LIST#ALL#SHARD#")
+    assert key.bucket.isdigit() and 0 <= int(key.bucket) < 8
+    proj = key.projection()
+    assert proj == {
+        "gsi1pk": key.pk,
+        "gsi1sk": "UPDATED#2026-06-19T00:00:00+00:00#org/repo",
+        "listAxisVersion": 1,
+        "listAxisShardCount": 8,
+        "listAxisShard": key.bucket,
+    }
+
+
+def test_bucket_is_deterministic_and_within_range():
+    a = ListAxisKey.build(
+        spec=SCAN_JOB_AXIS,
+        partition_root="SCAN_JOB_STATUS#pending",
+        gsi_sk="x",
+        shard_material="job_1",
+    )
+    b = ListAxisKey.build(
+        spec=SCAN_JOB_AXIS,
+        partition_root="SCAN_JOB_STATUS#pending",
+        gsi_sk="x",
+        shard_material="job_1",
+    )
+    assert a.bucket == b.bucket
+    assert len(a.bucket) == 1 and 0 <= int(a.bucket) < 4
+
+
+def test_list_axis_inputs_target_list():
+    item = {
+        "entityType": "SCAN_TARGET",
+        "name": "org/repo",
+        "url": "https://x/org/repo",
+    }
+    root, sk, material = list_axis_inputs(TARGET_LIST_AXIS, item)
+    assert root == "TARGET_LIST#ALL"
+    assert sk == "TARGET#org/repo"
+    assert material == "https://x/org/repo"
+
+
+def test_list_axis_inputs_repo_list():
+    item = {
+        "entityType": "REPO_META",
+        "repoKey": "org/repo",
+        "updatedAt": "2026-06-19T00:00:00+00:00",
+    }
+    root, sk, material = list_axis_inputs(REPO_LIST_AXIS, item)
+    assert root == "REPO_LIST#ALL"
+    assert sk == "UPDATED#2026-06-19T00:00:00+00:00#org/repo"
+    assert material == "org/repo"
+
+
+def test_list_axis_inputs_scan_date_requires_scan_run():
+    ok = {
+        "entityType": "SCAN_RUN",
+        "scanAtIso": "2026-06-19T01:02:03+00:00",
+        "repoKey": "org/repo",
+        "scanRunId": "scan_1",
+    }
+    root, sk, material = list_axis_inputs(SCAN_DATE_AXIS, ok)
+    assert root == "SCAN_DATE#2026-06-19"
+    assert sk == "2026-06-19T01:02:03+00:00#org/repo#scan_1"
+    assert material == "scan_1"
+    with pytest.raises(ValueError, match="SCAN_RUN"):
+        list_axis_inputs(SCAN_DATE_AXIS, {"entityType": "REPO_META"})
+
+
+def test_list_axis_inputs_scan_job_only_pending_leased():
+    base = {
+        "entityType": "SCAN_JOB",
+        "status": "pending",
+        "nextAttemptAt": "2026-06-19T00:00:00+00:00",
+        "priority": 5,
+        "createdAt": "2026-06-19T00:00:00+00:00",
+        "jobId": "scan_job_1",
+    }
+    root, sk, material = list_axis_inputs(SCAN_JOB_AXIS, base)
+    assert root == "SCAN_JOB_STATUS#pending"
+    assert (
+        sk == "2026-06-19T00:00:00+00:00#00000005#2026-06-19T00:00:00+00:00#scan_job_1"
+    )
+    assert material == "scan_job_1"
+    for cold in ("completed", "dead_letter"):
+        with pytest.raises(ValueError, match="pending"):
+            list_axis_inputs(SCAN_JOB_AXIS, {**base, "status": cold})
+
+
+def test_projection_for_item_round_trip():
+    item = {
+        "entityType": "SCAN_TARGET",
+        "name": "org/repo",
+        "url": "https://x/org/repo",
+    }
+    proj = list_axis_projection_for_item(TARGET_LIST_AXIS, item)
+    assert proj["gsi1pk"].startswith("TARGET_LIST#ALL#SHARD#")
+    assert proj["gsi1sk"] == "TARGET#org/repo"
+    assert proj["listAxisVersion"] == 1
diff --git a/tests/test_repo_axis_sharding.py b/tests/test_repo_axis_sharding.py
index e28a1b7..11e9e16 100644
--- a/tests/test_repo_axis_sharding.py
+++ b/tests/test_repo_axis_sharding.py
@@ -257,9 +257,9 @@ def test_no_raw_repo_axis_gsi1_construction_outside_helper():
     # reader may construct a repo-axis gsi1 partition key. Mappers must route
     # through the helper, never emit raw `gsi1pk = REPO#...` or `#SHARD#`.
     pkg_dir = pathlib.Path(repo_axis_module.__file__).parent
-    # repo_axis.py is the single source of truth for the REPO#/#SHARD# literals;
-    # every other module composes keys through its helpers.
-    allowed = {"repo_axis.py"}
+    # repo_axis.py owns the REPO#/#SHARD# literals; list_axis.py owns the list-axis
+    # #SHARD# infix (scale redesign). Every other module composes via their helpers.
+    allowed = {"repo_axis.py", "list_axis.py"}
     offenders: list[str] = []
     for path in sorted(pkg_dir.glob("*.py")):
         if path.name in allowed:

From 2ce71e5e2f6518b4f71d3ecc0432b5ae9f5343d8 Mon Sep 17 00:00:00 2001
From: pureliture <tkdgur1756@naver.com>
Date: Fri, 19 Jun 2026 15:50:11 +0900
Subject: [PATCH 2/2] fix(storage): address PR #39 review (public-safety,
 thread-safety, robustness)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- public-safety: relativize machine-local absolute paths in design.md
- list_axis_reader: drop unsafe parallel fan-out (boto3 Table is not
  thread-safe); serial fan-out, shard counts are small. None-coerce gsi_sk/PK/SK
  sort & merge keys to avoid TypeError on null attributes.
- list_axis_migration: remaining = failed (backfilled/skipped are no longer
  legacy) — avoids a second full-table scan per axis.
- store: _read_scan_jobs_by_status / lease_next_scan_job take include_legacy so
  legacy pending/leased jobs are not starved during the migration window.
- repo_axis: remove now-unused _bucket_width back-compat alias (CodeQL).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../design.md                                 |  4 +-
 .../adapters/nosql_db/list_axis_migration.py  |  5 ++-
 .../adapters/nosql_db/list_axis_reader.py     | 37 ++++++++-----------
 .../storage/adapters/nosql_db/repo_axis.py    |  4 +-
 .../storage/adapters/nosql_db/store.py        | 21 ++++++++---
 5 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md b/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
index 4a80662..170d49a 100644
--- a/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
+++ b/docs/workbench/specs/scale-redesign-list-axis-sharding/design.md
@@ -509,8 +509,8 @@ never-read GSI projection은 샤딩 이득이 0이고 write amplification만 만
 
 ---
 
-참조한 실제 코드 경로(절대 경로):
-- `/Users/ddalkak/Projects/security-scanner/.worktrees/scale-redesign-list-axis/src/security_scanner/storage/adapters/nosql_db/repo_axis.py` — `RepoAxisKey`, `repo_axis_shard`/`_bucket_width`/`repo_axis_material`, `REPO_AXIS_SHARD_INFIX`, `repo_axis_inputs`(SCAN_RUN 미지원→ValueError)
+참조한 실제 코드 경로(repo 상대):
+- `src/security_scanner/storage/adapters/nosql_db/repo_axis.py` — `RepoAxisKey`, `repo_axis_shard`/`repo_axis_material`, `REPO_AXIS_SHARD_INFIX`, `repo_axis_inputs`(SCAN_RUN 미지원→ValueError)
 - `.../nosql_db/repo_axis_reader.py` — `read_repo_axis`, `_query_partition`(GSI1 하드코딩), `_dedupe_and_sort`, fail-closed 불변식
 - `.../nosql_db/repo_axis_migration.py` — `backfill_repo_axis`, `_backfill_one`, `is_legacy_repo_axis_item`, removal gate
 - `.../nosql_db/transport.py` — `REPO_LIST_PK`/`TARGET_LIST_PK`, `GSI1_NAME="GSI1"`/`GSI2_NAME="GSI2"`
diff --git a/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py b/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py
index 329607e..b903f30 100644
--- a/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py
+++ b/src/security_scanner/storage/adapters/nosql_db/list_axis_migration.py
@@ -113,7 +113,10 @@ def backfill_list_axis(table: Any) -> ListAxisBackfillReport:
                 skipped += 1
             else:
                 failed += 1
-        remaining = len(_scan_legacy(table, target))
+        # backfilled and skipped rows are no longer legacy (skipped = condition
+        # already-sharded/gone); only failed rows remain legacy. Avoids a second
+        # full-table scan per axis (PR #39 review).
+        remaining = failed
         by_axis[target.spec.prefix] = EntityBackfillCounts(
             inventory=len(legacy),
             backfilled=backfilled,
diff --git a/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py b/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py
index cac68df..c391464 100644
--- a/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py
+++ b/src/security_scanner/storage/adapters/nosql_db/list_axis_reader.py
@@ -16,7 +16,6 @@
 
 import heapq
 from collections.abc import Callable
-from concurrent.futures import ThreadPoolExecutor
 from typing import Any
 
 from security_scanner.storage.adapters.nosql_db.access import query_all_pages
@@ -37,7 +36,6 @@ def read_list_axis(
     partition_root: str,
     gsi_sk_prefix: str | None = None,
     include_legacy: bool = False,
-    parallel: bool = False,
     query_pages: QueryPages = query_all_pages,
 ) -> list[dict[str, Any]]:
     """Flat scatter-gather across every shard of ``partition_root``.
@@ -45,9 +43,11 @@ def read_list_axis(
     Dedupes by ``(PK, SK)`` preferring the higher axis version, returns items in
     canonical ``(gsi_sk, PK, SK)`` order. Fails closed.
 
-    ``parallel`` issues the per-partition queries concurrently (thread pool) to
-    bound fan-out latency — used by the hot SCAN_JOB lease loop (D1). Fail-closed
-    is preserved: the first failing partition query propagates.
+    Fan-out is serial: the boto3 ``Table`` resource is not thread-safe, so a
+    thread-pool over the shared table risks corrupted connection state (PR #39
+    review). With ``shard_count`` small (≤8, queue=4) serial cost is bounded; a
+    parallel variant would need a per-thread/low-level thread-safe client (D1
+    follow-up).
     """
     width = bucket_width(spec.shard_count)
     partitions = [
@@ -57,18 +57,13 @@ def read_list_axis(
     if include_legacy:
         partitions.append(legacy_list_axis_pk(partition_root))
 
-    def _q(partition: str) -> list[dict[str, Any]]:
-        return _query_list_partition(query_pages, table, spec, partition, gsi_sk_prefix)
-
-    if parallel and len(partitions) > 1:
-        with ThreadPoolExecutor(max_workers=len(partitions)) as executor:
-            # list() drains executor.map, which re-raises the first exception →
-            # fail-closed: no partial result is returned.
-            per_partition = list(executor.map(_q, partitions))
-    else:
-        per_partition = [_q(partition) for partition in partitions]
-
-    collected = [item for result in per_partition for item in result]
+    collected = [
+        item
+        for partition in partitions
+        for item in _query_list_partition(
+            query_pages, table, spec, partition, gsi_sk_prefix
+        )
+    ]
     return _dedupe_and_sort(collected, spec)
 
 
@@ -121,7 +116,7 @@ def read_list_axis_ordered(
         )
     merged = heapq.merge(
         *per_shard,
-        key=lambda item: item.get(spec.gsi_sk_field, ""),
+        key=lambda item: item.get(spec.gsi_sk_field) or "",
         reverse=descending,
     )
     deduped = _dedupe_preserving_order(merged, spec)
@@ -190,9 +185,9 @@ def _dedupe_and_sort(
     return sorted(
         best.values(),
         key=lambda item: (
-            item.get(spec.gsi_sk_field, ""),
-            item.get("PK", ""),
-            item.get("SK", ""),
+            item.get(spec.gsi_sk_field) or "",
+            item.get("PK") or "",
+            item.get("SK") or "",
         ),
     )
 
diff --git a/src/security_scanner/storage/adapters/nosql_db/repo_axis.py b/src/security_scanner/storage/adapters/nosql_db/repo_axis.py
index 8297bcc..d0c11d4 100644
--- a/src/security_scanner/storage/adapters/nosql_db/repo_axis.py
+++ b/src/security_scanner/storage/adapters/nosql_db/repo_axis.py
@@ -29,7 +29,6 @@
 from security_scanner.storage.adapters.nosql_db.axis_core import (
     axis_material,
     axis_shard,
-    bucket_width,
 )
 
 REPO_AXIS_VERSION = 2
@@ -39,10 +38,9 @@
 REPO_AXIS_PARTITION_PREFIX = "REPO#"
 REPO_AXIS_SHARD_INFIX = "#SHARD#"
 
-#: Back-compat aliases over the shared core (axis_core). Behavior unchanged; the
+#: Back-compat alias over the shared core (axis_core). Behavior unchanged; the
 #: shard primitives now live in axis_core and are shared with the list-axis.
 repo_axis_material = axis_material
-_bucket_width = bucket_width
 
 
 def repo_axis_shard(
diff --git a/src/security_scanner/storage/adapters/nosql_db/store.py b/src/security_scanner/storage/adapters/nosql_db/store.py
index e054327..d4b331d 100644
--- a/src/security_scanner/storage/adapters/nosql_db/store.py
+++ b/src/security_scanner/storage/adapters/nosql_db/store.py
@@ -236,11 +236,17 @@ def lease_next_scan_job(
         worker_id: str,
         lease_seconds: int,
         now: dt.datetime,
+        *,
+        include_legacy: bool = False,
     ) -> ScanJob | None:
         now = _ensure_utc(now)
         candidates = [
-            *self._read_scan_jobs_by_status(SCAN_JOB_STATUS_PENDING),
-            *self._read_scan_jobs_by_status(SCAN_JOB_STATUS_LEASED),
+            *self._read_scan_jobs_by_status(
+                SCAN_JOB_STATUS_PENDING, include_legacy=include_legacy
+            ),
+            *self._read_scan_jobs_by_status(
+                SCAN_JOB_STATUS_LEASED, include_legacy=include_legacy
+            ),
         ]
         candidates.sort(key=_scan_job_lease_sort_key)
         for job in candidates:
@@ -736,15 +742,18 @@ def _put_scan_ledger_if_absent(self, ledger: ScanLedgerEntry) -> None:
                 return
             raise
 
-    def _read_scan_jobs_by_status(self, status: str) -> list[ScanJob]:
+    def _read_scan_jobs_by_status(
+        self, status: str, *, include_legacy: bool = False
+    ) -> list[ScanJob]:
         if status in ("pending", "leased"):
-            # hot queue partitions are sharded (#23 follow-on, D1); parallel
-            # fan-out bounds lease-loop latency, fail-closed preserved.
+            # hot queue partitions are sharded (#23 follow-on); serial fan-out
+            # (boto3 Table not thread-safe). include_legacy folds the pre-sharding
+            # partition so legacy queued jobs aren't starved during migration.
             items = read_list_axis(
                 self._table,
                 spec=SCAN_JOB_AXIS,
                 partition_root=f"SCAN_JOB_STATUS#{status}",
-                parallel=True,
+                include_legacy=include_legacy,
             )
         else:
             # cold terminal statuses keep the plain unsharded partition.