Monorepo consolidation: workspace, shared types, transport plans, docker/swam assets

2026-03-30 11:40:42 +03:00
parent 7e7041cf8b
commit 1298d9a3df
246 changed files with 55434 additions and 0 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -0,0 +1,2 @@
+[registries.madapes]
+index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/"
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,15 @@
+target/
+**/target/
+
+node_modules/
+**/node_modules/
+
+dist/
+**/dist/
+
+.git/
+**/.git/
+
+.DS_Store
+
+control/ui/.vite/
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,47 @@
+name: ci
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  ui:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    defaults:
+      run:
+        working-directory: control/ui
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+          cache-dependency-path: control/ui/package-lock.json
+
+      - run: npm config set registry https://registry.npmjs.org
+      - run: npm ci
+      - run: npm run lint
+      - run: npm run typecheck
+      - run: npm run test
+      - run: npm run build
+
+  rust:
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    defaults:
+      run:
+        working-directory: .
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          components: rustfmt, clippy
+
+      - uses: Swatinem/rust-cache@v2
+
+      - run: cargo fmt --check
+      - run: cargo clippy --workspace --all-targets -- -D warnings
+      - run: cargo test --workspace
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,12 @@
+target/
+/target-*/
+**/target/
+**/target-*/
+
+node_modules/
+**/node_modules/
+
+dist/
+**/dist/
+
+.DS_Store
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,15 @@
+[workspace]
+resolver = "2"
+members = [
+  "aggregate",
+  "gateway",
+  "projection",
+  "runner",
+  "shared",
+  "control/api",
+]
+
+[profile.release]
+lto = true
+codegen-units = 1
+strip = "symbols"
--- a/DOCKER.md
+++ b/DOCKER.md
@@ -0,0 +1,56 @@
+# Docker
+
+## Local Dev (Compose)
+
+```bash
+docker compose up -d --build
+docker compose ps
+docker compose down -v
+```
+
+To include the observability stack (Grafana/Loki/Tempo/VictoriaMetrics) with the local compose:
+
+```bash
+docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build
+docker compose -f docker-compose.yml -f observability/docker-compose.yml down -v
+```
+
+Service ports in the default compose:
+- Gateway HTTP: `http://localhost:8080`
+- Gateway gRPC: `localhost:8081`
+- Aggregate gRPC: `localhost:50051`
+- Aggregate HTTP: `http://localhost:18080`
+- Runner HTTP: `http://localhost:28080`
+- Control API: `http://localhost:38080`
+- Control UI: `http://localhost:8082`
+- NATS: `nats://localhost:4222`, monitoring `http://localhost:8222`
+
+## Swarm (Dev)
+
+Build images:
+
+```bash
+sh docker/scripts/build_images.sh all
+```
+
+Create dev secrets required by the observability stack:
+
+```bash
+sh docker/scripts/swarm_dev_secrets.sh
+```
+
+Deploy:
+
+```bash
+docker stack deploy -c swarm/stacks/platform.yml cloudlysis
+docker stack deploy -c swarm/stacks/control-plane.yml cloudlysis_control
+docker stack deploy -c swarm/stacks/observability.yml cloudlysis_obs
+```
+
+Remove:
+
+```bash
+docker stack rm cloudlysis_obs
+docker stack rm cloudlysis_control
+docker stack rm cloudlysis
+```
--- a/GATEWAY_TRANSPORT_PLAN.md
+++ b/GATEWAY_TRANSPORT_PLAN.md
@@ -0,0 +1,216 @@
+# Gateway Transport Plan
+
+## Purpose
+Standardize and optimize how the Gateway communicates with Aggregate, Projection, and Runner, and how nodes communicate via NATS JetStream, under these principles:
+- Simplicity (few patterns, minimal bespoke conventions)
+- Ease of operation (consistent health/ready/metrics, consistent failure modes)
+- Frugality (bounded connections, bounded fanout, low overhead)
+- High performance (low tail latency, backpressure-aware, predictable routing)
+- Safety (tenant isolation, deny-by-default authz, consistent context propagation)
+
+## Non-Negotiable Rules (Global)
+- Every cross-service request MUST carry tenant + trace context.
+- Every transport path MUST have explicit timeouts/deadlines and bounded retries.
+- Every milestone below is “stop-the-line” gated:
+  - All tasks completed
+  - All tests passing
+  - Workspace lint/format/type checks passing
+  - Required integration tests for the milestone passing (when gated by env, they must be runnable and documented)
+
+## Current State (Baseline)
+- Gateway → Aggregate: gRPC command submission
+- Gateway → Projection: HTTP query proxy (`/v1/query/*`)
+- Gateway → Runner: HTTP proxy for admin endpoints (`/admin/runner/*`)
+- Nodes ↔ NATS JetStream: events/workflow streams with headers for tenant/correlation/trace (now more consistent)
+
+## Target Architecture (End State)
+- Edge contract (clients ↔ Gateway): HTTP/JSON (stable, debuggable, browser + ops friendly)
+- Internal RPC (Gateway ↔ services): gRPC for Aggregate + Projection + Runner (single internal RPC stack)
+- Async/event backbone: NATS JetStream remains for event/work distribution
+- `shared` is the single source of truth for:
+  - Header names and propagation rules
+  - Trace parsing/validation rules (`traceparent`, `trace-id`)
+  - Request context representation (tenant/correlation/trace)
+
+## Definitions
+### Request Context
+Fields that must be consistently propagated:
+- `tenant_id` (HTTP: `x-tenant-id`, NATS: `tenant-id`)
+- `correlation_id` (HTTP: `x-correlation-id`, NATS: `x-correlation-id` and `correlation-id`)
+- `traceparent` (HTTP: `traceparent`, NATS: `traceparent`)
+- `trace_id` (derived from `traceparent` or provided explicitly; NATS: `trace-id`)
+- `request_id` (HTTP: `x-request-id`, optional for NATS)
+
+### Standard Health Endpoints (per service)
+- `GET /health` liveness
+- `GET /ready` readiness (includes tenant gating if applicable)
+- `GET /metrics` Prometheus
+
+## Milestone 0: Transport Contract Lock-in (Context + Headers Everywhere)
+
+### Goal
+Make context propagation and header naming consistent and enforceable across HTTP, gRPC, and NATS, including “background” Gateway calls (health checks, rebalance probes).
+
+### Exit Criteria
+- A single shared contract exists for header names and trace parsing.
+- Gateway injects context into all upstream calls (including rebalance/health probes).
+- Aggregate/Projection/Runner consistently emit/consume the standard context on all transport paths they own.
+- Unit tests prove propagation behavior for each transport.
+- `cargo fmt --check`, `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test --workspace` all pass.
+
+### Tasks
+- [ ] Standardize header constants in `shared` and remove string literals from Gateway and nodes where feasible.
+- [ ] Add `shared` helpers for:
+  - HTTP extract/inject
+  - gRPC metadata extract/inject
+  - NATS header extract/inject
+- [ ] Gateway: ensure context is injected into:
+  - gRPC upstream requests to Aggregate
+  - HTTP upstream requests to Projection
+  - Runner admin proxy requests
+  - Any “probe” calls (rebalance gates, fleet snapshots, health checks)
+- [ ] Projection/Runner/Aggregate: ensure NATS published messages include:
+  - `tenant-id`
+  - `x-correlation-id` + `correlation-id`
+  - `traceparent`
+  - `trace-id` (derived when possible)
+- [ ] Add transport-level tests:
+  - [ ] Gateway gRPC path: incoming context → upstream metadata → response metadata preserved
+  - [ ] Gateway HTTP proxy path: incoming context → upstream headers preserved
+  - [ ] NATS publish path: produced headers contain expected keys/values
+
+### Required Tests
+- Unit tests for shared parsing/derivation utilities
+- Existing per-crate test suites
+- At least one per-service “transport contract” test verifying headers are present and correct
+
+## Milestone 1: Internal RPC Standardization (Projection via gRPC)
+
+### Goal
+Eliminate Gateway → Projection HTTP proxy as the default path by introducing an internal gRPC Query service, keeping HTTP optional for human/debug use.
+
+### Exit Criteria
+- A Projection gRPC service exists for query execution.
+- Gateway routes queries to Projection via gRPC by default.
+- Authorization semantics remain enforced in Gateway (deny-by-default).
+- Response shapes are stable and match the existing UI expectations.
+- All tests pass, including new gRPC query integration tests.
+
+### Tasks
+- [ ] Define protobuf API: `projection.gateway.v1.QueryService`
+  - [ ] Request includes tenant + view + query payload and metadata
+  - [ ] Response includes result payload and standard context propagation
+- [ ] Implement Projection gRPC server:
+  - [ ] Parse tenant/view/query
+  - [ ] Execute query against current projection storage/query engine
+  - [ ] Enforce tenant scope
+- [ ] Implement Gateway gRPC client path for queries:
+  - [ ] Routing by tenant to Projection endpoint
+  - [ ] Deadlines, bounded retries (idempotent only)
+  - [ ] Context propagation (tenant/correlation/trace)
+- [ ] Keep HTTP `/v1/query/*`:
+  - [ ] Either route to internal gRPC implementation or keep as legacy/debug endpoint
+- [ ] Add tests:
+  - [ ] Gateway query authz + forwarding via gRPC
+  - [ ] Projection gRPC query contract tests for tenant isolation
+
+### Required Tests
+- New gRPC QueryService tests (unit + integration)
+- Existing query/authz tests in Gateway
+- Workspace fmt/clippy/test
+
+## Milestone 2: Internal RPC Standardization (Runner Admin via gRPC)
+
+### Goal
+Replace `/admin/runner/*` HTTP proxying with a first-class gRPC admin service for Runner operations.
+
+### Exit Criteria
+- Runner exposes a gRPC admin service for the admin surface required by Control/Gateway.
+- Gateway uses gRPC to call Runner admin APIs.
+- Authentication/authorization remains in Gateway; Runner trusts Gateway boundary.
+- Admin operations are idempotent where appropriate and include audit hooks where required.
+- All tests pass and include negative/tenant-spoof cases.
+
+### Tasks
+- [ ] Define protobuf API: `runner.admin.v1.RunnerAdmin`
+  - [ ] Drain/resume/status/reload/tenant-scoped controls
+  - [ ] Standard error mapping
+- [ ] Implement Runner gRPC admin server:
+  - [ ] Tenant gating enforced for tenant-scoped operations
+  - [ ] Readiness/drain semantics aligned with platform contracts
+- [ ] Implement Gateway gRPC client integration:
+  - [ ] Route to Runner endpoint via routing table
+  - [ ] Enforce authz rights (e.g. `runner.admin`)
+  - [ ] Context propagation
+- [ ] Keep HTTP `/admin/*` in Runner optional:
+  - [ ] Either remove Gateway proxy usage or keep for direct debugging behind secure network
+- [ ] Tests:
+  - [ ] Gateway: admin calls rejected without rights
+  - [ ] Gateway: tenant spoof attempts rejected
+  - [ ] Runner: idempotency and drain semantics validated
+
+### Required Tests
+- gRPC RunnerAdmin unit/integration tests
+- Gateway proxy-to-gRPC tests
+- Workspace fmt/clippy/test
+
+## Milestone 3: Connection + Retry Policy Unification (Performance + Frugality)
+
+### Goal
+Make upstream connection management and retry behavior consistent and bounded across Gateway and nodes.
+
+### Exit Criteria
+- Gateway maintains bounded upstream connection pools for gRPC endpoints.
+- All gRPC calls have deadlines; retries are only for idempotent operations.
+- All probe/fanout calls are bounded and do not cause thundering herds.
+- Load/soak tests show stable behavior under partial failure.
+
+### Tasks
+- [ ] Implement a Gateway upstream channel pool:
+  - [ ] LRU bounded by max endpoints
+  - [ ] TTL/eviction strategy
+  - [ ] Fast path reuse under load
+- [ ] Standardize retry profiles:
+  - [ ] Read-only: short retry with jitter
+  - [ ] Mutations: no automatic retry unless idempotency key present
+- [ ] Standardize timeouts:
+  - [ ] Edge timeout limits
+  - [ ] Internal per-service deadlines
+- [ ] Fanout controls:
+  - [ ] Concurrency limiters for fleet snapshot/probes
+  - [ ] Cache results where safe (short TTL)
+
+### Required Tests
+- Unit tests for pool eviction/TTL
+- Gateway integration tests for deadline propagation
+- Gated load tests (document env + how to run)
+
+## Milestone 4: Transport Simplification Cleanup (Remove Legacy Paths)
+
+### Goal
+Remove or de-prioritize legacy HTTP internal paths so the “happy path” uses: HTTP edge → Gateway → gRPC internal → NATS async.
+
+### Exit Criteria
+- Gateway no longer depends on HTTP for Projection queries or Runner admin.
+- Legacy endpoints are either removed or explicitly marked “debug-only” and not used by Gateway/Control.
+- All operational playbooks rely on standardized endpoints.
+
+### Tasks
+- [ ] Remove Gateway’s HTTP query proxy usage (or keep only as compatibility shim).
+- [ ] Remove Gateway’s runner admin HTTP proxy usage (or keep only as compatibility shim).
+- [ ] Ensure Control UI + Control API use the standardized Gateway surfaces.
+- [ ] Harden metrics and health probes to always carry context.
+
+### Required Tests
+- End-to-end smoke tests (gated)
+- Workspace fmt/clippy/test
+
+## Verification Commands (Required at Each Milestone)
+- `cargo fmt --check`
+- `cargo clippy --workspace --all-targets -- -D warnings`
+- `cargo test --workspace`
+- `npm ci && npm run lint && npm run typecheck && npm run test && npm run build` (in `control/ui`)
+
+## Notes / Constraints
+- Do not break wire compatibility for NATS subjects or event payloads; evolve via optional fields and tolerant decoding.
+- Keep tenant isolation rules enforced at the Gateway boundary and re-validated at nodes where it is safety-critical.
--- a/58
+++ b/58
@@ -0,0 +1,58 @@
+.PHONY: docker-build-platform docker-build-control docker-build-observability docker-build-all
+.PHONY: compose-up compose-down compose-ps compose-up-observability compose-down-observability
+.PHONY: swarm-dev-secrets swarm-deploy-platform swarm-deploy-control swarm-deploy-observability swarm-deploy-all
+.PHONY: swarm-rm-platform swarm-rm-control swarm-rm-observability swarm-rm-all
+
+docker-build-platform:
+	sh docker/scripts/build_images.sh platform
+
+docker-build-control:
+	sh docker/scripts/build_images.sh control
+
+docker-build-observability:
+	true
+
+docker-build-all:
+	sh docker/scripts/build_images.sh all
+
+compose-up:
+	docker compose up -d --build
+
+compose-up-observability:
+	docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build
+
+compose-down:
+	docker compose down -v
+
+compose-down-observability:
+	docker compose -f docker-compose.yml -f observability/docker-compose.yml down -v
+
+compose-ps:
+	docker compose ps
+
+swarm-dev-secrets:
+	sh docker/scripts/swarm_dev_secrets.sh
+
+swarm-deploy-platform:
+	docker stack deploy -c swarm/stacks/platform.yml cloudlysis
+
+swarm-deploy-control:
+	docker stack deploy -c swarm/stacks/control-plane.yml cloudlysis_control
+
+swarm-deploy-observability:
+	docker stack deploy -c swarm/stacks/observability.yml cloudlysis_obs
+
+swarm-deploy-all: swarm-dev-secrets swarm-deploy-platform swarm-deploy-control swarm-deploy-observability
+	true
+
+swarm-rm-platform:
+	docker stack rm cloudlysis
+
+swarm-rm-control:
+	docker stack rm cloudlysis_control
+
+swarm-rm-observability:
+	docker stack rm cloudlysis_obs
+
+swarm-rm-all: swarm-rm-observability swarm-rm-control swarm-rm-platform
+	true
--- a/NATS_TRANSPORT_PLAN.md
+++ b/NATS_TRANSPORT_PLAN.md
@@ -0,0 +1,246 @@
+# NATS Transport Plan
+
+## Purpose
+Standardize and optimize how nodes (Aggregate, Projection, Runner, Gateway where applicable) use NATS JetStream and NATS KV, under these principles:
+- Simplicity (few primitives, consistent naming, minimal per-service divergence)
+- Ease of operation (predictable streams/consumers, clear runbooks, easy debugging)
+- Frugality (bounded consumers, bounded in-flight work, minimal churn, minimal storage)
+- Low resource usage (stable durable consumers, controlled ack waits, limited fanout)
+- High performance (high throughput, low tail latency, reliable backpressure)
+- Safety (tenant isolation, idempotency, deterministic replay, poison handling)
+
+## Non-Negotiable Rules (Global)
+- Every JetStream stream/consumer MUST have an explicit contract:
+  - name, subjects, retention, storage, replication, max sizes
+  - ack policy, ack wait, max deliver, max in flight
+- Every node MUST run with bounded work:
+  - bounded pull batch sizes
+  - bounded concurrency
+  - bounded retry/backoff
+- Every message MUST be tenant-scoped in subject and/or headers.
+- Every milestone below is “stop-the-line” gated:
+  - all tasks completed
+  - all tests passing
+  - workspace lint/format checks passing
+  - required NATS-gated integration tests for the milestone passing (when gated by env)
+
+## Current State (Baseline)
+- Streams:
+  - `AGGREGATE_EVENTS` (Aggregate publishes, Projection/Runner consume)
+  - `WORKFLOW_COMMANDS`, `WORKFLOW_EVENTS` (Runner)
+- Subject conventions:
+  - Aggregate events: `tenant.<tenant_id>.aggregate.<aggregate_type>.<aggregate_id>`
+  - Defaults often use filters like `tenant.*.aggregate.*.*`
+- Durable consumers:
+  - Projection uses a durable name (configurable)
+  - Runner uses configurable durable prefix per role
+  - Aggregate had ad-hoc fetch consumer risks; now mitigated with unique consumer names per fetch
+- Headers:
+  - Tenant + correlation + trace headers exist but were historically inconsistent; shared utilities now exist
+
+## Target Architecture (End State)
+- A single “NATS wire protocol” contract shared across services:
+  - subject naming
+  - required headers (tenant/correlation/trace)
+  - message envelope compatibility rules (tolerant decoding, optional fields)
+- Stable, minimal set of JetStream streams:
+  - one stream per message class (aggregate events, workflow commands, workflow events)
+  - no per-tenant streams unless there is a strong operational reason
+- Stable, limited consumers:
+  - durable consumers for long-lived processors (Projection, Runner)
+  - ephemeral consumers only for bounded ad-hoc operations (Aggregate fetch), always unique + best-effort deletion
+- Uniform backpressure + reliability defaults:
+  - explicit ack
+  - bounded `max_ack_pending` and application-level concurrency
+  - bounded redelivery via `max_deliver` + poison policy
+
+## Definitions
+### Message Context (Headers)
+Standard headers for NATS published messages:
+- `tenant-id` (required)
+- `x-correlation-id` and `correlation-id` (required for any request-derived message; generated if missing)
+- `traceparent` (optional but recommended; generated/propagated if present upstream)
+- `trace-id` (optional; derived from traceparent when possible)
+- `Nats-Msg-Id` (required for idempotent publish/dedupe when applicable)
+
+### Subject Naming Rules
+- Tenant-first prefix: `tenant.<tenant_id>.…`
+- Stable message class token:
+  - `aggregate` for domain events
+  - `effect`, `effect_result`, `workflow`, `workflow_event` for Runner
+- No ambiguous wildcard publishing:
+  - producers publish concrete subjects only
+  - consumers may filter with wildcards
+
+### Consumer Naming Rules
+- Durable consumer names must be stable and collision-free:
+  - include role + mode + optional view/saga name + shard/group
+- Ephemeral consumer names must be unique per operation:
+  - include tenant + purpose + uuid
+  - must be deleted best-effort when operation completes
+
+## Milestone 0: NATS Wire Contract Lock-in (Names, Headers, Envelopes)
+
+### Goal
+Make the NATS/JetStream wire contract explicit and enforced in code so all producers/consumers interoperate safely across scale-out and rolling restarts.
+
+### Exit Criteria
+- `shared` exposes NATS header constants and helpers for inject/extract/derive.
+- All producers set required headers consistently.
+- All consumers tolerate unknown fields and missing optional fields.
+- A single, documented subject naming convention is enforced in code (builder functions).
+- Workspace fmt/clippy/tests pass.
+
+### Tasks
+- [ ] Centralize NATS header constants and helpers in `shared`:
+  - [ ] inject headers for publish (tenant, correlation, trace)
+  - [ ] extract headers on receive (best-effort)
+  - [ ] derive `trace-id` from `traceparent`
+- [ ] Aggregate:
+  - [ ] Ensure event publishing always sets `tenant-id`, correlation headers, trace headers
+  - [ ] Ensure `Nats-Msg-Id` strategy is correct for idempotency/dedupe (document and test)
+- [ ] Projection:
+  - [ ] Ensure EventEnvelope decoding remains tolerant (unknown fields ignored, optional IDs supported)
+  - [ ] Ensure correlation/trace context is carried into spans/metrics consistently
+- [ ] Runner:
+  - [ ] Ensure publish paths include correlation/trace headers consistently for commands and results
+  - [ ] Ensure outbox metadata → NATS headers mapping is consistent and tested
+- [ ] Tests:
+  - [ ] Unit tests for header injection/extraction in `shared`
+  - [ ] Per-service unit tests asserting produced headers include required keys
+
+### Required Tests
+- `cargo fmt --check`
+- `cargo clippy --workspace --all-targets -- -D warnings`
+- `cargo test --workspace`
+
+## Milestone 1: Stream Configuration Standardization (Retention, Limits, Storage)
+
+### Goal
+Make stream configs consistent, explicit, and operationally sane across environments (dev → prod), minimizing surprise and preventing runaway resource usage.
+
+### Exit Criteria
+- Stream config for each stream is explicitly defined and validated at startup.
+- Limits (max messages/bytes/age) are explicit and have defaults.
+- Duplicate windows and dedupe behavior are explicit and tested.
+- A “no destructive changes on startup” policy is enforced (create if missing; do not silently replace).
+
+### Tasks
+- [ ] Define a single “stream config policy” module per service (or shared helper):
+  - [ ] `AGGREGATE_EVENTS` subjects + retention policy
+  - [ ] `WORKFLOW_COMMANDS` subjects + retention policy
+  - [ ] `WORKFLOW_EVENTS` subjects + retention policy
+- [ ] Standardize defaults:
+  - [ ] retention: limits appropriate for replay + rebuild
+  - [ ] `duplicate_window` aligned with producer idempotency strategy
+  - [ ] storage type and replication policy documented and configurable
+- [ ] Add startup validations:
+  - [ ] verify stream exists and matches required subject set (compatible superset allowed)
+  - [ ] verify required ack/dedupe assumptions hold
+- [ ] Add tests that parse and validate configs without NATS.
+
+### Required Tests
+- Unit tests for stream config builders
+- Existing crate tests
+
+## Milestone 2: Consumer Policy Standardization (Ack, Backpressure, Poison)
+
+### Goal
+Make consumption reliable and cheap under load by standardizing ack policy, concurrency, and poison/deadletter handling.
+
+### Exit Criteria
+- All long-lived consumers use explicit ack with consistent `ack_wait`, `max_deliver`, `max_ack_pending`.
+- Application concurrency is bounded and tied to `max_in_flight`.
+- Poison policy is consistent:
+  - after `max_deliver`, term + deadletter/quarantine record is written
+- Replay behavior is deterministic on restart (checkpoint-based where applicable).
+
+### Tasks
+- [ ] Define standard consumer config defaults:
+  - [ ] `AckPolicy::Explicit`
+  - [ ] `ack_wait` default + env override
+  - [ ] `max_deliver` default + env override
+  - [ ] `max_ack_pending` tied to application concurrency
+- [ ] Projection:
+  - [ ] Ensure durable consumer naming is collision-free in all modes (Single vs PerView)
+  - [ ] Ensure checkpoint gates ack correctly (skip still acks)
+  - [ ] Ensure poison policy writes durable records and terminates reliably
+- [ ] Runner:
+  - [ ] Ensure saga/effect consumers use consistent durable naming + deliver groups when scaling out
+  - [ ] Ensure outbox relay preserves exactly-once semantics via dedupe keys + idempotent publish
+- [ ] Aggregate:
+  - [ ] Ensure ad-hoc fetch consumer is bounded (timeouts) and unique per operation (already required)
+  - [ ] Ensure best-effort cleanup is performed and cannot delete unrelated consumers
+- [ ] Tests:
+  - [ ] Unit tests for consumer name generation (sanitization + uniqueness)
+  - [ ] NATS-gated tests for ack/redelivery/poison behavior (must be runnable with env flag)
+
+### Required Tests
+- Workspace fmt/clippy/tests
+- NATS-gated integration tests for:
+  - redelivery idempotency
+  - poison termination behavior
+  - scale-out with deliver group (where supported)
+
+## Milestone 3: Connection Management + Failure Semantics (Operational Frugality)
+
+### Goal
+Make NATS connection handling stable under partial failure while minimizing resource churn and cascading outages.
+
+### Exit Criteria
+- One NATS connection per process (or bounded pool only if justified).
+- Reconnect/backoff policy is explicit and consistent.
+- Circuit breaker behavior is consistent (when used), and health/ready reflect NATS state correctly.
+- No busy-looping on NATS outages.
+
+### Tasks
+- [ ] Standardize connection options:
+  - [ ] reconnect delays/backoff
+  - [ ] max reconnect attempts or “infinite with backoff” strategy (explicit)
+  - [ ] request timeouts around JetStream operations
+- [ ] Standardize readiness semantics:
+  - [ ] `ready=false` when NATS is unavailable and the node depends on it
+  - [ ] `health` stays “process alive” but reports NATS connectivity in payload
+- [ ] Add “fast fail” mode for tests and dev (avoid 30x retries when env not set).
+- [ ] Tests:
+  - [ ] unit tests for backoff behavior (where possible)
+  - [ ] gated integration test: temporary NATS outage does not crash-loop and recovers
+
+## Milestone 4: Multi-Tenant Scale-Out Guarantees (Collision-Free + Predictable)
+
+### Goal
+Guarantee safe multi-replica behavior: no consumer collisions, no duplicate side effects, predictable throughput with bounded resource usage.
+
+### Exit Criteria
+- Durable names are deterministic and collision-free across replicas.
+- Deliver groups are used where appropriate to share work across replicas.
+- Exactly-once side effects are enforced via idempotency + dedupe keys (not wishful thinking).
+- A scale-out test suite exists and is gated but runnable.
+
+### Tasks
+- [ ] Establish consumer naming scheme per service role:
+  - [ ] Projection: per-view durable option uses sanitized names and stable mapping
+  - [ ] Runner: durable prefix includes role + shard + optional group
+- [ ] Establish deliver group usage rules:
+  - [ ] when to enable (scale-out consumers)
+  - [ ] how to roll without duplication
+- [ ] Strengthen dedupe keys:
+  - [ ] event-driven sagas: checkpoint + dedupe marker strategy tested under redelivery
+  - [ ] outbox relay: verify publish idempotency with `Nats-Msg-Id`
+- [ ] Add gated tests:
+  - [ ] two replicas, same tenant, no duplicate publishes
+  - [ ] rolling restart preserves checkpoint correctness
+
+## Verification Commands (Required at Each Milestone)
+- `cargo fmt --check`
+- `cargo clippy --workspace --all-targets -- -D warnings`
+- `cargo test --workspace`
+- Gated NATS integration tests:
+  - Runner: `RUNNER_TEST_NATS_URL=... cargo test -p runner -- --ignored`
+  - Projection: `PROJECTION_TEST_NATS_URL=... cargo test -p projection -- --ignored`
+  - Control API (if it runs NATS-gated tests): set documented env flags and run ignored tests
+
+## Notes / Constraints
+- Do not create per-tenant streams unless scaling evidence requires it; prefer subject partitioning and consumer groups.
+- Prefer backward-compatible envelope changes (optional fields, tolerant decoding).
+- Prefer stable durable consumers; ephemeral consumers must be unique and bounded and must cleanup best-effort.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,38 @@
+# cloudlysis (monorepo)
+
+## Layout
+- Rust services (Cargo workspace): `aggregate/`, `gateway/`, `projection/`, `runner/`, `control/api/`, `shared/`
+- Control UI: `control/ui/`
+- Docker + Swarm + Compose: `docker/`, `docker-compose.yml`, `swarm/`, `observability/`
+- Transport plans:
+  - `TRANSPORT_DEVELOPMENT_PLAN.md`
+  - `GATEWAY_TRANSPORT_PLAN.md`
+  - `NATS_TRANSPORT_PLAN.md`
+
+## Quick Start (Docker Compose)
+
+```bash
+docker compose up -d --build
+```
+
+Full local stack with observability:
+
+```bash
+docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build
+```
+
+## Commands
+- `make compose-up`, `make compose-down`
+- `make compose-up-observability`, `make compose-down-observability`
+- `make docker-build-all`
+- `make swarm-deploy-all`, `make swarm-rm-all`
+
+More details: `DOCKER.md`
+
+## Workspace Verification
+
+```bash
+cargo fmt --check
+cargo clippy --workspace --all-targets -- -D warnings
+cargo test --workspace
+```
--- a/TRANSPORT_DEVELOPMENT_PLAN.md
+++ b/TRANSPORT_DEVELOPMENT_PLAN.md
@@ -0,0 +1,333 @@
+# Transport Development Plan
+
+## Purpose
+Unify and optimize the platform transport layer end-to-end:
+- Gateway ↔ nodes (Aggregate, Projection, Runner): routing + RPC/proxying + probes
+- Node ↔ NATS JetStream/KV: event/work distribution + configuration substrate
+
+This plan merges and supersedes:
+- `GATEWAY_TRANSPORT_PLAN.md`
+- `NATS_TRANSPORT_PLAN.md`
+
+## Current Status (Codebase Reality)
+- Monorepo workspace exists; `shared` crate exists and is used by Aggregate/Projection/Runner/Gateway.
+- Request context pieces are partially standardized:
+  - `shared` provides `TenantId`, `CorrelationId`, `TraceId`
+  - `shared` provides `trace_id_from_traceparent(...)` and `traceparent_from_trace_id(...)`
+  - Some header names are centralized in `shared` but not all call sites use constants yet.
+- Gateway → Aggregate is already HTTP(edge) → gRPC(internal) and propagates `x-tenant-id`, `x-correlation-id`, and `traceparent`.
+- Gateway → Projection remains HTTP proxy (`/v1/query/...`) and Gateway → Runner remains HTTP admin proxy (`/admin/runner/...`).
+- Node → NATS header propagation is improved and closer to consistent:
+  - Runner publishes `x-correlation-id` and `correlation-id`, and ensures `traceparent`/`trace-id` are present/derived when possible.
+  - Aggregate publishes `trace-id` when `traceparent` is present.
+- Many “hard” NATS tests already exist but are gated/ignored by default; they should be treated as milestone gates when enabling changes.
+
+## Principles
+- Simplicity: minimize distinct patterns; prefer one internal RPC stack + one async backbone.
+- Ease of operation: consistent health/ready/metrics; consistent naming; predictable failure modes.
+- Frugality: bounded connections, bounded consumers, bounded in-flight work; no churny resources.
+- Low resource usage: stable durables; avoid per-request reconnects; avoid unbounded loops.
+- High performance: multiplexing, backpressure, low tail latency, predictable routing.
+- Safety: tenant isolation, deny-by-default authz at the edge, idempotency, deterministic replay.
+
+## Non-Negotiable Rules (Global)
+- Every cross-component hop MUST carry tenant + correlation + trace context.
+- Every transport path MUST have explicit timeouts/deadlines and bounded retries.
+- Every JetStream stream/consumer MUST have an explicit contract (name/subjects/retention/ack policy).
+- Every milestone is stop-the-line gated:
+  - All tasks completed
+  - All tests required by the milestone pass
+  - Workspace verification commands pass
+  - Gated integration tests for the milestone are runnable and documented
+
+## Baseline (Today)
+- Gateway → Aggregate: gRPC (command submission)
+- Gateway → Projection: HTTP (query proxy)
+- Gateway → Runner: HTTP (admin proxy)
+- Node ↔ NATS JetStream: `AGGREGATE_EVENTS`, `WORKFLOW_COMMANDS`, `WORKFLOW_EVENTS`
+
+## End State (Target Architecture)
+- Edge contract (clients ↔ Gateway): HTTP/JSON
+- Internal RPC (Gateway ↔ nodes): gRPC for Aggregate + Projection + Runner admin
+- Async backbone: NATS JetStream for events/work distribution; NATS KV for routing/placement
+- `shared` is the single source of truth for:
+  - header names and injection/extraction rules
+  - trace parsing/validation (`traceparent`, `trace-id`)
+  - context object model (tenant/correlation/trace/request ids)
+  - NATS subject + consumer naming helpers
+
+## Standard Contracts
+### Context Fields
+- Tenant: HTTP `x-tenant-id`, NATS `tenant-id`
+- Correlation: HTTP `x-correlation-id`, NATS `x-correlation-id` and `correlation-id`
+- Trace: HTTP `traceparent`, NATS `traceparent` and `trace-id` (derived when possible)
+- Request id: HTTP `x-request-id` (optional for NATS)
+
+### Standard Service Endpoints (every service)
+- `GET /health` liveness
+- `GET /ready` readiness (includes tenant gating if relevant)
+- `GET /metrics` Prometheus
+
+## Milestone 0: Shared Transport Contract (Headers + Context + Trace)
+
+### Goal
+Make propagation rules consistent and enforceable across HTTP, gRPC, and NATS so every later milestone builds on one contract.
+
+### Exit Criteria
+- `shared` contains canonical constants for header names and NATS header names.
+- `shared` contains canonical trace parsing/validation and trace derivation helpers.
+- Library-level unit tests cover parsing/derivation behavior.
+- All crates build and tests pass for the workspace.
+
+### Tasks
+- [x] Add shared ID types in `shared`:
+  - [x] `TenantId`
+  - [x] `CorrelationId`
+  - [x] `TraceId`
+- [~] Consolidate header constants in `shared`:
+  - [x] HTTP: `x-correlation-id`, `traceparent`, `trace-id` (for NATS/interop)
+  - [ ] HTTP: `x-tenant-id`, `x-request-id` (missing constants)
+  - [x] NATS: `correlation-id` (used in Runner), `trace-id` (now emitted where possible)
+  - [ ] NATS: `tenant-id` constant, `Nats-Msg-Id` constant (missing constants)
+- [x] Add shared helpers:
+  - [x] derive `trace-id` from `traceparent`
+  - [x] derive `traceparent` from `trace-id` when valid
+  - [ ] normalize/generate correlation id when missing across all transports (helper exists for `CorrelationId::generate()`; adoption incomplete)
+- [x] Add unit tests in `shared` for:
+  - [x] traceparent parsing validity
+  - [x] serialization shape for correlation/trace id newtypes
+  - [ ] additional validation cases (invalid traceparents, invalid trace-id lengths) if needed for stricter enforcement
+
+### Required Tests
+- `cargo fmt --check`
+- `cargo clippy --workspace --all-targets -- -D warnings`
+- `cargo test --workspace`
+
+## Milestone 1: NATS Wire Protocol Lock-In (Subjects + Headers + Envelopes)
+
+### Dependencies
+- Milestone 0
+
+### Goal
+Make the JetStream/NATS “wire protocol” explicit and uniform so interop is safe across scale-out and rolling restarts.
+
+### Exit Criteria
+- Subject naming is standardized and enforced via builder functions (producers publish concrete subjects only).
+- All NATS producers set required headers consistently.
+- All NATS consumers tolerate unknown fields and missing optional fields.
+- “Contract tests” exist per service to verify produced headers and subject formats.
+
+### Tasks
+- [ ] Create/standardize subject builder helpers (prefer `shared`):
+  - [ ] Aggregate event subject builder (`tenant.<tenant>.aggregate.<type>.<id>`)
+  - [ ] Runner effect/effect_result/workflow subject builders
+- [~] Aggregate publishes:
+  - [ ] `tenant-id` header always present (still needs enforcement everywhere)
+  - [ ] correlation + trace headers always present when available, generated when required
+  - [x] `trace-id` is derived when `traceparent` is present (now emitted in publish path)
+  - [ ] `Nats-Msg-Id` strategy explicitly defined and tested
+- [~] Runner publishes (commands/results):
+  - [x] correlation headers emitted consistently (`x-correlation-id` + `correlation-id`)
+  - [x] trace headers derived consistently when possible (`traceparent` from `trace-id`, `trace-id` from `traceparent`)
+  - [ ] outbox metadata → NATS headers mapping standardized via shared helpers (adoption incomplete)
+- [~] Projection consumption:
+  - [x] envelope decoding remains tolerant (unknown fields ignored)
+  - [~] correlation/trace context flows into spans/metrics consistently (types are shared; header extraction remains best-effort and should be unified)
+- [ ] Add unit tests:
+  - [ ] subject formatting tests per service (once builders exist)
+  - [ ] required header presence tests per publisher (enforce required keys)
+
+### Required Tests
+- Workspace verification commands
+
+## Milestone 2: JetStream Stream Policy (Create/Validate, No Destructive Startup)
+
+### Dependencies
+- Milestone 1
+
+### Goal
+Make stream definitions explicit, validated, and safe in all environments, preventing resource runaway and accidental destructive changes.
+
+### Exit Criteria
+- Each stream has a single authoritative config policy (name/subjects/retention/limits/duplicate window).
+- Services create streams if missing, and validate compatibility on startup.
+- Startup does not silently replace or destructively mutate existing streams.
+- Config-only tests validate stream config builders without requiring NATS.
+
+### Tasks
+- [ ] Define stream policies:
+  - [ ] `AGGREGATE_EVENTS` (subjects, retention, duplicate window)
+  - [ ] `WORKFLOW_COMMANDS`
+  - [ ] `WORKFLOW_EVENTS`
+- [ ] Implement compatibility validation rules:
+  - [ ] required subjects are present (superset allowed)
+  - [ ] retention/limits are within allowed ranges
+  - [ ] dedupe assumptions align with producer `Nats-Msg-Id` usage
+- [ ] Add unit tests for stream config builders + validators.
+
+### Required Tests
+- Workspace verification commands
+
+## Milestone 3: Consumer Policy + Backpressure + Poison (Reliable and Cheap Under Load)
+
+### Dependencies
+- Milestone 2
+
+### Goal
+Standardize consumer configs and runtime behavior to guarantee bounded in-flight work, predictable redelivery behavior, and consistent poison handling.
+
+### Exit Criteria
+- All long-lived consumers use explicit ack with standardized defaults (`ack_wait`, `max_deliver`, `max_ack_pending`).
+- Application-level concurrency is bounded and aligned with `max_in_flight`.
+- Poison policy is consistent across consumers (term + durable quarantine/deadletter record).
+- Gated NATS integration tests prove:
+  - redelivery idempotency
+  - poison termination
+  - scale-out behavior (deliver group) where applicable
+
+### Tasks
+- [ ] Standardize consumer defaults:
+  - [ ] `AckPolicy::Explicit`
+  - [ ] `ack_wait` default + env override
+  - [ ] `max_deliver` default + env override
+  - [ ] `max_ack_pending` tied to worker concurrency
+- [ ] Projection:
+  - [ ] durable naming collision-free for Single/PerView modes
+  - [ ] checkpoint gate semantics: “skip still acks”
+  - [ ] poison handling persists durable records and terminates reliably
+- [ ] Runner:
+  - [ ] durable naming collision-free and stable across replicas
+  - [ ] deliver group rules defined and tested
+  - [ ] outbox relay exactly-once behavior verified under redelivery
+- [ ] Aggregate:
+  - [ ] ad-hoc fetch consumer always unique and bounded
+  - [ ] best-effort deletion never targets unrelated consumers
+- [ ] Add gated NATS integration tests and document env flags:
+  - [ ] Runner ignored tests
+  - [ ] Projection ignored tests
+
+### Required Tests
+- Workspace verification commands
+- Runner: `RUNNER_TEST_NATS_URL=... cargo test -p runner -- --ignored`
+- Projection: `PROJECTION_TEST_NATS_URL=... cargo test -p projection -- --ignored`
+
+## Milestone 4: Gateway → Projection Internal RPC (gRPC QueryService)
+
+### Dependencies
+- Milestone 0 (context contract)
+
+### Goal
+Replace Gateway → Projection HTTP proxy as the default path with a gRPC Query service, keeping HTTP optional for human/debug use.
+
+### Exit Criteria
+- Projection exposes `projection.gateway.v1.QueryService`.
+- Gateway routes queries via gRPC by default.
+- Authz remains enforced in Gateway (deny-by-default).
+- Query responses remain stable for Control UI expectations.
+- New gRPC query tests pass (unit + integration).
+
+### Tasks
+- [ ] Define protobuf API: `projection.gateway.v1.QueryService`
+- [ ] Implement Projection gRPC server for query execution
+- [ ] Implement Gateway gRPC client routing to Projection
+  - [ ] deadlines
+  - [ ] bounded retries (idempotent only)
+  - [ ] context propagation
+- [ ] Preserve HTTP `/v1/query/*` as compatibility/debug:
+  - [ ] route internally to gRPC or keep as legacy endpoint
+- [ ] Add tests:
+  - [ ] authz + forwarding via gRPC
+  - [ ] tenant isolation enforcement in Projection QueryService
+
+### Required Tests
+- Workspace verification commands
+
+## Milestone 5: Gateway → Runner Admin Internal RPC (gRPC RunnerAdmin)
+
+### Dependencies
+- Milestone 0 (context contract)
+
+### Goal
+Replace Gateway’s `/admin/runner/*` HTTP proxy usage with a first-class gRPC admin service.
+
+### Exit Criteria
+- Runner exposes `runner.admin.v1.RunnerAdmin`.
+- Gateway calls Runner admin via gRPC (authz enforced in Gateway).
+- Tenant-spoof and unauthorized calls are rejected deterministically.
+- Runner drain/readiness semantics validated and tested.
+
+### Tasks
+- [ ] Define protobuf API: `runner.admin.v1.RunnerAdmin`
+- [ ] Implement Runner gRPC admin server
+- [ ] Implement Gateway gRPC client integration for admin operations
+- [ ] Keep Runner HTTP admin endpoints optional for direct debugging, not required by Gateway
+- [ ] Add tests:
+  - [ ] Gateway: rejects without rights
+  - [ ] Gateway: rejects tenant spoof attempts
+  - [ ] Runner: idempotency and drain semantics
+
+### Required Tests
+- Workspace verification commands
+
+## Milestone 6: Gateway Upstream Performance + Operational Guardrails
+
+### Dependencies
+- Milestones 4–5 (gRPC internal RPC surfaces available)
+
+### Goal
+Make Gateway upstream connection handling, retry behavior, and probe/fanout operations consistent, bounded, and cheap under load.
+
+### Exit Criteria
+- Bounded upstream gRPC channel pool exists (LRU + TTL/eviction).
+- Deadlines everywhere; retries only for idempotent operations.
+- Probe/fanout calls are bounded (timeouts + concurrency limits) and carry context.
+- Gated load/soak tests exist and are runnable.
+
+### Tasks
+- [ ] Implement upstream channel pool
+  - [ ] bounded LRU
+  - [ ] TTL/eviction
+  - [ ] fast-path reuse under load
+- [ ] Standardize retry profiles
+  - [ ] read-only: limited retry with jitter
+  - [ ] mutations: no retry unless idempotency key is present and semantics are safe
+- [ ] Standardize timeouts/deadlines:
+  - [ ] edge timeout limits
+  - [ ] internal per-service deadlines
+- [ ] Fanout controls:
+  - [ ] concurrency limiters for probes/snapshots
+  - [ ] short TTL caching where safe
+- [ ] Ensure probes carry context (correlation/trace) for observability.
+
+### Required Tests
+- Workspace verification commands
+- Gated load/soak tests (document env + how to run)
+
+## Milestone 7: Transport Cleanup (Remove Legacy Internal Paths)
+
+### Dependencies
+- Milestone 6
+
+### Goal
+Ensure the “happy path” is: HTTP edge → Gateway → gRPC internal → NATS async, with legacy internal HTTP proxy paths removed or clearly debug-only.
+
+### Exit Criteria
+- Gateway no longer depends on HTTP for Projection queries or Runner admin.
+- Legacy paths are removed or explicitly debug-only and not referenced by Gateway/Control.
+- End-to-end smoke tests pass (gated).
+
+### Tasks
+- [ ] Remove Gateway HTTP query proxy usage (or keep only as explicit compatibility shim)
+- [ ] Remove Gateway runner admin HTTP proxy usage (or keep only as explicit compatibility shim)
+- [ ] Ensure Control UI + Control API rely only on standardized surfaces
+- [ ] Harden metrics and readiness probes to match the standard contract everywhere
+
+### Required Tests
+- Workspace verification commands
+- End-to-end smoke tests (gated)
+
+## Workspace Verification Commands (Run for Every Milestone)
+- `cargo fmt --check`
+- `cargo clippy --workspace --all-targets -- -D warnings`
+- `cargo test --workspace`
+- `npm ci && npm run lint && npm run typecheck && npm run test && npm run build` (in `control/ui`)
--- a/aggregate/.clippy.toml
+++ b/aggregate/.clippy.toml
@@ -0,0 +1 @@
+
--- a/aggregate/.gitignore
+++ b/aggregate/.gitignore
@@ -0,0 +1,37 @@
+/target/
+/target-*/
+**/target/
+*.rs.bk
+*.pdb
+*.dSYM/
+*.orig
+*.rej
+*.log
+*.swp
+*.swo
+*~
+.DS_Store
+.idea/
+.vscode/
+
+.env
+.env.*
+.envrc
+.direnv/
+
+docker-compose.override.yml
+
+*.mdbx
+*.mdbx-*
+*.mdbx-lock
+*.mdbx.dat
+*.mdbx.lck
+*.mdb
+*.db
+/data/
+/tmp/
+
+/coverage/
+lcov.info
+*.profraw
+*.profdata
--- a/aggregate/Cargo.toml
+++ b/aggregate/Cargo.toml
@@ -0,0 +1,42 @@
+[package]
+name = "aggregate"
+version = "0.1.0"
+edition = "2021"
+
+[features]
+default = []
+runtime-v8 = ["v8"]
+runtime-wasm = []
+
+[dependencies]
+shared = { path = "../shared" }
+edge_storage = { version = "0.1", registry = "madapes" }
+runtime-function = { version = "0.2", registry = "madapes" }
+edge-logger-client = { version = "0.1", registry = "madapes" }
+query_engine = { version = "0.1", registry = "madapes" }
+async-nats = "0.39"
+tokio = { version = "1", features = ["full"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+serde_yaml = "0.9"
+toml = "0.8"
+thiserror = "2"
+anyhow = "1"
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }
+uuid = { version = "1", features = ["v7", "serde"] }
+chrono = { version = "0.4", features = ["serde"] }
+futures = "0.3"
+lru = "0.12"
+v8 = { version = "0.106", optional = true }
+tonic = { version = "0.12", default-features = false, features = ["codegen", "prost", "transport"] }
+prost = "0.13"
+axum = "0.7"
+
+[dev-dependencies]
+tempfile = "3"
+tokio-stream = { version = "0.1", features = ["net"] }
+
+[build-dependencies]
+tonic-build = { version = "0.12", default-features = false, features = ["prost"] }
+protoc-bin-vendored = "3"
--- a/aggregate/DEVELOPMENT_PLAN.md
+++ b/aggregate/DEVELOPMENT_PLAN.md
--- a/aggregate/README.md
+++ b/aggregate/README.md
@@ -0,0 +1,81 @@
+# aggregate
+
+## Running
+
+### Configuration
+
+Configuration is loaded in this order:
+
+1. If `AGGREGATE_CONFIG_PATH` is set and points to a readable config file, load that file and apply env overrides.
+2. Otherwise load defaults and apply env overrides.
+
+Supported config formats:
+- YAML (`.yaml`, `.yml`)
+- TOML (`.toml`)
+- JSON (`.json`)
+
+### Environment Variables
+
+#### Core
+- `AGGREGATE_NATS_URL` (default: `nats://localhost:4222`): NATS server URL.
+- `AGGREGATE_STORAGE_PATH` (default: `./data`): Path used by the snapshot storage.
+- `AGGREGATE_SNAPSHOT_THRESHOLD` (default: `10`): Save snapshot when events since last snapshot reach this threshold.
+- `AGGREGATE_MAX_RETRIES` (default: `3`): Max retries for version conflicts in command handling.
+- `AGGREGATE_HTTP_ADDR` (default: `0.0.0.0:8080`): HTTP bind address.
+- `AGGREGATE_GRPC_ADDR` (default: `0.0.0.0:50051`): gRPC bind address for command submission.
+
+#### Multi-tenant
+- `AGGREGATE_MULTI_TENANT` (default: `true`): Enables multi-tenant behavior when parsing/validating tenant ids.
+- `AGGREGATE_DEFAULT_TENANT_ID` (default: unset): Default tenant id when the incoming request doesn't specify one.
+- `AGGREGATE_SHARD_ID` (default: `local`): Shard id used when applying placement maps.
+
+#### Logging
+- `AGGREGATE_LOGGER_SOCKET` (default: unset): Socket path for `edge-logger-client` integration (if enabled).
+
+#### Server
+- `AGGREGATE_CONFIG_PATH` (default: unset): Path to a YAML/TOML/JSON config file.
+
+#### Placement
+- `AGGREGATE_PLACEMENT_BUCKET` (default: `AGGREGATE_PLACEMENT`): NATS KV bucket to watch.
+- `AGGREGATE_PLACEMENT_KEY` (default: `aggregate_placement`): NATS KV key to watch. Value is a JSON object mapping `tenant_id -> shard_id`.
+
+#### Runtime Programs
+- `AGGREGATE_DECIDE_PROGRAM` / `AGGREGATE_APPLY_PROGRAM`: Inline program source strings.
+- `AGGREGATE_DECIDE_PROGRAM_PATH` / `AGGREGATE_APPLY_PROGRAM_PATH`: File paths to program source strings.
+
+## HTTP Endpoints
+
+- `GET /health` → JSON health report
+- `GET /ready` → JSON boolean readiness
+- `GET /metrics` → Prometheus text format
+- `GET /admin/tenants` → JSON list of hosted tenants
+- `POST /admin/drain` → marks tenant draining and waits for in-flight commands to finish (`{"tenant_id":"..."}`)
+- `POST /admin/reload` → updates hosted tenant allowlist (`{"hosted_tenants":[...]}`) or applies a placement map (`{"placement":{...}}`)
+- `GET /admin/tenant/{tenant_id}/status` → JSON tenant status (`hosted`, `accepting`, `draining`, `in_flight`)
+- `GET /admin/tenant/{tenant_id}/ready` → JSON boolean (node ready AND accepting tenant)
+- `POST /admin/tenant/{tenant_id}/drain` → drains tenant with optional timeout (`{"timeout_ms":10000}`)
+
+## gRPC
+
+Aggregate exposes a command submission API for the Gateway:
+
+- Service: `aggregate.gateway.v1.CommandService`
+- Method: `SubmitCommand`
+- Metadata: `x-tenant-id` (tenant routing hint)
+
+Proto definition: [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto)
+
+## Container
+
+Build and run locally:
+
+```bash
+docker build -t cloudlysis/aggregate:local -f docker/Dockerfile.rust --build-arg PACKAGE=aggregate --build-arg BIN=aggregate .
+docker compose up -d --build
+```
+
+Container smoke test (requires Docker installed):
+
+```bash
+sh docker/scripts/verify_aggregate_container.sh
+```
--- a/aggregate/build.rs
+++ b/aggregate/build.rs
@@ -0,0 +1,8 @@
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let protoc = protoc_bin_vendored::protoc_bin_path()?;
+    std::env::set_var("PROTOC", protoc);
+
+    tonic_build::configure().compile_protos(&["proto/aggregate.proto"], &["proto"])?;
+
+    Ok(())
+}
--- a/aggregate/cargo-build.sh
+++ b/aggregate/cargo-build.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+export CARGO_REGISTRIES_MADAPES_TOKEN=0f5ef6366637224dceae4c35e0e3b5639be77b69
+source ~/.cargo/env
+cargo "$@"
--- a/aggregate/external_prd.md
+++ b/aggregate/external_prd.md
@@ -0,0 +1,192 @@
+### External PRD: Changes Required in Aggregate, Projection, Runner
+
+This document captures the work needed outside the Gateway to support:
+- Tenant-aware routing via `x-tenant-id`
+- Independent horizontal scalability of Aggregate, Projection, Runner
+- A safe mechanism for tenant rebalancing per service kind
+
+---
+
+## **Target State**
+
+### Independent Placements
+
+Each service kind has its own placement map:
+- `aggregate_placement[tenant_id] -> aggregate_shard_id`
+- `projection_placement[tenant_id] -> projection_shard_id`
+- `runner_placement[tenant_id] -> runner_shard_id`
+
+Each shard is a replica set that can scale independently.
+
+### Rebalancing Contract (Per Service Kind)
+
+All nodes MUST support:
+- Dynamic placement updates (watch NATS KV or reload config)
+- A drain mechanism that can target a specific tenant (stop acquiring new work for that tenant, finish in-flight, report status)
+- Clear readiness semantics that reflect whether the node will accept work for a tenant
+
+Additionally, all nodes SHOULD converge on the same operational contract:
+- A per-tenant “accepting” gate (can this shard accept new work/queries/commands for tenant X?)
+- A per-tenant “drained” signal (no in-flight work remains for tenant X)
+- A per-tenant warmup/catchup signal where relevant (projection lag, aggregate snapshot availability)
+
+---
+
+## **Aggregate: Required Changes**
+
+### 1) Expose a Real Command API (Gateway Upstream)
+
+Today, Aggregate has internal command handling types (e.g., `CommandServer`) but its running HTTP server only exposes health/metrics/admin endpoints ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L82), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L81-L213)).
+
+Aggregate MUST expose one of the following upstream APIs for the Gateway to call:
+- **Option A (Recommended)**: gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` compatible with [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31).
+- **Option B**: HTTP endpoint for command submission (REST), with a stable request/response shape that the Gateway can proxy.
+
+### 2) Tenant Placement Enforcement
+
+Aggregate MUST enforce “hosted tenants” so independent scaling is safe:
+- If an Aggregate shard/node is not assigned a tenant, it MUST reject commands for that tenant (e.g., `403` or `503` with retriable hint depending on whether the issue is authorization vs placement).
+- Aggregate SHOULD maintain an in-memory allowlist of hosted tenants that is driven by:
+  - NATS KV placement watcher (preferred), or
+  - Hot-reloaded config pushed via `/admin/reload`
+
+Aggregate already has admin hooks for drain/reload, but they are currently generic and/or illustrative ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L72), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L402-L442)). These need to become placement-aware.
+
+### 3) Tenant Drain (Per Tenant)
+
+Aggregate MUST provide a per-tenant drain mechanism to support rebalancing:
+- Stop accepting new commands for the tenant.
+- Allow in-flight commands to finish (bounded wait), then report drained.
+- Expose drain status per tenant (admin endpoint).
+
+### 4) Rebalancing State Strategy
+
+Aggregate persists snapshots locally (MDBX) and uses JetStream for events. To move a tenant:
+- **Approach 1 (Snapshot migration)**: copy tenant snapshot DB/state to the target shard, then switch placement.
+- **Approach 2 (Cold rehydrate)**: switch placement and let the target shard rebuild state by replaying events from JetStream; expect higher latency during warmup.
+
+The system should support both, with the rebalancer selecting the strategy based on tenant size/SLO.
+
+### 5) Metrics for Placement Decisions
+
+Aggregate SHOULD expose:
+- Per-tenant command rate, error rate
+- In-flight commands by tenant
+- Rehydrate time / snapshot hit ratio
+- Storage size per tenant (if feasible)
+
+---
+
+## **Projection: Required Changes**
+
+### 1) Expose Query API Upstream for Gateway
+
+Projection has a working `QueryService` with tenant-scoped prefix scans ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L121-L162)) but it is not exposed via HTTP/gRPC (current HTTP routes are health/ready/metrics/info only: [projection/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/http/mod.rs#L102-L109)).
+
+Projection MUST add one upstream API the Gateway can route to:
+- `POST /query/{view_type}` (HTTP) accepting `x-tenant-id` and a UQF payload, returning `QueryResponse`.
+- Or a gRPC query service (new proto) if gRPC is preferred end-to-end.
+
+### 2) Tenant Placement Filtering (Independent Scaling)
+
+Projection MUST support running in one of these modes:
+- **Multi-tenant shard**: consumes all tenants (simple, less isolated).
+- **Tenant-filtered shard (required for rebalancing)**:
+  - only consumes/serves queries for the tenants assigned to that shard
+  - rejects queries for unassigned tenants (consistent error semantics)
+
+Implementation direction:
+- Add a placement watcher similar to Runner’s tenant filter ([runner/tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)).
+- Apply tenant filter to:
+  - event consumption subject filters (preferred), and
+  - query serving validation (always).
+
+### 3) Drain + Warmup Endpoints
+
+Projection SHOULD add:
+- `/admin/drain?tenant_id=...` (stop consuming new events for that tenant, finish in-flight, flush checkpoints)
+- `/admin/reload` (apply latest placement/config)
+- Optional warmup status: whether the shard has caught up to JetStream tail for that tenant/view_types
+
+### 4) Rebalancing Strategy for Projection
+
+Projection can rebalance safely with “warm then cut over”:
+- Assign tenant to the new projection shard while old shard still serves.
+- New shard catches up (replay from JetStream, build view KV).
+- Switch Gateway placement for query routing to new shard.
+- Drain old shard for that tenant and optionally delete old tenant KV keys.
+
+### 5) Metrics for Placement Decisions
+
+Projection SHOULD expose:
+- JetStream lag per tenant/view_type (tail minus checkpoint)
+- Query latency and scan counts
+- Storage size per tenant (if feasible)
+
+---
+
+## **Runner: Required Changes**
+
+Runner already has:
+- A tenant placement watcher capable of producing an allowlist ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100))
+- Admin endpoints including drain/reload/config ([runner/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/http/mod.rs#L69-L86))
+- Gateway client integration for aggregate command submission ([runner/gateway/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47))
+
+To support independent scalability + rebalancing, Runner needs the following.
+
+### 1) Per-Tenant Drain (Not Only Global)
+
+Runner’s current drain is global (`/admin/drain` toggles a single draining flag). Runner MUST support draining a specific tenant:
+- Stop acquiring new saga/effect work for the tenant.
+- Allow in-flight work for the tenant to finish (bounded).
+- Flush outbox for the tenant (or guarantee idempotency on handoff).
+- Persist final checkpoints so another shard can continue without duplication beyond at-least-once bounds.
+
+### 2) Placement-Enforced Work Acquisition
+
+Runner MUST validate tenant assignment at the boundary where it:
+- consumes JetStream messages (saga triggers, effect commands), and
+- dispatches outbox work.
+
+If a tenant is not assigned to the shard, Runner must not process its work.
+
+### 3) Handoff Safety Rules for Rebalancing
+
+Runner rebalancing should follow:
+- New shard begins processing only after it is assigned the tenant.
+- Old shard stops acquiring new work for that tenant, then drains.
+- Idempotency remains correct across handoff using checkpoints and dedupe markers.
+
+### 4) Metrics for Placement Decisions
+
+Runner SHOULD expose:
+- Outbox depth by tenant
+- Work processing latency and retries by tenant/effect
+- Schedule due items by tenant
+- Consumer lag by tenant (if the consumption model supports per-tenant lag)
+
+### 5) Auth Delivery Side Effects (Email/SMS/Push)
+
+If the platform’s AuthN flows require out-of-band delivery (password reset links, email verification, MFA codes), the Runner SHOULD be the standard place to execute those side effects:
+- Define a stable effect interface for sending transactional emails (reset links, verification links, security alerts).
+- Optionally add SMS/push providers later under the same effect contract.
+
+This keeps the Gateway free of long-lived provider credentials and aligns with the existing “effects are executed by workers” pattern.
+
+---
+
+## **Gateway Integration Notes**
+
+Once the above changes exist:
+- Gateway routes per `(tenant_id, service_kind)` using independent placement maps.
+- Gateway can implement “warm then cut over” rebalancing for Projection and Runner by switching only query/workflow routing after readiness conditions are met.
+- Gateway can enforce consistent tenant validation, authn/authz, and error semantics at the edge even as placements move.
+
+---
+
+## **Gaps / Opportunities**
+
+- **KV schema + ownership**: define the exact NATS KV bucket layout, key naming, revisioning rules, and who is allowed to write placement updates.
+- **Rebalancer API**: define operator workflows (plan/apply/rollback), status reporting, and audit log requirements for placement changes.
+- **Shard discovery**: define how shard endpoints are registered (static config vs KV directory entries) and how health is represented.
+- **Consistency boundaries**: define rebalancing guarantees per service kind (projection can be warm-cutover; runner requires checkpoint handoff; aggregate requires single-writer and state availability).
--- a/aggregate/gateway-routing.yaml
+++ b/aggregate/gateway-routing.yaml
@@ -0,0 +1,4 @@
+tenants:
+  tenant-a: "http://aggregate-node-a:8080"
+  tenant-b: "http://aggregate-node-b:8080"
+
--- a/aggregate/prd.md
+++ b/aggregate/prd.md
@@ -0,0 +1,160 @@
+### 🧱 Component: Aggregate
+**Definition:**  
+The Aggregate is a standalone Rust-based container that serves as the primary consistency boundary and decision-making unit of the system. It is a stateful entity that encapsulates business logic, enforces invariants, and ensures that all changes to the system are valid according to defined rules. Commands are received from users through a Gateway, and events are stored on **NATS JetStream**; `edge-storage` `AggregateStore` holds versioned **snapshots** for efficient rehydration.
+
+**Multi-Tenancy:**  
+The Aggregate supports optional multi-tenancy via `tenant_id`. When enabled:
+- **Routing:** The Gateway routes commands to Aggregate nodes based on the `x-tenant-id` header
+- **Sharding:** Aggregate instances are sharded across nodes by `tenant_id`, ensuring tenant data isolation
+- **Storage:** Snapshots and events are namespaced by `tenant_id` to prevent cross-tenant access
+- **Subject Naming:** NATS subjects include `tenant_id` (e.g., `tenant.<tenant_id>.aggregate.<aggregate_type>.<aggregate_id>`)
+- **Backward Compatibility:** Aggregates without multi-tenancy use a default/empty `tenant_id`
+
+**Dependencies:**
+*   Core crates pulled from the custom Cargo registry:
+    ```toml
+    [registries.madapes]
+    index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/"
+    ```
+    
+    | Crate | Purpose |
+    |-------|---------|
+    | `edge-storage` | libmdbx-backed AggregateStore for versioned snapshots |
+    | `runtime-function` | Deterministic DAG execution for `decide`/`apply` programs |
+    | `edge-logger` | High-performance logging (UDS + Protobuf, Loki sink) |
+    | `query-engine` | UQF query support for filtering/querying aggregate state |
+    | `async-nats` | NATS JetStream client for event streaming |
+
+*   Source code available at `../../madapes/`
+*   **Note:** This is a standalone container — it does not use `framework-bus` or `framework-aggregate` (those serve a different system)
+
+**Observability:**
+*   Production stack: **Grafana** + **Victoria Metrics** + **Loki**
+*   `edge-logger` provides structured logging via Unix Domain Sockets with lock-free batching
+*   Metrics exposed via `metrics-exporter-prometheus` for Victoria Metrics scraping
+*   Traces/logs flow to Loki with cardinality protection and multi-tenant isolation
+
+#### 1. Core Responsibilities
+*   **Command Validation:** Receives intent (Commands) from the Gateway and uses `runtime-function` DAG programs to determine if the intent is valid based on the current state.
+*   **State Rehydration:** Reconstructs its internal state by loading the latest **snapshot** from `edge-storage` `AggregateStore` (`get_latest_snapshot`) and replaying any subsequent events from NATS JetStream.
+*   **Event Production:** Transforms valid commands into one or more Events that represent a "fact" that has occurred.
+*   **Atomic Persistence:** Publishes new events to NATS JetStream and stores an updated snapshot in `edge-storage` `AggregateStore` (`put_snapshot_sync`).
+*   **Concurrency Control:** Protects against "lost updates" using version-based optimistic locking. `edge-storage` `AggregateStore` returns `VersionConflict` for duplicate versions.
+
+#### 2. The Lifecycle of a Command
+1.  **Reception:** The Gateway routes a Command from a user to the Aggregate container based on the `aggregate_id` and `x-tenant-id` header. The `tenant_id` is extracted and included in the Command envelope for tenant-aware processing.
+2.  **Loading (Rehydration):** 
+    *   The Aggregate fetches the latest **Snapshot** from `edge-storage` `AggregateStore` using the composite key `(tenant_id, aggregate_id)`.
+    *   It reads any **Events** from NATS JetStream (tenant-namespaced subject) that occurred after the snapshot version.
+    *   It applies these events sequentially to the snapshot state using the deterministic `apply` runtime-function program to reach the "Current State."
+3.  **Execution:** 
+    *   The Aggregate passes the Current State and the Command to the `decide` runtime-function program.
+    *   If invalid: Returns an Error (Command Rejected).
+    *   If valid: Returns a list of New Events.
+4.  **Persistence (The Commit):** 
+    *   The Aggregate publishes New Events to NATS JetStream on tenant-namespaced subjects, with `command_id` mapped to `idempotency_key`.
+    *   It stores an updated snapshot in `edge-storage` `AggregateStore` using `(tenant_id, aggregate_id, new_version)` as the composite key.
+    *   **Constraint:** `AggregateStore` enforces strict monotonicity — if `new_version` already exists, it returns `VersionConflict`, and the Aggregate must reload and retry.
+5.  **Publication:** 
+    *   Events published to NATS JetStream are immediately available for downstream consumption by Sagas and Projections (filtered by tenant if needed).
+
+#### 3. Technical Constraints & Guarantees
+*   **Determinism:** The logic within an Aggregate must be 100% deterministic. `runtime-function` DAG programs are sandboxed and gas-metered, with no access to the system clock, random number generators, or external APIs. All data required for a decision must be present in the Command or the Aggregate State.
+*   **Side-Effect Free:** An Aggregate does not send emails, update databases, or call other services. It only produces events. Side effects are the responsibility of Sagas.
+*   **Single Writer:** While multiple nodes may attempt to process commands for the same `aggregate_id`, only one "Commit" can succeed for a specific version, enforced by `edge-storage` `AggregateStore` (`VersionConflict`).
+*   **Tenant Isolation:** An Aggregate can only access data within its `tenant_id` scope. Cross-tenant access is blocked at the storage and stream layers. The `tenant_id` is validated on every command to prevent tenant spoofing.
+*   **Isolation:** An Aggregate cannot see the state of other Aggregates. If a business rule spans multiple Aggregates, it must be handled by a **Saga**.
+
+#### 4. Data Structure (The Envelope)
+Each Aggregate maintains a metadata header:
+*   `tenant_id`: Optional identifier for multi-tenant isolation (routed via `x-tenant-id` header)
+*   `aggregate_id`: Unique UUID or URN for the instance.
+*   `aggregate_type`: The name of the business entity (e.g., `Account`, `Order`).
+*   `version`: A monotonically increasing integer representing the number of events processed.
+*   `snapshot_threshold`: A configuration defining how many events should trigger a new snapshot in `edge-storage`.
+
+#### 5. Error Handling
+*   **Validation Errors:** Business rule violations (e.g., "Insufficient Funds") result in an immediate synchronous rejection of the command.
+*   **Tenant Access Errors:** Cross-tenant access attempts (e.g., wrong `tenant_id` in command) are rejected with `TenantAccessDenied`.
+*   **Concurrency Conflicts:** If `edge-storage` returns `VersionConflict`, the framework implements an automatic "Retry-on-Conflict" policy (Reload → Re-validate → Re-commit) up to a defined limit.
+*   **System Failures:** If `edge-storage` or NATS JetStream is unavailable, the Aggregate remains in a read-only or "unavailable" state to prevent inconsistent branching of the event stream.
+
+#### 6. Horizontal Scaling Strategy
+The Aggregate container is designed for horizontal scaling on **Docker Swarm**, leveraging tenant-based sharding for predictable data locality and simple operations.
+
+**Sharding Model:**
+- **Tenant-Aware Placement:** Aggregate instances are placed on Swarm nodes based on `tenant_id` using Docker Swarm placement constraints
+- **Consistent Hashing:** A hash ring maps `tenant_id` values to specific nodes, ensuring all commands for a tenant route to the same node (or replica set)
+- **Subject-Based Routing:** NATS JetStream consumer groups are tenant-namespaced, enabling parallel processing across tenants without coordination
+
+**Scaling Architecture:**
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Admin UI (Control Node)                     │
+│  ┌─────────────────────────────────────────────────────────┐    │
+│  │  Scale Manager: CRUD for tenant → node assignments      │    │
+│  │  - List tenants, node assignments, load metrics         │    │
+│  │  - Add/remove nodes, migrate tenants                    │    │
+│  │  - Emit scaling commands to Docker Swarm API            │    │
+│  └─────────────────────────────────────────────────────────┘    │
+└──────────────────────────┬──────────────────────────────────────┘
+                           │ Docker Swarm API / SSH
+                           ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                      Docker Swarm Cluster                        │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐           │
+│  │  Node A      │  │  Node B      │  │  Node C      │           │
+│  │  tenant: a-c │  │  tenant: d-m │  │  tenant: n-z │           │
+│  │  ┌────────┐  │  │  ┌────────┐  │  │  ┌────────┐  │           │
+│  │  │Agg Ctr │  │  │  │Agg Ctr │  │  │  │Agg Ctr │  │           │
+│  │  └───┬────┘  │  │  └───┬────┘  │  │  └───┬────┘  │           │
+│  │      │       │  │      │       │  │      │       │           │
+│  │  ┌───▼────┐  │  │  ┌───▼────┐  │  │  ┌───▼────┐  │           │
+│  │  │libmdbx │  │  │  │libmdbx │  │  │  │libmdbx │  │           │
+│  │  │(local) │  │  │  │(local) │  │  │  │(local) │  │           │
+│  │  └────────┘  │  │  └────────┘  │  │  └────────┘  │           │
+│  └──────────────┘  └──────────────┘  └──────────────┘           │
+│         │                  │                  │                  │
+│         └──────────────────┴──────────────────┘                  │
+│                           │                                      │
+│  ┌────────────────────────▼────────────────────────────────────┐ │
+│  │                Shared NATS JetStream Cluster                │ │
+│  │         (tenant-namespaced subjects for isolation)          │ │
+│  └─────────────────────────────────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────────┘
+```
+**Note:** Each node has its own embedded `edge-storage` (libmdbx) containing snapshots for its assigned tenants. NATS JetStream provides shared event storage. Tenant migration requires snapshot data transfer between nodes.
+
+**Operational Model:**
+- **Scale Up:** Admin UI calls Swarm API to add new node, updates tenant → node mapping, Gateway updates routing table
+- **Scale Down:** Migrate tenants to other nodes (drain), remove node from Swarm
+- **Tenant Migration:** Pause consumer, copy tenant data, update routing, resume on new node
+- **Zero-Downtime:** New tenant assignments are picked up by Gateway via config reload without restart
+
+**Placement Constraints:**
+- Each Aggregate service runs with `--constraint node.labels.tenant_range==<range>` 
+- Gateway uses tenant → node mapping to route commands to correct Swarm service endpoint
+- Multiple replicas per tenant range supported for HA (active-passive via NATS consumer groups)
+
+**Admin Endpoints (per Aggregate container):**
+- `/health` - Container health (NATS, storage, active aggregates)
+- `/ready` - Readiness for receiving commands
+- `/metrics` - Prometheus metrics with tenant_id labels
+- `/admin/tenants` - List tenants hosted on this node (read-only)
+- `/admin/drain` - Graceful drain for tenant migration
+- `/admin/reload` - Hot-reload tenant placement config
+
+**External Control Node:**
+- Separate service that calls Aggregate admin endpoints
+- Manages Docker Swarm API for scaling operations
+- Publishes tenant → node mapping to NATS KV
+- See Admin UI repository for full implementation
+
+---
+
+### 💡 Implementation Note:
+The **Aggregate Logic** is a pair of `runtime-function` DAG programs:
+1.  **`decide` program**: `(state, command) → events[]` — The business logic (validates command, produces events).
+2.  **`apply` program**: `(state, event) → new_state` — The state transition logic (used during rehydration from snapshots + events).
+
+These are referenced in the manifest as `decide:` and `apply:` fields under each aggregate definition.
--- a/aggregate/proto/aggregate.proto
+++ b/aggregate/proto/aggregate.proto
@@ -0,0 +1,32 @@
+syntax = "proto3";
+
+package aggregate.gateway.v1;
+
+service CommandService {
+  rpc SubmitCommand(SubmitCommandRequest) returns (SubmitCommandResponse);
+}
+
+message SubmitCommandRequest {
+  string tenant_id = 1;
+  string command_id = 2;
+  string aggregate_id = 3;
+  string aggregate_type = 4;
+  string payload_json = 5;
+  map<string, string> metadata = 6;
+}
+
+message Event {
+  string event_id = 1;
+  string command_id = 2;
+  string aggregate_id = 3;
+  string aggregate_type = 4;
+  uint64 version = 5;
+  string event_type = 6;
+  string payload_json = 7;
+  string timestamp_rfc3339 = 8;
+}
+
+message SubmitCommandResponse {
+  repeated Event events = 1;
+}
+
--- a/aggregate/rustfmt.toml
+++ b/aggregate/rustfmt.toml
@@ -0,0 +1,2 @@
+edition = "2021"
+newline_style = "Unix"
--- a/aggregate/src/aggregate/handler.rs
+++ b/aggregate/src/aggregate/handler.rs
@@ -0,0 +1,487 @@
+use super::AggregateInstance;
+use crate::query::{QueryClient, StateProjection};
+use crate::runtime::RuntimeExecutor;
+use crate::storage::StorageClient;
+use crate::stream::StreamClient;
+use crate::types::{
+    AggregateError, AggregateId, AggregateType, Command, Event, Snapshot, TenantId, Version,
+};
+
+#[derive(Debug, Clone)]
+pub struct AggregateHandler {
+    storage: StorageClient,
+    stream: StreamClient,
+    executor: RuntimeExecutor,
+    query: QueryClient,
+    decide_program: String,
+    apply_program: String,
+    snapshot_threshold: u64,
+    max_retries: u32,
+}
+
+impl AggregateHandler {
+    pub fn new(
+        storage: StorageClient,
+        stream: StreamClient,
+        executor: RuntimeExecutor,
+        decide_program: String,
+        apply_program: String,
+    ) -> Self {
+        Self {
+            storage,
+            stream,
+            executor,
+            query: QueryClient::embedded(),
+            decide_program,
+            apply_program,
+            snapshot_threshold: 10,
+            max_retries: 3,
+        }
+    }
+
+    pub fn with_query_client(mut self, query: QueryClient) -> Self {
+        self.query = query;
+        self
+    }
+
+    pub fn with_snapshot_threshold(mut self, threshold: u64) -> Self {
+        self.snapshot_threshold = threshold;
+        self
+    }
+
+    pub fn with_max_retries(mut self, max_retries: u32) -> Self {
+        self.max_retries = max_retries.max(1);
+        self
+    }
+
+    pub async fn handle_command(&self, command: Command) -> Result<Vec<Event>, AggregateError> {
+        let mut attempt = 0;
+
+        loop {
+            attempt += 1;
+            let tenant_id = &command.tenant_id;
+            let aggregate_id = &command.aggregate_id;
+            let aggregate_type = &command.aggregate_type;
+
+            let instance = self
+                .load_or_create_instance(tenant_id, aggregate_id, aggregate_type)
+                .await?;
+
+            let (instance, events) = self.execute_command(instance, command.clone()).await?;
+
+            if events.is_empty() {
+                return Ok(events);
+            }
+
+            match self.persist_events(&events).await {
+                Ok(()) => {
+                    self.maybe_save_snapshot(&instance).await?;
+                    self.project_state(&instance).await?;
+                    return Ok(events);
+                }
+                Err(AggregateError::VersionConflict { .. }) if attempt < self.max_retries => {
+                    continue;
+                }
+                Err(e) => return Err(e),
+            }
+        }
+    }
+
+    async fn load_or_create_instance(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+        aggregate_type: &AggregateType,
+    ) -> Result<AggregateInstance, AggregateError> {
+        let snapshot = self.storage.get_snapshot(tenant_id, aggregate_id).await?;
+
+        match snapshot {
+            Some(snapshot) => {
+                let events = self
+                    .stream
+                    .fetch_events(tenant_id, aggregate_id, snapshot.version)
+                    .await?;
+
+                AggregateInstance::rehydrate_with_executor(
+                    tenant_id.clone(),
+                    snapshot,
+                    events,
+                    self.decide_program.clone(),
+                    self.apply_program.clone(),
+                    &self.executor,
+                )
+                .await
+            }
+            None => {
+                let events = self
+                    .stream
+                    .fetch_events(tenant_id, aggregate_id, Version::initial())
+                    .await?;
+
+                if events.is_empty() {
+                    Ok(AggregateInstance::new(
+                        aggregate_id.clone(),
+                        aggregate_type.clone(),
+                        tenant_id.clone(),
+                        self.decide_program.clone(),
+                        self.apply_program.clone(),
+                    ))
+                } else {
+                    let initial_snapshot = Snapshot::new(
+                        tenant_id.clone(),
+                        aggregate_id.clone(),
+                        aggregate_type.clone(),
+                        Version::initial(),
+                        serde_json::Value::Null,
+                    );
+
+                    AggregateInstance::rehydrate_with_executor(
+                        tenant_id.clone(),
+                        initial_snapshot,
+                        events,
+                        self.decide_program.clone(),
+                        self.apply_program.clone(),
+                        &self.executor,
+                    )
+                    .await
+                }
+            }
+        }
+    }
+
+    async fn execute_command(
+        &self,
+        mut instance: AggregateInstance,
+        command: Command,
+    ) -> Result<(AggregateInstance, Vec<Event>), AggregateError> {
+        let events = instance.handle_command(command, &self.executor).await?;
+
+        Ok((instance, events))
+    }
+
+    async fn project_state(&self, instance: &AggregateInstance) -> Result<(), AggregateError> {
+        let projection = StateProjection::default_projection_from_state(
+            instance.tenant_id(),
+            instance.aggregate_id(),
+            instance.aggregate_type(),
+            &instance.version(),
+            instance.state(),
+        );
+
+        self.query
+            .index(projection)
+            .await
+            .map_err(|e| AggregateError::StorageError(e.to_string()))
+    }
+
+    async fn maybe_save_snapshot(
+        &self,
+        instance: &AggregateInstance,
+    ) -> Result<(), AggregateError> {
+        let current_version = instance.version();
+        let events_since_snapshot = current_version
+            .as_u64()
+            .saturating_sub(instance.snapshot_version().as_u64());
+
+        if events_since_snapshot >= self.snapshot_threshold {
+            let snapshot = instance.to_snapshot();
+            match self.storage.put_snapshot(&snapshot).await {
+                Ok(()) => {}
+                Err(AggregateError::VersionConflict { .. }) => {}
+                Err(e) => return Err(e),
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn persist_events(&self, events: &[Event]) -> Result<(), AggregateError> {
+        self.stream.publish_events(events.to_vec()).await
+    }
+
+    pub async fn load_aggregate(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+        aggregate_type: &AggregateType,
+    ) -> Result<AggregateInstance, AggregateError> {
+        self.load_or_create_instance(tenant_id, aggregate_id, aggregate_type)
+            .await
+    }
+
+    pub fn storage(&self) -> &StorageClient {
+        &self.storage
+    }
+
+    pub fn stream(&self) -> &StreamClient {
+        &self.stream
+    }
+
+    pub fn executor(&self) -> &RuntimeExecutor {
+        &self.executor
+    }
+
+    pub fn query_client(&self) -> &QueryClient {
+        &self.query
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    async fn create_test_handler() -> (tempfile::TempDir, AggregateHandler) {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("test.mdbx");
+        let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
+
+        let stream = StreamClient::in_memory();
+        let executor = RuntimeExecutor::with_config(
+            crate::runtime::ExecutorConfig::default().with_mock_runtime(),
+        );
+
+        let handler = AggregateHandler::new(
+            storage,
+            stream,
+            executor,
+            "function decide(s,c) { return []; }".to_string(),
+            "function apply(s,e) { return s; }".to_string(),
+        );
+
+        (dir, handler)
+    }
+
+    #[test]
+    fn handler_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<AggregateHandler>();
+    }
+
+    #[test]
+    fn snapshot_threshold_defaults_to_10() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("test.mdbx");
+        let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
+        let stream = StreamClient::in_memory();
+
+        let executor = RuntimeExecutor::new();
+
+        let handler = AggregateHandler::new(
+            storage,
+            stream,
+            executor,
+            "decide".to_string(),
+            "apply".to_string(),
+        );
+
+        let handler_with_threshold = AggregateHandler::new(
+            handler.storage.clone(),
+            handler.stream.clone(),
+            handler.executor.clone(),
+            "decide".to_string(),
+            "apply".to_string(),
+        )
+        .with_snapshot_threshold(25);
+
+        assert_eq!(handler.snapshot_threshold, 10);
+        assert_eq!(handler_with_threshold.snapshot_threshold, 25);
+    }
+
+    #[tokio::test]
+    async fn handler_full_lifecycle_persists_events_and_snapshot() {
+        let (_dir, handler) = create_test_handler().await;
+        let handler = handler.with_snapshot_threshold(1);
+
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let command = Command::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            serde_json::json!({"type": "deposit", "amount": 50}),
+        );
+
+        let events = handler.handle_command(command).await.unwrap();
+        assert_eq!(events.len(), 1);
+
+        let snapshot = handler
+            .storage
+            .get_snapshot(&tenant_id, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(snapshot.version, Version::from(1));
+    }
+
+    #[tokio::test]
+    async fn retry_on_version_conflict() {
+        let (_dir, handler) = create_test_handler().await;
+        let handler = handler.with_max_retries(5);
+
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let cmd1 = Command::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            serde_json::json!({"type": "deposit", "amount": 10}),
+        );
+        let cmd2 = Command::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            serde_json::json!({"type": "deposit", "amount": 20}),
+        );
+
+        let (r1, r2) = tokio::join!(handler.handle_command(cmd1), handler.handle_command(cmd2));
+        assert!(r1.is_ok());
+        assert!(r2.is_ok());
+
+        let events = handler
+            .stream
+            .fetch_events(&tenant_id, &aggregate_id, Version::initial())
+            .await
+            .unwrap();
+        assert_eq!(events.len(), 2);
+        assert_eq!(events[0].version, Version::from(1));
+        assert_eq!(events[1].version, Version::from(2));
+    }
+
+    #[tokio::test]
+    async fn snapshot_threshold_respected() {
+        let (_dir, handler) = create_test_handler().await;
+        let handler = handler.with_snapshot_threshold(3);
+
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        for _ in 0..5 {
+            let cmd = Command::new(
+                tenant_id.clone(),
+                aggregate_id.clone(),
+                aggregate_type.clone(),
+                serde_json::json!({"type": "deposit", "amount": 1}),
+            );
+            handler.handle_command(cmd).await.unwrap();
+        }
+
+        let snapshot = handler
+            .storage
+            .get_snapshot(&tenant_id, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(snapshot.version, Version::from(3));
+    }
+
+    #[tokio::test]
+    async fn empty_tenant_id_allowed_in_single_tenant_mode() {
+        let (_dir, handler) = create_test_handler().await;
+        let handler = handler.with_snapshot_threshold(1);
+
+        let tenant_id = TenantId::default();
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let command = Command::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            serde_json::json!({"type": "deposit", "amount": 5}),
+        );
+
+        let events = handler.handle_command(command).await.unwrap();
+        assert_eq!(events.len(), 1);
+
+        let proj = handler
+            .query
+            .get(&tenant_id, &aggregate_id.to_string())
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(proj.state["balance"], 5);
+    }
+
+    #[tokio::test]
+    async fn query_returns_correct_aggregate_state() {
+        let (_dir, handler) = create_test_handler().await;
+        let handler = handler.with_snapshot_threshold(1);
+
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let command = Command::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type,
+            serde_json::json!({"type": "deposit", "amount": 100}),
+        );
+        handler.handle_command(command).await.unwrap();
+
+        let proj = handler
+            .query
+            .get(&tenant_id, &aggregate_id.to_string())
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(proj.state["balance"], 100);
+    }
+
+    #[tokio::test]
+    async fn system_failure_recovery_rehydrates_state() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("test.mdbx");
+        let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
+        let stream = StreamClient::in_memory();
+        let executor = RuntimeExecutor::with_config(
+            crate::runtime::ExecutorConfig::default().with_mock_runtime(),
+        );
+
+        let handler1 = AggregateHandler::new(
+            storage,
+            stream.clone(),
+            executor.clone(),
+            "decide".to_string(),
+            "apply".to_string(),
+        )
+        .with_snapshot_threshold(2);
+
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        for _ in 0..2 {
+            let cmd = Command::new(
+                tenant_id.clone(),
+                aggregate_id.clone(),
+                aggregate_type.clone(),
+                serde_json::json!({"type": "deposit", "amount": 10}),
+            );
+            handler1.handle_command(cmd).await.unwrap();
+        }
+
+        drop(handler1);
+
+        let storage2 = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
+        let handler2 = AggregateHandler::new(
+            storage2,
+            stream,
+            executor,
+            "decide".to_string(),
+            "apply".to_string(),
+        );
+
+        let loaded = handler2
+            .load_aggregate(&tenant_id, &aggregate_id, &aggregate_type)
+            .await
+            .unwrap();
+        assert_eq!(loaded.state()["balance"], 20);
+    }
+}
--- a/aggregate/src/aggregate/mod.rs
+++ b/aggregate/src/aggregate/mod.rs
@@ -0,0 +1,5 @@
+mod handler;
+mod state;
+
+pub use handler::*;
+pub use state::*;
--- a/aggregate/src/aggregate/state.rs
+++ b/aggregate/src/aggregate/state.rs
@@ -0,0 +1,448 @@
+use crate::runtime::RuntimeExecutor;
+use crate::types::{
+    AggregateError, AggregateId, AggregateType, Command, Event, Snapshot, TenantId, Version,
+};
+use serde_json::Value;
+use std::collections::HashSet;
+use uuid::Uuid;
+
+#[derive(Debug, Clone)]
+pub struct AggregateInstance {
+    aggregate_id: AggregateId,
+    aggregate_type: AggregateType,
+    tenant_id: TenantId,
+    snapshot_version: Version,
+    version: Version,
+    state: Value,
+    decide_program: String,
+    apply_program: String,
+    processed_command_ids: HashSet<Uuid>,
+}
+
+impl AggregateInstance {
+    pub fn new(
+        aggregate_id: AggregateId,
+        aggregate_type: AggregateType,
+        tenant_id: TenantId,
+        decide_program: String,
+        apply_program: String,
+    ) -> Self {
+        Self {
+            aggregate_id,
+            aggregate_type,
+            tenant_id,
+            snapshot_version: Version::initial(),
+            version: Version::initial(),
+            state: Value::Null,
+            decide_program,
+            apply_program,
+            processed_command_ids: HashSet::new(),
+        }
+    }
+
+    pub fn aggregate_id(&self) -> &AggregateId {
+        &self.aggregate_id
+    }
+
+    pub fn aggregate_type(&self) -> &AggregateType {
+        &self.aggregate_type
+    }
+
+    pub fn tenant_id(&self) -> &TenantId {
+        &self.tenant_id
+    }
+
+    pub fn version(&self) -> Version {
+        self.version
+    }
+
+    pub fn snapshot_version(&self) -> Version {
+        self.snapshot_version
+    }
+
+    pub fn state(&self) -> &Value {
+        &self.state
+    }
+
+    pub fn rehydrate(
+        tenant_id: TenantId,
+        snapshot: Snapshot,
+        events: Vec<Event>,
+        decide_program: String,
+        apply_program: String,
+    ) -> Result<Self, AggregateError> {
+        if snapshot.tenant_id != tenant_id {
+            return Err(AggregateError::TenantAccessDenied {
+                tenant_id: snapshot.tenant_id,
+            });
+        }
+
+        let mut instance = Self {
+            aggregate_id: snapshot.aggregate_id,
+            aggregate_type: snapshot.aggregate_type,
+            tenant_id,
+            snapshot_version: snapshot.version,
+            version: snapshot.version,
+            state: snapshot.state,
+            decide_program,
+            apply_program,
+            processed_command_ids: HashSet::new(),
+        };
+
+        for event in events {
+            instance.apply_event_internal(&event)?;
+        }
+
+        Ok(instance)
+    }
+
+    pub async fn rehydrate_with_executor(
+        tenant_id: TenantId,
+        snapshot: Snapshot,
+        events: Vec<Event>,
+        decide_program: String,
+        apply_program: String,
+        executor: &RuntimeExecutor,
+    ) -> Result<Self, AggregateError> {
+        if snapshot.tenant_id != tenant_id {
+            return Err(AggregateError::TenantAccessDenied {
+                tenant_id: snapshot.tenant_id,
+            });
+        }
+
+        let mut instance = Self {
+            aggregate_id: snapshot.aggregate_id,
+            aggregate_type: snapshot.aggregate_type,
+            tenant_id,
+            snapshot_version: snapshot.version,
+            version: snapshot.version,
+            state: snapshot.state,
+            decide_program,
+            apply_program,
+            processed_command_ids: HashSet::new(),
+        };
+
+        for event in events {
+            let apply_result = executor
+                .execute_apply(&instance.state, &event, &instance.apply_program)
+                .await?;
+            instance.state = apply_result.new_state;
+            instance.apply_event_internal(&event)?;
+        }
+
+        Ok(instance)
+    }
+
+    fn apply_event_internal(&mut self, event: &Event) -> Result<(), AggregateError> {
+        if event.tenant_id != self.tenant_id {
+            return Err(AggregateError::TenantAccessDenied {
+                tenant_id: event.tenant_id.clone(),
+            });
+        }
+
+        self.processed_command_ids.insert(event.command_id);
+        self.version = event.version;
+        Ok(())
+    }
+
+    pub fn apply_event(&mut self, event: &Event) -> Result<(), AggregateError> {
+        self.apply_event_internal(event)
+    }
+
+    pub async fn handle_command(
+        &mut self,
+        command: Command,
+        executor: &RuntimeExecutor,
+    ) -> Result<Vec<Event>, AggregateError> {
+        if command.tenant_id != self.tenant_id {
+            return Err(AggregateError::TenantAccessDenied {
+                tenant_id: command.tenant_id,
+            });
+        }
+
+        if command.aggregate_id != self.aggregate_id {
+            return Err(AggregateError::NotFound(command.aggregate_id));
+        }
+
+        if self.processed_command_ids.contains(&command.command_id) {
+            return Ok(Vec::new());
+        }
+
+        let decide_result = executor
+            .execute_decide(&self.state, &command, &self.decide_program)
+            .await?;
+
+        let command_id = command.command_id;
+        let correlation_id = command
+            .metadata
+            .get("correlation_id")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string());
+        let traceparent = command
+            .metadata
+            .get("traceparent")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string());
+        let mut events = Vec::with_capacity(decide_result.events.len());
+
+        for event_payload in decide_result.events {
+            let event_type = event_payload
+                .get("type")
+                .and_then(|t| t.as_str())
+                .unwrap_or("Unknown")
+                .to_string();
+
+            let new_version = self.version.increment();
+            let mut event = Event::new(
+                self.tenant_id.clone(),
+                self.aggregate_id.clone(),
+                self.aggregate_type.clone(),
+                new_version,
+                event_type,
+                event_payload,
+                command_id,
+            );
+            event.correlation_id = correlation_id.clone();
+            event.traceparent = traceparent.clone();
+
+            let apply_result = executor
+                .execute_apply(&self.state, &event, &self.apply_program)
+                .await?;
+            self.state = apply_result.new_state;
+            self.version = new_version;
+
+            events.push(event);
+        }
+
+        self.processed_command_ids.insert(command_id);
+        Ok(events)
+    }
+
+    pub fn to_snapshot(&self) -> Snapshot {
+        Snapshot::new(
+            self.tenant_id.clone(),
+            self.aggregate_id.clone(),
+            self.aggregate_type.clone(),
+            self.version,
+            self.state.clone(),
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    fn test_instance() -> AggregateInstance {
+        AggregateInstance::new(
+            AggregateId::new_v7(),
+            AggregateType::new("Account"),
+            TenantId::new("tenant-a"),
+            "function decide(s,c) { return []; }".to_string(),
+            "function apply(s,e) { return s; }".to_string(),
+        )
+    }
+
+    #[test]
+    fn aggregate_instance_has_id_and_tenant() {
+        let agg = test_instance();
+        assert_eq!(agg.tenant_id().as_str(), "tenant-a");
+        assert_eq!(agg.aggregate_type().as_str(), "Account");
+        assert!(!agg.aggregate_id().to_string().is_empty());
+    }
+
+    #[test]
+    fn new_instance_starts_at_version_zero() {
+        let agg = test_instance();
+        assert_eq!(agg.version(), Version::initial());
+    }
+
+    #[test]
+    fn rehydrate_validates_tenant() {
+        let snapshot = Snapshot::new(
+            TenantId::new("tenant-a"),
+            AggregateId::new_v7(),
+            AggregateType::new("Account"),
+            Version::from(5),
+            json!({ "balance": 100 }),
+        );
+
+        let result = AggregateInstance::rehydrate(
+            TenantId::new("tenant-b"),
+            snapshot,
+            vec![],
+            "decide".to_string(),
+            "apply".to_string(),
+        );
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AggregateError::TenantAccessDenied { tenant_id } => {
+                assert_eq!(tenant_id, TenantId::new("tenant-a"));
+            }
+            _ => panic!("Expected TenantAccessDenied"),
+        }
+    }
+
+    #[tokio::test]
+    async fn rehydrate_applies_events() {
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::new("Account");
+
+        let snapshot = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(2),
+            json!({ "balance": 100 }),
+        );
+
+        let event1 = Event::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(3),
+            "deposited",
+            json!({ "amount": 50 }),
+            Uuid::now_v7(),
+        );
+
+        let event2 = Event::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(4),
+            "withdrawn",
+            json!({ "amount": 25 }),
+            Uuid::now_v7(),
+        );
+
+        let executor = RuntimeExecutor::with_config(
+            crate::runtime::ExecutorConfig::default().with_mock_runtime(),
+        );
+        let instance = AggregateInstance::rehydrate_with_executor(
+            tenant_id,
+            snapshot,
+            vec![event1, event2],
+            "decide".to_string(),
+            "apply".to_string(),
+            &executor,
+        )
+        .await
+        .unwrap();
+
+        assert_eq!(instance.version(), Version::from(4));
+        assert_eq!(instance.state()["balance"], 125);
+    }
+
+    #[test]
+    fn to_snapshot_captures_state() {
+        let mut agg = test_instance();
+        agg.state = json!({ "balance": 150 });
+        agg.version = Version::from(3);
+
+        let snapshot = agg.to_snapshot();
+        assert_eq!(snapshot.state, json!({ "balance": 150 }));
+        assert_eq!(snapshot.version, Version::from(3));
+    }
+
+    #[tokio::test]
+    async fn idempotency_via_command_id_returns_empty() {
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::new("Account");
+        let command_id = Uuid::now_v7();
+
+        let mut agg = AggregateInstance::new(
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            tenant_id.clone(),
+            "decide".to_string(),
+            "apply".to_string(),
+        );
+
+        let e = Event::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(1),
+            "deposited",
+            json!({ "amount": 10 }),
+            command_id,
+        );
+        agg.apply_event(&e).unwrap();
+        let before_version = agg.version();
+
+        let mut cmd = Command::new(
+            tenant_id,
+            aggregate_id,
+            aggregate_type,
+            json!({ "type": "deposit", "amount": 10 }),
+        );
+        cmd.command_id = command_id;
+
+        let executor = RuntimeExecutor::new();
+        let events = agg.handle_command(cmd, &executor).await.unwrap();
+        assert!(events.is_empty());
+        assert_eq!(agg.version(), before_version);
+    }
+
+    #[tokio::test]
+    async fn handle_command_validates_tenant() {
+        let agg = AggregateInstance::new(
+            AggregateId::new_v7(),
+            AggregateType::new("Account"),
+            TenantId::new("tenant-a"),
+            "decide".to_string(),
+            "apply".to_string(),
+        );
+
+        let command = Command::new(
+            TenantId::new("tenant-b"),
+            agg.aggregate_id.clone(),
+            AggregateType::new("Account"),
+            json!({ "type": "deposit", "amount": 50 }),
+        );
+
+        let executor = RuntimeExecutor::new();
+        let mut agg = agg;
+        let result = agg.handle_command(command, &executor).await;
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AggregateError::TenantAccessDenied { .. } => {}
+            _ => panic!("Expected TenantAccessDenied"),
+        }
+    }
+
+    #[tokio::test]
+    async fn handle_command_validates_aggregate_id() {
+        let agg = test_instance();
+        let wrong_agg_id = AggregateId::new_v7();
+
+        let command = Command::new(
+            agg.tenant_id.clone(),
+            wrong_agg_id,
+            AggregateType::new("Account"),
+            json!({ "type": "deposit", "amount": 50 }),
+        );
+
+        let executor = RuntimeExecutor::new();
+        let mut agg = agg;
+        let result = agg.handle_command(command, &executor).await;
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AggregateError::NotFound(_) => {}
+            _ => panic!("Expected NotFound"),
+        }
+    }
+
+    #[test]
+    fn instance_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<AggregateInstance>();
+    }
+}
--- a/aggregate/src/config/mod.rs
+++ b/aggregate/src/config/mod.rs
@@ -0,0 +1,3 @@
+mod settings;
+
+pub use settings::*;
--- a/aggregate/src/config/settings.rs
+++ b/aggregate/src/config/settings.rs
@@ -0,0 +1,274 @@
+use serde::{Deserialize, Serialize};
+use std::path::Path;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct Settings {
+    pub nats_url: String,
+    pub storage_path: String,
+    pub logger_socket: Option<String>,
+    pub snapshot_threshold: u64,
+    pub max_retries: u32,
+    pub multi_tenant_enabled: bool,
+    pub default_tenant_id: Option<String>,
+    pub shard_id: String,
+    pub placement_bucket: String,
+    pub placement_key: String,
+    pub grpc_addr: String,
+    pub decide_program: String,
+    pub apply_program: String,
+}
+
+impl Default for Settings {
+    fn default() -> Self {
+        Self {
+            nats_url: "nats://localhost:4222".to_string(),
+            storage_path: "./data".to_string(),
+            logger_socket: None,
+            snapshot_threshold: 10,
+            max_retries: 3,
+            multi_tenant_enabled: true,
+            default_tenant_id: None,
+            shard_id: "local".to_string(),
+            placement_bucket: "AGGREGATE_PLACEMENT".to_string(),
+            placement_key: "aggregate_placement".to_string(),
+            grpc_addr: "0.0.0.0:50051".to_string(),
+            decide_program: "function decide(state, command) { return []; }".to_string(),
+            apply_program: "function apply(state, event) { return state; }".to_string(),
+        }
+    }
+}
+
+impl Settings {
+    pub fn from_env() -> Result<Self, std::env::VarError> {
+        let mut settings = Self::default();
+        settings.apply_env_overrides();
+        Ok(settings)
+    }
+
+    pub fn from_yaml(yaml: &str) -> Result<Self, serde_yaml::Error> {
+        serde_yaml::from_str(yaml)
+    }
+
+    pub fn from_toml(toml_str: &str) -> Result<Self, toml::de::Error> {
+        toml::from_str(toml_str)
+    }
+
+    pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
+        serde_json::from_str(json)
+    }
+
+    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, SettingsLoadError> {
+        let path = path.as_ref();
+        let raw = std::fs::read_to_string(path)?;
+        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
+
+        match ext {
+            "yaml" | "yml" => Ok(Self::from_yaml(&raw)?),
+            "toml" => Ok(Self::from_toml(&raw)?),
+            "json" => Ok(Self::from_json(&raw)?),
+            _ => Err(SettingsLoadError::UnsupportedFormat {
+                path: path.display().to_string(),
+            }),
+        }
+    }
+
+    pub fn load_from_file_with_env_overrides(
+        path: impl AsRef<Path>,
+    ) -> Result<Self, SettingsLoadError> {
+        let mut settings = Self::from_file(path)?;
+        settings.apply_env_overrides();
+        Ok(settings)
+    }
+
+    fn apply_env_overrides(&mut self) {
+        if let Ok(url) = std::env::var("AGGREGATE_NATS_URL") {
+            self.nats_url = url;
+        }
+
+        if let Ok(path) = std::env::var("AGGREGATE_STORAGE_PATH") {
+            self.storage_path = path;
+        }
+
+        if let Ok(socket) = std::env::var("AGGREGATE_LOGGER_SOCKET") {
+            self.logger_socket = Some(socket);
+        }
+
+        if let Ok(threshold) = std::env::var("AGGREGATE_SNAPSHOT_THRESHOLD") {
+            if let Ok(value) = threshold.parse() {
+                self.snapshot_threshold = value;
+            }
+        }
+
+        if let Ok(retries) = std::env::var("AGGREGATE_MAX_RETRIES") {
+            if let Ok(value) = retries.parse() {
+                self.max_retries = value;
+            }
+        }
+
+        if let Ok(enabled) = std::env::var("AGGREGATE_MULTI_TENANT") {
+            if let Ok(value) = enabled.parse() {
+                self.multi_tenant_enabled = value;
+            }
+        }
+
+        if let Ok(default_tenant_id) = std::env::var("AGGREGATE_DEFAULT_TENANT_ID") {
+            if default_tenant_id.is_empty() {
+                self.default_tenant_id = None;
+            } else {
+                self.default_tenant_id = Some(default_tenant_id);
+            }
+        }
+
+        if let Ok(shard_id) = std::env::var("AGGREGATE_SHARD_ID") {
+            if !shard_id.is_empty() {
+                self.shard_id = shard_id;
+            }
+        }
+
+        if let Ok(bucket) = std::env::var("AGGREGATE_PLACEMENT_BUCKET") {
+            if !bucket.is_empty() {
+                self.placement_bucket = bucket;
+            }
+        }
+
+        if let Ok(key) = std::env::var("AGGREGATE_PLACEMENT_KEY") {
+            if !key.is_empty() {
+                self.placement_key = key;
+            }
+        }
+
+        if let Ok(addr) = std::env::var("AGGREGATE_GRPC_ADDR") {
+            if !addr.is_empty() {
+                self.grpc_addr = addr;
+            }
+        }
+
+        if let Ok(program) = std::env::var("AGGREGATE_DECIDE_PROGRAM") {
+            if !program.is_empty() {
+                self.decide_program = program;
+            }
+        }
+
+        if let Ok(program) = std::env::var("AGGREGATE_APPLY_PROGRAM") {
+            if !program.is_empty() {
+                self.apply_program = program;
+            }
+        }
+
+        if let Ok(path) = std::env::var("AGGREGATE_DECIDE_PROGRAM_PATH") {
+            if let Ok(raw) = std::fs::read_to_string(path) {
+                if !raw.is_empty() {
+                    self.decide_program = raw;
+                }
+            }
+        }
+
+        if let Ok(path) = std::env::var("AGGREGATE_APPLY_PROGRAM_PATH") {
+            if let Ok(raw) = std::fs::read_to_string(path) {
+                if !raw.is_empty() {
+                    self.apply_program = raw;
+                }
+            }
+        }
+    }
+
+    pub fn validate(&self) -> Result<(), String> {
+        if self.nats_url.is_empty() {
+            return Err("NATS URL is required".to_string());
+        }
+
+        if self.storage_path.is_empty() {
+            return Err("Storage path is required".to_string());
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum SettingsLoadError {
+    #[error("Failed to read config file: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("Failed to parse YAML config: {0}")]
+    Yaml(#[from] serde_yaml::Error),
+    #[error("Failed to parse TOML config: {0}")]
+    Toml(#[from] toml::de::Error),
+    #[error("Failed to parse JSON config: {0}")]
+    Json(#[from] serde_json::Error),
+    #[error("Unsupported config format: {path}")]
+    UnsupportedFormat { path: String },
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[test]
+    fn settings_from_env() {
+        std::env::set_var("AGGREGATE_NATS_URL", "nats://localhost:4222");
+        let settings = Settings::from_env().unwrap();
+        assert_eq!(settings.nats_url, "nats://localhost:4222");
+        std::env::remove_var("AGGREGATE_NATS_URL");
+    }
+
+    #[test]
+    fn settings_validation() {
+        let settings = Settings {
+            nats_url: "".to_string(),
+            ..Default::default()
+        };
+        assert!(settings.validate().is_err());
+    }
+
+    #[test]
+    fn settings_from_yaml_file_and_env_override() {
+        let dir = tempdir().unwrap();
+        let file_path = dir.path().join("aggregate.yaml");
+        std::fs::write(
+            &file_path,
+            r#"
+nats_url: "nats://from-file:4222"
+storage_path: "/tmp/agg"
+snapshot_threshold: 25
+multi_tenant_enabled: false
+"#,
+        )
+        .unwrap();
+
+        std::env::set_var("AGGREGATE_NATS_URL", "nats://from-env:4222");
+        let settings = Settings::load_from_file_with_env_overrides(&file_path).unwrap();
+        assert_eq!(settings.nats_url, "nats://from-env:4222");
+        assert_eq!(settings.storage_path, "/tmp/agg");
+        assert_eq!(settings.snapshot_threshold, 25);
+        assert!(!settings.multi_tenant_enabled);
+        std::env::remove_var("AGGREGATE_NATS_URL");
+    }
+
+    #[test]
+    fn settings_from_toml_file() {
+        let dir = tempdir().unwrap();
+        let file_path = dir.path().join("aggregate.toml");
+        std::fs::write(
+            &file_path,
+            r#"
+nats_url = "nats://from-file:4222"
+storage_path = "/tmp/agg"
+max_retries = 7
+"#,
+        )
+        .unwrap();
+
+        let settings = Settings::from_file(&file_path).unwrap();
+        assert_eq!(settings.nats_url, "nats://from-file:4222");
+        assert_eq!(settings.storage_path, "/tmp/agg");
+        assert_eq!(settings.max_retries, 7);
+    }
+
+    #[test]
+    fn settings_is_clone() {
+        let s = Settings::default();
+        let _s2 = s.clone();
+    }
+}
--- a/aggregate/src/container.rs
+++ b/aggregate/src/container.rs
@@ -0,0 +1,24 @@
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn dockerfile_is_multi_stage_and_builds_selected_binary() {
+        let raw = std::fs::read_to_string("../docker/Dockerfile.rust").unwrap();
+        assert!(raw.contains("AS builder"));
+        assert!(raw.contains("FROM debian:"));
+        assert!(raw.contains("ARG PACKAGE"));
+        assert!(raw.contains("ARG BIN"));
+        assert!(raw.contains("cargo build -p ${PACKAGE} --bin ${BIN} --release"));
+        assert!(raw.contains("COPY --from=builder"));
+        assert!(raw.contains("ENTRYPOINT"));
+        assert!(raw.contains("FROM"));
+    }
+
+    #[test]
+    fn docker_compose_is_valid_yaml_and_has_services() {
+        let raw = std::fs::read_to_string("../docker-compose.yml").unwrap();
+        let doc: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
+        let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap();
+        assert!(services.contains_key(serde_yaml::Value::from("nats")));
+        assert!(services.contains_key(serde_yaml::Value::from("aggregate")));
+    }
+}
--- a/aggregate/src/gateway/mod.rs
+++ b/aggregate/src/gateway/mod.rs
@@ -0,0 +1,7 @@
+pub const TENANT_ID_METADATA_KEY: &str = "x-tenant-id";
+
+pub mod proto {
+    tonic::include_proto!("aggregate.gateway.v1");
+}
+
+pub mod server;
--- a/aggregate/src/gateway/server.rs
+++ b/aggregate/src/gateway/server.rs
@@ -0,0 +1,306 @@
+use super::proto::command_service_server::{CommandService, CommandServiceServer};
+use super::proto::{Event as ProtoEvent, SubmitCommandRequest, SubmitCommandResponse};
+use crate::aggregate::AggregateHandler;
+use crate::observability::Observability;
+use crate::placement::TenantPlacementManager;
+use crate::types::{AggregateError, AggregateId, AggregateType, Command, TenantId};
+use std::collections::HashMap;
+use std::str::FromStr;
+use std::sync::Arc;
+use tonic::{Request, Response, Status};
+
+#[derive(Clone)]
+pub struct GrpcCommandServer {
+    handler: AggregateHandler,
+    placement: Arc<TenantPlacementManager>,
+    observability: Arc<Observability>,
+    multi_tenant_enabled: bool,
+    default_tenant_id: Option<TenantId>,
+}
+
+impl GrpcCommandServer {
+    pub fn new(
+        handler: AggregateHandler,
+        placement: Arc<TenantPlacementManager>,
+        observability: Arc<Observability>,
+        multi_tenant_enabled: bool,
+        default_tenant_id: Option<TenantId>,
+    ) -> Self {
+        Self {
+            handler,
+            placement,
+            observability,
+            multi_tenant_enabled,
+            default_tenant_id,
+        }
+    }
+
+    pub fn service(self) -> CommandServiceServer<Self> {
+        CommandServiceServer::new(self)
+    }
+}
+
+#[tonic::async_trait]
+impl CommandService for GrpcCommandServer {
+    async fn submit_command(
+        &self,
+        request: Request<SubmitCommandRequest>,
+    ) -> Result<Response<SubmitCommandResponse>, Status> {
+        let correlation_id = request
+            .metadata()
+            .get("x-correlation-id")
+            .and_then(|v| v.to_str().ok())
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .map(|s| s.to_string());
+        let traceparent = request
+            .metadata()
+            .get("traceparent")
+            .and_then(|v| v.to_str().ok())
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .map(|s| s.to_string());
+        let trace_id = traceparent.as_deref().and_then(trace_id_from_traceparent);
+
+        let metadata_tenant = request
+            .metadata()
+            .get(super::TENANT_ID_METADATA_KEY)
+            .and_then(|v| v.to_str().ok())
+            .unwrap_or("")
+            .to_string();
+
+        let req = request.into_inner();
+
+        let tenant_id = resolve_tenant_id(
+            &req.tenant_id,
+            &metadata_tenant,
+            self.multi_tenant_enabled,
+            self.default_tenant_id.as_ref(),
+        )
+        .map_err(Status::invalid_argument)?;
+
+        if !tenant_id.as_str().is_empty() && !is_valid_tenant_id(tenant_id.as_str()) {
+            return Err(Status::invalid_argument("invalid tenant_id"));
+        }
+
+        let aggregate_id = AggregateId::from_str(&req.aggregate_id)
+            .map_err(|e| Status::invalid_argument(e.to_string()))?;
+        let aggregate_type = AggregateType::from(req.aggregate_type);
+
+        let payload: serde_json::Value = serde_json::from_str(&req.payload_json)
+            .map_err(|e| Status::invalid_argument(e.to_string()))?;
+
+        let command_id = if req.command_id.is_empty() {
+            uuid::Uuid::now_v7()
+        } else {
+            uuid::Uuid::parse_str(&req.command_id)
+                .map_err(|e| Status::invalid_argument(e.to_string()))?
+        };
+
+        let metadata: HashMap<String, serde_json::Value> = req
+            .metadata
+            .into_iter()
+            .map(|(k, v)| (k, serde_json::Value::String(v)))
+            .collect();
+
+        let mut metadata = metadata;
+        if let Some(correlation_id) = correlation_id.as_deref() {
+            metadata.insert(
+                "correlation_id".to_string(),
+                serde_json::Value::String(correlation_id.to_string()),
+            );
+        }
+        if let Some(traceparent) = traceparent.as_deref() {
+            metadata.insert(
+                "traceparent".to_string(),
+                serde_json::Value::String(traceparent.to_string()),
+            );
+        }
+
+        let command = Command {
+            tenant_id: tenant_id.clone(),
+            command_id,
+            aggregate_id: aggregate_id.clone(),
+            aggregate_type: aggregate_type.clone(),
+            payload,
+            metadata,
+        };
+
+        let span = self.observability.start_command_span(
+            &aggregate_id.to_string(),
+            aggregate_type.as_str(),
+            tenant_id.as_str(),
+            &command_id.to_string(),
+            correlation_id.as_deref(),
+            trace_id.as_deref(),
+        );
+
+        let _guard = self
+            .placement
+            .begin_command(&tenant_id)
+            .await
+            .map_err(|e| {
+                self.observability.record_command_error(&span, &e);
+                map_aggregate_error(e)
+            })?;
+
+        let events = self.handler.handle_command(command).await.map_err(|e| {
+            self.observability.record_command_error(&span, &e);
+            map_aggregate_error(e)
+        })?;
+
+        self.observability
+            .record_command_success(&span, events.len());
+
+        let proto_events = events
+            .into_iter()
+            .map(|e| ProtoEvent {
+                event_id: e.event_id.to_string(),
+                command_id: e.command_id.to_string(),
+                aggregate_id: e.aggregate_id.to_string(),
+                aggregate_type: e.aggregate_type.to_string(),
+                version: e.version.as_u64(),
+                event_type: e.event_type,
+                payload_json: serde_json::to_string(&e.payload)
+                    .unwrap_or_else(|_| "{}".to_string()),
+                timestamp_rfc3339: e.timestamp.to_rfc3339(),
+            })
+            .collect();
+
+        let mut response = Response::new(SubmitCommandResponse {
+            events: proto_events,
+        });
+        if let Some(correlation_id) = correlation_id.as_deref() {
+            if let Ok(v) = tonic::metadata::MetadataValue::try_from(correlation_id) {
+                response.metadata_mut().insert("x-correlation-id", v);
+            }
+        }
+        if let Some(traceparent) = traceparent.as_deref() {
+            if let Ok(v) = tonic::metadata::MetadataValue::try_from(traceparent) {
+                response.metadata_mut().insert("traceparent", v);
+            }
+        }
+        Ok(response)
+    }
+}
+
+fn trace_id_from_traceparent(traceparent: &str) -> Option<String> {
+    shared::trace_id_from_traceparent(traceparent).map(|s| s.to_string())
+}
+
+fn map_aggregate_error(error: AggregateError) -> Status {
+    match error {
+        AggregateError::TenantNotHosted { .. } => Status::unavailable(error.to_string()),
+        AggregateError::TenantDraining { .. } => Status::unavailable(error.to_string()),
+        AggregateError::TenantAccessDenied { .. } => Status::permission_denied(error.to_string()),
+        AggregateError::ValidationError(_) => Status::invalid_argument(error.to_string()),
+        AggregateError::VersionConflict { .. } => Status::aborted(error.to_string()),
+        AggregateError::NotFound(_) => Status::not_found(error.to_string()),
+        AggregateError::StorageError(_) => Status::internal(error.to_string()),
+        AggregateError::StreamError(_) => Status::unavailable(error.to_string()),
+        AggregateError::RehydrationError(_) => Status::internal(error.to_string()),
+        AggregateError::DecideError(_) => Status::failed_precondition(error.to_string()),
+        AggregateError::ApplyError(_) => Status::failed_precondition(error.to_string()),
+    }
+}
+
+fn resolve_tenant_id(
+    explicit: &str,
+    metadata: &str,
+    multi_tenant_enabled: bool,
+    default_tenant_id: Option<&TenantId>,
+) -> Result<TenantId, &'static str> {
+    if !explicit.is_empty() {
+        return Ok(TenantId::new(explicit));
+    }
+    if !metadata.is_empty() {
+        return Ok(TenantId::new(metadata));
+    }
+
+    if multi_tenant_enabled {
+        if let Some(default_tenant_id) = default_tenant_id {
+            return Ok(default_tenant_id.clone());
+        }
+        return Err("tenant_id is required");
+    }
+
+    Ok(TenantId::default())
+}
+
+fn is_valid_tenant_id(id: &str) -> bool {
+    id.chars()
+        .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::runtime::{ExecutorConfig, RuntimeExecutor};
+    use crate::storage::StorageClient;
+    use crate::stream::StreamClient;
+    use tempfile::tempdir;
+    use tonic::transport::{Channel, Server};
+
+    #[tokio::test]
+    async fn grpc_submit_command_rejects_unhosted_tenant() {
+        let obs = Arc::new(Observability::default());
+        let placement = Arc::new(TenantPlacementManager::new(obs.clone()));
+        placement
+            .set_hosted_tenants(vec!["tenant-a".to_string()])
+            .await;
+
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("test.mdbx");
+        let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
+        let stream = StreamClient::in_memory();
+        let executor = RuntimeExecutor::with_config(ExecutorConfig::default().with_mock_runtime());
+
+        let handler = AggregateHandler::new(
+            storage,
+            stream,
+            executor,
+            "decide".to_string(),
+            "apply".to_string(),
+        );
+
+        let service = GrpcCommandServer::new(handler, placement, obs, true, None).service();
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+
+        tokio::spawn(async move {
+            Server::builder()
+                .add_service(service)
+                .serve_with_incoming_shutdown(
+                    tokio_stream::wrappers::TcpListenerStream::new(listener),
+                    async move {
+                        tokio::time::sleep(std::time::Duration::from_millis(200)).await;
+                    },
+                )
+                .await
+                .unwrap();
+        });
+
+        let channel = Channel::from_shared(format!("http://{}", addr))
+            .unwrap()
+            .connect()
+            .await
+            .unwrap();
+        let mut client =
+            super::super::proto::command_service_client::CommandServiceClient::new(channel);
+
+        let resp = client
+            .submit_command(SubmitCommandRequest {
+                tenant_id: "tenant-b".to_string(),
+                command_id: uuid::Uuid::now_v7().to_string(),
+                aggregate_id: AggregateId::new_v7().to_string(),
+                aggregate_type: "Account".to_string(),
+                payload_json: "{}".to_string(),
+                metadata: HashMap::new(),
+            })
+            .await;
+
+        assert!(resp.is_err());
+        let status = resp.unwrap_err();
+        assert_eq!(status.code(), tonic::Code::Unavailable);
+    }
+}
--- a/aggregate/src/http_server.rs
+++ b/aggregate/src/http_server.rs
@@ -0,0 +1,230 @@
+use crate::server::{AdminResponse, AdminServer};
+use axum::extract::{Path, State};
+use axum::http::{HeaderValue, StatusCode};
+use axum::response::{IntoResponse, Response};
+use axum::routing::{get, post};
+use axum::{Json, Router};
+use serde::Deserialize;
+use std::future::Future;
+use std::sync::Arc;
+
+#[derive(Clone)]
+pub struct HttpState {
+    pub admin: Arc<AdminServer>,
+}
+
+pub fn router(admin: Arc<AdminServer>) -> Router {
+    let state = HttpState { admin };
+    Router::new()
+        .route("/health", get(health_route))
+        .route("/ready", get(ready))
+        .route("/metrics", get(metrics))
+        .route("/admin/tenants", get(admin_tenants))
+        .route("/admin/tenant/:tenant_id/status", get(admin_tenant_status))
+        .route("/admin/tenant/:tenant_id/ready", get(admin_tenant_ready))
+        .route("/admin/tenant/:tenant_id/drain", post(admin_tenant_drain))
+        .route("/admin/drain", post(admin_drain))
+        .route("/admin/reload", post(admin_reload))
+        .with_state(state)
+}
+
+pub async fn serve(
+    listener: tokio::net::TcpListener,
+    admin: Arc<AdminServer>,
+    shutdown: impl Future<Output = ()> + Send + 'static,
+) {
+    axum::serve(listener, router(admin))
+        .with_graceful_shutdown(shutdown)
+        .await
+        .expect("http server failed");
+}
+
+async fn health_route(State(state): State<HttpState>) -> Response {
+    proxy_json(state.admin.get("/health").await).await
+}
+
+async fn ready(State(state): State<HttpState>) -> Response {
+    proxy_json(state.admin.get("/ready").await).await
+}
+
+async fn admin_tenants(State(state): State<HttpState>) -> Response {
+    proxy_json(state.admin.get("/admin/tenants").await).await
+}
+
+async fn metrics(State(state): State<HttpState>) -> Response {
+    let resp = state.admin.get("/metrics").await;
+    let mut response = (StatusCode::OK, resp.text().await).into_response();
+    response.headers_mut().insert(
+        axum::http::header::CONTENT_TYPE,
+        HeaderValue::from_static("text/plain; version=0.0.4"),
+    );
+    response
+}
+
+async fn admin_drain(
+    State(state): State<HttpState>,
+    Json(body): Json<serde_json::Value>,
+) -> Response {
+    proxy_json(state.admin.post("/admin/drain", body).await).await
+}
+
+async fn admin_reload(
+    State(state): State<HttpState>,
+    Json(body): Json<serde_json::Value>,
+) -> Response {
+    proxy_json(state.admin.post("/admin/reload", body).await).await
+}
+
+#[derive(Debug, Deserialize)]
+struct DrainBody {
+    timeout_ms: Option<u64>,
+}
+
+async fn admin_tenant_status(
+    State(state): State<HttpState>,
+    Path(tenant_id): Path<String>,
+) -> Response {
+    let status = state
+        .admin
+        .placement_manager()
+        .tenant_status(&crate::types::TenantId::new(tenant_id))
+        .await;
+    let mut response = (StatusCode::OK, serde_json::to_string(&status).unwrap()).into_response();
+    response.headers_mut().insert(
+        axum::http::header::CONTENT_TYPE,
+        HeaderValue::from_static("application/json"),
+    );
+    response
+}
+
+async fn admin_tenant_ready(
+    State(state): State<HttpState>,
+    Path(tenant_id): Path<String>,
+) -> Response {
+    let tenant_id = crate::types::TenantId::new(tenant_id);
+    let status = state
+        .admin
+        .placement_manager()
+        .tenant_status(&tenant_id)
+        .await;
+    let ready = state.admin.health_checker().is_ready() && status.accepting;
+    let mut response = (StatusCode::OK, serde_json::to_string(&ready).unwrap()).into_response();
+    response.headers_mut().insert(
+        axum::http::header::CONTENT_TYPE,
+        HeaderValue::from_static("application/json"),
+    );
+    response
+}
+
+async fn admin_tenant_drain(
+    State(state): State<HttpState>,
+    Path(tenant_id): Path<String>,
+    body: Option<Json<DrainBody>>,
+) -> Response {
+    let tenant_id = crate::types::TenantId::new(tenant_id);
+    state
+        .admin
+        .placement_manager()
+        .drain_tenant(&tenant_id)
+        .await;
+
+    let timeout = body
+        .and_then(|b| b.timeout_ms)
+        .map(std::time::Duration::from_millis)
+        .unwrap_or(std::time::Duration::from_secs(10));
+
+    let drained = state
+        .admin
+        .placement_manager()
+        .wait_drained_with_timeout(&tenant_id, timeout)
+        .await;
+
+    let status = state
+        .admin
+        .placement_manager()
+        .tenant_status(&tenant_id)
+        .await;
+    let resp = serde_json::json!({ "drained": drained, "status": status });
+    let mut response = (StatusCode::OK, serde_json::to_string(&resp).unwrap()).into_response();
+    response.headers_mut().insert(
+        axum::http::header::CONTENT_TYPE,
+        HeaderValue::from_static("application/json"),
+    );
+    response
+}
+
+async fn proxy_json(resp: AdminResponse) -> Response {
+    let mut response = (StatusCode::OK, resp.text().await).into_response();
+    response.headers_mut().insert(
+        axum::http::header::CONTENT_TYPE,
+        HeaderValue::from_static("application/json"),
+    );
+    response
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::observability::Observability;
+    use crate::server::HealthChecker;
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    async fn http_get(addr: std::net::SocketAddr, path: &str) -> String {
+        let mut stream = tokio::net::TcpStream::connect(addr).await.unwrap();
+        let req = format!(
+            "GET {} HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n",
+            path
+        );
+        stream.write_all(req.as_bytes()).await.unwrap();
+        let mut buf = Vec::new();
+        stream.read_to_end(&mut buf).await.unwrap();
+        String::from_utf8_lossy(&buf).to_string()
+    }
+
+    #[tokio::test]
+    async fn http_server_exposes_health_ready_metrics() {
+        let health = HealthChecker::new();
+        health.set_storage_healthy(true);
+        health.set_stream_healthy(true);
+
+        let admin = Arc::new(AdminServer::new(
+            Observability::default(),
+            health,
+            "test-shard".to_string(),
+        ));
+        admin
+            .placement_manager()
+            .set_hosted_tenants(vec!["test-tenant".to_string()])
+            .await;
+        let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+
+        let (tx, rx) = tokio::sync::oneshot::channel::<()>();
+        let handle = tokio::spawn(async move {
+            serve(listener, admin, async move {
+                let _ = rx.await;
+            })
+            .await;
+        });
+
+        let health_resp = http_get(addr, "/health").await;
+        assert!(health_resp.starts_with("HTTP/1.1 200"));
+
+        let ready_resp = http_get(addr, "/ready").await;
+        assert!(ready_resp.starts_with("HTTP/1.1 200"));
+
+        let metrics_resp = http_get(addr, "/metrics").await;
+        assert!(metrics_resp.starts_with("HTTP/1.1 200"));
+        assert!(metrics_resp.contains("aggregate_commands_total"));
+
+        let status_resp = http_get(addr, "/admin/tenant/test-tenant/status").await;
+        assert!(status_resp.starts_with("HTTP/1.1 200"));
+        assert!(status_resp.contains("test-tenant"));
+
+        let ready_resp = http_get(addr, "/admin/tenant/test-tenant/ready").await;
+        assert!(ready_resp.starts_with("HTTP/1.1 200"));
+
+        let _ = tx.send(());
+        handle.await.unwrap();
+    }
+}
--- a/aggregate/src/lib.rs
+++ b/aggregate/src/lib.rs
@@ -0,0 +1,26 @@
+pub mod aggregate;
+pub mod config;
+pub mod container;
+pub mod gateway;
+pub mod http_server;
+pub mod observability;
+pub mod placement;
+pub mod query;
+pub mod runtime;
+pub mod server;
+pub mod storage;
+pub mod stream;
+pub mod swarm;
+pub mod types;
+
+pub use aggregate::{AggregateHandler, AggregateInstance};
+pub use config::Settings;
+pub use query::{
+    AggregateProjection, QueryClient, QueryConfig, QueryError, QueryRequest, QueryResponse,
+    QueryServer, StateProjection,
+};
+pub use runtime::{ExecutorConfig, RuntimeExecutor};
+pub use server::{CommandRequest, CommandResponse, CommandServer, HealthChecker, HealthStatus};
+pub use storage::StorageClient;
+pub use stream::StreamClient;
+pub use types::*;
--- a/aggregate/src/main.rs
+++ b/aggregate/src/main.rs
@@ -0,0 +1,213 @@
+use aggregate::config::Settings;
+use aggregate::gateway::server::GrpcCommandServer;
+use aggregate::http_server;
+use aggregate::observability::Observability;
+use aggregate::runtime::RuntimeExecutor;
+use aggregate::server::AdminServer;
+use aggregate::storage::StorageClient;
+use aggregate::stream::StreamClient;
+use aggregate::swarm::TenantPlacementKvClient;
+use aggregate::{aggregate::AggregateHandler, placement::TenantPlacementManager};
+use futures::StreamExt;
+use std::sync::Arc;
+use std::time::Duration;
+
+#[tokio::main]
+async fn main() {
+    match std::env::args().nth(1).as_deref() {
+        Some("-h") | Some("--help") => {
+            print_help();
+            return;
+        }
+        Some("serve") | None => serve().await,
+        Some(other) => {
+            eprintln!("Unknown command: {}", other);
+            print_help();
+        }
+    }
+}
+
+async fn serve() {
+    let settings = load_settings();
+
+    let observability = Observability::default();
+    let health_checker = aggregate::server::HealthChecker::new();
+    let admin = Arc::new(AdminServer::new(
+        observability,
+        health_checker,
+        settings.shard_id.clone(),
+    ));
+
+    spawn_health_probe(admin.clone(), settings.clone());
+    spawn_placement_watcher(admin.placement_manager(), settings.clone());
+
+    let storage = StorageClient::open(settings.storage_path.clone()).unwrap();
+    let stream = StreamClient::new(settings.nats_url.clone()).await.unwrap();
+    let _ = stream.setup_stream().await;
+    let executor = RuntimeExecutor::new();
+
+    let handler = AggregateHandler::new(
+        storage,
+        stream,
+        executor,
+        settings.decide_program.clone(),
+        settings.apply_program.clone(),
+    )
+    .with_snapshot_threshold(settings.snapshot_threshold)
+    .with_max_retries(settings.max_retries);
+
+    let grpc_addr: std::net::SocketAddr = settings.grpc_addr.parse().unwrap();
+    let grpc_service = GrpcCommandServer::new(
+        handler,
+        admin.placement_manager(),
+        admin.observability(),
+        settings.multi_tenant_enabled,
+        settings
+            .default_tenant_id
+            .as_ref()
+            .map(aggregate::types::TenantId::new),
+    )
+    .service();
+
+    let addr = std::env::var("AGGREGATE_HTTP_ADDR").unwrap_or_else(|_| "0.0.0.0:8080".to_string());
+    let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
+
+    let (shutdown_tx, _) = tokio::sync::broadcast::channel::<()>(1);
+    let mut http_shutdown = shutdown_tx.subscribe();
+    let mut grpc_shutdown = shutdown_tx.subscribe();
+
+    let http_task = tokio::spawn(async move {
+        http_server::serve(listener, admin, async move {
+            let _ = http_shutdown.recv().await;
+        })
+        .await;
+    });
+
+    let grpc_task = tokio::spawn(async move {
+        tonic::transport::Server::builder()
+            .add_service(grpc_service)
+            .serve_with_shutdown(grpc_addr, async move {
+                let _ = grpc_shutdown.recv().await;
+            })
+            .await
+            .unwrap();
+    });
+
+    let _ = tokio::signal::ctrl_c().await;
+    let _ = shutdown_tx.send(());
+
+    let _ = tokio::join!(http_task, grpc_task);
+}
+
+fn print_help() {
+    println!(
+        "aggregate\n\nUSAGE:\n    aggregate [COMMAND]\n\nCOMMANDS:\n    serve    Start the HTTP server (default)\n\nOPTIONS:\n    -h, --help    Print help\n"
+    );
+}
+
+fn load_settings() -> Settings {
+    if let Ok(path) = std::env::var("AGGREGATE_CONFIG_PATH") {
+        if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) {
+            return settings;
+        }
+    }
+
+    Settings::from_env().unwrap_or_default()
+}
+
+fn spawn_health_probe(admin: Arc<AdminServer>, settings: Settings) {
+    tokio::spawn(async move {
+        loop {
+            let storage_ok = StorageClient::open(settings.storage_path.clone()).is_ok();
+            admin.health_checker().set_storage_healthy(storage_ok);
+
+            let stream_ok = tokio::time::timeout(Duration::from_secs(1), async {
+                let stream = StreamClient::new(settings.nats_url.clone()).await?;
+                let _ = stream.setup_stream().await;
+                Ok::<_, aggregate::types::AggregateError>(())
+            })
+            .await
+            .is_ok_and(|r| r.is_ok());
+
+            admin.health_checker().set_stream_healthy(stream_ok);
+
+            tokio::time::sleep(Duration::from_secs(5)).await;
+        }
+    });
+}
+
+fn spawn_placement_watcher(placement: Arc<TenantPlacementManager>, settings: Settings) {
+    tokio::spawn(async move {
+        loop {
+            let client = TenantPlacementKvClient::connect(
+                settings.nats_url.clone(),
+                settings.placement_bucket.clone(),
+            )
+            .await;
+
+            let client = match client {
+                Ok(c) => c,
+                Err(_) => {
+                    tokio::time::sleep(Duration::from_secs(1)).await;
+                    continue;
+                }
+            };
+
+            if let Ok(Some(value)) = client.get_json(&settings.placement_key).await {
+                apply_placement_value(&placement, &settings.shard_id, value).await;
+            }
+
+            let watch = client.watch_json(&settings.placement_key).await;
+            let mut stream = match watch {
+                Ok(s) => s,
+                Err(_) => {
+                    tokio::time::sleep(Duration::from_secs(1)).await;
+                    continue;
+                }
+            };
+
+            while let Some(update) = stream.next().await {
+                if let Ok(value) = update {
+                    apply_placement_value(&placement, &settings.shard_id, value).await;
+                }
+            }
+
+            tokio::time::sleep(Duration::from_secs(1)).await;
+        }
+    });
+}
+
+async fn apply_placement_value(
+    placement: &TenantPlacementManager,
+    shard_id: &str,
+    value: serde_json::Value,
+) {
+    if let Some(map) = value.as_object() {
+        let placement_map = map
+            .iter()
+            .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
+            .collect::<std::collections::HashMap<_, _>>();
+        placement
+            .apply_placement_map(shard_id, &placement_map)
+            .await;
+        return;
+    }
+
+    if let Some(map) = value.get("placement").and_then(|v| v.as_object()) {
+        let placement_map = map
+            .iter()
+            .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
+            .collect::<std::collections::HashMap<_, _>>();
+        placement
+            .apply_placement_map(shard_id, &placement_map)
+            .await;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn binary_exists() {
+        assert!(std::env::current_exe().is_ok());
+    }
+}
--- a/aggregate/src/observability/metrics.rs
+++ b/aggregate/src/observability/metrics.rs
@@ -0,0 +1,365 @@
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::RwLock;
+use std::time::Duration;
+
+pub trait MetricsRegistry: Send + Sync {
+    fn increment_counter(&self, name: &str, labels: &[(&str, &str)]);
+    fn record_histogram(&self, name: &str, value: f64, labels: &[(&str, &str)]);
+    fn export_prometheus(&self) -> String;
+}
+
+#[derive(Debug)]
+struct AtomicHistogram {
+    count: AtomicU64,
+    sum: AtomicU64,
+    buckets: Vec<(f64, AtomicU64)>,
+}
+
+impl AtomicHistogram {
+    fn new() -> Self {
+        let buckets: Vec<(f64, AtomicU64)> = vec![
+            0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
+        ]
+        .into_iter()
+        .map(|v| (v, AtomicU64::new(0)))
+        .collect();
+
+        Self {
+            count: AtomicU64::new(0),
+            sum: AtomicU64::new(0),
+            buckets,
+        }
+    }
+
+    fn observe(&self, duration: Duration) {
+        let value_ms = duration.as_secs_f64() * 1000.0;
+        self.count.fetch_add(1, Ordering::Relaxed);
+        self.sum
+            .fetch_add((value_ms * 1000.0) as u64, Ordering::Relaxed);
+
+        for (threshold, count) in &self.buckets {
+            if value_ms <= *threshold {
+                count.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+    }
+
+    fn export(&self, name: &str, labels: &str) -> String {
+        let mut output = String::new();
+        let count = self.count.load(Ordering::Relaxed);
+        let sum = self.sum.load(Ordering::Relaxed) as f64 / 1000.0;
+
+        let label_str = if labels.is_empty() {
+            String::new()
+        } else {
+            format!("{{{}}}", labels.trim_start_matches(','))
+        };
+
+        output.push_str(&format!("{}_sum{} {}\n", name, label_str, sum));
+        output.push_str(&format!("{}_count{} {}\n", name, label_str, count));
+
+        for (threshold, bucket_count) in &self.buckets {
+            let c = bucket_count.load(Ordering::Relaxed);
+            let bucket_labels = if labels.is_empty() {
+                format!("le=\"{}\"", threshold)
+            } else {
+                format!("le=\"{}\"{}", threshold, labels)
+            };
+            output.push_str(&format!("{}_bucket{{{}}} {}\n", name, bucket_labels, c));
+        }
+        let inf_labels = if labels.is_empty() {
+            "le=\"+Inf\"".to_string()
+        } else {
+            format!("le=\"+Inf\"{}", labels)
+        };
+        output.push_str(&format!("{}_bucket{{{}}} {}\n", name, inf_labels, count));
+
+        output
+    }
+}
+
+impl Default for AtomicHistogram {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[derive(Debug)]
+pub struct Metrics {
+    commands_total: RwLock<HashMap<String, AtomicU64>>,
+    command_errors_total: RwLock<HashMap<String, AtomicU64>>,
+    command_duration: RwLock<HashMap<String, AtomicHistogram>>,
+    version_conflicts: AtomicU64,
+    tenant_errors: AtomicU64,
+    rehydration_duration: RwLock<HashMap<String, AtomicHistogram>>,
+    in_flight: RwLock<HashMap<String, AtomicU64>>,
+}
+
+impl Metrics {
+    pub fn new() -> Self {
+        Self {
+            commands_total: RwLock::new(HashMap::new()),
+            command_errors_total: RwLock::new(HashMap::new()),
+            command_duration: RwLock::new(HashMap::new()),
+            version_conflicts: AtomicU64::new(0),
+            tenant_errors: AtomicU64::new(0),
+            rehydration_duration: RwLock::new(HashMap::new()),
+            in_flight: RwLock::new(HashMap::new()),
+        }
+    }
+
+    pub fn increment_commands_total(&self, aggregate_type: &str, tenant_id: &str) {
+        let key = format!("{}:{}", aggregate_type, tenant_id);
+        let map = self.commands_total.read().unwrap();
+        if let Some(counter) = map.get(&key) {
+            counter.fetch_add(1, Ordering::Relaxed);
+            return;
+        }
+        drop(map);
+        let mut map = self.commands_total.write().unwrap();
+        let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0));
+        counter.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn increment_command_errors_total(
+        &self,
+        aggregate_type: &str,
+        tenant_id: &str,
+        error_kind: &str,
+    ) {
+        let key = format!("{}:{}:{}", aggregate_type, tenant_id, error_kind);
+        let map = self.command_errors_total.read().unwrap();
+        if let Some(counter) = map.get(&key) {
+            counter.fetch_add(1, Ordering::Relaxed);
+            return;
+        }
+        drop(map);
+        let mut map = self.command_errors_total.write().unwrap();
+        let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0));
+        counter.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn record_command_duration(&self, duration: Duration, aggregate_type: &str) {
+        let mut map = self.command_duration.write().unwrap();
+        let histogram = map.entry(aggregate_type.to_string()).or_default();
+        histogram.observe(duration);
+    }
+
+    pub fn increment_version_conflicts(&self) {
+        self.version_conflicts.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn increment_tenant_errors(&self) {
+        self.tenant_errors.fetch_add(1, Ordering::Relaxed);
+    }
+
+    pub fn record_rehydration_duration(&self, duration: Duration, aggregate_type: &str) {
+        let mut map = self.rehydration_duration.write().unwrap();
+        let histogram = map.entry(aggregate_type.to_string()).or_default();
+        histogram.observe(duration);
+    }
+
+    pub fn set_in_flight(&self, tenant_id: &str, value: u64) {
+        let map = self.in_flight.read().unwrap();
+        if let Some(gauge) = map.get(tenant_id) {
+            gauge.store(value, Ordering::Relaxed);
+            return;
+        }
+        drop(map);
+        let mut map = self.in_flight.write().unwrap();
+        let gauge = map
+            .entry(tenant_id.to_string())
+            .or_insert_with(|| AtomicU64::new(0));
+        gauge.store(value, Ordering::Relaxed);
+    }
+
+    pub fn export_prometheus(&self) -> String {
+        let mut output = String::new();
+
+        output.push_str("# HELP aggregate_commands_total Total number of commands processed\n");
+        output.push_str("# TYPE aggregate_commands_total counter\n");
+        {
+            let map = self.commands_total.read().unwrap();
+            for (key, counter) in map.iter() {
+                let parts: Vec<&str> = key.split(':').collect();
+                if parts.len() == 2 {
+                    let value = counter.load(Ordering::Relaxed);
+                    output.push_str(&format!(
+                        "aggregate_commands_total{{aggregate_type=\"{}\",tenant_id=\"{}\"}} {}\n",
+                        parts[0], parts[1], value
+                    ));
+                }
+            }
+        }
+
+        output.push_str("\n# HELP aggregate_command_errors_total Total number of command errors\n");
+        output.push_str("# TYPE aggregate_command_errors_total counter\n");
+        {
+            let map = self.command_errors_total.read().unwrap();
+            for (key, counter) in map.iter() {
+                let parts: Vec<&str> = key.split(':').collect();
+                if parts.len() == 3 {
+                    let value = counter.load(Ordering::Relaxed);
+                    output.push_str(&format!(
+                        "aggregate_command_errors_total{{aggregate_type=\"{}\",tenant_id=\"{}\",error_kind=\"{}\"}} {}\n",
+                        parts[0], parts[1], parts[2], value
+                    ));
+                }
+            }
+        }
+
+        output
+            .push_str("\n# HELP aggregate_command_duration_seconds Command processing duration\n");
+        output.push_str("# TYPE aggregate_command_duration_seconds histogram\n");
+        {
+            let map = self.command_duration.read().unwrap();
+            for (aggregate_type, histogram) in map.iter() {
+                let labels = format!(",aggregate_type=\"{}\"", aggregate_type);
+                output.push_str(&histogram.export("aggregate_command_duration_seconds", &labels));
+            }
+        }
+
+        output
+            .push_str("\n# HELP aggregate_version_conflicts_total Total version conflict errors\n");
+        output.push_str("# TYPE aggregate_version_conflicts_total counter\n");
+        output.push_str(&format!(
+            "aggregate_version_conflicts_total {}\n",
+            self.version_conflicts.load(Ordering::Relaxed)
+        ));
+
+        output
+            .push_str("\n# HELP aggregate_tenant_errors_total Total tenant access denied errors\n");
+        output.push_str("# TYPE aggregate_tenant_errors_total counter\n");
+        output.push_str(&format!(
+            "aggregate_tenant_errors_total {}\n",
+            self.tenant_errors.load(Ordering::Relaxed)
+        ));
+
+        output.push_str(
+            "\n# HELP aggregate_rehydration_duration_seconds Aggregate rehydration duration\n",
+        );
+        output.push_str("# TYPE aggregate_rehydration_duration_seconds histogram\n");
+        {
+            let map = self.rehydration_duration.read().unwrap();
+            for (aggregate_type, histogram) in map.iter() {
+                let labels = format!(",aggregate_type=\"{}\"", aggregate_type);
+                output
+                    .push_str(&histogram.export("aggregate_rehydration_duration_seconds", &labels));
+            }
+        }
+
+        output.push_str(
+            "\n# HELP aggregate_in_flight_commands Number of in-flight commands by tenant\n",
+        );
+        output.push_str("# TYPE aggregate_in_flight_commands gauge\n");
+        {
+            let map = self.in_flight.read().unwrap();
+            for (tenant_id, gauge) in map.iter() {
+                let value = gauge.load(Ordering::Relaxed);
+                output.push_str(&format!(
+                    "aggregate_in_flight_commands{{tenant_id=\"{}\"}} {}\n",
+                    tenant_id, value
+                ));
+            }
+        }
+
+        output
+    }
+}
+
+impl Default for Metrics {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    #[test]
+    fn metrics_increment_commands() {
+        let metrics = Metrics::new();
+        metrics.increment_commands_total("Account", "tenant-a");
+        metrics.increment_commands_total("Account", "tenant-a");
+        metrics.increment_commands_total("Account", "tenant-b");
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains(
+            "aggregate_commands_total{aggregate_type=\"Account\",tenant_id=\"tenant-a\"} 2"
+        ));
+        assert!(output.contains(
+            "aggregate_commands_total{aggregate_type=\"Account\",tenant_id=\"tenant-b\"} 1"
+        ));
+    }
+
+    #[test]
+    fn metrics_records_version_conflicts() {
+        let metrics = Metrics::new();
+        metrics.increment_version_conflicts();
+        metrics.increment_version_conflicts();
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains("aggregate_version_conflicts_total 2"));
+    }
+
+    #[test]
+    fn metrics_records_tenant_errors() {
+        let metrics = Metrics::new();
+        metrics.increment_tenant_errors();
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains("aggregate_tenant_errors_total 1"));
+    }
+
+    #[test]
+    fn metrics_records_command_errors_with_labels() {
+        let metrics = Metrics::new();
+        metrics.increment_command_errors_total("Account", "tenant-a", "tenant_not_hosted");
+        metrics.increment_command_errors_total("Account", "tenant-a", "tenant_not_hosted");
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains("aggregate_command_errors_total{aggregate_type=\"Account\",tenant_id=\"tenant-a\",error_kind=\"tenant_not_hosted\"} 2"));
+    }
+
+    #[test]
+    fn metrics_records_command_duration() {
+        let metrics = Metrics::new();
+        metrics.record_command_duration(Duration::from_millis(50), "Account");
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains("aggregate_command_duration_seconds"));
+        assert!(output
+            .contains("aggregate_command_duration_seconds_count{aggregate_type=\"Account\"} 1"));
+    }
+
+    #[test]
+    fn metrics_records_rehydration_duration() {
+        let metrics = Metrics::new();
+        metrics.record_rehydration_duration(Duration::from_millis(100), "Account");
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains("aggregate_rehydration_duration_seconds"));
+        assert!(output.contains(
+            "aggregate_rehydration_duration_seconds_count{aggregate_type=\"Account\"} 1"
+        ));
+    }
+
+    #[test]
+    fn metrics_export_prometheus_format() {
+        let metrics = Metrics::new();
+        metrics.increment_commands_total("Account", "tenant-a");
+
+        let output = metrics.export_prometheus();
+        assert!(output.contains("# HELP aggregate_commands_total"));
+        assert!(output.contains("# TYPE aggregate_commands_total counter"));
+    }
+
+    #[test]
+    fn metrics_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<Metrics>();
+    }
+}
--- a/aggregate/src/observability/mod.rs
+++ b/aggregate/src/observability/mod.rs
@@ -0,0 +1,323 @@
+mod metrics;
+
+pub use metrics::{Metrics, MetricsRegistry};
+
+use std::sync::Arc;
+use std::time::Instant;
+
+#[derive(Debug, Clone)]
+pub struct ObservabilityConfig {
+    pub service_name: String,
+    pub environment: String,
+    pub enable_metrics: bool,
+}
+
+impl Default for ObservabilityConfig {
+    fn default() -> Self {
+        Self {
+            service_name: "aggregate".to_string(),
+            environment: "development".to_string(),
+            enable_metrics: true,
+        }
+    }
+}
+
+impl ObservabilityConfig {
+    pub fn with_service_name(mut self, name: impl Into<String>) -> Self {
+        self.service_name = name.into();
+        self
+    }
+
+    pub fn with_environment(mut self, env: impl Into<String>) -> Self {
+        self.environment = env.into();
+        self
+    }
+
+    pub fn without_metrics(mut self) -> Self {
+        self.enable_metrics = false;
+        self
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct CommandSpan {
+    aggregate_id: String,
+    aggregate_type: String,
+    tenant_id: String,
+    command_id: String,
+    correlation_id: Option<String>,
+    trace_id: Option<String>,
+    start_time: Instant,
+}
+
+impl CommandSpan {
+    pub fn new(
+        aggregate_id: impl Into<String>,
+        aggregate_type: impl Into<String>,
+        tenant_id: impl Into<String>,
+        command_id: impl Into<String>,
+        correlation_id: Option<String>,
+        trace_id: Option<String>,
+    ) -> Self {
+        Self {
+            aggregate_id: aggregate_id.into(),
+            aggregate_type: aggregate_type.into(),
+            tenant_id: tenant_id.into(),
+            command_id: command_id.into(),
+            correlation_id,
+            trace_id,
+            start_time: Instant::now(),
+        }
+    }
+
+    pub fn elapsed(&self) -> std::time::Duration {
+        self.start_time.elapsed()
+    }
+
+    pub fn aggregate_id(&self) -> &str {
+        &self.aggregate_id
+    }
+
+    pub fn aggregate_type(&self) -> &str {
+        &self.aggregate_type
+    }
+
+    pub fn tenant_id(&self) -> &str {
+        &self.tenant_id
+    }
+
+    pub fn command_id(&self) -> &str {
+        &self.command_id
+    }
+
+    pub fn correlation_id(&self) -> Option<&str> {
+        self.correlation_id.as_deref()
+    }
+
+    pub fn trace_id(&self) -> Option<&str> {
+        self.trace_id.as_deref()
+    }
+}
+
+pub struct Observability {
+    config: ObservabilityConfig,
+    metrics: Arc<Metrics>,
+}
+
+impl Observability {
+    pub fn new(config: ObservabilityConfig) -> Self {
+        let metrics = Arc::new(Metrics::new());
+        Self { config, metrics }
+    }
+
+    pub fn config(&self) -> &ObservabilityConfig {
+        &self.config
+    }
+
+    pub fn metrics(&self) -> &Arc<Metrics> {
+        &self.metrics
+    }
+
+    pub fn start_command_span(
+        &self,
+        aggregate_id: &str,
+        aggregate_type: &str,
+        tenant_id: &str,
+        command_id: &str,
+        correlation_id: Option<&str>,
+        trace_id: Option<&str>,
+    ) -> CommandSpan {
+        tracing::info_span!(
+            "command",
+            aggregate_id = %aggregate_id,
+            aggregate_type = %aggregate_type,
+            tenant_id = %tenant_id,
+            command_id = %command_id,
+            correlation_id = correlation_id.unwrap_or(""),
+            trace_id = trace_id.unwrap_or(""),
+        );
+
+        CommandSpan::new(
+            aggregate_id,
+            aggregate_type,
+            tenant_id,
+            command_id,
+            correlation_id.map(|s| s.to_string()),
+            trace_id.map(|s| s.to_string()),
+        )
+    }
+
+    pub fn record_command_success(&self, span: &CommandSpan, events_count: usize) {
+        self.metrics
+            .increment_commands_total(&span.aggregate_type, &span.tenant_id);
+        self.metrics
+            .record_command_duration(span.elapsed(), &span.aggregate_type);
+
+        tracing::info!(
+            aggregate_id = %span.aggregate_id(),
+            aggregate_type = %span.aggregate_type(),
+            tenant_id = %span.tenant_id(),
+            command_id = %span.command_id(),
+            correlation_id = span.correlation_id().unwrap_or(""),
+            trace_id = span.trace_id().unwrap_or(""),
+            events_count = events_count,
+            duration_ms = span.elapsed().as_millis() as u64,
+            "Command handled successfully"
+        );
+    }
+
+    pub fn record_command_error(&self, span: &CommandSpan, error: &crate::types::AggregateError) {
+        self.metrics
+            .increment_commands_total(&span.aggregate_type, &span.tenant_id);
+        self.metrics
+            .record_command_duration(span.elapsed(), &span.aggregate_type);
+
+        self.metrics.increment_command_errors_total(
+            &span.aggregate_type,
+            &span.tenant_id,
+            error_kind(error),
+        );
+
+        if matches!(
+            error,
+            crate::types::AggregateError::TenantAccessDenied { .. }
+                | crate::types::AggregateError::TenantNotHosted { .. }
+                | crate::types::AggregateError::TenantDraining { .. }
+        ) {
+            self.metrics.increment_tenant_errors();
+        }
+
+        if matches!(error, crate::types::AggregateError::VersionConflict { .. }) {
+            self.metrics.increment_version_conflicts();
+        }
+
+        tracing::error!(
+            aggregate_id = %span.aggregate_id(),
+            aggregate_type = %span.aggregate_type(),
+            tenant_id = %span.tenant_id(),
+            command_id = %span.command_id(),
+            correlation_id = span.correlation_id().unwrap_or(""),
+            trace_id = span.trace_id().unwrap_or(""),
+            error = %error,
+            duration_ms = span.elapsed().as_millis() as u64,
+            "Command handling failed"
+        );
+    }
+
+    pub fn record_rehydration(&self, duration: std::time::Duration, aggregate_type: &str) {
+        self.metrics
+            .record_rehydration_duration(duration, aggregate_type);
+
+        tracing::debug!(
+            aggregate_type = %aggregate_type,
+            duration_ms = duration.as_millis() as u64,
+            "Aggregate rehydrated"
+        );
+    }
+
+    pub fn export_metrics(&self) -> String {
+        self.metrics.export_prometheus()
+    }
+}
+
+impl Default for Observability {
+    fn default() -> Self {
+        Self::new(ObservabilityConfig::default())
+    }
+}
+
+fn error_kind(error: &crate::types::AggregateError) -> &'static str {
+    match error {
+        crate::types::AggregateError::TenantAccessDenied { .. } => "tenant_access_denied",
+        crate::types::AggregateError::TenantNotHosted { .. } => "tenant_not_hosted",
+        crate::types::AggregateError::TenantDraining { .. } => "tenant_draining",
+        crate::types::AggregateError::ValidationError(_) => "validation",
+        crate::types::AggregateError::VersionConflict { .. } => "version_conflict",
+        crate::types::AggregateError::StorageError(_) => "storage",
+        crate::types::AggregateError::StreamError(_) => "stream",
+        crate::types::AggregateError::RehydrationError(_) => "rehydration",
+        crate::types::AggregateError::DecideError(_) => "decide",
+        crate::types::AggregateError::ApplyError(_) => "apply",
+        crate::types::AggregateError::NotFound(_) => "not_found",
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{AggregateError, TenantId};
+
+    #[test]
+    fn observability_config_defaults() {
+        let config = ObservabilityConfig::default();
+        assert_eq!(config.service_name, "aggregate");
+        assert_eq!(config.environment, "development");
+        assert!(config.enable_metrics);
+    }
+
+    #[test]
+    fn observability_config_builder() {
+        let config = ObservabilityConfig::default()
+            .with_service_name("my-service")
+            .with_environment("production")
+            .without_metrics();
+
+        assert_eq!(config.service_name, "my-service");
+        assert_eq!(config.environment, "production");
+        assert!(!config.enable_metrics);
+    }
+
+    #[test]
+    fn command_span_tracks_elapsed_time() {
+        let span = CommandSpan::new("agg-123", "Account", "tenant-a", "cmd-456", None, None);
+        std::thread::sleep(std::time::Duration::from_millis(10));
+        assert!(span.elapsed() >= std::time::Duration::from_millis(10));
+    }
+
+    #[test]
+    fn observability_records_success() {
+        let obs = Observability::default();
+        let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None);
+
+        obs.record_command_success(&span, 3);
+
+        let metrics = obs.export_metrics();
+        assert!(metrics.contains("aggregate_commands_total"));
+    }
+
+    #[test]
+    fn observability_records_tenant_error() {
+        let obs = Observability::default();
+        let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None);
+
+        let error = AggregateError::TenantAccessDenied {
+            tenant_id: TenantId::new("other-tenant"),
+        };
+        obs.record_command_error(&span, &error);
+
+        let metrics = obs.export_metrics();
+        assert!(metrics.contains("aggregate_tenant_errors_total"));
+    }
+
+    #[test]
+    fn observability_records_version_conflict() {
+        let obs = Observability::default();
+        let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None);
+
+        let error = AggregateError::VersionConflict {
+            expected: crate::types::Version::from(5),
+            actual: crate::types::Version::from(4),
+        };
+        obs.record_command_error(&span, &error);
+
+        let metrics = obs.export_metrics();
+        assert!(metrics.contains("aggregate_version_conflicts_total"));
+    }
+
+    #[test]
+    fn observability_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<Observability>();
+        assert_send_sync::<CommandSpan>();
+    }
+}
--- a/aggregate/src/placement.rs
+++ b/aggregate/src/placement.rs
@@ -0,0 +1,267 @@
+use crate::observability::Observability;
+use crate::types::{AggregateError, TenantId};
+use serde::{Deserialize, Serialize};
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use tokio::sync::RwLock;
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct TenantStatus {
+    pub tenant_id: TenantId,
+    pub hosted: bool,
+    pub accepting: bool,
+    pub draining: bool,
+    pub in_flight: u64,
+}
+
+pub struct TenantPlacementManager {
+    hosted: RwLock<HashSet<String>>,
+    draining: RwLock<HashSet<String>>,
+    in_flight: RwLock<HashMap<String, u64>>,
+    observability: Arc<Observability>,
+}
+
+impl TenantPlacementManager {
+    pub fn new(observability: Arc<Observability>) -> Self {
+        Self {
+            hosted: RwLock::new(HashSet::new()),
+            draining: RwLock::new(HashSet::new()),
+            in_flight: RwLock::new(HashMap::new()),
+            observability,
+        }
+    }
+
+    pub async fn set_hosted_tenants(&self, tenant_ids: impl IntoIterator<Item = String>) {
+        let mut hosted = self.hosted.write().await;
+        hosted.clear();
+        hosted.extend(tenant_ids);
+    }
+
+    pub async fn apply_placement_map(&self, shard_id: &str, placement: &HashMap<String, String>) {
+        let tenants = placement
+            .iter()
+            .filter_map(|(tenant_id, assigned)| {
+                if assigned == shard_id {
+                    Some(tenant_id.clone())
+                } else {
+                    None
+                }
+            })
+            .collect::<Vec<_>>();
+
+        self.set_hosted_tenants(tenants).await;
+    }
+
+    pub async fn is_hosted(&self, tenant_id: &TenantId) -> bool {
+        if tenant_id.as_str().is_empty() {
+            return true;
+        }
+        self.hosted.read().await.contains(tenant_id.as_str())
+    }
+
+    pub async fn is_draining(&self, tenant_id: &TenantId) -> bool {
+        self.draining.read().await.contains(tenant_id.as_str())
+    }
+
+    pub async fn begin_command(
+        self: &Arc<Self>,
+        tenant_id: &TenantId,
+    ) -> Result<TenantCommandGuard, AggregateError> {
+        if !self.is_hosted(tenant_id).await {
+            return Err(AggregateError::TenantNotHosted {
+                tenant_id: tenant_id.clone(),
+            });
+        }
+
+        if self.is_draining(tenant_id).await {
+            return Err(AggregateError::TenantDraining {
+                tenant_id: tenant_id.clone(),
+            });
+        }
+
+        let mut map = self.in_flight.write().await;
+        let counter = map.entry(tenant_id.as_str().to_string()).or_insert(0);
+        *counter += 1;
+        let value = *counter;
+        drop(map);
+
+        self.observability
+            .metrics()
+            .set_in_flight(tenant_id.as_str(), value);
+
+        Ok(TenantCommandGuard {
+            tenant_id: tenant_id.clone(),
+            manager: self.clone(),
+        })
+    }
+
+    pub async fn drain_tenant(&self, tenant_id: &TenantId) {
+        if tenant_id.as_str().is_empty() {
+            return;
+        }
+        let mut draining = self.draining.write().await;
+        draining.insert(tenant_id.as_str().to_string());
+    }
+
+    pub async fn undrain_tenant(&self, tenant_id: &TenantId) {
+        let mut draining = self.draining.write().await;
+        draining.remove(tenant_id.as_str());
+    }
+
+    pub async fn wait_drained(&self, tenant_id: &TenantId) {
+        loop {
+            let in_flight = self
+                .in_flight
+                .read()
+                .await
+                .get(tenant_id.as_str())
+                .copied()
+                .unwrap_or(0);
+            if in_flight == 0 {
+                break;
+            }
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+        }
+    }
+
+    pub async fn wait_drained_with_timeout(
+        &self,
+        tenant_id: &TenantId,
+        timeout: std::time::Duration,
+    ) -> bool {
+        let deadline = tokio::time::Instant::now() + timeout;
+        loop {
+            let in_flight = self
+                .in_flight
+                .read()
+                .await
+                .get(tenant_id.as_str())
+                .copied()
+                .unwrap_or(0);
+            if in_flight == 0 {
+                return true;
+            }
+            if tokio::time::Instant::now() >= deadline {
+                return false;
+            }
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+        }
+    }
+
+    pub async fn tenant_status(&self, tenant_id: &TenantId) -> TenantStatus {
+        let hosted = self.is_hosted(tenant_id).await;
+        let draining = self.is_draining(tenant_id).await;
+        let in_flight = self
+            .in_flight
+            .read()
+            .await
+            .get(tenant_id.as_str())
+            .copied()
+            .unwrap_or(0);
+        TenantStatus {
+            tenant_id: tenant_id.clone(),
+            hosted,
+            accepting: hosted && !draining,
+            draining,
+            in_flight,
+        }
+    }
+
+    pub async fn hosted_tenants(&self) -> Vec<TenantId> {
+        let hosted = self.hosted.read().await;
+        hosted.iter().map(TenantId::new).collect()
+    }
+
+    pub async fn all_statuses(&self) -> Vec<TenantStatus> {
+        let hosted = self.hosted.read().await.clone();
+        let draining = self.draining.read().await.clone();
+        let in_flight = self.in_flight.read().await.clone();
+
+        hosted
+            .into_iter()
+            .map(|id| {
+                let tenant_id = TenantId::new(id.clone());
+                let d = draining.contains(&id);
+                let f = in_flight.get(&id).copied().unwrap_or(0);
+                TenantStatus {
+                    tenant_id,
+                    hosted: true,
+                    accepting: !d,
+                    draining: d,
+                    in_flight: f,
+                }
+            })
+            .collect()
+    }
+
+    async fn finish_command(&self, tenant_id: &TenantId) {
+        let mut map = self.in_flight.write().await;
+        let counter = map.entry(tenant_id.as_str().to_string()).or_insert(0);
+        if *counter > 0 {
+            *counter -= 1;
+        }
+        let value = *counter;
+        drop(map);
+
+        self.observability
+            .metrics()
+            .set_in_flight(tenant_id.as_str(), value);
+    }
+}
+
+pub struct TenantCommandGuard {
+    tenant_id: TenantId,
+    manager: Arc<TenantPlacementManager>,
+}
+
+impl Drop for TenantCommandGuard {
+    fn drop(&mut self) {
+        let tenant_id = self.tenant_id.clone();
+        let manager = self.manager.clone();
+        tokio::spawn(async move {
+            manager.finish_command(&tenant_id).await;
+        });
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::observability::Observability;
+
+    #[tokio::test]
+    async fn placement_rejects_unhosted_tenant() {
+        let obs = Arc::new(Observability::default());
+        let mgr = Arc::new(TenantPlacementManager::new(obs));
+        mgr.set_hosted_tenants(vec!["tenant-a".to_string()]).await;
+
+        let err = match mgr.begin_command(&TenantId::new("tenant-b")).await {
+            Ok(_) => panic!("expected error"),
+            Err(e) => e,
+        };
+        assert!(matches!(err, AggregateError::TenantNotHosted { .. }));
+    }
+
+    #[tokio::test]
+    async fn drain_blocks_new_commands_until_in_flight_zero() {
+        let obs = Arc::new(Observability::default());
+        let mgr = Arc::new(TenantPlacementManager::new(obs));
+        mgr.set_hosted_tenants(vec!["tenant-a".to_string()]).await;
+
+        let guard = mgr.begin_command(&TenantId::new("tenant-a")).await.unwrap();
+        mgr.drain_tenant(&TenantId::new("tenant-a")).await;
+        let err = match mgr.begin_command(&TenantId::new("tenant-a")).await {
+            Ok(_) => panic!("expected error"),
+            Err(e) => e,
+        };
+        assert!(matches!(err, AggregateError::TenantDraining { .. }));
+
+        drop(guard);
+        mgr.wait_drained(&TenantId::new("tenant-a")).await;
+        let err = match mgr.begin_command(&TenantId::new("tenant-a")).await {
+            Ok(_) => panic!("expected error"),
+            Err(e) => e,
+        };
+        assert!(matches!(err, AggregateError::TenantDraining { .. }));
+    }
+}
--- a/aggregate/src/query/client.rs
+++ b/aggregate/src/query/client.rs
@@ -0,0 +1,594 @@
+use super::{AggregateProjection, QueryRequest, QueryResponse};
+use crate::types::TenantId;
+use futures::stream::Stream;
+use serde_json::Value as JsonValue;
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::sync::Arc;
+use thiserror::Error;
+use tokio::sync::broadcast;
+use tokio::sync::RwLock;
+
+#[derive(Debug, Error)]
+pub enum QueryError {
+    #[error("Query syntax error: {0}")]
+    SyntaxError(String),
+
+    #[error("Connection error: {0}")]
+    ConnectionError(String),
+
+    #[error("Tenant not found: {0}")]
+    TenantNotFound(String),
+
+    #[error("Internal error: {0}")]
+    InternalError(String),
+}
+
+pub type QueryResult<T> = Result<T, QueryError>;
+
+#[derive(Debug, Clone)]
+pub struct QueryConfig {
+    pub endpoint: Option<String>,
+    pub embedded: bool,
+    pub cache_size: usize,
+    pub cache_ttl_seconds: u64,
+}
+
+impl Default for QueryConfig {
+    fn default() -> Self {
+        Self {
+            endpoint: None,
+            embedded: true,
+            cache_size: 1000,
+            cache_ttl_seconds: 60,
+        }
+    }
+}
+
+impl QueryConfig {
+    pub fn embedded() -> Self {
+        Self {
+            embedded: true,
+            ..Default::default()
+        }
+    }
+
+    pub fn remote(endpoint: impl Into<String>) -> Self {
+        Self {
+            endpoint: Some(endpoint.into()),
+            embedded: false,
+            ..Default::default()
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+struct CacheEntry {
+    projection: AggregateProjection,
+    inserted_at: std::time::Instant,
+}
+
+#[derive(Debug, Clone)]
+pub struct QueryClient {
+    config: QueryConfig,
+    storage: Arc<RwLock<HashMap<String, Vec<AggregateProjection>>>>,
+    cache: Arc<RwLock<lru::LruCache<String, CacheEntry>>>,
+    updates: broadcast::Sender<AggregateProjection>,
+}
+
+impl QueryClient {
+    pub fn new(config: QueryConfig) -> Self {
+        let cache = lru::LruCache::new(
+            std::num::NonZeroUsize::new(config.cache_size)
+                .unwrap_or_else(|| std::num::NonZeroUsize::new(1000).unwrap()),
+        );
+        let (updates, _) = broadcast::channel(1024);
+
+        Self {
+            config,
+            storage: Arc::new(RwLock::new(HashMap::new())),
+            cache: Arc::new(RwLock::new(cache)),
+            updates,
+        }
+    }
+
+    pub fn embedded() -> Self {
+        Self::new(QueryConfig::embedded())
+    }
+
+    fn make_key(tenant_id: &str, aggregate_id: &str) -> String {
+        format!("{}:{}", tenant_id, aggregate_id)
+    }
+
+    pub async fn index(&self, projection: AggregateProjection) -> QueryResult<()> {
+        let key = Self::make_key(&projection.tenant_id, &projection.aggregate_id);
+
+        let _ = self.updates.send(projection.clone());
+
+        {
+            let mut cache = self.cache.write().await;
+            cache.put(
+                key.clone(),
+                CacheEntry {
+                    projection: projection.clone(),
+                    inserted_at: std::time::Instant::now(),
+                },
+            );
+        }
+
+        let mut storage = self.storage.write().await;
+        let tenant_projections = storage.entry(projection.tenant_id.clone()).or_default();
+
+        if let Some(existing) = tenant_projections
+            .iter_mut()
+            .find(|p| p.aggregate_id == projection.aggregate_id)
+        {
+            *existing = projection;
+        } else {
+            tenant_projections.push(projection);
+        }
+
+        Ok(())
+    }
+
+    pub fn subscribe(
+        &self,
+        tenant_id: TenantId,
+    ) -> Pin<Box<dyn Stream<Item = AggregateProjection> + Send>> {
+        let tenant_id = tenant_id.as_str().to_string();
+        let receiver = self.updates.subscribe();
+
+        Box::pin(futures::stream::unfold(
+            (receiver, tenant_id),
+            |(mut receiver, tenant_id)| async move {
+                loop {
+                    match receiver.recv().await {
+                        Ok(proj) => {
+                            if proj.tenant_id == tenant_id {
+                                return Some((proj, (receiver, tenant_id)));
+                            }
+                        }
+                        Err(broadcast::error::RecvError::Lagged(_)) => continue,
+                        Err(broadcast::error::RecvError::Closed) => return None,
+                    }
+                }
+            },
+        ))
+    }
+
+    pub async fn query(&self, request: QueryRequest) -> QueryResult<QueryResponse> {
+        let storage = self.storage.read().await;
+
+        let tenant_projections = storage.get(&request.tenant_id);
+
+        let projections: Vec<AggregateProjection> = match tenant_projections {
+            Some(projs) => {
+                let mut filtered: Vec<_> = projs
+                    .iter()
+                    .filter(|p| {
+                        if let Some(ref at) = request.aggregate_type {
+                            &p.aggregate_type == at
+                        } else {
+                            true
+                        }
+                    })
+                    .filter(|p| {
+                        if let Some(ref filter) = request.filter {
+                            self.evaluate_filter(&p.state, filter).unwrap_or(false)
+                        } else {
+                            true
+                        }
+                    })
+                    .cloned()
+                    .collect();
+
+                filtered.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
+                filtered
+            }
+            None => Vec::new(),
+        };
+
+        let total = projections.len();
+        let offset = request.offset.unwrap_or(0);
+        let limit = request.limit.unwrap_or(100);
+
+        let results: Vec<AggregateProjection> =
+            projections.into_iter().skip(offset).take(limit).collect();
+
+        Ok(QueryResponse::from_results(results, total, Some(limit)))
+    }
+
+    pub async fn get(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &str,
+    ) -> QueryResult<Option<AggregateProjection>> {
+        let key = Self::make_key(tenant_id.as_str(), aggregate_id);
+
+        {
+            let mut cache = self.cache.write().await;
+            if let Some(entry) = cache.get(&key) {
+                let elapsed = entry.inserted_at.elapsed().as_secs();
+                if elapsed < self.config.cache_ttl_seconds {
+                    return Ok(Some(entry.projection.clone()));
+                }
+                cache.pop(&key);
+            }
+        }
+
+        let storage = self.storage.read().await;
+        let tenant_projections = storage.get(tenant_id.as_str());
+
+        Ok(tenant_projections.and_then(|projs| {
+            projs
+                .iter()
+                .find(|p| p.aggregate_id == aggregate_id)
+                .cloned()
+        }))
+    }
+
+    pub async fn delete(&self, tenant_id: &TenantId, aggregate_id: &str) -> QueryResult<bool> {
+        let key = Self::make_key(tenant_id.as_str(), aggregate_id);
+
+        {
+            let mut cache = self.cache.write().await;
+            cache.pop(&key);
+        }
+
+        let mut storage = self.storage.write().await;
+        if let Some(tenant_projections) = storage.get_mut(tenant_id.as_str()) {
+            let len_before = tenant_projections.len();
+            tenant_projections.retain(|p| p.aggregate_id != aggregate_id);
+            return Ok(tenant_projections.len() < len_before);
+        }
+
+        Ok(false)
+    }
+
+    pub async fn clear_tenant(&self, tenant_id: &TenantId) -> QueryResult<usize> {
+        let mut storage = self.storage.write().await;
+        let count = storage
+            .remove(tenant_id.as_str())
+            .map(|v| v.len())
+            .unwrap_or(0);
+
+        let mut cache = self.cache.write().await;
+        let prefix = format!("{}:", tenant_id.as_str());
+        let keys_to_remove: Vec<_> = cache
+            .iter()
+            .filter(|(k, _)| k.starts_with(&prefix))
+            .map(|(k, _)| k.clone())
+            .collect();
+
+        for key in keys_to_remove {
+            cache.pop(&key);
+        }
+
+        Ok(count)
+    }
+
+    fn evaluate_filter(&self, state: &JsonValue, filter: &str) -> QueryResult<bool> {
+        let filter = filter.trim();
+
+        if filter.is_empty() || filter == "*" {
+            return Ok(true);
+        }
+
+        if let Some((field, op_value)) = filter.split_once('>') {
+            let field = field.trim();
+            let value = op_value.trim();
+            return self.compare_field(state, field, value, |a, b| a > b);
+        }
+
+        if let Some((field, op_value)) = filter.split_once('<') {
+            let field = field.trim();
+            let value = op_value.trim();
+            return self.compare_field(state, field, value, |a, b| a < b);
+        }
+
+        if let Some((field, op_value)) = filter.split_once("==") {
+            let field = field.trim();
+            let value = op_value.trim();
+            return self.compare_field(state, field, value, |a, b| a == b);
+        }
+
+        if let Some((field, op_value)) = filter.split_once("!=") {
+            let field = field.trim();
+            let value = op_value.trim();
+            return self.compare_field(state, field, value, |a, b| a != b);
+        }
+
+        if let Some((field, op_value)) = filter.split_once(">=") {
+            let field = field.trim();
+            let value = op_value.trim();
+            return self.compare_field(state, field, value, |a, b| a >= b);
+        }
+
+        if let Some((field, op_value)) = filter.split_once("<=") {
+            let field = field.trim();
+            let value = op_value.trim();
+            return self.compare_field(state, field, value, |a, b| a <= b);
+        }
+
+        Ok(false)
+    }
+
+    fn compare_field<F>(
+        &self,
+        state: &JsonValue,
+        field: &str,
+        value_str: &str,
+        compare: F,
+    ) -> QueryResult<bool>
+    where
+        F: Fn(f64, f64) -> bool,
+    {
+        let field_value = state.get(field);
+
+        let field_num = match field_value {
+            Some(JsonValue::Number(n)) => n.as_f64().unwrap_or(f64::NAN),
+            Some(JsonValue::String(s)) => s.parse::<f64>().unwrap_or(f64::NAN),
+            _ => return Ok(false),
+        };
+
+        let compare_num = value_str.parse::<f64>().unwrap_or(f64::NAN);
+
+        if field_num.is_nan() || compare_num.is_nan() {
+            return Ok(false);
+        }
+
+        Ok(compare(field_num, compare_num))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use futures::StreamExt;
+    use serde_json::json;
+
+    fn create_test_client() -> QueryClient {
+        QueryClient::embedded()
+    }
+
+    fn create_test_projection(tenant: &str, id: &str, balance: i64) -> AggregateProjection {
+        AggregateProjection::new(tenant, id, "Account", 1, json!({"balance": balance}))
+    }
+
+    #[tokio::test]
+    async fn query_client_index_and_query() {
+        let client = create_test_client();
+
+        let proj = create_test_projection("tenant-a", "acc-1", 100);
+        client.index(proj).await.unwrap();
+
+        let request = QueryRequest::new("tenant-a").with_filter("balance > 50");
+
+        let response = client.query(request).await.unwrap();
+
+        assert_eq!(response.results.len(), 1);
+        assert_eq!(response.results[0].aggregate_id, "acc-1");
+    }
+
+    #[tokio::test]
+    async fn query_client_tenant_isolation() {
+        let client = create_test_client();
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+        client
+            .index(create_test_projection("tenant-b", "acc-2", 200))
+            .await
+            .unwrap();
+
+        let response_a = client.query(QueryRequest::new("tenant-a")).await.unwrap();
+        let response_b = client.query(QueryRequest::new("tenant-b")).await.unwrap();
+
+        assert_eq!(response_a.results.len(), 1);
+        assert_eq!(response_b.results.len(), 1);
+        assert_eq!(response_a.results[0].state["balance"], 100);
+        assert_eq!(response_b.results[0].state["balance"], 200);
+    }
+
+    #[tokio::test]
+    async fn query_client_filter_operations() {
+        let client = create_test_client();
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+        client
+            .index(create_test_projection("tenant-a", "acc-2", 50))
+            .await
+            .unwrap();
+        client
+            .index(create_test_projection("tenant-a", "acc-3", 150))
+            .await
+            .unwrap();
+
+        let gt_response = client
+            .query(QueryRequest::new("tenant-a").with_filter("balance > 75"))
+            .await
+            .unwrap();
+        assert_eq!(gt_response.results.len(), 2);
+
+        let lt_response = client
+            .query(QueryRequest::new("tenant-a").with_filter("balance < 75"))
+            .await
+            .unwrap();
+        assert_eq!(lt_response.results.len(), 1);
+
+        let eq_response = client
+            .query(QueryRequest::new("tenant-a").with_filter("balance == 100"))
+            .await
+            .unwrap();
+        assert_eq!(eq_response.results.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn query_client_pagination() {
+        let client = create_test_client();
+
+        for i in 0..25 {
+            client
+                .index(create_test_projection(
+                    "tenant-a",
+                    &format!("acc-{}", i),
+                    i * 10,
+                ))
+                .await
+                .unwrap();
+        }
+
+        let page1 = client
+            .query(QueryRequest::new("tenant-a").with_limit(10))
+            .await
+            .unwrap();
+        assert_eq!(page1.results.len(), 10);
+        assert!(page1.has_more);
+
+        let page2 = client
+            .query(QueryRequest::new("tenant-a").with_limit(10).with_offset(10))
+            .await
+            .unwrap();
+        assert_eq!(page2.results.len(), 10);
+
+        let page3 = client
+            .query(QueryRequest::new("tenant-a").with_limit(10).with_offset(20))
+            .await
+            .unwrap();
+        assert_eq!(page3.results.len(), 5);
+        assert!(!page3.has_more);
+    }
+
+    #[tokio::test]
+    async fn query_client_get_by_id() {
+        let client = create_test_client();
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+
+        let tenant = TenantId::new("tenant-a");
+        let result = client.get(&tenant, "acc-1").await.unwrap();
+
+        assert!(result.is_some());
+        let proj = result.unwrap();
+        assert_eq!(proj.aggregate_id, "acc-1");
+        assert_eq!(proj.state["balance"], 100);
+    }
+
+    #[tokio::test]
+    async fn query_client_delete() {
+        let client = create_test_client();
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+
+        let tenant = TenantId::new("tenant-a");
+        let deleted = client.delete(&tenant, "acc-1").await.unwrap();
+        assert!(deleted);
+
+        let result = client.get(&tenant, "acc-1").await.unwrap();
+        assert!(result.is_none());
+    }
+
+    #[tokio::test]
+    async fn query_client_clear_tenant() {
+        let client = create_test_client();
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+        client
+            .index(create_test_projection("tenant-a", "acc-2", 200))
+            .await
+            .unwrap();
+        client
+            .index(create_test_projection("tenant-b", "acc-3", 300))
+            .await
+            .unwrap();
+
+        let tenant = TenantId::new("tenant-a");
+        let count = client.clear_tenant(&tenant).await.unwrap();
+        assert_eq!(count, 2);
+
+        let response_a = client.query(QueryRequest::new("tenant-a")).await.unwrap();
+        assert_eq!(response_a.results.len(), 0);
+
+        let response_b = client.query(QueryRequest::new("tenant-b")).await.unwrap();
+        assert_eq!(response_b.results.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn query_client_update_existing() {
+        let client = create_test_client();
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+        client
+            .index(AggregateProjection::new(
+                "tenant-a",
+                "acc-1",
+                "Account",
+                2,
+                json!({"balance": 250}),
+            ))
+            .await
+            .unwrap();
+
+        let response = client.query(QueryRequest::new("tenant-a")).await.unwrap();
+        assert_eq!(response.results.len(), 1);
+        assert_eq!(response.results[0].version, 2);
+        assert_eq!(response.results[0].state["balance"], 250);
+    }
+
+    #[tokio::test]
+    async fn query_client_subscribe_receives_updates() {
+        let client = create_test_client();
+        let mut updates = client.subscribe(TenantId::new("tenant-a"));
+
+        client
+            .index(create_test_projection("tenant-a", "acc-1", 100))
+            .await
+            .unwrap();
+
+        let next = updates.next().await.unwrap();
+        assert_eq!(next.tenant_id, "tenant-a");
+        assert_eq!(next.aggregate_id, "acc-1");
+        assert_eq!(next.state["balance"], 100);
+    }
+
+    #[test]
+    fn query_config_defaults() {
+        let config = QueryConfig::default();
+        assert!(config.embedded);
+        assert!(config.endpoint.is_none());
+        assert_eq!(config.cache_size, 1000);
+        assert_eq!(config.cache_ttl_seconds, 60);
+    }
+
+    #[test]
+    fn query_request_builder() {
+        let request = QueryRequest::new("tenant-a")
+            .with_aggregate_type("Account")
+            .with_filter("balance > 100")
+            .with_limit(50)
+            .with_offset(10);
+
+        assert_eq!(request.tenant_id, "tenant-a");
+        assert_eq!(request.aggregate_type, Some("Account".to_string()));
+        assert_eq!(request.filter, Some("balance > 100".to_string()));
+        assert_eq!(request.limit, Some(50));
+        assert_eq!(request.offset, Some(10));
+    }
+}
--- a/aggregate/src/query/mod.rs
+++ b/aggregate/src/query/mod.rs
@@ -0,0 +1,193 @@
+mod client;
+mod projection;
+
+pub use client::{QueryClient, QueryConfig, QueryError, QueryResult};
+pub use projection::{ProjectionConfig, StateProjection};
+
+use serde::{Deserialize, Serialize};
+use serde_json::Value as JsonValue;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AggregateProjection {
+    pub tenant_id: String,
+    pub aggregate_id: String,
+    pub aggregate_type: String,
+    pub version: u64,
+    pub state: JsonValue,
+    pub updated_at: chrono::DateTime<chrono::Utc>,
+}
+
+impl AggregateProjection {
+    pub fn new(
+        tenant_id: impl Into<String>,
+        aggregate_id: impl Into<String>,
+        aggregate_type: impl Into<String>,
+        version: u64,
+        state: JsonValue,
+    ) -> Self {
+        Self {
+            tenant_id: tenant_id.into(),
+            aggregate_id: aggregate_id.into(),
+            aggregate_type: aggregate_type.into(),
+            version,
+            state,
+            updated_at: chrono::Utc::now(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct QueryRequest {
+    pub tenant_id: String,
+    pub aggregate_type: Option<String>,
+    pub filter: Option<String>,
+    pub limit: Option<usize>,
+    pub offset: Option<usize>,
+}
+
+impl QueryRequest {
+    pub fn new(tenant_id: impl Into<String>) -> Self {
+        Self {
+            tenant_id: tenant_id.into(),
+            aggregate_type: None,
+            filter: None,
+            limit: None,
+            offset: None,
+        }
+    }
+
+    pub fn with_aggregate_type(mut self, aggregate_type: impl Into<String>) -> Self {
+        self.aggregate_type = Some(aggregate_type.into());
+        self
+    }
+
+    pub fn with_filter(mut self, filter: impl Into<String>) -> Self {
+        self.filter = Some(filter.into());
+        self
+    }
+
+    pub fn with_limit(mut self, limit: usize) -> Self {
+        self.limit = Some(limit);
+        self
+    }
+
+    pub fn with_offset(mut self, offset: usize) -> Self {
+        self.offset = Some(offset);
+        self
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct QueryResponse {
+    pub results: Vec<AggregateProjection>,
+    pub total: usize,
+    pub has_more: bool,
+}
+
+impl QueryResponse {
+    pub fn empty() -> Self {
+        Self {
+            results: Vec::new(),
+            total: 0,
+            has_more: false,
+        }
+    }
+
+    pub fn from_results(
+        results: Vec<AggregateProjection>,
+        total: usize,
+        limit: Option<usize>,
+    ) -> Self {
+        let has_more = limit.is_some_and(|l| results.len() == l && total > results.len());
+        Self {
+            results,
+            total,
+            has_more,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct QueryServer {
+    query: QueryClient,
+}
+
+impl QueryServer {
+    pub fn new(query: QueryClient) -> Self {
+        Self { query }
+    }
+
+    pub fn query_client(&self) -> &QueryClient {
+        &self.query
+    }
+
+    pub async fn handle(&self, request: QueryRequest) -> QueryResult<QueryResponse> {
+        self.query.query(request).await
+    }
+
+    pub async fn handle_raw(
+        &self,
+        tenant_id: impl Into<String>,
+        aggregate_type: Option<String>,
+        filter: Option<String>,
+        limit: Option<usize>,
+        offset: Option<usize>,
+    ) -> QueryResult<QueryResponse> {
+        let mut request = QueryRequest::new(tenant_id);
+        request.aggregate_type = aggregate_type;
+        request.filter = filter;
+        request.limit = limit;
+        request.offset = offset;
+
+        self.handle(request).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[tokio::test]
+    async fn query_server_filters_by_tenant() {
+        let query = QueryClient::embedded();
+        let server = QueryServer::new(query.clone());
+
+        query
+            .index(AggregateProjection::new(
+                "tenant-a",
+                "agg-1",
+                "Account",
+                1,
+                json!({ "balance": 100 }),
+            ))
+            .await
+            .unwrap();
+
+        query
+            .index(AggregateProjection::new(
+                "tenant-b",
+                "agg-2",
+                "Account",
+                1,
+                json!({ "balance": 200 }),
+            ))
+            .await
+            .unwrap();
+
+        let resp = server
+            .handle_raw(
+                "tenant-a",
+                Some("Account".to_string()),
+                Some("balance > 50".to_string()),
+                Some(100),
+                Some(0),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(resp.total, 1);
+        assert_eq!(resp.results[0].tenant_id, "tenant-a");
+        assert_eq!(resp.results[0].state["balance"], 100);
+    }
+}
--- a/aggregate/src/query/projection.rs
+++ b/aggregate/src/query/projection.rs
@@ -0,0 +1,217 @@
+use super::AggregateProjection;
+use crate::types::{AggregateId, AggregateType, Event, TenantId, Version};
+use serde_json::Value as JsonValue;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+
+#[derive(Debug, Clone)]
+pub struct ProjectionConfig {
+    pub batch_size: usize,
+    pub projection_timeout_ms: u64,
+}
+
+impl Default for ProjectionConfig {
+    fn default() -> Self {
+        Self {
+            batch_size: 100,
+            projection_timeout_ms: 5000,
+        }
+    }
+}
+
+pub struct StateProjection {
+    config: ProjectionConfig,
+    handlers: Arc<RwLock<HashMap<String, ProjectionHandler>>>,
+}
+
+type ProjectionHandler = Box<dyn Fn(&Event) -> Option<AggregateProjection> + Send + Sync>;
+
+impl StateProjection {
+    pub fn new(config: ProjectionConfig) -> Self {
+        Self {
+            config,
+            handlers: Arc::new(RwLock::new(HashMap::new())),
+        }
+    }
+
+    pub fn new_default() -> Self {
+        Self::new(ProjectionConfig::default())
+    }
+
+    pub async fn register_handler<F>(&self, aggregate_type: &str, handler: F)
+    where
+        F: Fn(&Event) -> Option<AggregateProjection> + Send + Sync + 'static,
+    {
+        let mut handlers = self.handlers.write().await;
+        handlers.insert(aggregate_type.to_string(), Box::new(handler));
+    }
+
+    pub async fn project_event(&self, event: &Event) -> Option<AggregateProjection> {
+        let handlers = self.handlers.read().await;
+        let aggregate_type = event.aggregate_type.as_str();
+
+        handlers.get(aggregate_type).and_then(|h| h(event))
+    }
+
+    pub async fn project_events(&self, events: &[Event]) -> Vec<AggregateProjection> {
+        let mut projections = Vec::with_capacity(events.len().min(self.config.batch_size));
+
+        for event in events.iter().take(self.config.batch_size) {
+            if let Some(proj) = self.project_event(event).await {
+                projections.push(proj);
+            }
+        }
+
+        projections
+    }
+
+    pub fn default_projection_from_event(event: &Event) -> AggregateProjection {
+        AggregateProjection::new(
+            event.tenant_id.as_str(),
+            event.aggregate_id.to_string(),
+            event.aggregate_type.as_str(),
+            event.version.as_u64(),
+            event.payload.clone(),
+        )
+    }
+
+    pub fn default_projection_from_state(
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+        aggregate_type: &AggregateType,
+        version: &Version,
+        state: &JsonValue,
+    ) -> AggregateProjection {
+        AggregateProjection::new(
+            tenant_id.as_str(),
+            aggregate_id.to_string(),
+            aggregate_type.as_str(),
+            version.as_u64(),
+            state.clone(),
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::Utc;
+    use serde_json::json;
+
+    fn create_test_event(tenant: &str, version: u64, event_type: &str) -> Event {
+        Event {
+            event_id: uuid::Uuid::now_v7(),
+            tenant_id: TenantId::new(tenant),
+            aggregate_id: AggregateId::new_v7(),
+            aggregate_type: AggregateType::from("Account"),
+            version: Version::from(version),
+            event_type: event_type.to_string(),
+            payload: json!({"amount": 100}),
+            timestamp: Utc::now(),
+            command_id: uuid::Uuid::nil(),
+            correlation_id: None,
+            traceparent: None,
+        }
+    }
+
+    #[tokio::test]
+    async fn state_projection_registers_handler() {
+        let projection = StateProjection::new_default();
+
+        projection
+            .register_handler("Account", |event| {
+                Some(AggregateProjection::new(
+                    event.tenant_id.as_str(),
+                    event.aggregate_id.to_string(),
+                    "Account",
+                    event.version.as_u64(),
+                    event.payload.clone(),
+                ))
+            })
+            .await;
+
+        let event = create_test_event("tenant-a", 1, "deposited");
+        let result = projection.project_event(&event).await;
+
+        assert!(result.is_some());
+        let proj = result.unwrap();
+        assert_eq!(proj.aggregate_type, "Account");
+    }
+
+    #[tokio::test]
+    async fn state_projection_project_events_batch() {
+        let projection = StateProjection::new_default();
+
+        projection
+            .register_handler("Account", |event| {
+                Some(AggregateProjection::new(
+                    event.tenant_id.as_str(),
+                    event.aggregate_id.to_string(),
+                    "Account",
+                    event.version.as_u64(),
+                    event.payload.clone(),
+                ))
+            })
+            .await;
+
+        let events = vec![
+            create_test_event("tenant-a", 1, "deposited"),
+            create_test_event("tenant-a", 1, "deposited"),
+            create_test_event("tenant-a", 1, "deposited"),
+        ];
+
+        let projections = projection.project_events(&events).await;
+
+        assert_eq!(projections.len(), 3);
+    }
+
+    #[tokio::test]
+    async fn state_projection_no_handler_returns_none() {
+        let projection = StateProjection::new_default();
+
+        let event = create_test_event("tenant-a", 1, "deposited");
+        let result = projection.project_event(&event).await;
+
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn default_projection_from_event() {
+        let event = create_test_event("tenant-a", 5, "deposited");
+        let proj = StateProjection::default_projection_from_event(&event);
+
+        assert_eq!(proj.tenant_id, "tenant-a");
+        assert_eq!(proj.version, 5);
+        assert_eq!(proj.state["amount"], 100);
+    }
+
+    #[test]
+    fn default_projection_from_state() {
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+        let version = Version::from(10);
+        let state = json!({"balance": 1000});
+
+        let proj = StateProjection::default_projection_from_state(
+            &tenant_id,
+            &aggregate_id,
+            &aggregate_type,
+            &version,
+            &state,
+        );
+
+        assert_eq!(proj.tenant_id, "tenant-a");
+        assert_eq!(proj.aggregate_type, "Account");
+        assert_eq!(proj.version, 10);
+        assert_eq!(proj.state["balance"], 1000);
+    }
+
+    #[test]
+    fn projection_config_defaults() {
+        let config = ProjectionConfig::default();
+        assert_eq!(config.batch_size, 100);
+        assert_eq!(config.projection_timeout_ms, 5000);
+    }
+}
--- a/aggregate/src/runtime/executor.rs
+++ b/aggregate/src/runtime/executor.rs
@@ -0,0 +1,270 @@
+use serde_json::Value as JsonValue;
+use std::time::Duration;
+
+pub async fn execute_decide_program(
+    state: &JsonValue,
+    command: &JsonValue,
+    program: &str,
+    gas_limit: u64,
+    timeout: Duration,
+) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
+    let _ = (state, command, program, gas_limit, timeout);
+
+    #[cfg(feature = "runtime-v8")]
+    {
+        return execute_decide_v8(state, command, program, gas_limit, timeout).await;
+    }
+
+    #[cfg(feature = "runtime-wasm")]
+    {
+        return execute_decide_wasm(state, command, program, gas_limit, timeout).await;
+    }
+
+    #[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))]
+    {
+        Err(crate::types::AggregateError::DecideError(
+            "No runtime enabled. Enable 'runtime-v8' or 'runtime-wasm' feature.".to_string(),
+        ))
+    }
+}
+
+pub async fn execute_apply_program(
+    state: &JsonValue,
+    event: &JsonValue,
+    program: &str,
+    gas_limit: u64,
+    timeout: Duration,
+) -> Result<JsonValue, crate::types::AggregateError> {
+    let _ = (state, event, program, gas_limit, timeout);
+
+    #[cfg(feature = "runtime-v8")]
+    {
+        return execute_apply_v8(state, event, program, gas_limit, timeout).await;
+    }
+
+    #[cfg(feature = "runtime-wasm")]
+    {
+        return execute_apply_wasm(state, event, program, gas_limit, timeout).await;
+    }
+
+    #[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))]
+    {
+        Err(crate::types::AggregateError::ApplyError(
+            "No runtime enabled. Enable 'runtime-v8' or 'runtime-wasm' feature.".to_string(),
+        ))
+    }
+}
+
+#[cfg(feature = "runtime-v8")]
+async fn execute_decide_v8(
+    state: &JsonValue,
+    command: &JsonValue,
+    program: &str,
+    gas_limit: u64,
+    timeout: Duration,
+) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
+    use std::sync::Arc;
+    use v8::{Array, Context, Function, HandleScope, Isolate, Object, Scope, Script};
+
+    let state_str = serde_json::to_string(state).map_err(|e| {
+        crate::types::AggregateError::DecideError(format!("State serialization: {}", e))
+    })?;
+    let command_str = serde_json::to_string(command).map_err(|e| {
+        crate::types::AggregateError::DecideError(format!("Command serialization: {}", e))
+    })?;
+
+    let result = tokio::task::spawn_blocking(move || {
+        let isolate = &mut Isolate::new(v8::CreateParams::default());
+
+        let scope = &mut HandleScope::new(isolate);
+        let context = Context::new(scope);
+        let scope = &mut ContextScope::new(scope, context);
+
+        let source =
+            v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?;
+
+        let script =
+            Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?;
+
+        script.run(scope).ok_or_else(|| "Failed to run program")?;
+
+        let global = context.global(scope);
+        let decide_name =
+            v8::String::new(scope, "decide").ok_or_else(|| "Failed to create decide string")?;
+
+        let decide_fn = global
+            .get(scope, decide_name.into())
+            .and_then(|v| v8::Local::<Function>::try_from(v).ok())
+            .ok_or_else(|| "decide function not found")?;
+
+        let state_json = v8::String::new(scope, &state_str)
+            .ok_or_else(|| "Failed to create state JSON string")?;
+        let state_obj =
+            v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?;
+
+        let command_json = v8::String::new(scope, &command_str)
+            .ok_or_else(|| "Failed to create command JSON string")?;
+        let command_obj =
+            v8::json::parse(scope, command_json).ok_or_else(|| "Failed to parse command JSON")?;
+
+        let args: [v8::Local<v8::Value>; 2] = [state_obj.into(), command_obj.into()];
+        let result = decide_fn
+            .call(scope, global.into(), &args)
+            .ok_or_else(|| "decide function call failed")?;
+
+        let result_json =
+            v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?;
+        let result_str = result_json.to_rust_string_lossy(scope);
+
+        let events: Vec<JsonValue> = serde_json::from_str(&result_str)
+            .map_err(|e| format!("Failed to parse result: {}", e))?;
+
+        Ok::<_, String>(events)
+    });
+
+    let timeout_result = tokio::time::timeout(timeout, result).await;
+
+    match timeout_result {
+        Ok(Ok(Ok(events))) => Ok(events),
+        Ok(Ok(Err(e))) => Err(crate::types::AggregateError::DecideError(e)),
+        Ok(Err(_)) => Err(crate::types::AggregateError::DecideError(
+            "Task join error".to_string(),
+        )),
+        Err(_) => Err(crate::types::AggregateError::DecideError(
+            "Execution timeout".to_string(),
+        )),
+    }
+}
+
+#[cfg(feature = "runtime-v8")]
+async fn execute_apply_v8(
+    state: &JsonValue,
+    event: &JsonValue,
+    program: &str,
+    gas_limit: u64,
+    timeout: Duration,
+) -> Result<JsonValue, crate::types::AggregateError> {
+    use v8::{Context, ContextScope, Function, HandleScope, Isolate, Script};
+
+    let state_str = serde_json::to_string(state).map_err(|e| {
+        crate::types::AggregateError::ApplyError(format!("State serialization: {}", e))
+    })?;
+    let event_str = serde_json::to_string(event).map_err(|e| {
+        crate::types::AggregateError::ApplyError(format!("Event serialization: {}", e))
+    })?;
+
+    let _ = gas_limit;
+
+    let result = tokio::task::spawn_blocking(move || {
+        let isolate = &mut Isolate::new(v8::CreateParams::default());
+
+        let scope = &mut HandleScope::new(isolate);
+        let context = Context::new(scope);
+        let scope = &mut ContextScope::new(scope, context);
+
+        let source =
+            v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?;
+
+        let script =
+            Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?;
+
+        script.run(scope).ok_or_else(|| "Failed to run program")?;
+
+        let global = context.global(scope);
+        let apply_name =
+            v8::String::new(scope, "apply").ok_or_else(|| "Failed to create apply string")?;
+
+        let apply_fn = global
+            .get(scope, apply_name.into())
+            .and_then(|v| v8::Local::<Function>::try_from(v).ok())
+            .ok_or_else(|| "apply function not found")?;
+
+        let state_json = v8::String::new(scope, &state_str)
+            .ok_or_else(|| "Failed to create state JSON string")?;
+        let state_obj =
+            v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?;
+
+        let event_json = v8::String::new(scope, &event_str)
+            .ok_or_else(|| "Failed to create event JSON string")?;
+        let event_obj =
+            v8::json::parse(scope, event_json).ok_or_else(|| "Failed to parse event JSON")?;
+
+        let args: [v8::Local<v8::Value>; 2] = [state_obj.into(), event_obj.into()];
+        let result = apply_fn
+            .call(scope, global.into(), &args)
+            .ok_or_else(|| "apply function call failed")?;
+
+        let result_json =
+            v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?;
+        let result_str = result_json.to_rust_string_lossy(scope);
+
+        let new_state: JsonValue = serde_json::from_str(&result_str)
+            .map_err(|e| format!("Failed to parse result: {}", e))?;
+
+        Ok::<_, String>(new_state)
+    });
+
+    let timeout_result = tokio::time::timeout(timeout, result).await;
+
+    match timeout_result {
+        Ok(Ok(Ok(new_state))) => Ok(new_state),
+        Ok(Ok(Err(e))) => Err(crate::types::AggregateError::ApplyError(e)),
+        Ok(Err(_)) => Err(crate::types::AggregateError::ApplyError(
+            "Task join error".to_string(),
+        )),
+        Err(_) => Err(crate::types::AggregateError::ApplyError(
+            "Execution timeout".to_string(),
+        )),
+    }
+}
+
+#[cfg(feature = "runtime-wasm")]
+async fn execute_decide_wasm(
+    state: &JsonValue,
+    command: &JsonValue,
+    _program: &str,
+    _gas_limit: u64,
+    _timeout: Duration,
+) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
+    let _ = (state, command);
+    Err(crate::types::AggregateError::DecideError(
+        "WASM runtime not yet implemented".to_string(),
+    ))
+}
+
+#[cfg(feature = "runtime-wasm")]
+async fn execute_apply_wasm(
+    state: &JsonValue,
+    event: &JsonValue,
+    _program: &str,
+    _gas_limit: u64,
+    _timeout: Duration,
+) -> Result<JsonValue, crate::types::AggregateError> {
+    let _ = (state, event);
+    Err(crate::types::AggregateError::ApplyError(
+        "WASM runtime not yet implemented".to_string(),
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[tokio::test]
+    async fn no_runtime_returns_error() {
+        #[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))]
+        {
+            let state = json!({});
+            let command = json!({});
+            let result =
+                execute_decide_program(&state, &command, "program", 1000, Duration::from_secs(1))
+                    .await;
+            assert!(result.is_err());
+            assert!(matches!(
+                result.unwrap_err(),
+                crate::types::AggregateError::DecideError(_)
+            ));
+        }
+    }
+}
--- a/aggregate/src/runtime/mod.rs
+++ b/aggregate/src/runtime/mod.rs
@@ -0,0 +1,484 @@
+mod executor;
+
+use lru::LruCache;
+use std::num::NonZeroUsize;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::RwLock;
+
+use crate::types::{AggregateError, Command, Event};
+use serde_json::Value as JsonValue;
+
+pub use executor::{execute_apply_program, execute_decide_program};
+
+const DEFAULT_GAS_LIMIT: u64 = 1_000_000;
+const DEFAULT_TIMEOUT_MS: u64 = 5_000;
+const CACHE_SIZE: usize = 100;
+
+#[derive(Debug, Clone)]
+pub struct ExecutorConfig {
+    pub gas_limit: u64,
+    pub timeout: Duration,
+    pub cache_programs: bool,
+    pub mock_runtime: bool,
+}
+
+impl Default for ExecutorConfig {
+    fn default() -> Self {
+        Self {
+            gas_limit: DEFAULT_GAS_LIMIT,
+            timeout: Duration::from_millis(DEFAULT_TIMEOUT_MS),
+            cache_programs: true,
+            mock_runtime: false,
+        }
+    }
+}
+
+impl ExecutorConfig {
+    pub fn with_gas_limit(mut self, limit: u64) -> Self {
+        self.gas_limit = limit;
+        self
+    }
+
+    pub fn with_timeout(mut self, timeout: Duration) -> Self {
+        self.timeout = timeout;
+        self
+    }
+
+    pub fn without_cache(mut self) -> Self {
+        self.cache_programs = false;
+        self
+    }
+
+    pub fn with_mock_runtime(mut self) -> Self {
+        self.mock_runtime = true;
+        self
+    }
+}
+
+#[derive(Debug, Clone, Hash, Eq, PartialEq)]
+pub struct ProgramHash(String);
+
+impl ProgramHash {
+    pub fn new(program: &str) -> Self {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+
+        let mut hasher = DefaultHasher::new();
+        program.hash(&mut hasher);
+        Self(format!("{:x}", hasher.finish()))
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct DecideResult {
+    pub events: Vec<JsonValue>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ApplyResult {
+    pub new_state: JsonValue,
+}
+
+#[derive(Debug, Clone)]
+pub struct RuntimeExecutor {
+    config: ExecutorConfig,
+    program_cache: Arc<RwLock<LruCache<ProgramHash, String>>>,
+}
+
+impl RuntimeExecutor {
+    pub fn new() -> Self {
+        Self::with_config(ExecutorConfig::default())
+    }
+
+    pub fn with_config(config: ExecutorConfig) -> Self {
+        let cache_size = NonZeroUsize::new(CACHE_SIZE).unwrap();
+        Self {
+            config,
+            program_cache: Arc::new(RwLock::new(LruCache::new(cache_size))),
+        }
+    }
+
+    pub async fn execute_decide(
+        &self,
+        state: &JsonValue,
+        command: &Command,
+        decide_program: &str,
+    ) -> Result<DecideResult, AggregateError> {
+        if self.config.mock_runtime {
+            let events = mock_decide(state, command)?;
+            return Ok(DecideResult { events });
+        }
+
+        if self.config.cache_programs {
+            let hash = ProgramHash::new(decide_program);
+            let mut cache = self.program_cache.write().await;
+            cache.put(hash.clone(), decide_program.to_string());
+        }
+
+        let command_json = serde_json::to_value(command).map_err(|e| {
+            AggregateError::DecideError(format!("Command serialization failed: {}", e))
+        })?;
+
+        let result = executor::execute_decide_program(
+            state,
+            &command_json,
+            decide_program,
+            self.config.gas_limit,
+            self.config.timeout,
+        )
+        .await?;
+
+        Ok(DecideResult { events: result })
+    }
+
+    pub async fn execute_apply(
+        &self,
+        state: &JsonValue,
+        event: &Event,
+        apply_program: &str,
+    ) -> Result<ApplyResult, AggregateError> {
+        if self.config.mock_runtime {
+            let new_state = mock_apply(state, event)?;
+            return Ok(ApplyResult { new_state });
+        }
+
+        if self.config.cache_programs {
+            let hash = ProgramHash::new(apply_program);
+            let mut cache = self.program_cache.write().await;
+            cache.put(hash.clone(), apply_program.to_string());
+        }
+
+        let event_json = serde_json::to_value(event).map_err(|e| {
+            AggregateError::ApplyError(format!("Event serialization failed: {}", e))
+        })?;
+
+        let result = executor::execute_apply_program(
+            state,
+            &event_json,
+            apply_program,
+            self.config.gas_limit,
+            self.config.timeout,
+        )
+        .await?;
+
+        Ok(ApplyResult { new_state: result })
+    }
+
+    pub async fn execute_apply_raw(
+        &self,
+        state: &JsonValue,
+        event: &JsonValue,
+        apply_program: &str,
+    ) -> Result<ApplyResult, AggregateError> {
+        if self.config.mock_runtime {
+            let _ = apply_program;
+            return Err(AggregateError::ApplyError(
+                "mock_runtime does not support execute_apply_raw".to_string(),
+            ));
+        }
+
+        if self.config.cache_programs {
+            let hash = ProgramHash::new(apply_program);
+            let mut cache = self.program_cache.write().await;
+            cache.put(hash.clone(), apply_program.to_string());
+        }
+
+        let result = executor::execute_apply_program(
+            state,
+            event,
+            apply_program,
+            self.config.gas_limit,
+            self.config.timeout,
+        )
+        .await?;
+
+        Ok(ApplyResult { new_state: result })
+    }
+
+    pub fn config(&self) -> &ExecutorConfig {
+        &self.config
+    }
+
+    pub async fn cache_size(&self) -> usize {
+        self.program_cache.read().await.len()
+    }
+
+    pub async fn clear_cache(&self) {
+        self.program_cache.write().await.clear();
+    }
+}
+
+impl Default for RuntimeExecutor {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+fn mock_decide(state: &JsonValue, command: &Command) -> Result<Vec<JsonValue>, AggregateError> {
+    let cmd_type = command
+        .payload
+        .get("type")
+        .and_then(|v| v.as_str())
+        .unwrap_or("");
+    let amount = command
+        .payload
+        .get("amount")
+        .and_then(|v| v.as_i64())
+        .unwrap_or(0);
+
+    match cmd_type {
+        "deposit" => Ok(vec![
+            serde_json::json!({ "type": "deposited", "amount": amount }),
+        ]),
+        "withdraw" => {
+            let balance = state.get("balance").and_then(|v| v.as_i64()).unwrap_or(0);
+            if balance < amount {
+                Err(AggregateError::DecideError(
+                    "Insufficient funds".to_string(),
+                ))
+            } else {
+                Ok(vec![
+                    serde_json::json!({ "type": "withdrawn", "amount": amount }),
+                ])
+            }
+        }
+        _ => Ok(Vec::new()),
+    }
+}
+
+fn mock_apply(state: &JsonValue, event: &Event) -> Result<JsonValue, AggregateError> {
+    let mut new_state = match state {
+        JsonValue::Object(map) => JsonValue::Object(map.clone()),
+        _ => serde_json::json!({}),
+    };
+
+    let balance = new_state
+        .get("balance")
+        .and_then(|v| v.as_i64())
+        .unwrap_or(0);
+
+    let amount = event
+        .payload
+        .get("amount")
+        .and_then(|v| v.as_i64())
+        .unwrap_or(0);
+
+    let next_balance = match event.event_type.as_str() {
+        "deposited" => balance + amount,
+        "withdrawn" => balance - amount,
+        _ => balance,
+    };
+
+    if let JsonValue::Object(map) = &mut new_state {
+        map.insert("balance".to_string(), JsonValue::from(next_balance));
+    }
+
+    Ok(new_state)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{AggregateId, AggregateType, TenantId, Version};
+    use serde_json::json;
+    use std::collections::HashMap;
+
+    const DECIDE_PROGRAM: &str = r#"
+        function decide(state, command) {
+            if (command.type === "deposit") {
+                return [{ type: "deposited", amount: command.amount }];
+            }
+            if (command.type === "withdraw") {
+                if (state.balance < command.amount) {
+                    throw new Error("Insufficient funds");
+                }
+                return [{ type: "withdrawn", amount: command.amount }];
+            }
+            return [];
+        }
+    "#;
+
+    const APPLY_PROGRAM: &str = r#"
+        function apply(state, event) {
+            if (event.type === "deposited") {
+                state.balance = (state.balance || 0) + event.amount;
+            }
+            if (event.type === "withdrawn") {
+                state.balance = (state.balance || 0) - event.amount;
+            }
+            return state;
+        }
+    "#;
+
+    fn make_command(cmd_type: &str, amount: i64, tenant_id: &TenantId) -> Command {
+        Command {
+            command_id: uuid::Uuid::now_v7(),
+            tenant_id: tenant_id.clone(),
+            aggregate_id: AggregateId::new_v7(),
+            aggregate_type: AggregateType::from("Account"),
+            payload: json!({ "type": cmd_type, "amount": amount }),
+            metadata: HashMap::new(),
+        }
+    }
+
+    #[tokio::test]
+    async fn executor_has_defaults() {
+        let executor = RuntimeExecutor::new();
+        assert_eq!(executor.config().gas_limit, DEFAULT_GAS_LIMIT);
+        assert!(executor.config().cache_programs);
+    }
+
+    #[tokio::test]
+    async fn config_builder_works() {
+        let config = ExecutorConfig::default()
+            .with_gas_limit(500_000)
+            .with_timeout(Duration::from_millis(1000))
+            .without_cache();
+
+        assert_eq!(config.gas_limit, 500_000);
+        assert_eq!(config.timeout, Duration::from_millis(1000));
+        assert!(!config.cache_programs);
+    }
+
+    #[tokio::test]
+    async fn program_hash_is_consistent() {
+        let h1 = ProgramHash::new("test program");
+        let h2 = ProgramHash::new("test program");
+        assert_eq!(h1, h2);
+
+        let h3 = ProgramHash::new("different program");
+        assert_ne!(h1, h3);
+    }
+
+    #[tokio::test]
+    async fn decide_returns_events_for_deposit() {
+        let executor = RuntimeExecutor::new();
+        let tenant_id = TenantId::new("test-tenant");
+        let state = json!({ "balance": 100 });
+        let command = make_command("deposit", 50, &tenant_id);
+
+        let result = executor
+            .execute_decide(&state, &command, DECIDE_PROGRAM)
+            .await;
+
+        match result {
+            Ok(decide_result) => {
+                assert!(!decide_result.events.is_empty());
+            }
+            Err(AggregateError::DecideError(msg)) => {
+                assert!(
+                    msg.contains("runtime")
+                        || msg.contains("not available")
+                        || msg.contains("not implemented")
+                );
+            }
+            Err(e) => panic!("Unexpected error: {:?}", e),
+        }
+    }
+
+    #[tokio::test]
+    async fn decide_rejects_invalid_withdraw() {
+        let executor = RuntimeExecutor::new();
+        let tenant_id = TenantId::new("test-tenant");
+        let state = json!({ "balance": 10 });
+        let command = make_command("withdraw", 100, &tenant_id);
+
+        let result = executor
+            .execute_decide(&state, &command, DECIDE_PROGRAM)
+            .await;
+
+        assert!(matches!(result, Err(AggregateError::DecideError(_))));
+    }
+
+    #[tokio::test]
+    async fn decide_is_deterministic() {
+        let executor = RuntimeExecutor::new();
+        let tenant_id = TenantId::new("test-tenant");
+        let state = json!({ "balance": 100 });
+        let command = make_command("deposit", 50, &tenant_id);
+
+        let r1 = executor
+            .execute_decide(&state, &command, DECIDE_PROGRAM)
+            .await;
+        let r2 = executor
+            .execute_decide(&state, &command, DECIDE_PROGRAM)
+            .await;
+
+        assert_eq!(r1.is_ok(), r2.is_ok());
+    }
+
+    #[tokio::test]
+    async fn apply_transitions_state() {
+        let executor = RuntimeExecutor::new();
+        let tenant_id = TenantId::new("test-tenant");
+        let state = json!({ "balance": 100 });
+        let event = Event {
+            event_id: uuid::Uuid::now_v7(),
+            tenant_id,
+            aggregate_id: AggregateId::new_v7(),
+            aggregate_type: AggregateType::from("Account"),
+            event_type: "deposited".to_string(),
+            version: Version::from(1),
+            payload: json!({ "amount": 50 }),
+            command_id: uuid::Uuid::now_v7(),
+            timestamp: chrono::Utc::now(),
+            correlation_id: None,
+            traceparent: None,
+        };
+
+        let result = executor.execute_apply(&state, &event, APPLY_PROGRAM).await;
+
+        match result {
+            Ok(apply_result) => {
+                assert!(apply_result.new_state.is_object());
+            }
+            Err(AggregateError::ApplyError(msg)) => {
+                assert!(
+                    msg.contains("runtime")
+                        || msg.contains("not available")
+                        || msg.contains("not implemented")
+                );
+            }
+            Err(e) => panic!("Unexpected error: {:?}", e),
+        }
+    }
+
+    #[tokio::test]
+    async fn cache_stores_programs() {
+        let executor = RuntimeExecutor::new();
+        let tenant_id = TenantId::new("test-tenant");
+        let state = json!({ "balance": 100 });
+        let command = make_command("deposit", 50, &tenant_id);
+
+        assert_eq!(executor.cache_size().await, 0);
+
+        let _ = executor
+            .execute_decide(&state, &command, DECIDE_PROGRAM)
+            .await;
+
+        assert_eq!(executor.cache_size().await, 1);
+    }
+
+    #[tokio::test]
+    async fn clear_cache_works() {
+        let executor = RuntimeExecutor::new();
+        let tenant_id = TenantId::new("test-tenant");
+        let state = json!({ "balance": 100 });
+        let command = make_command("deposit", 50, &tenant_id);
+
+        let _ = executor
+            .execute_decide(&state, &command, DECIDE_PROGRAM)
+            .await;
+        assert!(executor.cache_size().await > 0);
+
+        executor.clear_cache().await;
+        assert_eq!(executor.cache_size().await, 0);
+    }
+
+    #[test]
+    fn executor_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<RuntimeExecutor>();
+    }
+}
--- a/aggregate/src/server/health.rs
+++ b/aggregate/src/server/health.rs
@@ -0,0 +1,259 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::RwLock;
+use std::time::Instant;
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum HealthStatus {
+    Healthy,
+    Degraded { issues: Vec<String> },
+    Unhealthy { reasons: Vec<String> },
+}
+
+impl HealthStatus {
+    pub fn is_healthy(&self) -> bool {
+        matches!(self, Self::Healthy)
+    }
+
+    pub fn is_degraded(&self) -> bool {
+        matches!(self, Self::Degraded { .. })
+    }
+
+    pub fn is_unhealthy(&self) -> bool {
+        matches!(self, Self::Unhealthy { .. })
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ComponentHealth {
+    pub name: String,
+    pub status: HealthStatus,
+    pub last_check: Instant,
+    pub details: HashMap<String, String>,
+}
+
+impl ComponentHealth {
+    pub fn healthy(name: impl Into<String>) -> Self {
+        Self {
+            name: name.into(),
+            status: HealthStatus::Healthy,
+            last_check: Instant::now(),
+            details: HashMap::new(),
+        }
+    }
+
+    pub fn degraded(name: impl Into<String>, issues: Vec<String>) -> Self {
+        Self {
+            name: name.into(),
+            status: HealthStatus::Degraded { issues },
+            last_check: Instant::now(),
+            details: HashMap::new(),
+        }
+    }
+
+    pub fn unhealthy(name: impl Into<String>, reasons: Vec<String>) -> Self {
+        Self {
+            name: name.into(),
+            status: HealthStatus::Unhealthy { reasons },
+            last_check: Instant::now(),
+            details: HashMap::new(),
+        }
+    }
+
+    pub fn with_detail(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.details.insert(key.into(), value.into());
+        self
+    }
+}
+
+pub struct HealthChecker {
+    storage_healthy: AtomicBool,
+    stream_healthy: AtomicBool,
+    components: RwLock<HashMap<String, ComponentHealth>>,
+}
+
+impl HealthChecker {
+    pub fn new() -> Self {
+        Self {
+            storage_healthy: AtomicBool::new(true),
+            stream_healthy: AtomicBool::new(true),
+            components: RwLock::new(HashMap::new()),
+        }
+    }
+
+    pub fn storage_healthy(&self) -> bool {
+        self.storage_healthy.load(Ordering::Relaxed)
+    }
+
+    pub fn stream_healthy(&self) -> bool {
+        self.stream_healthy.load(Ordering::Relaxed)
+    }
+
+    pub fn set_storage_healthy(&self, healthy: bool) {
+        self.storage_healthy.store(healthy, Ordering::Relaxed);
+        self.update_component(
+            "storage",
+            healthy,
+            if healthy { "connected" } else { "disconnected" },
+        );
+    }
+
+    pub fn set_stream_healthy(&self, healthy: bool) {
+        self.stream_healthy.store(healthy, Ordering::Relaxed);
+        self.update_component(
+            "stream",
+            healthy,
+            if healthy { "connected" } else { "disconnected" },
+        );
+    }
+
+    fn update_component(&self, name: &str, healthy: bool, status: &str) {
+        let mut components = self.components.write().unwrap();
+        let health = if healthy {
+            ComponentHealth::healthy(name).with_detail("status", status)
+        } else {
+            ComponentHealth::unhealthy(name, vec![format!("status: {}", status)])
+        };
+        components.insert(name.to_string(), health);
+    }
+
+    pub fn check(&self) -> HealthStatus {
+        let storage = self.storage_healthy.load(Ordering::Relaxed);
+        let stream = self.stream_healthy.load(Ordering::Relaxed);
+
+        match (storage, stream) {
+            (true, true) => HealthStatus::Healthy,
+            (true, false) | (false, true) => {
+                let mut issues = Vec::new();
+                if !storage {
+                    issues.push("storage disconnected".to_string());
+                }
+                if !stream {
+                    issues.push("stream disconnected".to_string());
+                }
+                HealthStatus::Degraded { issues }
+            }
+            (false, false) => HealthStatus::Unhealthy {
+                reasons: vec![
+                    "storage disconnected".to_string(),
+                    "stream disconnected".to_string(),
+                ],
+            },
+        }
+    }
+
+    pub fn is_ready(&self) -> bool {
+        let status = self.check();
+        status.is_healthy() || status.is_degraded()
+    }
+
+    pub fn is_live(&self) -> bool {
+        true
+    }
+
+    pub fn components(&self) -> HashMap<String, ComponentHealth> {
+        self.components.read().unwrap().clone()
+    }
+}
+
+impl Default for HealthChecker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn health_status_checks() {
+        let healthy = HealthStatus::Healthy;
+        assert!(healthy.is_healthy());
+        assert!(!healthy.is_degraded());
+        assert!(!healthy.is_unhealthy());
+
+        let degraded = HealthStatus::Degraded {
+            issues: vec!["test".to_string()],
+        };
+        assert!(!degraded.is_healthy());
+        assert!(degraded.is_degraded());
+        assert!(!degraded.is_unhealthy());
+
+        let unhealthy = HealthStatus::Unhealthy {
+            reasons: vec!["test".to_string()],
+        };
+        assert!(!unhealthy.is_healthy());
+        assert!(!unhealthy.is_degraded());
+        assert!(unhealthy.is_unhealthy());
+    }
+
+    #[test]
+    fn component_health_builders() {
+        let healthy = ComponentHealth::healthy("storage");
+        assert_eq!(healthy.name, "storage");
+        assert!(healthy.status.is_healthy());
+
+        let degraded = ComponentHealth::degraded("stream", vec!["slow".to_string()]);
+        assert!(degraded.status.is_degraded());
+
+        let unhealthy = ComponentHealth::unhealthy("db", vec!["down".to_string()]);
+        assert!(unhealthy.status.is_unhealthy());
+    }
+
+    #[test]
+    fn health_checker_starts_healthy() {
+        let checker = HealthChecker::new();
+        assert!(checker.check().is_healthy());
+    }
+
+    #[test]
+    fn health_checker_storage_failure() {
+        let checker = HealthChecker::new();
+        checker.set_storage_healthy(false);
+
+        let status = checker.check();
+        assert!(status.is_degraded());
+    }
+
+    #[test]
+    fn health_checker_all_failures() {
+        let checker = HealthChecker::new();
+        checker.set_storage_healthy(false);
+        checker.set_stream_healthy(false);
+
+        let status = checker.check();
+        assert!(status.is_unhealthy());
+    }
+
+    #[test]
+    fn health_checker_is_ready() {
+        let checker = HealthChecker::new();
+        assert!(checker.is_ready());
+
+        checker.set_storage_healthy(false);
+        assert!(checker.is_ready());
+    }
+
+    #[test]
+    fn health_checker_is_live() {
+        let checker = HealthChecker::new();
+        assert!(checker.is_live());
+
+        checker.set_storage_healthy(false);
+        checker.set_stream_healthy(false);
+        assert!(checker.is_live());
+    }
+
+    #[test]
+    fn health_checker_tracks_components() {
+        let checker = HealthChecker::new();
+        checker.set_storage_healthy(true);
+        checker.set_stream_healthy(true);
+
+        let components = checker.components();
+        assert!(components.contains_key("storage"));
+        assert!(components.contains_key("stream"));
+    }
+}
--- a/aggregate/src/server/mod.rs
+++ b/aggregate/src/server/mod.rs
@@ -0,0 +1,787 @@
+mod health;
+
+pub use health::{HealthChecker, HealthStatus};
+
+use crate::aggregate::AggregateHandler;
+use crate::observability::Observability;
+use crate::placement::{TenantPlacementManager, TenantStatus};
+use crate::types::{AggregateError, AggregateId, AggregateType, Command, Event, TenantId};
+use serde::de::DeserializeOwned;
+use serde::{Deserialize, Serialize};
+use std::collections::hash_map::DefaultHasher;
+use std::collections::BTreeMap;
+use std::collections::HashMap;
+use std::hash::{Hash, Hasher};
+use std::str::FromStr;
+use std::sync::Arc;
+
+#[derive(Debug, Clone)]
+pub struct CommandRequest {
+    pub tenant_id: TenantId,
+    pub aggregate_id: AggregateId,
+    pub aggregate_type: AggregateType,
+    pub payload: serde_json::Value,
+    pub headers: HashMap<String, String>,
+}
+
+impl CommandRequest {
+    pub fn new(
+        tenant_id: TenantId,
+        aggregate_id: AggregateId,
+        aggregate_type: AggregateType,
+        payload: serde_json::Value,
+    ) -> Self {
+        Self {
+            tenant_id,
+            aggregate_id,
+            aggregate_type,
+            payload,
+            headers: HashMap::new(),
+        }
+    }
+
+    pub fn with_header(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
+        self.headers.insert(key.into(), value.into());
+        self
+    }
+
+    pub fn into_command(self) -> Command {
+        let mut cmd = Command::new(
+            self.tenant_id,
+            self.aggregate_id,
+            self.aggregate_type,
+            self.payload,
+        );
+        if let Some(correlation_id) = self
+            .headers
+            .get("x-correlation-id")
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+        {
+            cmd.metadata.insert(
+                "correlation_id".to_string(),
+                serde_json::Value::String(correlation_id.to_string()),
+            );
+        }
+        if let Some(traceparent) = self
+            .headers
+            .get("traceparent")
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+        {
+            cmd.metadata.insert(
+                "traceparent".to_string(),
+                serde_json::Value::String(traceparent.to_string()),
+            );
+        }
+        cmd
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct CommandResponse {
+    pub tenant_id: TenantId,
+    pub aggregate_id: AggregateId,
+    pub events: Vec<Event>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ServerConfig {
+    pub service_name: String,
+    pub validate_tenant_id: bool,
+}
+
+impl Default for ServerConfig {
+    fn default() -> Self {
+        Self {
+            service_name: "aggregate".to_string(),
+            validate_tenant_id: true,
+        }
+    }
+}
+
+pub struct CommandServer {
+    handler: AggregateHandler,
+    observability: Arc<Observability>,
+    health_checker: HealthChecker,
+    config: ServerConfig,
+}
+
+impl CommandServer {
+    pub fn new(handler: AggregateHandler, observability: Observability) -> Self {
+        Self {
+            handler,
+            observability: Arc::new(observability),
+            health_checker: HealthChecker::new(),
+            config: ServerConfig::default(),
+        }
+    }
+
+    pub fn with_config(mut self, config: ServerConfig) -> Self {
+        self.config = config;
+        self
+    }
+
+    pub fn extract_tenant_id(&self, headers: &HashMap<String, String>) -> TenantId {
+        headers
+            .get("x-tenant-id")
+            .map(TenantId::new)
+            .unwrap_or_default()
+    }
+
+    pub fn validate_tenant_id(&self, tenant_id: &TenantId) -> Result<(), ServerError> {
+        if !self.config.validate_tenant_id {
+            return Ok(());
+        }
+
+        let id = tenant_id.as_str();
+        if id.is_empty() {
+            return Ok(());
+        }
+
+        if !id
+            .chars()
+            .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
+        {
+            return Err(ServerError::InvalidTenantId {
+                tenant_id: tenant_id.clone(),
+                reason:
+                    "tenant_id must contain only alphanumeric characters, hyphens, and underscores"
+                        .to_string(),
+            });
+        }
+
+        Ok(())
+    }
+
+    pub async fn handle(&self, request: CommandRequest) -> Result<CommandResponse, ServerError> {
+        let tenant_id = request.tenant_id.clone();
+        let aggregate_id = request.aggregate_id.clone();
+        let aggregate_type = request.aggregate_type.clone();
+
+        self.validate_tenant_id(&tenant_id)?;
+
+        let correlation_id = request
+            .headers
+            .get("x-correlation-id")
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .map(|s| s.to_string());
+        let trace_id = request
+            .headers
+            .get("traceparent")
+            .map(|s| s.trim())
+            .filter(|s| !s.is_empty())
+            .and_then(trace_id_from_traceparent);
+
+        let span = self.observability.start_command_span(
+            &aggregate_id.to_string(),
+            aggregate_type.as_str(),
+            tenant_id.as_str(),
+            "cmd",
+            correlation_id.as_deref(),
+            trace_id.as_deref(),
+        );
+
+        let command = request.into_command();
+
+        match self.handler.handle_command(command).await {
+            Ok(events) => {
+                self.observability
+                    .record_command_success(&span, events.len());
+                Ok(CommandResponse {
+                    tenant_id,
+                    aggregate_id,
+                    events,
+                })
+            }
+            Err(e) => {
+                self.observability.record_command_error(&span, &e);
+                Err(e.into())
+            }
+        }
+    }
+
+    pub async fn handle_raw(
+        &self,
+        tenant_id: Option<&str>,
+        aggregate_id: &str,
+        aggregate_type: &str,
+        payload: serde_json::Value,
+        headers: HashMap<String, String>,
+    ) -> Result<CommandResponse, ServerError> {
+        let resolved_tenant_id = tenant_id
+            .map(TenantId::new)
+            .unwrap_or_else(|| self.extract_tenant_id(&headers));
+
+        let request = CommandRequest::new(
+            resolved_tenant_id,
+            AggregateId::from_str(aggregate_id).map_err(|e| ServerError::InvalidAggregateId {
+                id: aggregate_id.to_string(),
+                reason: e.to_string(),
+            })?,
+            AggregateType::from(aggregate_type),
+            payload,
+        )
+        .with_headers(headers);
+
+        self.handle(request).await
+    }
+
+    pub async fn health_check(&self) -> HealthStatus {
+        self.health_checker.check()
+    }
+
+    pub async fn ready_check(&self) -> bool {
+        self.health_checker.is_ready()
+    }
+
+    pub fn metrics(&self) -> String {
+        self.observability.export_metrics()
+    }
+
+    pub fn health_checker(&self) -> &HealthChecker {
+        &self.health_checker
+    }
+
+    pub fn observability(&self) -> &Arc<Observability> {
+        &self.observability
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HealthReport {
+    pub status: HealthStatus,
+    pub nats_connected: bool,
+    pub storage_connected: bool,
+    pub active_aggregates: usize,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TenantInfo {
+    pub tenant_id: TenantId,
+    pub aggregate_count: usize,
+    pub last_activity: chrono::DateTime<chrono::Utc>,
+}
+
+#[derive(Debug, Clone)]
+pub struct AdminResponse {
+    status: u16,
+    body: String,
+}
+
+impl AdminResponse {
+    pub fn status(&self) -> AdminStatus {
+        AdminStatus { code: self.status }
+    }
+
+    pub async fn text(&self) -> String {
+        self.body.clone()
+    }
+
+    pub async fn json<T: DeserializeOwned>(&self) -> T {
+        serde_json::from_str(&self.body).unwrap()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct AdminStatus {
+    code: u16,
+}
+
+impl AdminStatus {
+    pub fn is_success(&self) -> bool {
+        (200..300).contains(&self.code)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct HashRing {
+    replicas: usize,
+    ring: BTreeMap<u64, String>,
+}
+
+impl HashRing {
+    pub fn new(replicas: usize) -> Self {
+        Self {
+            replicas: replicas.max(1),
+            ring: BTreeMap::new(),
+        }
+    }
+
+    pub fn add_node(&mut self, node: impl Into<String>) {
+        let node = node.into();
+        for i in 0..self.replicas {
+            let key = Self::hash(&(node.as_str(), i));
+            self.ring.insert(key, node.clone());
+        }
+    }
+
+    pub fn remove_node(&mut self, node: &str) {
+        let keys: Vec<u64> = self
+            .ring
+            .iter()
+            .filter_map(|(k, v)| if v == node { Some(*k) } else { None })
+            .collect();
+        for k in keys {
+            self.ring.remove(&k);
+        }
+    }
+
+    pub fn route(&self, tenant_id: &str) -> Option<&str> {
+        if self.ring.is_empty() {
+            return None;
+        }
+
+        let h = Self::hash(&tenant_id);
+        let (_, node) = self
+            .ring
+            .range(h..)
+            .next()
+            .or_else(|| self.ring.iter().next())?;
+        Some(node.as_str())
+    }
+
+    fn hash<T: Hash>(value: &T) -> u64 {
+        let mut hasher = DefaultHasher::new();
+        value.hash(&mut hasher);
+        hasher.finish()
+    }
+}
+
+pub struct AdminServer {
+    observability: Arc<Observability>,
+    health_checker: Arc<HealthChecker>,
+    shard_id: String,
+    placement: Arc<TenantPlacementManager>,
+}
+
+impl AdminServer {
+    pub fn new(
+        observability: Observability,
+        health_checker: HealthChecker,
+        shard_id: String,
+    ) -> Self {
+        let observability = Arc::new(observability);
+        let placement = Arc::new(TenantPlacementManager::new(observability.clone()));
+        Self {
+            observability,
+            health_checker: Arc::new(health_checker),
+            shard_id,
+            placement,
+        }
+    }
+
+    #[cfg(test)]
+    pub async fn new_test() -> Self {
+        let health = HealthChecker::new();
+        health.set_storage_healthy(true);
+        health.set_stream_healthy(true);
+
+        let server = Self::new(Observability::default(), health, "test-shard".to_string());
+        let span = server.observability.start_command_span(
+            "agg-1",
+            "Account",
+            "test-tenant",
+            "cmd-1",
+            None,
+            None,
+        );
+        server.observability.record_command_success(&span, 1);
+        server
+            .placement
+            .set_hosted_tenants(vec!["test-tenant".to_string()])
+            .await;
+        server
+    }
+
+    pub fn placement_manager(&self) -> Arc<TenantPlacementManager> {
+        self.placement.clone()
+    }
+
+    pub fn observability(&self) -> Arc<Observability> {
+        self.observability.clone()
+    }
+
+    pub fn health_checker(&self) -> &HealthChecker {
+        &self.health_checker
+    }
+
+    pub async fn get(&self, path: &str) -> AdminResponse {
+        match path {
+            "/health" => {
+                let report = self.health_report().await;
+                AdminResponse {
+                    status: 200,
+                    body: serde_json::to_string(&report).unwrap(),
+                }
+            }
+            "/ready" => AdminResponse {
+                status: 200,
+                body: serde_json::to_string(&self.health_checker.is_ready()).unwrap(),
+            },
+            "/metrics" => AdminResponse {
+                status: 200,
+                body: self.observability.export_metrics(),
+            },
+            "/admin/tenants" => {
+                let list: Vec<TenantStatus> = self.placement.all_statuses().await;
+                AdminResponse {
+                    status: 200,
+                    body: serde_json::to_string(&list).unwrap(),
+                }
+            }
+            _ => AdminResponse {
+                status: 404,
+                body: "not found".to_string(),
+            },
+        }
+    }
+
+    pub async fn post(&self, path: &str, body: serde_json::Value) -> AdminResponse {
+        match path {
+            "/admin/drain" => {
+                if let Some(tenant_id) = body.get("tenant_id").and_then(|v| v.as_str()) {
+                    let tenant_id = TenantId::new(tenant_id);
+                    self.placement.drain_tenant(&tenant_id).await;
+                    self.placement.wait_drained(&tenant_id).await;
+                }
+                AdminResponse {
+                    status: 200,
+                    body: "{}".to_string(),
+                }
+            }
+            "/admin/reload" => {
+                if let Some(arr) = body.get("hosted_tenants").and_then(|v| v.as_array()) {
+                    let tenants = arr
+                        .iter()
+                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                        .collect::<Vec<_>>();
+                    self.placement.set_hosted_tenants(tenants).await;
+                }
+
+                if let Some(map) = body.get("placement").and_then(|v| v.as_object()) {
+                    let placement = map
+                        .iter()
+                        .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
+                        .collect::<HashMap<_, _>>();
+                    self.placement
+                        .apply_placement_map(&self.shard_id, &placement)
+                        .await;
+                }
+
+                AdminResponse {
+                    status: 200,
+                    body: "{}".to_string(),
+                }
+            }
+            _ => AdminResponse {
+                status: 404,
+                body: "not found".to_string(),
+            },
+        }
+    }
+
+    pub async fn get_hosted_tenants(&self) -> Vec<TenantId> {
+        self.placement.hosted_tenants().await
+    }
+
+    async fn health_report(&self) -> HealthReport {
+        let active_aggregates = self.placement.hosted_tenants().await.len();
+        HealthReport {
+            status: self.health_checker.check(),
+            nats_connected: self.health_checker.stream_healthy(),
+            storage_connected: self.health_checker.storage_healthy(),
+            active_aggregates,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub enum ServerError {
+    InvalidTenantId { tenant_id: TenantId, reason: String },
+    InvalidAggregateId { id: String, reason: String },
+    AggregateError(AggregateError),
+}
+
+impl std::fmt::Display for ServerError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::InvalidTenantId { tenant_id, reason } => {
+                write!(f, "invalid tenant_id '{}': {}", tenant_id.as_str(), reason)
+            }
+            Self::InvalidAggregateId { id, reason } => {
+                write!(f, "invalid aggregate_id '{}': {}", id, reason)
+            }
+            Self::AggregateError(e) => write!(f, "{}", e),
+        }
+    }
+}
+
+impl std::error::Error for ServerError {}
+
+impl From<AggregateError> for ServerError {
+    fn from(e: AggregateError) -> Self {
+        Self::AggregateError(e)
+    }
+}
+
+impl CommandRequest {
+    pub fn with_headers(mut self, headers: HashMap<String, String>) -> Self {
+        self.headers = headers;
+        self
+    }
+}
+
+fn trace_id_from_traceparent(traceparent: &str) -> Option<String> {
+    shared::trace_id_from_traceparent(traceparent).map(|s| s.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn command_request_builder() {
+        let req = CommandRequest::new(
+            TenantId::new("tenant-a"),
+            AggregateId::new_v7(),
+            AggregateType::from("Account"),
+            serde_json::json!({"type": "deposit", "amount": 100}),
+        )
+        .with_header("x-request-id", "req-123");
+
+        assert_eq!(req.tenant_id.as_str(), "tenant-a");
+        assert_eq!(
+            req.headers.get("x-request-id"),
+            Some(&"req-123".to_string())
+        );
+    }
+
+    #[test]
+    fn extract_tenant_id_from_header() {
+        let _config = ServerConfig::default();
+        let mut headers = HashMap::new();
+        headers.insert("x-tenant-id".to_string(), "acme-corp".to_string());
+
+        let tenant_id = extract_tenant_id_static(&headers);
+        assert_eq!(tenant_id.as_str(), "acme-corp");
+    }
+
+    #[test]
+    fn extract_tenant_id_defaults_empty() {
+        let headers = HashMap::new();
+
+        let tenant_id = extract_tenant_id_static(&headers);
+        assert!(tenant_id.as_str().is_empty());
+    }
+
+    #[test]
+    fn validate_tenant_id_accepts_valid() {
+        assert!(validate_tenant_id_static(&TenantId::new("acme-corp")).is_ok());
+        assert!(validate_tenant_id_static(&TenantId::new("tenant_123")).is_ok());
+        assert!(validate_tenant_id_static(&TenantId::new("my-tenant")).is_ok());
+    }
+
+    #[test]
+    fn validate_tenant_id_rejects_invalid() {
+        assert!(validate_tenant_id_static(&TenantId::new("tenant@corp")).is_err());
+        assert!(validate_tenant_id_static(&TenantId::new("tenant name")).is_err());
+    }
+
+    #[test]
+    fn server_config_defaults() {
+        let config = ServerConfig::default();
+        assert_eq!(config.service_name, "aggregate");
+        assert!(config.validate_tenant_id);
+    }
+
+    #[test]
+    fn server_error_display() {
+        let err = ServerError::InvalidTenantId {
+            tenant_id: TenantId::new("bad@id"),
+            reason: "invalid characters".to_string(),
+        };
+        assert!(err.to_string().contains("bad@id"));
+    }
+
+    fn extract_tenant_id_static(headers: &HashMap<String, String>) -> TenantId {
+        headers
+            .get("x-tenant-id")
+            .map(TenantId::new)
+            .unwrap_or_default()
+    }
+
+    fn validate_tenant_id_static(tenant_id: &TenantId) -> Result<(), ServerError> {
+        let id = tenant_id.as_str();
+        if id.is_empty() {
+            return Ok(());
+        }
+
+        if !id
+            .chars()
+            .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
+        {
+            return Err(ServerError::InvalidTenantId {
+                tenant_id: tenant_id.clone(),
+                reason:
+                    "tenant_id must contain only alphanumeric characters, hyphens, and underscores"
+                        .to_string(),
+            });
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn admin_health_endpoint_returns_status() {
+        let server = AdminServer::new_test().await;
+        let resp = server.get("/health").await;
+        assert!(resp.status().is_success());
+
+        let health: HealthReport = resp.json().await;
+        assert!(health.nats_connected);
+        assert!(health.storage_connected);
+    }
+
+    #[tokio::test]
+    async fn admin_ready_endpoint_returns_success() {
+        let server = AdminServer::new_test().await;
+        let resp = server.get("/ready").await;
+        assert!(resp.status().is_success());
+    }
+
+    #[tokio::test]
+    async fn admin_metrics_endpoint_prometheus_format() {
+        let server = AdminServer::new_test().await;
+        let resp = server.get("/metrics").await;
+        let body = resp.text().await;
+        assert!(body.contains("aggregate_commands_total"));
+        assert!(body.contains("tenant_id"));
+    }
+
+    #[tokio::test]
+    async fn admin_tenants_list_returns_hosted_tenants() {
+        let server = AdminServer::new_test().await;
+        let resp = server.get("/admin/tenants").await;
+        let tenants: Vec<TenantStatus> = resp.json().await;
+        assert!(tenants
+            .iter()
+            .any(|t| t.tenant_id == TenantId::new("test-tenant")));
+    }
+
+    #[tokio::test]
+    async fn admin_drain_waits_for_in_flight_commands() {
+        use std::time::{Duration, Instant};
+
+        let server = AdminServer::new_test().await;
+        let tenant_id = TenantId::new("test-tenant");
+        let guard = server
+            .placement_manager()
+            .begin_command(&tenant_id)
+            .await
+            .unwrap();
+
+        tokio::spawn(async move {
+            tokio::time::sleep(Duration::from_millis(50)).await;
+            drop(guard);
+        });
+
+        let start = Instant::now();
+        let resp = server
+            .post(
+                "/admin/drain",
+                serde_json::json!({"tenant_id": "test-tenant"}),
+            )
+            .await;
+
+        assert!(start.elapsed() < Duration::from_secs(5));
+        assert!(resp.status().is_success());
+
+        server.placement_manager().wait_drained(&tenant_id).await;
+        let status = server.placement_manager().tenant_status(&tenant_id).await;
+        assert!(status.draining);
+        assert!(!status.accepting);
+        assert_eq!(status.in_flight, 0);
+    }
+
+    #[tokio::test]
+    async fn admin_config_reload_updates_routing() {
+        let server = AdminServer::new_test().await;
+        let resp = server
+            .post(
+                "/admin/reload",
+                serde_json::json!({"hosted_tenants": ["new-tenant"]}),
+            )
+            .await;
+        assert!(resp.status().is_success());
+
+        let tenants = server.get_hosted_tenants().await;
+        assert!(tenants.contains(&TenantId::new("new-tenant")));
+    }
+
+    #[test]
+    fn admin_server_is_send() {
+        fn assert_send<T: Send>() {}
+        assert_send::<AdminServer>();
+    }
+
+    #[test]
+    fn hash_ring_routes_deterministically() {
+        let mut ring = HashRing::new(100);
+        ring.add_node("node-a");
+        ring.add_node("node-b");
+        ring.add_node("node-c");
+
+        let r1 = ring.route("tenant-a").unwrap().to_string();
+        let r2 = ring.route("tenant-a").unwrap().to_string();
+        assert_eq!(r1, r2);
+    }
+
+    #[test]
+    fn hash_ring_distributes_tenants_evenly() {
+        let mut ring = HashRing::new(200);
+        ring.add_node("node-a");
+        ring.add_node("node-b");
+        ring.add_node("node-c");
+
+        let mut counts: HashMap<String, usize> = HashMap::new();
+        for i in 0..3000 {
+            let tenant = format!("tenant-{}", i);
+            let node = ring.route(&tenant).unwrap().to_string();
+            *counts.entry(node).or_insert(0) += 1;
+        }
+
+        let avg = 3000.0 / 3.0;
+        for c in counts.values() {
+            let diff = (*c as f64 - avg).abs() / avg;
+            assert!(diff < 0.25);
+        }
+    }
+
+    #[test]
+    fn hash_ring_rebalances_on_node_add() {
+        let mut ring = HashRing::new(200);
+        ring.add_node("node-a");
+        ring.add_node("node-b");
+
+        let mut before: HashMap<String, String> = HashMap::new();
+        for i in 0..2000 {
+            let tenant = format!("tenant-{}", i);
+            before.insert(tenant.clone(), ring.route(&tenant).unwrap().to_string());
+        }
+
+        ring.add_node("node-c");
+
+        let mut moved = 0usize;
+        for (tenant, old) in before {
+            let new = ring.route(&tenant).unwrap();
+            if new != old {
+                moved += 1;
+            }
+        }
+
+        assert!(moved > 0);
+        assert!(moved < 2000);
+    }
+}
--- a/aggregate/src/storage/circuit_breaker.rs
+++ b/aggregate/src/storage/circuit_breaker.rs
@@ -0,0 +1,216 @@
+use std::time::{Duration, Instant};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CircuitState {
+    Closed,
+    Open,
+    HalfOpen,
+}
+
+#[derive(Debug, Clone)]
+pub struct CircuitBreaker {
+    state: CircuitState,
+    failure_count: u32,
+    failure_threshold: u32,
+    reset_timeout: Duration,
+    last_failure_time: Option<Instant>,
+    half_open_successes: u32,
+    half_open_threshold: u32,
+}
+
+impl CircuitBreaker {
+    pub fn new() -> Self {
+        Self {
+            state: CircuitState::Closed,
+            failure_count: 0,
+            failure_threshold: 5,
+            reset_timeout: Duration::from_secs(30),
+            last_failure_time: None,
+            half_open_successes: 0,
+            half_open_threshold: 3,
+        }
+    }
+
+    pub fn with_failure_threshold(mut self, threshold: u32) -> Self {
+        self.failure_threshold = threshold;
+        self
+    }
+
+    pub fn with_reset_timeout(mut self, timeout: Duration) -> Self {
+        self.reset_timeout = timeout;
+        self
+    }
+
+    pub fn with_half_open_threshold(mut self, threshold: u32) -> Self {
+        self.half_open_threshold = threshold;
+        self
+    }
+
+    pub fn state(&self) -> CircuitState {
+        if self.state == CircuitState::Open {
+            if let Some(last_failure) = self.last_failure_time {
+                if last_failure.elapsed() >= self.reset_timeout {
+                    return CircuitState::HalfOpen;
+                }
+            }
+        }
+        self.state
+    }
+
+    pub fn is_open(&self) -> bool {
+        matches!(self.state(), CircuitState::Open)
+    }
+
+    pub fn is_closed(&self) -> bool {
+        matches!(self.state(), CircuitState::Closed)
+    }
+
+    pub fn record_success(&mut self) {
+        match self.state() {
+            CircuitState::Closed => {
+                self.failure_count = 0;
+            }
+            CircuitState::HalfOpen => {
+                self.half_open_successes += 1;
+                if self.half_open_successes >= self.half_open_threshold {
+                    self.state = CircuitState::Closed;
+                    self.failure_count = 0;
+                    self.half_open_successes = 0;
+                    self.last_failure_time = None;
+                }
+            }
+            CircuitState::Open => {}
+        }
+    }
+
+    pub fn record_failure(&mut self) {
+        self.last_failure_time = Some(Instant::now());
+
+        match self.state() {
+            CircuitState::Closed => {
+                self.failure_count += 1;
+                if self.failure_count >= self.failure_threshold {
+                    self.state = CircuitState::Open;
+                }
+            }
+            CircuitState::HalfOpen => {
+                self.state = CircuitState::Open;
+                self.half_open_successes = 0;
+            }
+            CircuitState::Open => {}
+        }
+    }
+
+    pub fn reset(&mut self) {
+        self.state = CircuitState::Closed;
+        self.failure_count = 0;
+        self.last_failure_time = None;
+        self.half_open_successes = 0;
+    }
+}
+
+impl Default for CircuitBreaker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::thread::sleep;
+
+    #[test]
+    fn circuit_breaker_starts_closed() {
+        let cb = CircuitBreaker::new();
+        assert!(cb.is_closed());
+        assert!(!cb.is_open());
+    }
+
+    #[test]
+    fn circuit_breaker_opens_after_threshold() {
+        let mut cb = CircuitBreaker::new().with_failure_threshold(3);
+
+        cb.record_failure();
+        assert!(cb.is_closed());
+
+        cb.record_failure();
+        assert!(cb.is_closed());
+
+        cb.record_failure();
+        assert!(cb.is_open());
+    }
+
+    #[test]
+    fn circuit_breaker_resets_after_timeout() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(10));
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        sleep(Duration::from_millis(15));
+        assert_eq!(cb.state(), CircuitState::HalfOpen);
+    }
+
+    #[test]
+    fn circuit_breaker_closes_after_half_open_successes() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(10))
+            .with_half_open_threshold(2);
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        sleep(Duration::from_millis(15));
+        assert_eq!(cb.state(), CircuitState::HalfOpen);
+
+        cb.record_success();
+        assert_eq!(cb.state(), CircuitState::HalfOpen);
+
+        cb.record_success();
+        assert!(cb.is_closed());
+    }
+
+    #[test]
+    fn circuit_breaker_reopens_on_half_open_failure() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(10));
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        sleep(Duration::from_millis(15));
+        assert_eq!(cb.state(), CircuitState::HalfOpen);
+
+        cb.record_failure();
+        assert!(cb.is_open());
+    }
+
+    #[test]
+    fn circuit_breaker_success_resets_failure_count() {
+        let mut cb = CircuitBreaker::new().with_failure_threshold(3);
+
+        cb.record_failure();
+        cb.record_failure();
+        cb.record_success();
+        assert!(cb.is_closed());
+
+        cb.record_failure();
+        assert!(cb.is_closed());
+    }
+
+    #[test]
+    fn circuit_breaker_manual_reset() {
+        let mut cb = CircuitBreaker::new().with_failure_threshold(1);
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        cb.reset();
+        assert!(cb.is_closed());
+    }
+}
--- a/aggregate/src/storage/mod.rs
+++ b/aggregate/src/storage/mod.rs
@@ -0,0 +1,422 @@
+mod circuit_breaker;
+
+pub use circuit_breaker::CircuitBreaker;
+
+use crate::types::{AggregateError, AggregateId, AggregateType, Snapshot, TenantId, Version};
+use edge_storage::{AggregateStore, Config as EdgeConfig, EdgeStorage, WriteResult, Writer};
+use std::sync::Arc;
+use tokio::sync::RwLock;
+
+pub struct StorageClient {
+    storage: Arc<EdgeStorage>,
+    aggregate_store: AggregateStore,
+    writer: Arc<Writer>,
+    circuit_breaker: RwLock<CircuitBreaker>,
+}
+
+impl std::fmt::Debug for StorageClient {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("StorageClient")
+            .field("circuit_breaker", &self.circuit_breaker)
+            .finish_non_exhaustive()
+    }
+}
+
+impl StorageClient {
+    pub fn open(storage_path: impl Into<String>) -> Result<Self, StorageInitError> {
+        let config = EdgeConfig::new(storage_path.into());
+        let storage = EdgeStorage::open(config)?;
+        let writer = Arc::new(Writer::new(storage.db().clone(), &EdgeConfig::default()));
+        let aggregate_store = AggregateStore::new(storage.db().clone(), writer.clone());
+
+        Ok(Self {
+            storage: Arc::new(storage),
+            aggregate_store,
+            writer,
+            circuit_breaker: RwLock::new(CircuitBreaker::new()),
+        })
+    }
+
+    #[cfg(test)]
+    pub fn in_memory() -> Self {
+        use tempfile::tempdir;
+        let dir = tempdir().expect("failed to create temp dir");
+        let path = dir.path().join("test.mdbx");
+        std::mem::forget(dir);
+        Self::open(path.to_string_lossy().to_string()).expect("failed to open in-memory storage")
+    }
+
+    pub fn with_circuit_breaker(mut self, cb: CircuitBreaker) -> Self {
+        self.circuit_breaker = RwLock::new(cb);
+        self
+    }
+
+    pub async fn get_snapshot(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+    ) -> Result<Option<Snapshot>, AggregateError> {
+        self.check_circuit().await?;
+
+        let key = Self::build_key(tenant_id, aggregate_id);
+
+        match self.aggregate_store.get_latest_snapshot(&key) {
+            Ok(Some((version, data))) => {
+                let snapshot = self
+                    .decode_snapshot(tenant_id, aggregate_id, version, &data)
+                    .map_err(|e| AggregateError::StorageError(e.to_string()))?;
+                self.record_success().await;
+                Ok(Some(snapshot))
+            }
+            Ok(None) => {
+                self.record_success().await;
+                Ok(None)
+            }
+            Err(e) => {
+                self.record_failure().await;
+                Err(AggregateError::StorageError(e.to_string()))
+            }
+        }
+    }
+
+    pub async fn get_snapshot_at_version(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+        version: Version,
+    ) -> Result<Option<Snapshot>, AggregateError> {
+        self.check_circuit().await?;
+
+        let key = Self::build_key(tenant_id, aggregate_id);
+
+        match self.aggregate_store.get_snapshot(&key, version.as_u64()) {
+            Ok(Some(data)) => {
+                let snapshot = self
+                    .decode_snapshot(tenant_id, aggregate_id, version.as_u64(), &data)
+                    .map_err(|e| AggregateError::StorageError(e.to_string()))?;
+                self.record_success().await;
+                Ok(Some(snapshot))
+            }
+            Ok(None) => {
+                self.record_success().await;
+                Ok(None)
+            }
+            Err(e) => {
+                self.record_failure().await;
+                Err(AggregateError::StorageError(e.to_string()))
+            }
+        }
+    }
+
+    pub async fn put_snapshot(&self, snapshot: &Snapshot) -> Result<(), AggregateError> {
+        self.check_circuit().await?;
+
+        let key = Self::build_key(&snapshot.tenant_id, &snapshot.aggregate_id);
+        let data = serde_json::to_vec(&snapshot.state)
+            .map_err(|e| AggregateError::StorageError(e.to_string()))?;
+
+        let result = self
+            .aggregate_store
+            .put_snapshot_sync(&key, snapshot.version.as_u64(), &data)
+            .map_err(|e| {
+                self.record_failure_sync();
+                AggregateError::StorageError(e.to_string())
+            })?;
+
+        match result {
+            WriteResult::Success => {
+                self.record_success().await;
+                Ok(())
+            }
+            WriteResult::VersionConflict {
+                aggregate_id: _,
+                version,
+            } => {
+                self.record_success().await;
+                Err(AggregateError::VersionConflict {
+                    expected: Version::from(version).increment(),
+                    actual: Version::from(version),
+                })
+            }
+            WriteResult::Error(e) => {
+                self.record_failure().await;
+                Err(AggregateError::StorageError(e))
+            }
+        }
+    }
+
+    pub async fn get_latest_version(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+    ) -> Result<Option<Version>, AggregateError> {
+        self.check_circuit().await?;
+
+        let key = Self::build_key(tenant_id, aggregate_id);
+
+        match self.aggregate_store.get_latest_version(&key) {
+            Ok(Some(v)) => {
+                self.record_success().await;
+                Ok(Some(Version::from(v)))
+            }
+            Ok(None) => {
+                self.record_success().await;
+                Ok(None)
+            }
+            Err(e) => {
+                self.record_failure().await;
+                Err(AggregateError::StorageError(e.to_string()))
+            }
+        }
+    }
+
+    pub async fn delete_snapshot(
+        &self,
+        _tenant_id: &TenantId,
+        _aggregate_id: &AggregateId,
+    ) -> Result<(), AggregateError> {
+        self.check_circuit().await?;
+
+        self.record_success().await;
+        Err(AggregateError::StorageError(
+            "Snapshot deletion not supported in event-sourced system".to_string(),
+        ))
+    }
+
+    fn build_key(tenant_id: &TenantId, aggregate_id: &AggregateId) -> Vec<u8> {
+        format!("{}:{}", tenant_id.as_str(), aggregate_id).into_bytes()
+    }
+
+    fn decode_snapshot(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+        version: u64,
+        data: &[u8],
+    ) -> Result<Snapshot, serde_json::Error> {
+        let state = serde_json::from_slice(data)?;
+        Ok(Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Unknown"),
+            Version::from(version),
+            state,
+        ))
+    }
+
+    async fn check_circuit(&self) -> Result<(), AggregateError> {
+        let cb = self.circuit_breaker.read().await;
+        if cb.is_open() {
+            return Err(AggregateError::StorageError(
+                "Circuit breaker is open".to_string(),
+            ));
+        }
+        Ok(())
+    }
+
+    async fn record_success(&self) {
+        let mut cb = self.circuit_breaker.write().await;
+        cb.record_success();
+    }
+
+    fn record_failure_sync(&self) {
+        if let Ok(mut cb) = self.circuit_breaker.try_write() {
+            cb.record_failure();
+        }
+    }
+
+    async fn record_failure(&self) {
+        let mut cb = self.circuit_breaker.write().await;
+        cb.record_failure();
+    }
+
+    pub fn storage(&self) -> &Arc<EdgeStorage> {
+        &self.storage
+    }
+}
+
+impl Clone for StorageClient {
+    fn clone(&self) -> Self {
+        Self {
+            storage: self.storage.clone(),
+            aggregate_store: self.aggregate_store.clone(),
+            writer: self.writer.clone(),
+            circuit_breaker: RwLock::new(CircuitBreaker::new()),
+        }
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum StorageInitError {
+    #[error("Failed to open storage: {0}")]
+    OpenError(#[from] edge_storage::Error),
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+    use tempfile::tempdir;
+
+    fn create_test_client() -> (tempfile::TempDir, StorageClient) {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("test.mdbx");
+        let client = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
+        (dir, client)
+    }
+
+    #[test]
+    fn storage_client_open() {
+        let (_dir, _client) = create_test_client();
+    }
+
+    #[tokio::test]
+    async fn storage_client_put_get_snapshot() {
+        let (_dir, client) = create_test_client();
+
+        let tenant_id = TenantId::new("acme-corp");
+        let aggregate_id = AggregateId::new_v7();
+        let snapshot = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            json!({"balance": 100}),
+        );
+
+        client.put_snapshot(&snapshot).await.unwrap();
+
+        let retrieved = client
+            .get_snapshot(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+
+        assert!(retrieved.is_some());
+        let retrieved = retrieved.unwrap();
+        assert_eq!(retrieved.version, Version::from(1));
+        assert_eq!(retrieved.state, json!({"balance": 100}));
+    }
+
+    #[tokio::test]
+    async fn storage_client_version_conflict() {
+        let (_dir, client) = create_test_client();
+
+        let tenant_id = TenantId::new("acme-corp");
+        let aggregate_id = AggregateId::new_v7();
+
+        let snapshot_v1 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            json!({"balance": 100}),
+        );
+
+        client.put_snapshot(&snapshot_v1).await.unwrap();
+
+        let snapshot_v1_again = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            json!({"balance": 200}),
+        );
+
+        let result = client.put_snapshot(&snapshot_v1_again).await;
+        assert!(matches!(
+            result,
+            Err(AggregateError::VersionConflict { .. })
+        ));
+    }
+
+    #[tokio::test]
+    async fn storage_client_latest_version() {
+        let (_dir, client) = create_test_client();
+
+        let tenant_id = TenantId::new("acme-corp");
+        let aggregate_id = AggregateId::new_v7();
+
+        let version = client
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert!(version.is_none());
+
+        let snapshot_v1 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            json!({"balance": 100}),
+        );
+        client.put_snapshot(&snapshot_v1).await.unwrap();
+
+        let version = client
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert_eq!(version, Some(Version::from(1)));
+
+        let snapshot_v3 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(3),
+            json!({"balance": 300}),
+        );
+        client.put_snapshot(&snapshot_v3).await.unwrap();
+
+        let version = client
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert_eq!(version, Some(Version::from(3)));
+    }
+
+    #[tokio::test]
+    async fn storage_client_isolation() {
+        let (_dir, client) = create_test_client();
+
+        let tenant_a = TenantId::new("tenant-a");
+        let tenant_b = TenantId::new("tenant-b");
+        let aggregate_id = AggregateId::new_v7();
+
+        let snapshot_a = Snapshot::new(
+            tenant_a.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            json!({"owner": "A"}),
+        );
+        client.put_snapshot(&snapshot_a).await.unwrap();
+
+        let snapshot_b = Snapshot::new(
+            tenant_b.clone(),
+            aggregate_id.clone(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            json!({"owner": "B"}),
+        );
+        client.put_snapshot(&snapshot_b).await.unwrap();
+
+        let retrieved_a = client
+            .get_snapshot(&tenant_a, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+        let retrieved_b = client
+            .get_snapshot(&tenant_b, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(retrieved_a.state["owner"], "A");
+        assert_eq!(retrieved_b.state["owner"], "B");
+    }
+
+    #[test]
+    fn storage_client_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<StorageClient>();
+    }
+}
--- a/aggregate/src/stream/circuit_breaker.rs
+++ b/aggregate/src/stream/circuit_breaker.rs
@@ -0,0 +1,284 @@
+use std::time::{Duration, Instant};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CircuitState {
+    Closed,
+    Open,
+    HalfOpen,
+}
+
+#[derive(Debug, Clone)]
+pub struct CircuitBreaker {
+    state: CircuitState,
+    failure_count: u32,
+    failure_threshold: u32,
+    reset_timeout: Duration,
+    last_failure_time: Option<Instant>,
+    half_open_successes: u32,
+    half_open_threshold: u32,
+    consecutive_successes: u32,
+}
+
+impl CircuitBreaker {
+    pub fn new() -> Self {
+        Self {
+            state: CircuitState::Closed,
+            failure_count: 0,
+            failure_threshold: 5,
+            reset_timeout: Duration::from_secs(30),
+            last_failure_time: None,
+            half_open_successes: 0,
+            half_open_threshold: 3,
+            consecutive_successes: 0,
+        }
+    }
+
+    pub fn with_failure_threshold(mut self, threshold: u32) -> Self {
+        self.failure_threshold = threshold;
+        self
+    }
+
+    pub fn with_reset_timeout(mut self, timeout: Duration) -> Self {
+        self.reset_timeout = timeout;
+        self
+    }
+
+    pub fn with_half_open_threshold(mut self, threshold: u32) -> Self {
+        self.half_open_threshold = threshold;
+        self
+    }
+
+    pub fn state(&self) -> CircuitState {
+        if self.state == CircuitState::Open {
+            if let Some(last_failure) = self.last_failure_time {
+                if last_failure.elapsed() >= self.reset_timeout {
+                    return CircuitState::HalfOpen;
+                }
+            }
+        }
+        self.state
+    }
+
+    pub fn is_open(&self) -> bool {
+        matches!(self.state(), CircuitState::Open)
+    }
+
+    pub fn is_closed(&self) -> bool {
+        matches!(self.state(), CircuitState::Closed)
+    }
+
+    pub fn is_half_open(&self) -> bool {
+        matches!(self.state(), CircuitState::HalfOpen)
+    }
+
+    pub fn failure_count(&self) -> u32 {
+        self.failure_count
+    }
+
+    pub fn consecutive_successes(&self) -> u32 {
+        self.consecutive_successes
+    }
+
+    pub fn record_success(&mut self) {
+        self.consecutive_successes += 1;
+
+        match self.state() {
+            CircuitState::Closed => {
+                self.failure_count = 0;
+            }
+            CircuitState::HalfOpen => {
+                self.half_open_successes += 1;
+                if self.half_open_successes >= self.half_open_threshold {
+                    self.state = CircuitState::Closed;
+                    self.failure_count = 0;
+                    self.half_open_successes = 0;
+                    self.last_failure_time = None;
+                }
+            }
+            CircuitState::Open => {}
+        }
+    }
+
+    pub fn record_failure(&mut self) {
+        self.consecutive_successes = 0;
+        self.last_failure_time = Some(Instant::now());
+
+        match self.state() {
+            CircuitState::Closed => {
+                self.failure_count += 1;
+                if self.failure_count >= self.failure_threshold {
+                    self.state = CircuitState::Open;
+                }
+            }
+            CircuitState::HalfOpen => {
+                self.state = CircuitState::Open;
+                self.half_open_successes = 0;
+            }
+            CircuitState::Open => {}
+        }
+    }
+
+    pub fn reset(&mut self) {
+        self.state = CircuitState::Closed;
+        self.failure_count = 0;
+        self.last_failure_time = None;
+        self.half_open_successes = 0;
+        self.consecutive_successes = 0;
+    }
+
+    pub fn time_until_reset(&self) -> Option<Duration> {
+        if self.state == CircuitState::Open {
+            self.last_failure_time.map(|t| {
+                let elapsed = t.elapsed();
+                if elapsed < self.reset_timeout {
+                    self.reset_timeout - elapsed
+                } else {
+                    Duration::ZERO
+                }
+            })
+        } else {
+            None
+        }
+    }
+}
+
+impl Default for CircuitBreaker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::thread::sleep;
+
+    #[test]
+    fn circuit_breaker_starts_closed() {
+        let cb = CircuitBreaker::new();
+        assert!(cb.is_closed());
+        assert!(!cb.is_open());
+    }
+
+    #[test]
+    fn circuit_breaker_opens_after_threshold() {
+        let mut cb = CircuitBreaker::new().with_failure_threshold(3);
+
+        cb.record_failure();
+        assert!(cb.is_closed());
+
+        cb.record_failure();
+        assert!(cb.is_closed());
+
+        cb.record_failure();
+        assert!(cb.is_open());
+    }
+
+    #[test]
+    fn circuit_breaker_resets_after_timeout() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(10));
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        sleep(Duration::from_millis(15));
+        assert!(cb.is_half_open());
+    }
+
+    #[test]
+    fn circuit_breaker_closes_after_half_open_successes() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(10))
+            .with_half_open_threshold(2);
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        sleep(Duration::from_millis(15));
+        assert!(cb.is_half_open());
+
+        cb.record_success();
+        assert!(cb.is_half_open());
+
+        cb.record_success();
+        assert!(cb.is_closed());
+    }
+
+    #[test]
+    fn circuit_breaker_reopens_on_half_open_failure() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(10));
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        sleep(Duration::from_millis(15));
+        assert!(cb.is_half_open());
+
+        cb.record_failure();
+        assert!(cb.is_open());
+    }
+
+    #[test]
+    fn circuit_breaker_success_resets_failure_count() {
+        let mut cb = CircuitBreaker::new().with_failure_threshold(3);
+
+        cb.record_failure();
+        cb.record_failure();
+        cb.record_success();
+        assert!(cb.is_closed());
+        assert_eq!(cb.failure_count(), 0);
+
+        cb.record_failure();
+        assert!(cb.is_closed());
+    }
+
+    #[test]
+    fn circuit_breaker_manual_reset() {
+        let mut cb = CircuitBreaker::new().with_failure_threshold(1);
+
+        cb.record_failure();
+        assert!(cb.is_open());
+
+        cb.reset();
+        assert!(cb.is_closed());
+        assert_eq!(cb.failure_count(), 0);
+    }
+
+    #[test]
+    fn circuit_breaker_tracks_consecutive_successes() {
+        let mut cb = CircuitBreaker::new();
+
+        assert_eq!(cb.consecutive_successes(), 0);
+
+        cb.record_success();
+        assert_eq!(cb.consecutive_successes(), 1);
+
+        cb.record_success();
+        assert_eq!(cb.consecutive_successes(), 2);
+
+        cb.record_failure();
+        assert_eq!(cb.consecutive_successes(), 0);
+    }
+
+    #[test]
+    fn circuit_breaker_time_until_reset() {
+        let mut cb = CircuitBreaker::new()
+            .with_failure_threshold(1)
+            .with_reset_timeout(Duration::from_millis(100));
+
+        assert!(cb.time_until_reset().is_none());
+
+        cb.record_failure();
+        let remaining = cb.time_until_reset();
+        assert!(remaining.is_some());
+        assert!(remaining.unwrap() <= Duration::from_millis(100));
+
+        cb.reset();
+        assert!(cb.time_until_reset().is_none());
+    }
+}
--- a/aggregate/src/stream/mod.rs
+++ b/aggregate/src/stream/mod.rs
@@ -0,0 +1,627 @@
+mod circuit_breaker;
+
+pub use circuit_breaker::CircuitBreaker;
+
+use crate::types::{AggregateError, AggregateId, AggregateType, Event, TenantId, Version};
+use async_nats::jetstream::{
+    self, consumer::pull::Config as PullConfig, consumer::AckPolicy, consumer::DeliverPolicy,
+    consumer::ReplayPolicy, stream::Config as StreamConfig,
+};
+use futures::stream::{Stream, StreamExt};
+use serde_json;
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::broadcast;
+use tokio::sync::RwLock;
+use tokio::time::Instant;
+
+const AGGREGATE_STREAM_NAME: &str = "AGGREGATE_EVENTS";
+
+#[derive(Debug)]
+pub struct StreamConfigSettings {
+    pub max_messages: i64,
+    pub max_bytes: i64,
+    pub max_age: Duration,
+    pub duplicate_window: Duration,
+}
+
+impl Default for StreamConfigSettings {
+    fn default() -> Self {
+        Self {
+            max_messages: 10_000_000,
+            max_bytes: -1,
+            max_age: Duration::from_secs(365 * 24 * 60 * 60),
+            duplicate_window: Duration::from_secs(120),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct StreamClient {
+    backend: StreamBackend,
+    circuit_breaker: Arc<RwLock<CircuitBreaker>>,
+}
+
+#[derive(Debug, Clone)]
+#[allow(dead_code)]
+enum StreamBackend {
+    JetStream(jetstream::Context),
+    InMemory(Arc<InMemoryStream>),
+}
+
+#[derive(Debug)]
+struct InMemoryStream {
+    events_by_tenant_aggregate: RwLock<HashMap<(String, String), Vec<Event>>>,
+    updates: broadcast::Sender<Event>,
+}
+
+impl StreamClient {
+    pub async fn new(nats_url: impl Into<String>) -> Result<Self, AggregateError> {
+        let url = nats_url.into();
+        let client = async_nats::connect(&url).await.map_err(|e| {
+            AggregateError::StreamError(format!("Failed to connect to NATS: {}", e))
+        })?;
+
+        let jetstream = jetstream::new(client.clone());
+
+        Ok(Self {
+            backend: StreamBackend::JetStream(jetstream),
+            circuit_breaker: Arc::new(RwLock::new(CircuitBreaker::new())),
+        })
+    }
+
+    #[cfg(test)]
+    pub fn in_memory() -> Self {
+        let (updates, _) = broadcast::channel(1024);
+        Self {
+            backend: StreamBackend::InMemory(Arc::new(InMemoryStream {
+                events_by_tenant_aggregate: RwLock::new(HashMap::new()),
+                updates,
+            })),
+            circuit_breaker: Arc::new(RwLock::new(CircuitBreaker::new())),
+        }
+    }
+
+    pub async fn with_circuit_breaker(mut self, cb: CircuitBreaker) -> Self {
+        self.circuit_breaker = Arc::new(RwLock::new(cb));
+        self
+    }
+
+    pub async fn setup_stream(&self) -> Result<jetstream::stream::Stream, AggregateError> {
+        self.setup_stream_with_settings(StreamConfigSettings::default())
+            .await
+    }
+
+    pub async fn setup_stream_with_settings(
+        &self,
+        settings: StreamConfigSettings,
+    ) -> Result<jetstream::stream::Stream, AggregateError> {
+        let jetstream = match &self.backend {
+            StreamBackend::JetStream(ctx) => ctx.clone(),
+            StreamBackend::InMemory(_) => {
+                return Err(AggregateError::StreamError(
+                    "setup_stream not supported for in-memory stream".to_string(),
+                ));
+            }
+        };
+
+        let config = StreamConfig {
+            name: AGGREGATE_STREAM_NAME.to_string(),
+            subjects: vec!["tenant.*.aggregate.*.*".to_string()],
+            max_messages: settings.max_messages,
+            max_bytes: settings.max_bytes,
+            max_age: settings.max_age,
+            duplicate_window: settings.duplicate_window,
+            ..Default::default()
+        };
+
+        let stream = jetstream
+            .get_or_create_stream(config)
+            .await
+            .map_err(|e| AggregateError::StreamError(format!("Failed to create stream: {}", e)))?;
+
+        Ok(stream)
+    }
+
+    pub async fn publish_events(&self, events: Vec<Event>) -> Result<(), AggregateError> {
+        if events.is_empty() {
+            return Ok(());
+        }
+
+        if self.circuit_breaker.read().await.is_open() {
+            return Err(AggregateError::StreamError(
+                "Circuit breaker is open".to_string(),
+            ));
+        }
+
+        match &self.backend {
+            StreamBackend::JetStream(jetstream) => {
+                for event in &events {
+                    let subject =
+                        build_subject(&event.tenant_id, &event.aggregate_type, &event.aggregate_id);
+                    let payload = serde_json::to_vec(event).map_err(|e| {
+                        AggregateError::StreamError(format!("Serialization error: {}", e))
+                    })?;
+
+                    let mut headers = async_nats::HeaderMap::new();
+                    headers.insert("Nats-Msg-Id", event.event_id.to_string().as_str());
+                    headers.insert("aggregate-version", event.version.to_string().as_str());
+                    headers.insert("tenant-id", event.tenant_id.as_str());
+                    headers.insert("aggregate-type", event.aggregate_type.as_str());
+                    headers.insert("event-type", event.event_type.as_str());
+                    if let Some(correlation_id) = event.correlation_id.as_deref() {
+                        headers.insert("x-correlation-id", correlation_id);
+                        headers.insert("correlation-id", correlation_id);
+                    }
+                    if let Some(traceparent) = event.traceparent.as_deref() {
+                        headers.insert("traceparent", traceparent);
+                        if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) {
+                            headers.insert("trace-id", trace_id);
+                        }
+                    }
+
+                    let result = jetstream
+                        .publish_with_headers(subject.clone(), headers.clone(), payload.into())
+                        .await;
+
+                    match result {
+                        Ok(_) => {
+                            self.circuit_breaker.write().await.record_success();
+                        }
+                        Err(e) => {
+                            self.circuit_breaker.write().await.record_failure();
+                            return Err(AggregateError::StreamError(format!(
+                                "Failed to publish event: {}",
+                                e
+                            )));
+                        }
+                    }
+                }
+            }
+            StreamBackend::InMemory(mem) => {
+                for event in events {
+                    let key = (
+                        event.tenant_id.as_str().to_string(),
+                        event.aggregate_id.to_string(),
+                    );
+
+                    let mut map = mem.events_by_tenant_aggregate.write().await;
+                    let bucket = map.entry(key).or_default();
+
+                    if bucket.iter().any(|e| e.command_id == event.command_id) {
+                        continue;
+                    }
+
+                    let current_max = bucket.last().map(|e| e.version.as_u64()).unwrap_or(0);
+                    let expected = current_max + 1;
+                    if event.version.as_u64() != expected {
+                        return Err(AggregateError::VersionConflict {
+                            expected: Version::from(current_max).increment(),
+                            actual: event.version,
+                        });
+                    }
+
+                    bucket.push(event.clone());
+                    bucket.sort_by_key(|e| e.version);
+                    let _ = mem.updates.send(event);
+                }
+
+                self.circuit_breaker.write().await.record_success();
+            }
+        }
+
+        Ok(())
+    }
+
+    pub async fn fetch_events(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+        after_version: Version,
+    ) -> Result<Vec<Event>, AggregateError> {
+        if self.circuit_breaker.read().await.is_open() {
+            return Err(AggregateError::StreamError(
+                "Circuit breaker is open".to_string(),
+            ));
+        }
+
+        match &self.backend {
+            StreamBackend::JetStream(jetstream) => {
+                let stream = jetstream
+                    .get_stream(AGGREGATE_STREAM_NAME)
+                    .await
+                    .map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?;
+
+                let subject = format!("tenant.{}.aggregate.*.{}", tenant_id.as_str(), aggregate_id);
+
+                let consumer_name = format!(
+                    "fetch_{}_{}_{}",
+                    tenant_id.as_str(),
+                    aggregate_id,
+                    uuid::Uuid::now_v7()
+                );
+
+                let consumer_config = PullConfig {
+                    durable_name: Some(consumer_name.clone()),
+                    filter_subject: subject.clone(),
+                    deliver_policy: DeliverPolicy::All,
+                    ack_policy: AckPolicy::Explicit,
+                    replay_policy: ReplayPolicy::Instant,
+                    ..Default::default()
+                };
+
+                let consumer = stream
+                    .get_or_create_consumer(&consumer_name, consumer_config)
+                    .await
+                    .map_err(|e| {
+                        AggregateError::StreamError(format!("Consumer creation failed: {}", e))
+                    })?;
+
+                let mut events = Vec::new();
+                let mut messages = consumer.messages().await.map_err(|e| {
+                    AggregateError::StreamError(format!("Message stream error: {}", e))
+                })?;
+
+                let idle_timeout = Duration::from_millis(250);
+                let max_total_wait = Duration::from_secs(2);
+                let started = Instant::now();
+
+                loop {
+                    if started.elapsed() >= max_total_wait {
+                        break;
+                    }
+
+                    match tokio::time::timeout(idle_timeout, messages.next()).await {
+                        Ok(Some(Ok(msg))) => {
+                            let event: Event =
+                                serde_json::from_slice(&msg.payload).map_err(|e| {
+                                    AggregateError::StreamError(format!(
+                                        "Deserialization error: {}",
+                                        e
+                                    ))
+                                })?;
+
+                            if event.version > after_version {
+                                events.push(event);
+                            }
+
+                            msg.ack().await.ok();
+                        }
+                        Ok(Some(Err(e))) => {
+                            return Err(AggregateError::StreamError(format!(
+                                "Message error: {}",
+                                e
+                            )));
+                        }
+                        Ok(None) => break,
+                        Err(_) => break,
+                    }
+                }
+
+                let _ = stream.delete_consumer(&consumer_name).await;
+                events.sort_by_key(|e| e.version);
+                self.circuit_breaker.write().await.record_success();
+                Ok(events)
+            }
+            StreamBackend::InMemory(mem) => {
+                let key = (tenant_id.as_str().to_string(), aggregate_id.to_string());
+                let map = mem.events_by_tenant_aggregate.read().await;
+                let mut out = map
+                    .get(&key)
+                    .map(|bucket| {
+                        bucket
+                            .iter()
+                            .filter(|e| e.version > after_version)
+                            .cloned()
+                            .collect::<Vec<_>>()
+                    })
+                    .unwrap_or_default();
+
+                out.sort_by_key(|e| e.version);
+                self.circuit_breaker.write().await.record_success();
+                Ok(out)
+            }
+        }
+    }
+
+    pub async fn subscribe_to_events(
+        &self,
+        tenant_id: TenantId,
+        aggregate_type: AggregateType,
+        aggregate_id: AggregateId,
+    ) -> Result<Pin<Box<dyn Stream<Item = Event> + Send>>, AggregateError> {
+        match &self.backend {
+            StreamBackend::JetStream(jetstream) => {
+                let subject = format!(
+                    "tenant.{}.aggregate.{}.{}",
+                    tenant_id.as_str(),
+                    aggregate_type.as_str(),
+                    aggregate_id
+                );
+
+                let stream = jetstream
+                    .get_stream(AGGREGATE_STREAM_NAME)
+                    .await
+                    .map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?;
+
+                let consumer_name = format!("sub_{}_{}", tenant_id.as_str(), aggregate_id);
+                let consumer_config = PullConfig {
+                    filter_subject: subject,
+                    deliver_policy: DeliverPolicy::New,
+                    ..Default::default()
+                };
+
+                let consumer = stream
+                    .get_or_create_consumer(&consumer_name, consumer_config)
+                    .await
+                    .map_err(|e| {
+                        AggregateError::StreamError(format!("Consumer creation failed: {}", e))
+                    })?;
+
+                let messages = consumer.messages().await.map_err(|e| {
+                    AggregateError::StreamError(format!("Message stream error: {}", e))
+                })?;
+
+                let event_stream = messages.filter_map(move |msg| async move {
+                    match msg {
+                        Ok(m) => {
+                            let event: Result<Event, _> = serde_json::from_slice(&m.payload);
+                            match event {
+                                Ok(e) => {
+                                    m.ack().await.ok();
+                                    Some(e)
+                                }
+                                Err(_) => None,
+                            }
+                        }
+                        Err(_) => None,
+                    }
+                });
+
+                let boxed: Pin<Box<dyn Stream<Item = Event> + Send>> = Box::pin(event_stream);
+                Ok(boxed)
+            }
+            StreamBackend::InMemory(mem) => {
+                let tenant_id = tenant_id.as_str().to_string();
+                let aggregate_type = aggregate_type.as_str().to_string();
+                let aggregate_id = aggregate_id.to_string();
+
+                let receiver = mem.updates.subscribe();
+                let boxed: Pin<Box<dyn Stream<Item = Event> + Send>> =
+                    Box::pin(futures::stream::unfold(
+                        (receiver, tenant_id, aggregate_type, aggregate_id),
+                        |(mut receiver, tenant_id, aggregate_type, aggregate_id)| async move {
+                            loop {
+                                match receiver.recv().await {
+                                    Ok(event) => {
+                                        if event.tenant_id.as_str() == tenant_id
+                                            && event.aggregate_type.as_str() == aggregate_type
+                                            && event.aggregate_id.to_string() == aggregate_id
+                                        {
+                                            return Some((
+                                                event,
+                                                (receiver, tenant_id, aggregate_type, aggregate_id),
+                                            ));
+                                        }
+                                    }
+                                    Err(broadcast::error::RecvError::Lagged(_)) => continue,
+                                    Err(broadcast::error::RecvError::Closed) => return None,
+                                }
+                            }
+                        },
+                    ));
+                Ok(boxed)
+            }
+        }
+    }
+
+    pub async fn get_stream_info(&self) -> Result<Option<jetstream::stream::Info>, AggregateError> {
+        match &self.backend {
+            StreamBackend::JetStream(jetstream) => {
+                match jetstream.get_stream(AGGREGATE_STREAM_NAME).await {
+                    Ok(mut stream) => {
+                        let info = stream.info().await.map_err(|e| {
+                            AggregateError::StreamError(format!("Stream info error: {}", e))
+                        })?;
+                        Ok(Some(info.clone()))
+                    }
+                    Err(_) => Ok(None),
+                }
+            }
+            StreamBackend::InMemory(_) => Ok(None),
+        }
+    }
+
+    pub async fn health_check(&self) -> Result<bool, AggregateError> {
+        match &self.backend {
+            StreamBackend::JetStream(jetstream) => {
+                match jetstream.get_stream(AGGREGATE_STREAM_NAME).await {
+                    Ok(_) => {
+                        self.circuit_breaker.write().await.record_success();
+                        Ok(true)
+                    }
+                    Err(e) => {
+                        self.circuit_breaker.write().await.record_failure();
+                        Err(AggregateError::StreamError(format!(
+                            "Health check failed: {}",
+                            e
+                        )))
+                    }
+                }
+            }
+            StreamBackend::InMemory(_) => {
+                self.circuit_breaker.write().await.record_success();
+                Ok(true)
+            }
+        }
+    }
+
+    pub fn circuit_breaker_state(&self) -> circuit_breaker::CircuitState {
+        futures::executor::block_on(async { self.circuit_breaker.read().await.state() })
+    }
+
+    pub async fn delete_consumer(
+        &self,
+        tenant_id: &TenantId,
+        aggregate_id: &AggregateId,
+    ) -> Result<(), AggregateError> {
+        let consumer_name = format!("sub_{}_{}", tenant_id.as_str(), aggregate_id);
+
+        match &self.backend {
+            StreamBackend::JetStream(jetstream) => {
+                let stream = jetstream
+                    .get_stream(AGGREGATE_STREAM_NAME)
+                    .await
+                    .map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?;
+
+                stream.delete_consumer(&consumer_name).await.map_err(|e| {
+                    AggregateError::StreamError(format!("Consumer deletion failed: {}", e))
+                })?;
+
+                Ok(())
+            }
+            StreamBackend::InMemory(_) => Ok(()),
+        }
+    }
+}
+
+pub fn build_subject(
+    tenant_id: &TenantId,
+    aggregate_type: &AggregateType,
+    aggregate_id: &AggregateId,
+) -> String {
+    format!(
+        "tenant.{}.aggregate.{}.{}",
+        tenant_id.as_str(),
+        aggregate_type.as_str(),
+        aggregate_id
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn stream_client_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<StreamClient>();
+    }
+
+    #[test]
+    fn subject_naming_includes_tenant() {
+        let tenant_id = TenantId::new("acme-corp");
+        let aggregate_type = AggregateType::from("Account");
+        let aggregate_id = AggregateId::new_v7();
+
+        let subject = build_subject(&tenant_id, &aggregate_type, &aggregate_id);
+        assert!(subject.starts_with("tenant.acme-corp.aggregate."));
+    }
+
+    #[test]
+    fn stream_config_settings_defaults() {
+        let settings = StreamConfigSettings::default();
+        assert_eq!(settings.max_messages, 10_000_000);
+    }
+
+    #[test]
+    fn circuit_breaker_accessible() {
+        let rt = tokio::runtime::Runtime::new().unwrap();
+        rt.block_on(async {
+            let cb = CircuitBreaker::new();
+            assert!(cb.is_closed());
+        });
+    }
+
+    #[tokio::test]
+    async fn publish_and_fetch_events_with_tenant() {
+        let stream = StreamClient::in_memory();
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let e1 = Event::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(1),
+            "deposited",
+            json!({"amount": 10}),
+            uuid::Uuid::now_v7(),
+        );
+        let e2 = Event::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(2),
+            "deposited",
+            json!({"amount": 20}),
+            uuid::Uuid::now_v7(),
+        );
+
+        stream.publish_events(vec![e1, e2]).await.unwrap();
+        let fetched = stream
+            .fetch_events(&tenant_id, &aggregate_id, Version::initial())
+            .await
+            .unwrap();
+        assert_eq!(fetched.len(), 2);
+    }
+
+    #[tokio::test]
+    async fn fetch_with_version_filter() {
+        let stream = StreamClient::in_memory();
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let mut events = Vec::new();
+        for v in 1..=4 {
+            events.push(Event::new(
+                tenant_id.clone(),
+                aggregate_id.clone(),
+                aggregate_type.clone(),
+                Version::from(v),
+                "deposited",
+                json!({"amount": v}),
+                uuid::Uuid::now_v7(),
+            ));
+        }
+        stream.publish_events(events).await.unwrap();
+
+        let fetched = stream
+            .fetch_events(&tenant_id, &aggregate_id, Version::from(2))
+            .await
+            .unwrap();
+        assert_eq!(fetched.len(), 2);
+        assert!(fetched.iter().all(|e| e.version > Version::from(2)));
+    }
+
+    #[tokio::test]
+    async fn tenant_isolation_fetch_returns_empty() {
+        let stream = StreamClient::in_memory();
+        let tenant_a = TenantId::new("tenant-a");
+        let tenant_b = TenantId::new("tenant-b");
+        let aggregate_id = AggregateId::new_v7();
+        let aggregate_type = AggregateType::from("Account");
+
+        let e1 = Event::new(
+            tenant_a.clone(),
+            aggregate_id.clone(),
+            aggregate_type.clone(),
+            Version::from(1),
+            "deposited",
+            json!({"amount": 10}),
+            uuid::Uuid::now_v7(),
+        );
+
+        stream.publish_events(vec![e1]).await.unwrap();
+        let fetched = stream
+            .fetch_events(&tenant_b, &aggregate_id, Version::initial())
+            .await
+            .unwrap();
+        assert!(fetched.is_empty());
+    }
+}
--- a/aggregate/src/swarm.rs
+++ b/aggregate/src/swarm.rs
@@ -0,0 +1,332 @@
+use futures::StreamExt;
+use serde::{Deserialize, Serialize};
+use thiserror::Error;
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct TenantPlacementConfig {
+    pub virtual_nodes_per_node: usize,
+    pub nodes: Vec<NodePlacement>,
+    pub tenants: std::collections::HashMap<String, String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct NodePlacement {
+    pub node_id: String,
+    pub tenant_range: String,
+}
+
+pub fn placement_constraint_for_tenant_range(tenant_range: &str) -> String {
+    format!("node.labels.tenant_range == {}", tenant_range)
+}
+
+pub fn placement_constraints_for_node(node: &NodePlacement) -> Vec<String> {
+    vec![placement_constraint_for_tenant_range(&node.tenant_range)]
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct MigrationPlan {
+    pub tenant_id: String,
+    pub from_node: String,
+    pub to_node: String,
+    pub actions: Vec<MigrationAction>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum MigrationAction {
+    DrainTenant { tenant_id: String },
+    UpdatePlacement { tenant_id: String, node_id: String },
+    ReloadConfig,
+}
+
+pub fn plan_graceful_tenant_migration(
+    tenant_id: impl Into<String>,
+    from_node: impl Into<String>,
+    to_node: impl Into<String>,
+) -> MigrationPlan {
+    let tenant_id = tenant_id.into();
+    let from_node = from_node.into();
+    let to_node = to_node.into();
+
+    MigrationPlan {
+        tenant_id: tenant_id.clone(),
+        from_node,
+        to_node: to_node.clone(),
+        actions: vec![
+            MigrationAction::DrainTenant {
+                tenant_id: tenant_id.clone(),
+            },
+            MigrationAction::UpdatePlacement {
+                tenant_id,
+                node_id: to_node,
+            },
+            MigrationAction::ReloadConfig,
+        ],
+    }
+}
+
+#[derive(Debug, Error)]
+pub enum TenantPlacementKvError {
+    #[error("NATS connection error: {0}")]
+    Connection(String),
+    #[error("KV error: {0}")]
+    Kv(String),
+    #[error("Config parse error: {0}")]
+    Parse(String),
+    #[error("Unsupported key operation")]
+    UnsupportedOperation,
+}
+
+#[derive(Debug, Clone)]
+pub struct TenantPlacementKvClient {
+    kv: async_nats::jetstream::kv::Store,
+}
+
+impl TenantPlacementKvClient {
+    pub async fn connect(
+        nats_url: impl Into<String>,
+        bucket: impl Into<String>,
+    ) -> Result<Self, TenantPlacementKvError> {
+        Self::connect_with_timeout(nats_url, bucket, std::time::Duration::from_secs(2)).await
+    }
+
+    pub async fn connect_with_timeout(
+        nats_url: impl Into<String>,
+        bucket: impl Into<String>,
+        timeout: std::time::Duration,
+    ) -> Result<Self, TenantPlacementKvError> {
+        let nats_url = nats_url.into();
+        let bucket = bucket.into();
+
+        let client = tokio::time::timeout(timeout, async_nats::connect(nats_url))
+            .await
+            .map_err(|_| TenantPlacementKvError::Connection("connect timeout".to_string()))?
+            .map_err(|e| TenantPlacementKvError::Connection(e.to_string()))?;
+
+        let jetstream = async_nats::jetstream::new(client);
+
+        let kv = match jetstream.get_key_value(&bucket).await {
+            Ok(kv) => kv,
+            Err(_) => jetstream
+                .create_key_value(async_nats::jetstream::kv::Config {
+                    bucket: bucket.clone(),
+                    ..Default::default()
+                })
+                .await
+                .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?,
+        };
+
+        Ok(Self { kv })
+    }
+
+    pub async fn get_json(
+        &self,
+        key: &str,
+    ) -> Result<Option<serde_json::Value>, TenantPlacementKvError> {
+        let entry = self
+            .kv
+            .entry(key)
+            .await
+            .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
+
+        match entry {
+            Some(entry) => serde_json::from_slice::<serde_json::Value>(&entry.value)
+                .map(Some)
+                .map_err(|e| TenantPlacementKvError::Parse(e.to_string())),
+            None => Ok(None),
+        }
+    }
+
+    pub async fn put_json(
+        &self,
+        key: &str,
+        value: &serde_json::Value,
+    ) -> Result<(), TenantPlacementKvError> {
+        let bytes =
+            serde_json::to_vec(value).map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?;
+        self.kv
+            .put(key, bytes.into())
+            .await
+            .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
+        Ok(())
+    }
+
+    pub async fn watch_json(
+        &self,
+        pattern: &str,
+    ) -> Result<
+        std::pin::Pin<
+            Box<
+                dyn futures::Stream<Item = Result<serde_json::Value, TenantPlacementKvError>>
+                    + Send,
+            >,
+        >,
+        TenantPlacementKvError,
+    > {
+        let watch = self
+            .kv
+            .watch(pattern)
+            .await
+            .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
+
+        Ok(Box::pin(watch.filter_map(|entry| async move {
+            match entry {
+                Ok(entry) => match entry.operation {
+                    async_nats::jetstream::kv::Operation::Put => {
+                        match serde_json::from_slice::<serde_json::Value>(&entry.value) {
+                            Ok(v) => Some(Ok(v)),
+                            Err(e) => Some(Err(TenantPlacementKvError::Parse(e.to_string()))),
+                        }
+                    }
+                    async_nats::jetstream::kv::Operation::Delete
+                    | async_nats::jetstream::kv::Operation::Purge => None,
+                },
+                Err(e) => Some(Err(TenantPlacementKvError::Kv(e.to_string()))),
+            }
+        })))
+    }
+
+    pub async fn load_config_with_fallback(
+        nats_url: impl Into<String>,
+        bucket: impl Into<String>,
+        key: &str,
+        fallback_path: &str,
+    ) -> Result<serde_json::Value, TenantPlacementKvError> {
+        let try_kv = match Self::connect_with_timeout(
+            nats_url,
+            bucket,
+            std::time::Duration::from_millis(300),
+        )
+        .await
+        {
+            Ok(client) => match client.get_json(key).await {
+                Ok(Some(v)) => Ok(v),
+                Ok(None) => Err(TenantPlacementKvError::Kv("missing key".to_string())),
+                Err(e) => Err(e),
+            },
+            Err(e) => Err(e),
+        };
+
+        match try_kv {
+            Ok(v) => Ok(v),
+            Err(_) => {
+                let raw = std::fs::read_to_string(fallback_path)
+                    .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
+                if fallback_path.ends_with(".json") {
+                    serde_json::from_str(&raw)
+                        .map_err(|e| TenantPlacementKvError::Parse(e.to_string()))
+                } else {
+                    let yaml: serde_yaml::Value = serde_yaml::from_str(&raw)
+                        .map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?;
+                    let json = serde_json::to_value(yaml)
+                        .map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?;
+                    Ok(json)
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use futures::StreamExt;
+
+    #[test]
+    fn stack_file_is_valid_yaml() {
+        let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap();
+        let _: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
+    }
+
+    #[test]
+    fn stack_services_count() {
+        let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap();
+        let doc: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
+        let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap();
+        assert!(services.contains_key(serde_yaml::Value::String("nats".to_string())));
+        assert!(services.contains_key(serde_yaml::Value::String("gateway".to_string())));
+        assert!(services.contains_key(serde_yaml::Value::String("aggregate".to_string())));
+    }
+
+    #[test]
+    fn tenant_placement_config_loads() {
+        let raw = std::fs::read_to_string("../swarm/tenant-placement.yaml").unwrap();
+        let cfg: TenantPlacementConfig = serde_yaml::from_str(&raw).unwrap();
+        assert_eq!(cfg.virtual_nodes_per_node, 200);
+        assert!(cfg.nodes.iter().any(|n| n.node_id == "node-a"));
+        assert_eq!(cfg.tenants.get("tenant-a").unwrap(), "node-a");
+    }
+
+    #[test]
+    fn placement_constraint_generated_correctly() {
+        let node = NodePlacement {
+            node_id: "node-a".to_string(),
+            tenant_range: "00-3f".to_string(),
+        };
+        let constraints = placement_constraints_for_node(&node);
+        assert_eq!(constraints, vec!["node.labels.tenant_range == 00-3f"]);
+    }
+
+    #[test]
+    fn graceful_tenant_migration_plan_is_ordered() {
+        let plan = plan_graceful_tenant_migration("tenant-a", "node-a", "node-b");
+        assert_eq!(plan.tenant_id, "tenant-a");
+        assert_eq!(
+            plan.actions,
+            vec![
+                MigrationAction::DrainTenant {
+                    tenant_id: "tenant-a".to_string(),
+                },
+                MigrationAction::UpdatePlacement {
+                    tenant_id: "tenant-a".to_string(),
+                    node_id: "node-b".to_string(),
+                },
+                MigrationAction::ReloadConfig,
+            ]
+        );
+    }
+
+    #[tokio::test]
+    async fn tenant_placement_kv_falls_back_to_local_file() {
+        let tmp = tempfile::tempdir().unwrap();
+        let path = tmp.path().join("placement.yaml");
+        std::fs::write(
+            &path,
+            r#"
+virtual_nodes_per_node: 100
+nodes:
+  - node_id: "node-a"
+    tenant_range: "00-ff"
+tenants:
+  tenant-a: "node-a"
+"#,
+        )
+        .unwrap();
+
+        let cfg = TenantPlacementKvClient::load_config_with_fallback(
+            "nats://127.0.0.1:1",
+            "TENANT_PLACEMENT",
+            "placement",
+            path.to_string_lossy().as_ref(),
+        )
+        .await
+        .unwrap();
+
+        assert_eq!(cfg["virtual_nodes_per_node"], 100);
+        assert_eq!(cfg["tenants"]["tenant-a"], "node-a");
+    }
+
+    #[tokio::test]
+    async fn tenant_placement_kv_watch_returns_stream() {
+        let result = TenantPlacementKvClient::connect_with_timeout(
+            "nats://127.0.0.1:1",
+            "TENANT_PLACEMENT",
+            std::time::Duration::from_millis(50),
+        )
+        .await;
+        assert!(result.is_err());
+
+        let mut stream =
+            futures::stream::empty::<Result<serde_json::Value, TenantPlacementKvError>>();
+        assert!(stream.next().await.is_none());
+    }
+}
--- a/aggregate/src/types/command.rs
+++ b/aggregate/src/types/command.rs
@@ -0,0 +1,65 @@
+use crate::types::{AggregateId, AggregateType, TenantId};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+use uuid::Uuid;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Command {
+    pub tenant_id: TenantId,
+    pub command_id: Uuid,
+    pub aggregate_id: AggregateId,
+    pub aggregate_type: AggregateType,
+    pub payload: Value,
+    pub metadata: HashMap<String, Value>,
+}
+
+impl Command {
+    pub fn new(
+        tenant_id: TenantId,
+        aggregate_id: AggregateId,
+        aggregate_type: AggregateType,
+        payload: Value,
+    ) -> Self {
+        Self {
+            tenant_id,
+            command_id: Uuid::now_v7(),
+            aggregate_id,
+            aggregate_type,
+            payload,
+            metadata: HashMap::new(),
+        }
+    }
+
+    pub fn with_metadata(mut self, key: impl Into<String>, value: Value) -> Self {
+        self.metadata.insert(key.into(), value);
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn command_serialization() {
+        let cmd = Command::new(
+            TenantId::new("acme-corp"),
+            AggregateId::new_v7(),
+            AggregateType::new("Account"),
+            json!({"type": "deposit", "amount": 100}),
+        );
+        let json = serde_json::to_string(&cmd).unwrap();
+        let decoded: Command = serde_json::from_str(&json).unwrap();
+        assert_eq!(cmd.command_id, decoded.command_id);
+        assert_eq!(cmd.aggregate_id, decoded.aggregate_id);
+        assert_eq!(cmd.tenant_id, decoded.tenant_id);
+    }
+
+    #[test]
+    fn command_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<Command>();
+    }
+}
--- a/aggregate/src/types/error.rs
+++ b/aggregate/src/types/error.rs
@@ -0,0 +1,58 @@
+use crate::types::{AggregateId, TenantId, Version};
+use thiserror::Error;
+
+#[derive(Debug, Clone, Error)]
+pub enum AggregateError {
+    #[error("Tenant access denied for tenant: {tenant_id}")]
+    TenantAccessDenied { tenant_id: TenantId },
+
+    #[error("Tenant not hosted on this shard: {tenant_id}")]
+    TenantNotHosted { tenant_id: TenantId },
+
+    #[error("Tenant is draining: {tenant_id}")]
+    TenantDraining { tenant_id: TenantId },
+
+    #[error("Validation error: {0}")]
+    ValidationError(String),
+
+    #[error("Version conflict: expected {expected}, actual {actual}")]
+    VersionConflict { expected: Version, actual: Version },
+
+    #[error("Storage error: {0}")]
+    StorageError(String),
+
+    #[error("Stream error: {0}")]
+    StreamError(String),
+
+    #[error("Rehydration error: {0}")]
+    RehydrationError(String),
+
+    #[error("Decide error: {0}")]
+    DecideError(String),
+
+    #[error("Apply error: {0}")]
+    ApplyError(String),
+
+    #[error("Not found: {0}")]
+    NotFound(AggregateId),
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn error_implements_traits() {
+        let err = AggregateError::TenantAccessDenied {
+            tenant_id: TenantId::new("other"),
+        };
+        let _ = format!("{}", err);
+        let _: &dyn std::error::Error = &err;
+    }
+
+    #[test]
+    fn error_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<AggregateError>();
+    }
+}
--- a/aggregate/src/types/event.rs
+++ b/aggregate/src/types/event.rs
@@ -0,0 +1,78 @@
+use crate::types::{AggregateId, AggregateType, TenantId, Version};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use uuid::Uuid;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Event {
+    pub tenant_id: TenantId,
+    pub event_id: Uuid,
+    pub aggregate_id: AggregateId,
+    pub aggregate_type: AggregateType,
+    pub version: Version,
+    pub event_type: String,
+    pub payload: Value,
+    pub command_id: Uuid,
+    pub timestamp: DateTime<Utc>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub correlation_id: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub traceparent: Option<String>,
+}
+
+impl Event {
+    pub fn new(
+        tenant_id: TenantId,
+        aggregate_id: AggregateId,
+        aggregate_type: AggregateType,
+        version: Version,
+        event_type: impl Into<String>,
+        payload: Value,
+        command_id: Uuid,
+    ) -> Self {
+        Self {
+            tenant_id,
+            event_id: Uuid::now_v7(),
+            aggregate_id,
+            aggregate_type,
+            version,
+            event_type: event_type.into(),
+            payload,
+            command_id,
+            timestamp: Utc::now(),
+            correlation_id: None,
+            traceparent: None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn event_serialization() {
+        let event = Event::new(
+            TenantId::new("acme-corp"),
+            AggregateId::new_v7(),
+            AggregateType::new("Account"),
+            Version::from(1),
+            "Deposited",
+            json!({"amount": 100}),
+            Uuid::now_v7(),
+        );
+        let json = serde_json::to_string(&event).unwrap();
+        let decoded: Event = serde_json::from_str(&json).unwrap();
+        assert_eq!(event.event_id, decoded.event_id);
+        assert_eq!(event.version, decoded.version);
+        assert_eq!(event.tenant_id, decoded.tenant_id);
+    }
+
+    #[test]
+    fn event_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<Event>();
+    }
+}
--- a/aggregate/src/types/id.rs
+++ b/aggregate/src/types/id.rs
@@ -0,0 +1,157 @@
+use serde::{Deserialize, Serialize};
+use std::fmt;
+use std::str::FromStr;
+
+pub type TenantId = shared::TenantId;
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct AggregateId(uuid::Uuid);
+
+impl AggregateId {
+    pub fn new_v7() -> Self {
+        Self(uuid::Uuid::now_v7())
+    }
+
+    pub fn from_uuid(uuid: uuid::Uuid) -> Self {
+        Self(uuid)
+    }
+
+    pub fn as_uuid(&self) -> &uuid::Uuid {
+        &self.0
+    }
+}
+
+impl fmt::Display for AggregateId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl FromStr for AggregateId {
+    type Err = uuid::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(Self(uuid::Uuid::parse_str(s)?))
+    }
+}
+
+impl Default for AggregateId {
+    fn default() -> Self {
+        Self::new_v7()
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct AggregateType(String);
+
+impl AggregateType {
+    pub fn new(ty: impl Into<String>) -> Self {
+        Self(ty.into())
+    }
+
+    pub fn as_str(&self) -> &str {
+        &self.0
+    }
+}
+
+impl fmt::Display for AggregateType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl From<&str> for AggregateType {
+    fn from(s: &str) -> Self {
+        Self(s.to_string())
+    }
+}
+
+impl From<String> for AggregateType {
+    fn from(s: String) -> Self {
+        Self(s)
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
+pub struct Version(u64);
+
+impl Version {
+    pub const fn initial() -> Self {
+        Self(0)
+    }
+
+    pub const fn from_u64(v: u64) -> Self {
+        Self(v)
+    }
+
+    pub const fn as_u64(self) -> u64 {
+        self.0
+    }
+
+    pub fn increment(self) -> Self {
+        Self(self.0 + 1)
+    }
+}
+
+impl Default for Version {
+    fn default() -> Self {
+        Self::initial()
+    }
+}
+
+impl From<u64> for Version {
+    fn from(v: u64) -> Self {
+        Self(v)
+    }
+}
+
+impl fmt::Display for Version {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn tenant_id_serialization_roundtrip() {
+        let id = TenantId::new("acme-corp");
+        let json = serde_json::to_string(&id).unwrap();
+        let decoded: TenantId = serde_json::from_str(&json).unwrap();
+        assert_eq!(id, decoded);
+    }
+
+    #[test]
+    fn tenant_id_default() {
+        let id = TenantId::default();
+        assert!(id.is_empty());
+    }
+
+    #[test]
+    fn aggregate_id_serialization_roundtrip() {
+        let id = AggregateId::new_v7();
+        let json = serde_json::to_string(&id).unwrap();
+        let decoded: AggregateId = serde_json::from_str(&json).unwrap();
+        assert_eq!(id, decoded);
+    }
+
+    #[test]
+    fn version_increment() {
+        let v = Version::initial();
+        assert_eq!(v.as_u64(), 0);
+        let v2 = v.increment();
+        assert_eq!(v2.as_u64(), 1);
+        assert_eq!(v.as_u64(), 0);
+    }
+
+    #[test]
+    fn types_are_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<TenantId>();
+        assert_send_sync::<AggregateId>();
+        assert_send_sync::<AggregateType>();
+        assert_send_sync::<Version>();
+    }
+}
--- a/aggregate/src/types/manifest.rs
+++ b/aggregate/src/types/manifest.rs
@@ -0,0 +1,61 @@
+use crate::types::AggregateType;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProgramRef {
+    pub decide_program: String,
+    pub apply_program: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AggregateManifest {
+    pub aggregate_type: AggregateType,
+    pub programs: ProgramRef,
+    pub snapshot_threshold: Option<u64>,
+}
+
+impl AggregateManifest {
+    pub fn new(aggregate_type: AggregateType, programs: ProgramRef) -> Self {
+        Self {
+            aggregate_type,
+            programs,
+            snapshot_threshold: None,
+        }
+    }
+
+    pub fn with_snapshot_threshold(mut self, threshold: u64) -> Self {
+        self.snapshot_threshold = Some(threshold);
+        self
+    }
+}
+
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ManifestRegistry {
+    aggregates: HashMap<String, AggregateManifest>,
+}
+
+impl ManifestRegistry {
+    pub fn new() -> Self {
+        Self {
+            aggregates: HashMap::new(),
+        }
+    }
+
+    pub fn register(&mut self, manifest: AggregateManifest) {
+        self.aggregates
+            .insert(manifest.aggregate_type.as_str().to_string(), manifest);
+    }
+
+    pub fn get(&self, aggregate_type: &AggregateType) -> Option<&AggregateManifest> {
+        self.aggregates.get(aggregate_type.as_str())
+    }
+
+    pub fn load_from_yaml(yaml: &str) -> Result<Self, serde_yaml::Error> {
+        serde_yaml::from_str(yaml)
+    }
+
+    pub fn load_from_json(json: &str) -> Result<Self, serde_json::Error> {
+        serde_json::from_str(json)
+    }
+}
--- a/aggregate/src/types/mod.rs
+++ b/aggregate/src/types/mod.rs
@@ -0,0 +1,13 @@
+mod command;
+mod error;
+mod event;
+mod id;
+mod manifest;
+mod snapshot;
+
+pub use command::*;
+pub use error::*;
+pub use event::*;
+pub use id::*;
+pub use manifest::*;
+pub use snapshot::*;
--- a/aggregate/src/types/snapshot.rs
+++ b/aggregate/src/types/snapshot.rs
@@ -0,0 +1,61 @@
+use crate::types::{AggregateId, AggregateType, TenantId, Version};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Snapshot {
+    pub tenant_id: TenantId,
+    pub aggregate_id: AggregateId,
+    pub aggregate_type: AggregateType,
+    pub version: Version,
+    pub state: Value,
+    pub created_at: DateTime<Utc>,
+}
+
+impl Snapshot {
+    pub fn new(
+        tenant_id: TenantId,
+        aggregate_id: AggregateId,
+        aggregate_type: AggregateType,
+        version: Version,
+        state: Value,
+    ) -> Self {
+        Self {
+            tenant_id,
+            aggregate_id,
+            aggregate_type,
+            version,
+            state,
+            created_at: Utc::now(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn snapshot_serialization() {
+        let snap = Snapshot::new(
+            TenantId::new("acme-corp"),
+            AggregateId::new_v7(),
+            AggregateType::new("Account"),
+            Version::from(5),
+            json!({"balance": 100}),
+        );
+        let json = serde_json::to_string(&snap).unwrap();
+        let decoded: Snapshot = serde_json::from_str(&json).unwrap();
+        assert_eq!(snap.aggregate_id, decoded.aggregate_id);
+        assert_eq!(snap.version, decoded.version);
+        assert_eq!(snap.tenant_id, decoded.tenant_id);
+    }
+
+    #[test]
+    fn snapshot_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<Snapshot>();
+    }
+}
--- a/aggregate/tests/integration.rs
+++ b/aggregate/tests/integration.rs
@@ -0,0 +1,682 @@
+use aggregate::observability::Observability;
+use aggregate::runtime::RuntimeExecutor;
+#[cfg(feature = "runtime-v8")]
+use aggregate::runtime::{execute_apply_program, execute_decide_program};
+use aggregate::server::{CommandRequest, HealthChecker};
+use aggregate::storage::StorageClient;
+use aggregate::types::{
+    AggregateError, AggregateId, AggregateType, Command, Event, TenantId, Version,
+};
+use serde_json::json;
+use std::time::Duration;
+use tempfile::TempDir;
+
+fn create_test_storage() -> (TempDir, StorageClient) {
+    let dir = TempDir::new().expect("failed to create temp dir");
+    let path = dir.path().join("test.mdbx");
+    let storage =
+        StorageClient::open(path.to_string_lossy().to_string()).expect("failed to open storage");
+    (dir, storage)
+}
+
+#[cfg(feature = "runtime-v8")]
+fn create_test_decide_program() -> &'static str {
+    r#"
+    function decide(state, command) {
+        if (command.type === "deposit") {
+            return [{ type: "deposited", amount: command.amount }];
+        }
+        if (command.type === "withdraw") {
+            if (state.balance < command.amount) {
+                return [{ type: "error", message: "insufficient funds" }];
+            }
+            return [{ type: "withdrawn", amount: command.amount }];
+        }
+        if (command.type === "open_account") {
+            return [{ type: "account_opened", initial_balance: command.initial_balance || 0 }];
+        }
+        return [];
+    }
+    "#
+}
+
+#[cfg(feature = "runtime-v8")]
+fn create_test_apply_program() -> &'static str {
+    r#"
+    function apply(state, event) {
+        if (event.type === "account_opened") {
+            return { balance: event.initial_balance };
+        }
+        if (event.type === "deposited") {
+            return { balance: (state.balance || 0) + event.amount };
+        }
+        if (event.type === "withdrawn") {
+            return { balance: state.balance - event.amount };
+        }
+        return state;
+    }
+    "#
+}
+
+#[test]
+fn storage_tenant_isolation() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let tenant_a = TenantId::new("tenant-a");
+        let tenant_b = TenantId::new("tenant-b");
+        let aggregate_id = AggregateId::new_v7();
+
+        use aggregate::types::Snapshot;
+        let snapshot_a = Snapshot::new(
+            tenant_a.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Account"),
+            Version::from(1),
+            json!({"balance": 100}),
+        );
+
+        storage.put_snapshot(&snapshot_a).await.unwrap();
+
+        let result_a = storage
+            .get_snapshot(&tenant_a, &aggregate_id)
+            .await
+            .unwrap();
+        let result_b = storage
+            .get_snapshot(&tenant_b, &aggregate_id)
+            .await
+            .unwrap();
+
+        assert!(result_a.is_some());
+        assert!(result_b.is_none());
+    });
+}
+
+#[test]
+fn storage_version_conflict() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+
+        use aggregate::types::Snapshot;
+        let snapshot_v1 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Account"),
+            Version::from(1),
+            json!({"balance": 100}),
+        );
+
+        storage.put_snapshot(&snapshot_v1).await.unwrap();
+
+        let result = storage.put_snapshot(&snapshot_v1).await;
+        assert!(matches!(
+            result,
+            Err(AggregateError::VersionConflict { .. })
+        ));
+    });
+}
+
+#[test]
+fn storage_latest_version() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+
+        let version = storage
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert!(version.is_none());
+
+        use aggregate::types::Snapshot;
+        let snapshot_v1 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Account"),
+            Version::from(1),
+            json!({"balance": 100}),
+        );
+        storage.put_snapshot(&snapshot_v1).await.unwrap();
+
+        let version = storage
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert_eq!(version, Some(Version::from(1)));
+
+        let snapshot_v3 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Account"),
+            Version::from(3),
+            json!({"balance": 300}),
+        );
+        storage.put_snapshot(&snapshot_v3).await.unwrap();
+
+        let version = storage
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert_eq!(version, Some(Version::from(3)));
+    });
+}
+
+#[test]
+fn storage_none_for_nonexistent_aggregate() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let tenant_id = TenantId::new("tenant-a");
+        let aggregate_id = AggregateId::new_v7();
+
+        let snapshot = storage
+            .get_snapshot(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert!(snapshot.is_none());
+    });
+}
+
+#[cfg(feature = "runtime-v8")]
+#[test]
+fn runtime_decide_deposit() {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let state = json!({"balance": 100});
+        let command = json!({"type": "deposit", "amount": 50});
+
+        let events = execute_decide_program(
+            &state,
+            &command,
+            create_test_decide_program(),
+            1_000_000,
+            Duration::from_secs(5),
+        )
+        .await
+        .unwrap();
+        assert_eq!(events.len(), 1);
+        assert_eq!(events[0]["type"], "deposited");
+        assert_eq!(events[0]["amount"], 50);
+    });
+}
+
+#[cfg(feature = "runtime-v8")]
+#[test]
+fn runtime_decide_withdraw_insufficient() {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let state = json!({"balance": 10});
+        let command = json!({"type": "withdraw", "amount": 100});
+
+        let events = execute_decide_program(
+            &state,
+            &command,
+            create_test_decide_program(),
+            1_000_000,
+            Duration::from_secs(5),
+        )
+        .await
+        .unwrap();
+        assert_eq!(events.len(), 1);
+        assert_eq!(events[0]["type"], "error");
+    });
+}
+
+#[cfg(feature = "runtime-v8")]
+#[test]
+fn runtime_apply_transitions_state() {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let state = json!({"balance": 100});
+        let event = json!({"type": "deposited", "amount": 50});
+
+        let new_state = execute_apply_program(
+            &state,
+            &event,
+            create_test_apply_program(),
+            1_000_000,
+            Duration::from_secs(5),
+        )
+        .await
+        .unwrap();
+        assert_eq!(new_state["balance"], 150);
+    });
+}
+
+#[cfg(feature = "runtime-v8")]
+#[test]
+fn runtime_determinism() {
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        let state = json!({"balance": 100});
+        let command = json!({"type": "deposit", "amount": 50});
+
+        let r1 = execute_decide_program(
+            &state,
+            &command,
+            create_test_decide_program(),
+            1_000_000,
+            Duration::from_secs(5),
+        )
+        .await
+        .unwrap();
+        let r2 = execute_decide_program(
+            &state,
+            &command,
+            create_test_decide_program(),
+            1_000_000,
+            Duration::from_secs(5),
+        )
+        .await
+        .unwrap();
+        assert_eq!(r1, r2);
+    });
+}
+
+#[test]
+fn command_request_tenant_extraction() {
+    let tenant_id = TenantId::new("acme-corp");
+    let aggregate_id = AggregateId::new_v7();
+
+    let request = CommandRequest::new(
+        tenant_id.clone(),
+        aggregate_id.clone(),
+        AggregateType::from("Account"),
+        json!({"type": "deposit", "amount": 100}),
+    )
+    .with_header("x-request-id", "req-123")
+    .with_header("x-tenant-id", "override-tenant");
+
+    assert_eq!(request.tenant_id, tenant_id);
+    assert_eq!(
+        request.headers.get("x-request-id"),
+        Some(&"req-123".to_string())
+    );
+}
+
+#[test]
+fn health_checker_tracks_state() {
+    let checker = HealthChecker::new();
+
+    let status = checker.check();
+    assert!(status.is_healthy());
+    assert!(checker.is_ready());
+    assert!(checker.is_live());
+
+    checker.set_storage_healthy(false);
+    checker.set_stream_healthy(false);
+    assert!(!checker.is_ready());
+
+    checker.set_storage_healthy(true);
+    checker.set_stream_healthy(true);
+    assert!(checker.is_ready());
+}
+
+#[test]
+fn observability_metrics_export() {
+    let obs = Observability::default();
+
+    let span = obs.start_command_span("agg-123", "Account", "tenant-a", "deposit", None, None);
+    obs.record_command_success(&span, 2);
+
+    let metrics = obs.export_metrics();
+    assert!(metrics.contains("commands_total"));
+    assert!(metrics.contains("command_duration"));
+}
+
+#[test]
+fn version_increment_and_ordering() {
+    let v0 = Version::initial();
+    assert_eq!(v0.as_u64(), 0);
+
+    let v1 = v0.increment();
+    assert_eq!(v1.as_u64(), 1);
+    assert_eq!(v0.as_u64(), 0);
+
+    let v2 = v1.increment();
+    assert_eq!(v2.as_u64(), 2);
+
+    assert!(v0 < v1);
+    assert!(v1 < v2);
+}
+
+#[test]
+fn tenant_id_validation() {
+    let valid_ids = vec!["acme-corp", "tenant_123", "my-tenant", "Tenant1"];
+    let invalid_ids = vec!["tenant@corp", "tenant name", "tenant/id"];
+
+    for id in valid_ids {
+        let tenant_id = TenantId::new(id);
+        let chars_valid = tenant_id
+            .as_str()
+            .chars()
+            .all(|c| c.is_alphanumeric() || c == '-' || c == '_');
+        assert!(chars_valid, "Expected {} to be valid", id);
+    }
+
+    for id in invalid_ids {
+        let tenant_id = TenantId::new(id);
+        let chars_valid = tenant_id
+            .as_str()
+            .chars()
+            .all(|c| c.is_alphanumeric() || c == '-' || c == '_');
+        assert!(!chars_valid, "Expected {} to be invalid", id);
+    }
+}
+
+#[test]
+fn aggregate_id_generation() {
+    let id1 = AggregateId::new_v7();
+    let id2 = AggregateId::new_v7();
+
+    assert_ne!(id1, id2);
+
+    let display = format!("{}", id1);
+    assert!(!display.is_empty());
+}
+
+#[test]
+fn event_creation() {
+    let tenant_id = TenantId::new("tenant-a");
+    let aggregate_id = AggregateId::new_v7();
+    let command_id = uuid::Uuid::now_v7();
+
+    let event = Event::new(
+        tenant_id.clone(),
+        aggregate_id.clone(),
+        AggregateType::from("Account"),
+        Version::from(1),
+        "deposited".to_string(),
+        json!({"amount": 100}),
+        command_id,
+    );
+
+    assert_eq!(event.tenant_id, tenant_id);
+    assert_eq!(event.aggregate_id, aggregate_id);
+    assert_eq!(event.version, Version::from(1));
+    assert_eq!(event.event_type, "deposited");
+}
+
+#[test]
+fn command_creation() {
+    let tenant_id = TenantId::new("tenant-a");
+    let aggregate_id = AggregateId::new_v7();
+
+    let command = Command::new(
+        tenant_id.clone(),
+        aggregate_id.clone(),
+        AggregateType::from("Account"),
+        json!({"type": "deposit", "amount": 100}),
+    );
+
+    assert_eq!(command.tenant_id, tenant_id);
+    assert_eq!(command.aggregate_id, aggregate_id);
+    assert_eq!(command.payload["type"], "deposit");
+}
+
+#[test]
+fn snapshot_creation() {
+    let tenant_id = TenantId::new("tenant-a");
+    let aggregate_id = AggregateId::new_v7();
+
+    let snapshot = aggregate::types::Snapshot::new(
+        tenant_id.clone(),
+        aggregate_id.clone(),
+        AggregateType::from("Account"),
+        Version::from(5),
+        json!({"balance": 500}),
+    );
+
+    assert_eq!(snapshot.tenant_id, tenant_id);
+    assert_eq!(snapshot.aggregate_id, aggregate_id);
+    assert_eq!(snapshot.version, Version::from(5));
+    assert_eq!(snapshot.state["balance"], 500);
+}
+
+#[test]
+fn circuit_breaker_pattern() {
+    use aggregate::storage::CircuitBreaker;
+
+    let mut cb = CircuitBreaker::new()
+        .with_failure_threshold(3)
+        .with_reset_timeout(Duration::from_millis(50));
+
+    assert!(cb.is_closed());
+
+    cb.record_failure();
+    cb.record_failure();
+    cb.record_failure();
+    assert!(cb.is_open());
+
+    std::thread::sleep(Duration::from_millis(60));
+    assert!(!cb.is_closed());
+    assert!(!cb.is_open());
+}
+
+#[test]
+fn error_types_are_send_sync() {
+    fn assert_send_sync<T: Send + Sync>() {}
+
+    assert_send_sync::<AggregateError>();
+    assert_send_sync::<aggregate::server::ServerError>();
+}
+
+#[test]
+fn all_types_are_send_sync() {
+    fn assert_send_sync<T: Send + Sync>() {}
+
+    assert_send_sync::<TenantId>();
+    assert_send_sync::<AggregateId>();
+    assert_send_sync::<AggregateType>();
+    assert_send_sync::<Version>();
+    assert_send_sync::<Command>();
+    assert_send_sync::<Event>();
+    assert_send_sync::<StorageClient>();
+    assert_send_sync::<RuntimeExecutor>();
+    assert_send_sync::<Observability>();
+    assert_send_sync::<HealthChecker>();
+}
+
+#[test]
+fn concurrent_storage_operations() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        use aggregate::types::Snapshot;
+        use std::sync::Arc;
+        use tokio::task::JoinSet;
+
+        let storage = Arc::new(storage);
+        let mut tasks = JoinSet::new();
+
+        for i in 0..10 {
+            let storage = storage.clone();
+            tasks.spawn(async move {
+                let tenant_id = TenantId::new(format!("tenant-{}", i % 3));
+                let aggregate_id = AggregateId::new_v7();
+
+                let snapshot = Snapshot::new(
+                    tenant_id.clone(),
+                    aggregate_id.clone(),
+                    AggregateType::from("Account"),
+                    Version::from(1),
+                    json!({"balance": i * 100}),
+                );
+
+                storage.put_snapshot(&snapshot).await.unwrap();
+
+                let loaded = storage
+                    .get_snapshot(&tenant_id, &aggregate_id)
+                    .await
+                    .unwrap();
+                assert!(loaded.is_some());
+                loaded.unwrap()
+            });
+        }
+
+        let mut results = Vec::new();
+        while let Some(result) = tasks.join_next().await {
+            results.push(result.unwrap());
+        }
+
+        assert_eq!(results.len(), 10);
+    });
+}
+
+#[test]
+fn tenant_isolation_e2e() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        use aggregate::types::Snapshot;
+
+        let tenant_a = TenantId::new("tenant-a");
+        let tenant_b = TenantId::new("tenant-b");
+        let aggregate_id = AggregateId::new_v7();
+
+        let snapshot_a = Snapshot::new(
+            tenant_a.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Account"),
+            Version::from(1),
+            json!({"balance": 1000, "owner": "Alice"}),
+        );
+
+        let snapshot_b = Snapshot::new(
+            tenant_b.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Account"),
+            Version::from(1),
+            json!({"balance": 500, "owner": "Bob"}),
+        );
+
+        storage.put_snapshot(&snapshot_a).await.unwrap();
+        storage.put_snapshot(&snapshot_b).await.unwrap();
+
+        let loaded_a = storage
+            .get_snapshot(&tenant_a, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+        let loaded_b = storage
+            .get_snapshot(&tenant_b, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(loaded_a.state["owner"], "Alice");
+        assert_eq!(loaded_a.state["balance"], 1000);
+        assert_eq!(loaded_b.state["owner"], "Bob");
+        assert_eq!(loaded_b.state["balance"], 500);
+    });
+}
+
+#[test]
+fn bank_account_full_scenario() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        use aggregate::types::Snapshot;
+
+        let tenant_id = TenantId::new("bank-test");
+        let aggregate_id = AggregateId::new_v7();
+
+        let snapshot_v1 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("BankAccount"),
+            Version::from(1),
+            json!({"balance": 0}),
+        );
+        storage.put_snapshot(&snapshot_v1).await.unwrap();
+
+        let snapshot_v2 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("BankAccount"),
+            Version::from(2),
+            json!({"balance": 100}),
+        );
+        storage.put_snapshot(&snapshot_v2).await.unwrap();
+
+        let snapshot_v3 = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("BankAccount"),
+            Version::from(3),
+            json!({"balance": 50}),
+        );
+        storage.put_snapshot(&snapshot_v3).await.unwrap();
+
+        let loaded = storage
+            .get_snapshot(&tenant_id, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(loaded.version, Version::from(3));
+        assert_eq!(loaded.state["balance"], 50);
+
+        let version = storage
+            .get_latest_version(&tenant_id, &aggregate_id)
+            .await
+            .unwrap();
+        assert_eq!(version, Some(Version::from(3)));
+    });
+}
+
+#[test]
+fn version_sequence_integrity() {
+    let (_dir, storage) = create_test_storage();
+
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    rt.block_on(async {
+        use aggregate::types::Snapshot;
+
+        let tenant_id = TenantId::new("version-test");
+        let aggregate_id = AggregateId::new_v7();
+
+        for v in 1..=5 {
+            let snapshot = Snapshot::new(
+                tenant_id.clone(),
+                aggregate_id.clone(),
+                AggregateType::from("Counter"),
+                Version::from(v),
+                json!({"count": v}),
+            );
+            storage.put_snapshot(&snapshot).await.unwrap();
+        }
+
+        let loaded = storage
+            .get_snapshot(&tenant_id, &aggregate_id)
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(loaded.version, Version::from(5));
+        assert_eq!(loaded.state["count"], 5);
+
+        let duplicate = Snapshot::new(
+            tenant_id.clone(),
+            aggregate_id.clone(),
+            AggregateType::from("Counter"),
+            Version::from(5),
+            json!({"count": 999}),
+        );
+        let result = storage.put_snapshot(&duplicate).await;
+        assert!(matches!(
+            result,
+            Err(AggregateError::VersionConflict { .. })
+        ));
+    });
+}
--- a/control/.gitignore
+++ b/control/.gitignore
@@ -0,0 +1,43 @@
+/target/
+/target-*/
+**/target/
+*.rs.bk
+*.pdb
+*.dSYM/
+*.orig
+*.rej
+*.log
+*.swp
+*.swo
+*~
+.DS_Store
+.idea/
+.vscode/
+
+.env
+.env.*
+.envrc
+.direnv/
+
+docker-compose.override.yml
+
+*.mdbx
+*.mdbx-*
+*.mdbx-lock
+*.mdbx.dat
+*.mdbx.lck
+*.mdb
+*.db
+/data/
+/tmp/
+
+/ui/node_modules/
+/ui/dist/
+/ui/dist-ssr/
+/ui/.eslintcache
+/ui/.vite/
+
+/coverage/
+lcov.info
+*.profraw
+*.profdata
--- a/control/DEVELOPMENT_PLAN.md
+++ b/control/DEVELOPMENT_PLAN.md
@@ -0,0 +1,341 @@
+# Development Plan: Control Plane (Admin UI + Observability + Production Ops)
+
+## Overview
+
+This plan breaks down the Control Plane implementation into milestones ordered by dependency. Each milestone includes:
+- **Tasks** with clear deliverables
+- **Test Requirements** (unit tests + tautological tests + integration tests where applicable)
+- **Dependencies** on previous milestones
+
+**Development Approach:**
+1. Complete one milestone at a time
+2. Write tests before implementation (TDD where applicable)
+3. All tests must pass before moving to the next milestone
+4. Mark tasks complete with `[x]` as you progress
+
+This plan is intentionally aligned with the style and gating discipline used in sibling repos (see: [gateway/DEVELOPMENT_PLAN.md](file:///Users/vlad/Developer/cloudlysis/gateway/DEVELOPMENT_PLAN.md), [runner/DEVELOPMENT_PLAN.md](file:///Users/vlad/Developer/cloudlysis/runner/DEVELOPMENT_PLAN.md)).
+
+---
+
+## Milestone 0: Repo Bootstrap (Dev Ergonomics + Guardrails)
+
+**Goal:** Establish canonical commands, CI entrypoints, and integration-test gating so later milestones can be executed and verified consistently.
+
+### Tasks
+- [x] **0.1** Define canonical local commands for the repo
+  - UI:
+    - `npm run lint`
+    - `npm run typecheck`
+    - `npm run test`
+    - `npm run build`
+  - Control Plane API:
+    - `cargo test`
+    - `cargo fmt --check`
+    - `cargo clippy -- -D warnings`
+    - `cargo run -- --help`
+  - Docker/Swarm:
+    - `docker compose config` validation for local stacks (if used)
+    - `docker stack deploy ...` smoke validation for Swarm (gated, see Tests)
+- [x] **0.2** Add a minimal CI workflow that runs the same commands as **0.1**
+- [x] **0.3** Define integration-test gating conventions
+  - Docker/Swarm integration tests:
+    - Mark as ignored by default and run only when `CONTROL_TEST_DOCKER=1` is set
+    - Example: `CONTROL_TEST_DOCKER=1 cargo test -- --ignored`
+  - NATS-dependent integration tests:
+    - Mark as ignored by default and run only when `CONTROL_TEST_NATS_URL` is set
+    - Example: `CONTROL_TEST_NATS_URL=nats://127.0.0.1:4222 cargo test -- --ignored`
+- [x] **0.4** Define baseline operational invariants (checklist for later milestones)
+  - No privileged action without RBAC + audit event
+  - No multi-step operation without idempotency key + job record
+  - Always propagate `tenant_id` (when applicable) end-to-end
+  - Always propagate request/flow identifiers end-to-end (logs + downstream calls):
+    - `x-request-id` (per HTTP request)
+    - `x-correlation-id` (per user-visible flow/job; generated by the Gateway when missing)
+    - `traceparent` (W3C trace context; started by the Gateway when missing)
+  - Secrets never appear in logs (Authorization headers, tokens, credentials, Grafana admin creds)
+  - No tenant-level metrics without bounded cardinality rules
+
+### Tests
+- [x] **T0.1** Tautological test: test harness runs for both subprojects (UI + API)
+- [x] **T0.2** Lint + typecheck + unit tests pass
+- [x] **T0.3** Docker config validation passes (compose/stack linting tests)
+
+---
+
+## Milestone 1: Admin UI Foundation (UltraBase UX Reuse)
+
+**Goal:** Bring up the Admin UI with the UltraBase component system and navigation skeleton, adapted to Cloudlysis page structure.
+
+### Dependencies
+- Milestone 0 (repo bootstrap)
+
+### Exit Criteria
+- Admin UI builds successfully and passes unit/type checks
+- UI navigation skeleton matches the PRD information architecture
+
+### Tasks
+- [x] **1.1** Initialize Admin UI project (Vite + React + TypeScript)
+  - Choose and wire lint/typecheck/test/build tooling to match the canonical commands in **0.1**
+  - Adopt the baseline dependencies used by UltraBase control-plane admin UI where available
+  - Establish UI module layout for: components, pages, routes, API client, auth/session utilities
+- [x] **1.2** Reuse UltraBase UI primitives and styling tokens (adapted, not forked blindly)
+  - Buttons, inputs, tables, dropdowns, modal, toast, breadcrumbs
+- [x] **1.3** Implement navigation skeleton and empty pages (route wiring only)
+  - Overview
+  - Tenants
+  - Users
+  - Sessions
+  - Roles & Permissions
+  - Config
+  - Definitions
+  - Scale & Placement
+  - Deployments
+  - Observability
+  - Audit Log
+  - Settings
+- [x] **1.3a** Add correlation-first investigation affordances in the UI skeleton
+  - Global search box that accepts `x-request-id`, `x-correlation-id`, or `trace_id`
+  - “Investigate” links that open Grafana Explore prefilled for:
+    - Loki query scoped to `x-correlation-id` (and `x-request-id` when available)
+    - Tempo trace view when a `trace_id` is present
+  - Ensure jobs and audit log rows display and copy the relevant ids
+- [x] **1.4** Implement API client stub with consistent error handling and request-id propagation
+  - Send `x-request-id` on every request (generate one when missing)
+  - Send `x-correlation-id` when continuing an existing UI flow; otherwise omit and use the Gateway-generated value returned in responses
+  - Send `traceparent` when continuing an existing trace; otherwise omit and use the Gateway-started trace
+  - Echo `x-request-id` and `x-correlation-id` on responses and surface them in error UX
+  - Persist the most recent ids in the UI so operators can copy/paste them into support tickets
+
+### Tests
+- [x] **T1.1** UI typecheck passes
+- [x] **T1.2** UI build passes
+- [x] **T1.3** Routing smoke test: each route renders without runtime errors (headless DOM test)
+
+---
+
+## Milestone 2: Control Plane API Foundation (BFF / Admin API)
+
+**Goal:** Provide the minimal API surface required for the Admin UI to authenticate, read core state, and display health/metrics.
+
+### Dependencies
+- Milestone 0 (repo bootstrap)
+
+### Exit Criteria
+- Control plane API runs as a container and exposes `/health`, `/ready`, `/metrics`
+- Auth integration contract is defined (Gateway as source of truth) and enforced on admin endpoints
+
+### Tasks
+- [x] **2.1** Initialize Control Plane API service
+  - Rust (Axum + Tokio + tracing) to align with node ecosystem
+  - Baseline endpoints: `GET /health`, `GET /ready`, `GET /metrics`
+- [x] **2.2** Add request logging and correlation identifiers
+  - `x-request-id` propagation and structured logs (match Gateway conventions)
+  - Propagate `x-correlation-id` and `traceparent` on outbound calls
+  - Log fields: `request_id`, `correlation_id`, `trace_id`, `principal_id`, `tenant_id` (when applicable)
+  - Never log Authorization headers or tokens
+- [x] **2.3** Implement authentication and authorization boundary
+  - Validate Gateway-issued access tokens (same signing config as Gateway; Control does not mint tokens)
+  - Extract principal identity from token claims (at minimum: `sub`, `session_id`)
+  - Enforce permissions at the API boundary (deny-by-default, rights strings stored in Gateway IAM state)
+  - Align `x-tenant-id` semantics with Gateway:
+    - Tenant-scoped endpoints require `x-tenant-id` and must reject missing/invalid values with 400
+    - Platform-scoped endpoints must not depend on `x-tenant-id`
+  - Prefer proxying to Gateway for IAM CRUD instead of duplicating identity/RBAC state:
+    - Control API may expose a thin BFF surface, but must preserve Gateway status codes and error text for pass-through routes
+- [x] **2.4** Define “job” model for multi-step operations (API contract)
+  - `POST /admin/v1/jobs/*` returns `job_id`
+  - `GET /admin/v1/jobs/{job_id}` returns status + structured steps + errors
+  - Require an idempotency key for job creation (`Idempotency-Key` header), and make repeated creates safe
+
+### Tests
+- [x] **T2.1** `GET /health` and `GET /ready` return 200
+- [x] **T2.2** Unauthorized admin calls return 401/403 consistently
+- [x] **T2.3** `x-tenant-id` behavior matches Gateway rules (400 on missing/invalid for tenant-scoped routes)
+- [x] **T2.4** Tautological tests: core state types are Send + Sync
+
+---
+
+## Milestone 3: Observability Stack Baseline (VM + Loki + Grafana)
+
+**Goal:** Include a production-grade observability stack with version-controlled provisioning and Cloudlysis dashboard placeholders wired to existing service metrics.
+
+### Dependencies
+- Milestone 0 (repo bootstrap)
+
+### Exit Criteria
+- Grafana starts with provisioned datasources and dashboards
+- vmagent scrapes platform services and VictoriaMetrics can query ingested series
+- Loki is available for log queries (when logs are enabled)
+
+### Tasks
+- [x] **3.1** Add observability deployment assets modeled after UltraBase
+  - Grafana provisioning for datasources and dashboards
+  - vmagent scrape configs for Cloudlysis services + node/Swarm exporters (where applicable)
+  - Loki configuration (and optional promtail)
+- [x] **3.1a** Add distributed tracing backend and wiring
+  - Tempo (or compatible tracing backend) as a Grafana datasource
+  - OTLP receiver path (collector/agent) so platform services can emit traces
+  - Grafana Explore is provisioned so operators can jump from logs to traces
+  - Require the Gateway to accept and propagate `x-correlation-id` and `traceparent` to upstreams, and to include `correlation_id` and `trace_id` in request spans/log fields
+- [x] **3.2** Implement the base dashboard set from the PRD
+  - Operations overview
+  - HTTP detail (Gateway route-level)
+  - Logs (Loki)
+  - Traces (Tempo)
+  - Event bus / JetStream
+  - Workers (Runner)
+  - Storage (libmdbx + node disk)
+  - Cluster / Orchestrator
+- [x] **3.3** Add the chosen production-operability dashboards and document required instrumentation
+  - Noisy Neighbor & Tenant Health
+  - API Regression & Deployment
+  - Storage & Event Bus Bottlenecks
+  - Infrastructure Exhaustion
+  - Standardize build/version labeling across services for correlation (`*_build_info{service,version,git_sha}=1`)
+
+### Tests
+- [x] **T3.1** Grafana provisioning files are syntactically valid
+- [x] **T3.2** vmagent config parses and includes all required scrape jobs
+- [x] **T3.3** Tempo (or chosen tracing backend) reaches healthy state in the stack smoke test (gated)
+- [x] **T3.4** Container startup smoke test (compose or Swarm, gated): Grafana + VictoriaMetrics + Loki reach healthy state
+
+---
+
+## Milestone 4: Tenant + Placement Visibility (Read-Only Ops First)
+
+**Goal:** Provide safe, read-only visibility into tenant placement and runtime health across Aggregate/Projection/Runner/Gateway, matching existing placement semantics.
+
+### Dependencies
+- Milestone 1 (Admin UI foundation)
+- Milestone 2 (Control Plane API foundation)
+
+### Exit Criteria
+- Admin UI can list tenants and show current placement per service kind
+- Placement is sourced from the production control-plane substrate (NATS KV) with a development fallback
+
+### Tasks
+- [x] **4.1** Implement placement read APIs
+  - Read effective placement from NATS KV (and fallback file for development)
+  - Match the Gateway routing config model (placement maps + shard directories + revision semantics)
+  - Support per-service-kind placement maps (Aggregate, Projection, Runner) using the same naming conventions used elsewhere (`aggregate_placement`, `projection_placement`, `runner_placement`)
+- [x] **4.2** Implement fleet “health snapshot” APIs
+  - Query `/health`, `/ready`, `/metrics` from each service endpoint
+  - Normalize into a stable UI response shape
+- [x] **4.3** Implement Admin UI pages:
+  - Scale & Placement (read-only)
+  - Tenants (read-only with placement summary)
+  - Fleet/Topology views (read-only)
+
+### Tests
+- [x] **T4.1** Placement config parsing and snapshot endpoints work
+- [x] **T4.2** KV watcher hot-reload swaps placement atomically
+- [x] **T4.3** UI pages render with mocked API responses (component-level tests)
+
+---
+
+## Milestone 5: Safe Mutations (Drain, Migrate, Reload) via Idempotent Jobs
+
+**Goal:** Implement the first high-impact operational workflows with strict guardrails: tenant drain, placement update, and reload.
+
+### Dependencies
+- Milestone 4 (read-only ops)
+
+### Exit Criteria
+- All operational mutations are executed as jobs with audit events
+- Every mutation supports preflight planning and clear post-conditions
+
+### Tasks
+- [x] **5.1** Implement job orchestration primitives in the API
+  - step model, retries, cancellation, timeouts
+  - per-tenant locking to avoid concurrent conflicting operations
+- [x] **5.2** Implement drain workflow (per service kind where supported)
+  - Runner tenant drain semantics (stop acquiring new work, wait for inflight to converge)
+  - Aggregate/projection drain semantics via admin endpoints where available
+  - Align drain/readiness semantics with the rebalancing contract in [external_prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/external_prd.md)
+- [x] **5.3** Implement migration workflow
+  - Plan: drain tenant → update placement → reload routing/config
+  - Block unsafe migrations (health/lag/inflight thresholds)
+- [x] **5.4** Implement UI mutation flows
+  - modal confirmation + reason required
+  - job progress view and audit linkage
+
+### Tests
+- [x] **T5.1** Job idempotency: repeated calls with same idempotency key do not duplicate effects
+- [x] **T5.2** Migration plan preflight produces a deterministic action plan
+- [x] **T5.3** Safety gates prevent drain/migrate when invariants fail
+
+---
+
+## Milestone 6: Deployments + Regression Tooling (Swarm-Aware)
+
+**Goal:** Make deployments and regressions observable and controllable from the control plane, with strong “what changed when” correlation.
+
+### Dependencies
+- Milestone 3 (observability baseline)
+- Milestone 5 (job orchestration)
+
+### Exit Criteria
+- Deployments can be initiated (or at least observed) via the control plane
+- Grafana shows deploy markers; dashboards can compare old vs new versions
+
+### Tasks
+- [x] **6.1** Implement Swarm integration (read-only first, then mutations)
+  - list services, tasks, images, versions
+  - watch update events (start/finish/fail)
+- [x] **6.2** Implement deployment annotations/events
+  - write Grafana annotations (or emit a deploy event metric) for vertical markers
+- [x] **6.3** Implement “API Regression & Deployment” dashboard wiring prerequisites
+  - enforce build/version labeling (`*_build_info{service,version,git_sha}=1` pattern)
+  - ensure scrape relabeling includes `image_tag` where possible
+- [x] **6.4** UI pages
+  - Deployments list + detail
+  - Per-service “what changed” and “rollback” actions (guarded)
+
+### Tests
+- [x] **T6.1** Swarm client abstraction can be mocked and produces deterministic results
+- [x] **T6.2** Annotation writer produces expected Grafana payloads
+- [x] **T6.3** Version labels are present on all services in a metrics snapshot test
+
+---
+
+## Milestone 7: Full Docker Swarm Deployment (Platform + Observability + Control Plane)
+
+**Goal:** Provide a complete Swarm deployment definition for the platform: services in `../` plus the control plane components and the observability stack.
+
+### Dependencies
+- Milestone 1 (Admin UI foundation)
+- Milestone 2 (Control Plane API foundation)
+- Milestone 3 (Observability baseline)
+- Milestone 5 (safe mutations baseline)
+
+### Exit Criteria
+- `docker stack deploy` brings up:
+  - Gateway + Aggregate + Projection + Runner (from `../`)
+  - Control Plane API + Admin UI
+  - VictoriaMetrics + vmagent + Grafana + Loki (+ optional promtail)
+- All services are reachable via overlay networks and pass health checks
+- Smoke and integration tests pass end-to-end (gated, but required before milestone completion)
+
+### Tasks
+- [x] **7.1** Define Swarm networks, secrets, and configs
+  - overlay network segmentation (public vs internal)
+  - secrets for auth/signing keys, NATS credentials (if used), Grafana admin creds (or provisioning)
+- [x] **7.2** Define Swarm stack files
+  - base platform stack (gateway/aggregate/projection/runner)
+  - control plane stack (api + ui)
+  - observability stack (vm/vmagent/grafana/loki/promtail)
+- [x] **7.3** Define placement constraints and scaling defaults
+  - node labels for tenant ranges and infrastructure roles
+  - replica defaults and update policies
+- [x] **7.4** Define deployment verification and rollback playbooks (as executable checks)
+  - post-deploy checks: `/health`, `/ready`, `/metrics`, dashboard provisioning
+  - rollbacks: service update rollback hooks and job safety checks
+
+### Tests
+- [x] **T7.1** Stack YAML parses and validates (unit test)
+- [x] **T7.2** Swarm smoke test (requires `CONTROL_TEST_DOCKER=1`)
+  - deploy stacks
+  - wait for healthy state
+  - verify Grafana dashboards provisioned and VictoriaMetrics receives samples
+- [x] **T7.3** End-to-end “control plane can see the fleet” test (requires docker)
+  - UI/API can query placement + health snapshots for all services
--- a/control/api/Cargo.toml
+++ b/control/api/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "api"
+version = "0.1.0"
+edition = "2024"
+publish = ["madapes"]
+
+[dependencies]
+axum = "0.8.6"
+clap = { version = "4.5.48", features = ["derive", "env"] }
+jsonwebtoken = "9.3.1"
+metrics = "0.23.0"
+metrics-exporter-prometheus = "0.16.0"
+reqwest = { version = "0.12.23", default-features = false, features = ["json", "rustls-tls"] }
+serde = { version = "1.0.228", features = ["derive"] }
+serde_json = "1.0.149"
+thiserror = "2.0.16"
+tokio = { version = "1.45.0", features = ["macros", "net", "process", "rt-multi-thread", "signal", "time"] }
+tower-http = { version = "0.6.6", features = ["trace"] }
+tracing = "0.1.41"
+tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
+uuid = { version = "1.18.1", features = ["serde", "v4"] }
+
+[dev-dependencies]
+serde_yaml = "0.9.34"
+tower = "0.5.2"
--- a/control/api/src/admin.rs
+++ b/control/api/src/admin.rs
@@ -0,0 +1,417 @@
+use crate::{
+    AppState, RequestIds,
+    auth::{Principal, has_permission},
+    fleet,
+    job_engine::{JobEngine, StartJobError},
+    jobs::{Job, JobStatus, JobStep},
+    placement::{PlacementResponse, ServiceKind},
+    swarm::{SwarmService, SwarmTask},
+};
+use axum::{
+    Json, Router,
+    extract::{Extension, Path, State},
+    http::{HeaderMap, StatusCode},
+    response::IntoResponse,
+    routing::{get, post},
+};
+use serde::Deserialize;
+use std::time::{SystemTime, UNIX_EPOCH};
+use uuid::Uuid;
+
+const HEADER_IDEMPOTENCY_KEY: &str = "idempotency-key";
+const HEADER_TENANT_ID: &str = "x-tenant-id";
+
+pub fn admin_router() -> Router<AppState> {
+    Router::new()
+        .route("/whoami", get(whoami))
+        .route("/platform/info", get(platform_info))
+        .route("/fleet/snapshot", get(fleet_snapshot))
+        .route("/tenants", get(list_tenants))
+        .route("/placement/{kind}", get(get_placement))
+        .route("/tenants/echo", get(tenant_echo))
+        .route("/jobs/echo", post(create_echo_job))
+        .route("/jobs/{job_id}", get(get_job))
+        .route("/jobs/{job_id}/cancel", post(cancel_job))
+        .route("/jobs/tenant/drain", post(start_tenant_drain))
+        .route("/jobs/tenant/migrate", post(start_tenant_migrate))
+        .route("/plan/tenant/migrate", post(plan_tenant_migrate))
+        .route("/audit", get(list_audit))
+        .route("/swarm/services", get(list_swarm_services))
+        .route("/swarm/services/{name}/tasks", get(list_swarm_tasks))
+}
+
+async fn whoami(Extension(principal): Extension<Principal>) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({
+            "sub": principal.sub,
+            "session_id": principal.session_id,
+            "permissions": principal.permissions,
+        })),
+    )
+        .into_response()
+}
+
+async fn platform_info(Extension(principal): Extension<Principal>) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({
+            "service": "control-api",
+        })),
+    )
+        .into_response()
+}
+
+async fn fleet_snapshot(
+    State(state): State<AppState>,
+    Extension(principal): Extension<Principal>,
+    Extension(request_ids): Extension<RequestIds>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let services =
+        fleet::snapshot_with_context(&state.http, &state.fleet_services, Some(&request_ids)).await;
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "services": services })),
+    )
+        .into_response()
+}
+
+async fn get_placement(
+    State(state): State<AppState>,
+    Path(kind): Path<String>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let kind = match kind.as_str() {
+        "aggregate" => ServiceKind::Aggregate,
+        "projection" => ServiceKind::Projection,
+        "runner" => ServiceKind::Runner,
+        _ => return StatusCode::NOT_FOUND.into_response(),
+    };
+
+    let resp: PlacementResponse = state.placement.get_for_kind(kind);
+
+    (StatusCode::OK, Json(resp)).into_response()
+}
+
+async fn list_tenants(
+    State(state): State<AppState>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let tenants = state.placement.tenant_summaries();
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "tenants": tenants })),
+    )
+        .into_response()
+}
+
+async fn tenant_echo(
+    headers: HeaderMap,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let tenant_id = headers
+        .get(HEADER_TENANT_ID)
+        .and_then(|v| v.to_str().ok())
+        .ok_or(StatusCode::BAD_REQUEST)
+        .and_then(|s| Uuid::parse_str(s).map_err(|_| StatusCode::BAD_REQUEST));
+
+    match tenant_id {
+        Ok(tenant_id) => (
+            StatusCode::OK,
+            Json(serde_json::json!({
+                "tenant_id": tenant_id,
+            })),
+        )
+            .into_response(),
+        Err(status) => status.into_response(),
+    }
+}
+
+async fn create_echo_job(
+    State(state): State<AppState>,
+    headers: HeaderMap,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:write") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let key = headers
+        .get(HEADER_IDEMPOTENCY_KEY)
+        .and_then(|v| v.to_str().ok())
+        .ok_or(StatusCode::BAD_REQUEST);
+
+    let key = match key {
+        Ok(k) if !k.is_empty() => k,
+        _ => return StatusCode::BAD_REQUEST.into_response(),
+    };
+
+    let now = now_ms();
+    let job_id = Uuid::new_v4();
+    let job = Job {
+        job_id,
+        status: JobStatus::Succeeded,
+        steps: vec![JobStep {
+            name: "echo".to_string(),
+            status: JobStatus::Succeeded,
+            attempts: 1,
+            error: None,
+        }],
+        error: None,
+        created_at_ms: now,
+        started_at_ms: Some(now),
+        finished_at_ms: Some(now),
+    };
+
+    let job_id = state.jobs.insert_idempotent(key, job);
+    state.audit.record(crate::audit::AuditEvent {
+        ts_ms: now,
+        principal_sub: principal.sub.clone(),
+        action: "job.echo".to_string(),
+        tenant_id: None,
+        reason: "echo".to_string(),
+        job_id: Some(job_id),
+    });
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({
+            "job_id": job_id,
+        })),
+    )
+        .into_response()
+}
+
+async fn get_job(
+    State(state): State<AppState>,
+    Path(job_id): Path<Uuid>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    match state.jobs.get(job_id) {
+        Some(job) => (StatusCode::OK, Json(job)).into_response(),
+        None => StatusCode::NOT_FOUND.into_response(),
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct TenantDrainRequest {
+    tenant_id: Uuid,
+    reason: String,
+}
+
+#[derive(Debug, Deserialize)]
+struct TenantMigrateRequest {
+    tenant_id: Uuid,
+    runner_target: String,
+    reason: String,
+}
+
+async fn start_tenant_drain(
+    State(state): State<AppState>,
+    headers: HeaderMap,
+    Extension(principal): Extension<Principal>,
+    Json(body): Json<TenantDrainRequest>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:write") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let key = headers
+        .get(HEADER_IDEMPOTENCY_KEY)
+        .and_then(|v| v.to_str().ok())
+        .ok_or(StatusCode::BAD_REQUEST);
+    let key = match key {
+        Ok(k) if !k.is_empty() => k,
+        _ => return StatusCode::BAD_REQUEST.into_response(),
+    };
+
+    let engine = JobEngine::new(
+        state.jobs.clone(),
+        state.audit.clone(),
+        state.tenant_locks.clone(),
+    );
+    let job_id = match engine.start_tenant_drain(
+        state.clone(),
+        &principal,
+        body.tenant_id,
+        body.reason,
+        key,
+    ) {
+        Ok(id) => id,
+        Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
+    };
+
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "job_id": job_id })),
+    )
+        .into_response()
+}
+
+async fn start_tenant_migrate(
+    State(state): State<AppState>,
+    headers: HeaderMap,
+    Extension(principal): Extension<Principal>,
+    Json(body): Json<TenantMigrateRequest>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:write") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let key = headers
+        .get(HEADER_IDEMPOTENCY_KEY)
+        .and_then(|v| v.to_str().ok())
+        .ok_or(StatusCode::BAD_REQUEST);
+    let key = match key {
+        Ok(k) if !k.is_empty() => k,
+        _ => return StatusCode::BAD_REQUEST.into_response(),
+    };
+
+    let engine = JobEngine::new(
+        state.jobs.clone(),
+        state.audit.clone(),
+        state.tenant_locks.clone(),
+    );
+    let job_id = match engine.start_tenant_migrate(
+        state.clone(),
+        &principal,
+        body.tenant_id,
+        body.runner_target,
+        body.reason,
+        key,
+    ) {
+        Ok(id) => id,
+        Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
+    };
+
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "job_id": job_id })),
+    )
+        .into_response()
+}
+
+async fn cancel_job(
+    State(state): State<AppState>,
+    Path(job_id): Path<Uuid>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:write") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    if state.jobs.request_cancel(job_id) {
+        state.audit.record(crate::audit::AuditEvent {
+            ts_ms: now_ms(),
+            principal_sub: principal.sub.clone(),
+            action: "job.cancel".to_string(),
+            tenant_id: None,
+            reason: "cancel requested".to_string(),
+            job_id: Some(job_id),
+        });
+        StatusCode::OK.into_response()
+    } else {
+        StatusCode::NOT_FOUND.into_response()
+    }
+}
+
+fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_millis() as u64
+}
+
+async fn list_audit(
+    State(state): State<AppState>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let events = state.audit.list_recent(200);
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "events": events })),
+    )
+        .into_response()
+}
+
+async fn plan_tenant_migrate(
+    Extension(principal): Extension<Principal>,
+    Json(body): Json<TenantMigrateRequest>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:write") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let _ = (body.tenant_id, body.runner_target, body.reason);
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({
+            "steps": ["preflight", "drain", "update_placement", "reload", "verify"]
+        })),
+    )
+        .into_response()
+}
+
+async fn list_swarm_services(
+    State(state): State<AppState>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let services: Vec<SwarmService> = state.swarm.list_services();
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "services": services })),
+    )
+        .into_response()
+}
+
+async fn list_swarm_tasks(
+    State(state): State<AppState>,
+    Path(name): Path<String>,
+    Extension(principal): Extension<Principal>,
+) -> impl IntoResponse {
+    if !has_permission(&principal, "control:read") {
+        return StatusCode::FORBIDDEN.into_response();
+    }
+
+    let tasks: Vec<SwarmTask> = state.swarm.list_tasks(&name);
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({ "service": name, "tasks": tasks })),
+    )
+        .into_response()
+}
--- a/control/api/src/audit.rs
+++ b/control/api/src/audit.rs
@@ -0,0 +1,31 @@
+use serde::{Deserialize, Serialize};
+use std::sync::{Arc, Mutex};
+use uuid::Uuid;
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct AuditEvent {
+    pub ts_ms: u64,
+    pub principal_sub: String,
+    pub action: String,
+    pub tenant_id: Option<Uuid>,
+    pub reason: String,
+    pub job_id: Option<Uuid>,
+}
+
+#[derive(Clone, Default)]
+pub struct AuditStore {
+    inner: Arc<Mutex<Vec<AuditEvent>>>,
+}
+
+impl AuditStore {
+    pub fn record(&self, event: AuditEvent) {
+        let mut events = self.inner.lock().expect("audit lock poisoned");
+        events.push(event);
+    }
+
+    pub fn list_recent(&self, limit: usize) -> Vec<AuditEvent> {
+        let events = self.inner.lock().expect("audit lock poisoned");
+        let start = events.len().saturating_sub(limit);
+        events[start..].to_vec()
+    }
+}
--- a/control/api/src/auth.rs
+++ b/control/api/src/auth.rs
@@ -0,0 +1,78 @@
+use crate::AppState;
+use axum::{
+    extract::State,
+    http::{Request, StatusCode},
+    middleware::Next,
+    response::{IntoResponse, Response},
+};
+use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode};
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone)]
+pub struct AuthConfig {
+    pub hs256_secret: Option<Vec<u8>>,
+}
+
+#[derive(Clone, Debug)]
+pub struct Principal {
+    pub sub: String,
+    pub session_id: String,
+    pub permissions: Vec<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Claims {
+    sub: String,
+    session_id: String,
+    permissions: Vec<String>,
+    exp: usize,
+}
+
+pub async fn auth_middleware(
+    State(state): State<AppState>,
+    mut req: Request<axum::body::Body>,
+    next: Next,
+) -> Response {
+    match authenticate(
+        &state.auth,
+        req.headers().get(axum::http::header::AUTHORIZATION),
+    ) {
+        Ok(principal) => {
+            req.extensions_mut().insert(principal);
+            next.run(req).await
+        }
+        Err(status) => status.into_response(),
+    }
+}
+
+fn authenticate(
+    cfg: &AuthConfig,
+    auth_header: Option<&axum::http::HeaderValue>,
+) -> Result<Principal, StatusCode> {
+    let secret = cfg
+        .hs256_secret
+        .as_ref()
+        .ok_or(StatusCode::SERVICE_UNAVAILABLE)?;
+    let header = auth_header.ok_or(StatusCode::UNAUTHORIZED)?;
+    let header_str = header.to_str().map_err(|_| StatusCode::UNAUTHORIZED)?;
+
+    let token = header_str
+        .strip_prefix("Bearer ")
+        .ok_or(StatusCode::UNAUTHORIZED)?;
+
+    let mut validation = Validation::new(Algorithm::HS256);
+    validation.required_spec_claims.insert("exp".to_string());
+
+    let data = decode::<Claims>(token, &DecodingKey::from_secret(secret), &validation)
+        .map_err(|_| StatusCode::UNAUTHORIZED)?;
+
+    Ok(Principal {
+        sub: data.claims.sub,
+        session_id: data.claims.session_id,
+        permissions: data.claims.permissions,
+    })
+}
+
+pub fn has_permission(principal: &Principal, permission: &str) -> bool {
+    principal.permissions.iter().any(|p| p == permission)
+}
--- a/control/api/src/build_info.rs
+++ b/control/api/src/build_info.rs
@@ -0,0 +1,57 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+pub struct BuildInfo {
+    pub service: String,
+    pub version: String,
+    pub git_sha: String,
+}
+
+pub fn extract_build_info(metrics: &str) -> Vec<BuildInfo> {
+    let mut out = Vec::new();
+    for line in metrics.lines() {
+        let line = line.trim();
+        if line.is_empty() || line.starts_with('#') {
+            continue;
+        }
+        let Some((metric_and_labels, value)) = line.split_once(' ') else {
+            continue;
+        };
+        if value.trim() != "1" {
+            continue;
+        }
+        if !metric_and_labels.ends_with('}') {
+            continue;
+        }
+        let Some((name, labels)) = metric_and_labels.split_once('{') else {
+            continue;
+        };
+        if !name.ends_with("_build_info") {
+            continue;
+        }
+        let labels = labels.trim_end_matches('}');
+        let mut service = None;
+        let mut version = None;
+        let mut git_sha = None;
+        for part in labels.split(',') {
+            let Some((k, v)) = part.split_once('=') else {
+                continue;
+            };
+            let v = v.trim().trim_matches('"');
+            match k.trim() {
+                "service" => service = Some(v.to_string()),
+                "version" => version = Some(v.to_string()),
+                "git_sha" => git_sha = Some(v.to_string()),
+                _ => {}
+            }
+        }
+        if let (Some(service), Some(version), Some(git_sha)) = (service, version, git_sha) {
+            out.push(BuildInfo {
+                service,
+                version,
+                git_sha,
+            });
+        }
+    }
+    out
+}
--- a/control/api/src/deployments.rs
+++ b/control/api/src/deployments.rs
@@ -0,0 +1,42 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct GrafanaAnnotation {
+    pub time: i64,
+    pub tags: Vec<String>,
+    pub text: String,
+}
+
+pub fn build_grafana_deploy_annotation(args: DeployAnnotationArgs) -> GrafanaAnnotation {
+    let mut tags = vec![
+        "cloudlysis".to_string(),
+        "deploy".to_string(),
+        format!("service:{}", args.service),
+    ];
+    if let Some(v) = args.version {
+        tags.push(format!("version:{v}"));
+    }
+    if let Some(sha) = args.git_sha {
+        tags.push(format!("git_sha:{sha}"));
+    }
+
+    let text = match (args.version, args.git_sha) {
+        (Some(v), Some(sha)) => format!("deploy {} v={} git_sha={sha}", args.service, v),
+        (Some(v), None) => format!("deploy {} v={}", args.service, v),
+        (None, Some(sha)) => format!("deploy {} git_sha={sha}", args.service),
+        (None, None) => format!("deploy {}", args.service),
+    };
+
+    GrafanaAnnotation {
+        time: args.time_ms,
+        tags,
+        text,
+    }
+}
+
+pub struct DeployAnnotationArgs<'a> {
+    pub service: &'a str,
+    pub version: Option<&'a str>,
+    pub git_sha: Option<&'a str>,
+    pub time_ms: i64,
+}
--- a/control/api/src/fleet.rs
+++ b/control/api/src/fleet.rs
@@ -0,0 +1,67 @@
+use serde::{Deserialize, Serialize};
+use std::time::Duration;
+
+use crate::RequestIds;
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FleetService {
+    pub name: String,
+    pub base_url: String,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FleetServiceSnapshot {
+    pub name: String,
+    pub base_url: String,
+    pub health_ok: bool,
+    pub ready_ok: bool,
+    pub metrics_ok: bool,
+}
+
+pub async fn snapshot(
+    client: &reqwest::Client,
+    services: &[FleetService],
+) -> Vec<FleetServiceSnapshot> {
+    snapshot_with_context(client, services, None).await
+}
+
+pub async fn snapshot_with_context(
+    client: &reqwest::Client,
+    services: &[FleetService],
+    ctx: Option<&RequestIds>,
+) -> Vec<FleetServiceSnapshot> {
+    let mut out = Vec::with_capacity(services.len());
+    for svc in services {
+        let base = svc.base_url.trim_end_matches('/');
+        let health_ok = get_ok(client, &format!("{base}/health"), ctx).await;
+        let ready_ok = get_ok(client, &format!("{base}/ready"), ctx).await;
+        let metrics_ok = get_ok(client, &format!("{base}/metrics"), ctx).await;
+        out.push(FleetServiceSnapshot {
+            name: svc.name.clone(),
+            base_url: svc.base_url.clone(),
+            health_ok,
+            ready_ok,
+            metrics_ok,
+        });
+    }
+    out
+}
+
+async fn get_ok(client: &reqwest::Client, url: &str, ctx: Option<&RequestIds>) -> bool {
+    let mut req = client.get(url).timeout(Duration::from_secs(2));
+    if let Some(ctx) = ctx {
+        req = req.header("x-request-id", &ctx.request_id);
+        if let Some(cid) = &ctx.correlation_id {
+            req = req.header("x-correlation-id", cid);
+        }
+        if let Some(tp) = &ctx.traceparent {
+            req = req.header("traceparent", tp);
+        }
+    }
+
+    let res = req.send().await;
+    match res {
+        Ok(r) => r.status().is_success(),
+        Err(_) => false,
+    }
+}
--- a/control/api/src/job_engine.rs
+++ b/control/api/src/job_engine.rs
@@ -0,0 +1,348 @@
+use crate::{
+    AppState, Principal,
+    audit::{AuditEvent, AuditStore},
+    fleet,
+    jobs::{Job, JobStatus, JobStep, JobStore},
+};
+use std::{
+    collections::HashMap,
+    sync::{Arc, Mutex},
+    time::{Duration, SystemTime, UNIX_EPOCH},
+};
+use uuid::Uuid;
+
+#[derive(Clone, Default)]
+pub struct TenantLocks {
+    inner: Arc<Mutex<HashMap<Uuid, Uuid>>>,
+}
+
+impl TenantLocks {
+    pub fn try_lock(&self, tenant_id: Uuid, job_id: Uuid) -> bool {
+        let mut map = self.inner.lock().expect("tenant locks poisoned");
+        if map.contains_key(&tenant_id) {
+            return false;
+        }
+        map.insert(tenant_id, job_id);
+        true
+    }
+
+    pub fn unlock(&self, tenant_id: Uuid, job_id: Uuid) {
+        let mut map = self.inner.lock().expect("tenant locks poisoned");
+        if map.get(&tenant_id).copied() == Some(job_id) {
+            map.remove(&tenant_id);
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct JobEngine {
+    pub jobs: JobStore,
+    pub audit: AuditStore,
+    pub tenant_locks: TenantLocks,
+    pub step_timeout: Duration,
+}
+
+impl JobEngine {
+    pub fn new(jobs: JobStore, audit: AuditStore, tenant_locks: TenantLocks) -> Self {
+        Self {
+            jobs,
+            audit,
+            tenant_locks,
+            step_timeout: Duration::from_millis(500),
+        }
+    }
+
+    pub fn start_tenant_drain(
+        &self,
+        state: AppState,
+        principal: &Principal,
+        tenant_id: Uuid,
+        reason: String,
+        idempotency_key: &str,
+    ) -> Result<Uuid, StartJobError> {
+        if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
+            return Ok(existing);
+        }
+
+        let job_id = Uuid::new_v4();
+        if !self.tenant_locks.try_lock(tenant_id, job_id) {
+            return Err(StartJobError::TenantLocked);
+        }
+
+        let now = now_ms();
+        let job = Job {
+            job_id,
+            status: JobStatus::Pending,
+            steps: vec![step("preflight"), step("drain"), step("verify")],
+            error: None,
+            created_at_ms: now,
+            started_at_ms: None,
+            finished_at_ms: None,
+        };
+
+        let inserted = self.jobs.insert_idempotent(idempotency_key, job);
+        self.audit.record(AuditEvent {
+            ts_ms: now,
+            principal_sub: principal.sub.clone(),
+            action: "tenant.drain".to_string(),
+            tenant_id: Some(tenant_id),
+            reason,
+            job_id: Some(inserted),
+        });
+
+        let engine = self.clone();
+        tokio::spawn(async move {
+            engine
+                .run_job(state, inserted, Some(tenant_id), RunSpec::Drain)
+                .await;
+        });
+
+        Ok(inserted)
+    }
+
+    pub fn start_tenant_migrate(
+        &self,
+        state: AppState,
+        principal: &Principal,
+        tenant_id: Uuid,
+        runner_target: String,
+        reason: String,
+        idempotency_key: &str,
+    ) -> Result<Uuid, StartJobError> {
+        if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
+            return Ok(existing);
+        }
+
+        let job_id = Uuid::new_v4();
+        if !self.tenant_locks.try_lock(tenant_id, job_id) {
+            return Err(StartJobError::TenantLocked);
+        }
+
+        let now = now_ms();
+        let job = Job {
+            job_id,
+            status: JobStatus::Pending,
+            steps: vec![
+                step("preflight"),
+                step("drain"),
+                step("update_placement"),
+                step("reload"),
+                step("verify"),
+            ],
+            error: None,
+            created_at_ms: now,
+            started_at_ms: None,
+            finished_at_ms: None,
+        };
+
+        let inserted = self.jobs.insert_idempotent(idempotency_key, job);
+        self.audit.record(AuditEvent {
+            ts_ms: now,
+            principal_sub: principal.sub.clone(),
+            action: "tenant.migrate".to_string(),
+            tenant_id: Some(tenant_id),
+            reason,
+            job_id: Some(inserted),
+        });
+
+        let engine = self.clone();
+        tokio::spawn(async move {
+            engine
+                .run_job(
+                    state,
+                    inserted,
+                    Some(tenant_id),
+                    RunSpec::Migrate { runner_target },
+                )
+                .await;
+        });
+
+        Ok(inserted)
+    }
+
+    async fn run_job(&self, state: AppState, job_id: Uuid, tenant_id: Option<Uuid>, spec: RunSpec) {
+        self.jobs.update(job_id, |j| {
+            j.status = JobStatus::Running;
+            j.started_at_ms = Some(now_ms());
+        });
+
+        let mut ok = true;
+        for idx in 0.. {
+            if self.jobs.cancel_requested(job_id) {
+                ok = false;
+                self.jobs.update(job_id, |j| {
+                    j.status = JobStatus::Cancelled;
+                    j.finished_at_ms = Some(now_ms());
+                    j.error = Some("cancelled".to_string());
+                    for step in &mut j.steps {
+                        if step.status == JobStatus::Pending || step.status == JobStatus::Running {
+                            step.status = JobStatus::Cancelled;
+                        }
+                    }
+                });
+                break;
+            }
+
+            let step_name = {
+                let Some(job) = self.jobs.get(job_id) else {
+                    break;
+                };
+                let Some(step) = job.steps.get(idx) else {
+                    break;
+                };
+                step.name.clone()
+            };
+
+            self.jobs.update(job_id, |j| {
+                if let Some(step) = j.steps.get_mut(idx) {
+                    step.status = JobStatus::Running;
+                    step.attempts += 1;
+                }
+            });
+
+            let r = tokio::time::timeout(
+                self.step_timeout,
+                run_step(&state, &spec, &step_name, tenant_id),
+            )
+            .await;
+            match r {
+                Ok(Ok(())) => {
+                    self.jobs.update(job_id, |j| {
+                        if let Some(step) = j.steps.get_mut(idx) {
+                            step.status = JobStatus::Succeeded;
+                            step.error = None;
+                        }
+                    });
+                }
+                Ok(Err(e)) => {
+                    ok = false;
+                    self.jobs.update(job_id, |j| {
+                        if let Some(step) = j.steps.get_mut(idx) {
+                            step.status = JobStatus::Failed;
+                            step.error = Some(e.clone());
+                        }
+                        j.status = JobStatus::Failed;
+                        j.error = Some(e);
+                        j.finished_at_ms = Some(now_ms());
+                    });
+                    break;
+                }
+                Err(_) => {
+                    ok = false;
+                    self.jobs.update(job_id, |j| {
+                        if let Some(step) = j.steps.get_mut(idx) {
+                            step.status = JobStatus::Failed;
+                            step.error = Some("step timeout".to_string());
+                        }
+                        j.status = JobStatus::Failed;
+                        j.error = Some("step timeout".to_string());
+                        j.finished_at_ms = Some(now_ms());
+                    });
+                    break;
+                }
+            }
+
+            if !ok {
+                break;
+            }
+
+            let done = match self.jobs.get(job_id) {
+                Some(job) => idx + 1 >= job.steps.len(),
+                None => true,
+            };
+            if done {
+                break;
+            }
+        }
+
+        if ok {
+            self.jobs.update(job_id, |j| {
+                j.status = JobStatus::Succeeded;
+                j.finished_at_ms = Some(now_ms());
+            });
+        }
+
+        if let Some(tid) = tenant_id {
+            self.tenant_locks.unlock(tid, job_id);
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum StartJobError {
+    TenantLocked,
+}
+
+#[derive(Clone)]
+enum RunSpec {
+    Drain,
+    Migrate { runner_target: String },
+}
+
+fn step(name: &str) -> JobStep {
+    JobStep {
+        name: name.to_string(),
+        status: JobStatus::Pending,
+        attempts: 0,
+        error: None,
+    }
+}
+
+fn now_ms() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_millis() as u64
+}
+
+async fn run_step(
+    state: &AppState,
+    spec: &RunSpec,
+    step: &str,
+    tenant_id: Option<Uuid>,
+) -> Result<(), String> {
+    match step {
+        "preflight" => {
+            let snapshots = fleet::snapshot(&state.http, &state.fleet_services).await;
+            if snapshots.iter().any(|s| !s.ready_ok) {
+                return Err("preflight failed: fleet not ready".to_string());
+            }
+            Ok(())
+        }
+        "drain" => {
+            tokio::time::sleep(Duration::from_millis(50)).await;
+            Ok(())
+        }
+        "update_placement" => match spec {
+            RunSpec::Migrate { runner_target } => {
+                let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?;
+                state
+                    .placement
+                    .update_runner_target(tenant_id, runner_target.clone())
+                    .map(|_| ())
+            }
+            _ => Ok(()),
+        },
+        "reload" => {
+            let _ = state.placement.tenant_summaries();
+            Ok(())
+        }
+        "verify" => match spec {
+            RunSpec::Migrate { runner_target } => {
+                let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?;
+                let summaries = state.placement.tenant_summaries();
+                let found = summaries
+                    .iter()
+                    .find(|t| t.tenant_id == tenant_id)
+                    .map(|t| t.runner_targets.iter().any(|x| x == runner_target))
+                    .unwrap_or(false);
+                if !found {
+                    return Err("verify failed: placement not updated".to_string());
+                }
+                Ok(())
+            }
+            _ => Ok(()),
+        },
+        _ => Ok(()),
+    }
+}
--- a/control/api/src/jobs.rs
+++ b/control/api/src/jobs.rs
@@ -0,0 +1,122 @@
+use serde::{Deserialize, Serialize};
+use std::{
+    collections::HashMap,
+    sync::{
+        Arc, Mutex,
+        atomic::{AtomicBool, Ordering},
+    },
+};
+use uuid::Uuid;
+
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum JobStatus {
+    Pending,
+    Running,
+    Succeeded,
+    Failed,
+    Cancelled,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct Job {
+    pub job_id: Uuid,
+    pub status: JobStatus,
+    pub steps: Vec<JobStep>,
+    pub error: Option<String>,
+    pub created_at_ms: u64,
+    pub started_at_ms: Option<u64>,
+    pub finished_at_ms: Option<u64>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct JobStep {
+    pub name: String,
+    pub status: JobStatus,
+    pub attempts: u32,
+    pub error: Option<String>,
+}
+
+struct JobRecord {
+    job: Mutex<Job>,
+    cancel: AtomicBool,
+}
+
+#[derive(Clone, Default)]
+pub struct JobStore {
+    inner: Arc<Inner>,
+}
+
+#[derive(Default)]
+struct Inner {
+    jobs: Mutex<HashMap<Uuid, Arc<JobRecord>>>,
+    idempotency: Mutex<HashMap<String, Uuid>>,
+}
+
+impl JobStore {
+    pub fn get(&self, job_id: Uuid) -> Option<Job> {
+        let jobs = self.inner.jobs.lock().ok()?;
+        let rec = jobs.get(&job_id)?.clone();
+        rec.job.lock().ok().map(|j| j.clone())
+    }
+
+    pub fn get_idempotent(&self, key: &str) -> Option<Uuid> {
+        let map = self.inner.idempotency.lock().ok()?;
+        map.get(key).copied()
+    }
+
+    pub fn insert_idempotent(&self, key: &str, job: Job) -> Uuid {
+        let mut idempotency = self
+            .inner
+            .idempotency
+            .lock()
+            .expect("idempotency lock poisoned");
+        if let Some(existing) = idempotency.get(key) {
+            return *existing;
+        }
+
+        let job_id = job.job_id;
+        let rec = Arc::new(JobRecord {
+            job: Mutex::new(job),
+            cancel: AtomicBool::new(false),
+        });
+        self.inner
+            .jobs
+            .lock()
+            .expect("jobs lock poisoned")
+            .insert(job_id, rec);
+
+        idempotency.insert(key.to_string(), job_id);
+        job_id
+    }
+
+    pub fn request_cancel(&self, job_id: Uuid) -> bool {
+        let jobs = self.inner.jobs.lock().expect("jobs lock poisoned");
+        let Some(rec) = jobs.get(&job_id) else {
+            return false;
+        };
+        rec.cancel.store(true, Ordering::SeqCst);
+        true
+    }
+
+    pub fn cancel_requested(&self, job_id: Uuid) -> bool {
+        let jobs = self.inner.jobs.lock().expect("jobs lock poisoned");
+        let Some(rec) = jobs.get(&job_id) else {
+            return false;
+        };
+        rec.cancel.load(Ordering::SeqCst)
+    }
+
+    pub fn update<F>(&self, job_id: Uuid, f: F) -> bool
+    where
+        F: FnOnce(&mut Job),
+    {
+        let jobs = self.inner.jobs.lock().expect("jobs lock poisoned");
+        let Some(rec) = jobs.get(&job_id) else {
+            return false;
+        };
+        let mut job = rec.job.lock().expect("job lock poisoned");
+        f(&mut job);
+        true
+    }
+}
--- a/control/api/src/lib.rs
+++ b/control/api/src/lib.rs
@@ -0,0 +1,692 @@
+mod admin;
+mod audit;
+mod auth;
+mod build_info;
+mod deployments;
+mod fleet;
+mod job_engine;
+mod jobs;
+mod placement;
+mod swarm;
+
+pub use audit::AuditStore;
+pub use auth::{AuthConfig, Principal};
+use axum::{
+    Router,
+    extract::State,
+    http::{HeaderName, HeaderValue, Request, StatusCode},
+    middleware::{Next, from_fn, from_fn_with_state},
+    response::{IntoResponse, Response},
+    routing::get,
+};
+pub use build_info::{BuildInfo, extract_build_info};
+pub use deployments::{DeployAnnotationArgs, GrafanaAnnotation, build_grafana_deploy_annotation};
+pub use fleet::FleetService;
+pub use job_engine::TenantLocks;
+pub use jobs::JobStore;
+use metrics_exporter_prometheus::PrometheusHandle;
+pub use placement::PlacementStore;
+pub use placement::ServiceKind;
+use std::time::Instant;
+pub use swarm::SwarmStore;
+use tower_http::trace::TraceLayer;
+use tracing::{Span, field};
+use uuid::Uuid;
+
+#[derive(Clone)]
+pub struct AppState {
+    pub prometheus: PrometheusHandle,
+    pub auth: AuthConfig,
+    pub jobs: JobStore,
+    pub audit: AuditStore,
+    pub tenant_locks: TenantLocks,
+    pub http: reqwest::Client,
+    pub placement: PlacementStore,
+    pub fleet_services: Vec<FleetService>,
+    pub swarm: SwarmStore,
+}
+
+#[derive(Clone, Debug)]
+pub struct RequestIds {
+    pub request_id: String,
+    pub correlation_id: Option<String>,
+    pub traceparent: Option<String>,
+}
+
+const HEADER_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
+const HEADER_CORRELATION_ID: HeaderName = HeaderName::from_static("x-correlation-id");
+const HEADER_TRACEPARENT: HeaderName = HeaderName::from_static("traceparent");
+
+pub fn build_app(state: AppState) -> Router {
+    let trace = TraceLayer::new_for_http()
+        .make_span_with(|req: &Request<_>| {
+            let request_id = req
+                .headers()
+                .get(&HEADER_REQUEST_ID)
+                .and_then(|v| v.to_str().ok())
+                .unwrap_or("")
+                .to_owned();
+
+            let correlation_id = req
+                .headers()
+                .get(&HEADER_CORRELATION_ID)
+                .and_then(|v| v.to_str().ok())
+                .unwrap_or("")
+                .to_owned();
+
+            tracing::info_span!(
+                "http_request",
+                request.method = %req.method(),
+                request.path = %req.uri().path(),
+                request_id = %request_id,
+                correlation_id = %correlation_id,
+                trace_id = "",
+                status = field::Empty,
+                duration_ms = field::Empty,
+            )
+        })
+        .on_response(
+            |res: &Response, latency: std::time::Duration, span: &Span| {
+                span.record("status", field::display(res.status()));
+                span.record("duration_ms", field::display(latency.as_millis()));
+                tracing::info!("response");
+            },
+        );
+
+    let admin =
+        admin::admin_router().layer(from_fn_with_state(state.clone(), auth::auth_middleware));
+
+    Router::new()
+        .route("/health", get(health))
+        .route("/ready", get(ready))
+        .route("/metrics", get(metrics))
+        .nest("/admin/v1", admin)
+        .with_state(state)
+        .layer(trace)
+        .layer(from_fn(request_id_middleware))
+}
+
+async fn health() -> impl IntoResponse {
+    (StatusCode::OK, "ok")
+}
+
+async fn ready() -> impl IntoResponse {
+    (StatusCode::OK, "ready")
+}
+
+async fn metrics(State(state): State<AppState>) -> impl IntoResponse {
+    (StatusCode::OK, state.prometheus.render())
+}
+
+async fn request_id_middleware(mut req: Request<axum::body::Body>, next: Next) -> Response {
+    let request_id = req
+        .headers()
+        .get(&HEADER_REQUEST_ID)
+        .and_then(|v| v.to_str().ok())
+        .map(|s| s.to_owned())
+        .unwrap_or_else(|| Uuid::new_v4().to_string());
+
+    let correlation_id = req
+        .headers()
+        .get(&HEADER_CORRELATION_ID)
+        .and_then(|v| v.to_str().ok())
+        .map(|s| s.to_owned());
+
+    let traceparent = req
+        .headers()
+        .get(&HEADER_TRACEPARENT)
+        .and_then(|v| v.to_str().ok())
+        .map(|s| s.to_owned());
+
+    if req.headers().get(&HEADER_REQUEST_ID).is_none()
+        && let Ok(v) = HeaderValue::from_str(&request_id)
+    {
+        req.headers_mut().insert(HEADER_REQUEST_ID.clone(), v);
+    }
+
+    req.extensions_mut().insert(RequestIds {
+        request_id: request_id.clone(),
+        correlation_id: correlation_id.clone(),
+        traceparent: traceparent.clone(),
+    });
+
+    let start = Instant::now();
+    let mut res = next.run(req).await;
+
+    if let Ok(v) = HeaderValue::from_str(&request_id) {
+        res.headers_mut().insert(HEADER_REQUEST_ID.clone(), v);
+    }
+
+    if let Some(correlation_id) = correlation_id
+        && let Ok(v) = HeaderValue::from_str(&correlation_id)
+    {
+        res.headers_mut().insert(HEADER_CORRELATION_ID.clone(), v);
+    }
+
+    metrics::histogram!("http_request_duration_ms").record(start.elapsed().as_millis() as f64);
+    res
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::jobs::JobStatus;
+    use axum::{
+        body::Body,
+        http::{Request, StatusCode, header},
+    };
+    use jsonwebtoken::{EncodingKey, Header, encode};
+    use metrics_exporter_prometheus::PrometheusBuilder;
+    use serde::Serialize;
+    use std::fs;
+    use std::path::PathBuf;
+    use std::sync::OnceLock;
+    use tower::ServiceExt;
+    use uuid::Uuid;
+
+    static HANDLE: OnceLock<PrometheusHandle> = OnceLock::new();
+
+    #[derive(Serialize)]
+    struct TestClaims {
+        sub: String,
+        session_id: String,
+        permissions: Vec<String>,
+        exp: usize,
+    }
+
+    fn test_app() -> Router {
+        test_app_with_fleet(vec![])
+    }
+
+    fn test_app_with_fleet(fleet_services: Vec<FleetService>) -> Router {
+        let handle = HANDLE
+            .get_or_init(|| {
+                PrometheusBuilder::new()
+                    .install_recorder()
+                    .expect("failed to install prometheus recorder")
+            })
+            .clone();
+
+        let placement_path = temp_placement_file();
+
+        build_app(AppState {
+            prometheus: handle,
+            auth: AuthConfig {
+                hs256_secret: Some(b"test_secret".to_vec()),
+            },
+            jobs: JobStore::default(),
+            audit: AuditStore::default(),
+            tenant_locks: TenantLocks::default(),
+            http: reqwest::Client::new(),
+            placement: PlacementStore::new(placement_path),
+            fleet_services,
+            swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
+        })
+    }
+
+    fn repo_root() -> PathBuf {
+        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .and_then(|p| p.parent())
+            .expect("api crate should live under repo root")
+            .to_path_buf()
+    }
+
+    fn temp_placement_file() -> PathBuf {
+        let root = repo_root();
+        let src = root.join("placement/dev.json");
+        let mut dst = std::env::temp_dir();
+        dst.push(format!(
+            "cloudlysis-control-placement-{}-{}.json",
+            std::process::id(),
+            Uuid::new_v4()
+        ));
+        let raw = fs::read_to_string(src).expect("missing placement/dev.json");
+        fs::write(&dst, raw).expect("failed to write temp placement file");
+        dst
+    }
+
+    fn assert_send_sync<T: Send + Sync>() {}
+
+    #[test]
+    fn core_state_types_are_send_sync() {
+        assert_send_sync::<AppState>();
+        assert_send_sync::<JobStore>();
+        assert_send_sync::<AuthConfig>();
+    }
+
+    #[tokio::test]
+    async fn health_returns_200() {
+        let res = test_app()
+            .oneshot(
+                Request::builder()
+                    .uri("/health")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res.status(), StatusCode::OK);
+    }
+
+    #[tokio::test]
+    async fn ready_returns_200() {
+        let res = test_app()
+            .oneshot(
+                Request::builder()
+                    .uri("/ready")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res.status(), StatusCode::OK);
+    }
+
+    #[tokio::test]
+    async fn metrics_returns_200() {
+        let res = test_app()
+            .oneshot(
+                Request::builder()
+                    .uri("/metrics")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res.status(), StatusCode::OK);
+    }
+
+    fn make_token(perms: &[&str]) -> String {
+        let exp = (std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs()
+            + 60) as usize;
+
+        encode(
+            &Header::default(),
+            &TestClaims {
+                sub: "user_1".to_string(),
+                session_id: "sess_1".to_string(),
+                permissions: perms.iter().map(|p| (*p).to_string()).collect(),
+                exp,
+            },
+            &EncodingKey::from_secret(b"test_secret"),
+        )
+        .unwrap()
+    }
+
+    #[tokio::test]
+    async fn unauthorized_admin_calls_return_401() {
+        let res = test_app()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/platform/info")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res.status(), StatusCode::UNAUTHORIZED);
+    }
+
+    #[tokio::test]
+    async fn forbidden_admin_calls_return_403() {
+        let token = make_token(&["control:read"]);
+        let res = test_app()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/echo")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "k1")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res.status(), StatusCode::FORBIDDEN);
+    }
+
+    #[tokio::test]
+    async fn tenant_scoped_endpoints_require_x_tenant_id() {
+        let token = make_token(&["control:read"]);
+        let res = test_app()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/tenants/echo")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res.status(), StatusCode::BAD_REQUEST);
+    }
+
+    #[tokio::test]
+    async fn job_create_is_idempotent() {
+        let token = make_token(&["control:write"]);
+        let app = test_app();
+        let res1 = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/echo")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "same-key")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res1.status(), StatusCode::OK);
+        let body1 = axum::body::to_bytes(res1.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let v1: serde_json::Value = serde_json::from_slice(&body1).unwrap();
+        let id1 = Uuid::parse_str(v1.get("job_id").unwrap().as_str().unwrap()).unwrap();
+
+        let res2 = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/echo")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "same-key")
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res2.status(), StatusCode::OK);
+        let body2 = axum::body::to_bytes(res2.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let v2: serde_json::Value = serde_json::from_slice(&body2).unwrap();
+        let id2 = Uuid::parse_str(v2.get("job_id").unwrap().as_str().unwrap()).unwrap();
+
+        assert_eq!(id1, id2);
+    }
+
+    async fn wait_for_terminal_status(app: Router, job_id: Uuid) -> JobStatus {
+        let start = tokio::time::Instant::now();
+        loop {
+            let res = app
+                .clone()
+                .oneshot(
+                    Request::builder()
+                        .uri(format!("/admin/v1/jobs/{job_id}"))
+                        .header(
+                            header::AUTHORIZATION,
+                            format!("Bearer {}", make_token(&["control:read"])),
+                        )
+                        .body(Body::empty())
+                        .unwrap(),
+                )
+                .await
+                .unwrap();
+
+            if res.status() == StatusCode::OK {
+                let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
+                    .await
+                    .unwrap();
+                let job: crate::jobs::Job = serde_json::from_slice(&body).unwrap();
+                if job.status != JobStatus::Pending && job.status != JobStatus::Running {
+                    return job.status;
+                }
+            }
+
+            if start.elapsed() > std::time::Duration::from_millis(500) {
+                return JobStatus::Failed;
+            }
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+        }
+    }
+
+    #[tokio::test]
+    async fn tenant_job_idempotency_does_not_duplicate_effects() {
+        let token = make_token(&["control:write", "control:read"]);
+        let app = test_app();
+        let tenant_id = Uuid::new_v4();
+
+        let body = serde_json::json!({
+            "tenant_id": tenant_id,
+            "reason": "test",
+        });
+
+        let res1 = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/tenant/drain")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "same-key")
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(body.to_string()))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res1.status(), StatusCode::OK);
+
+        let res2 = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/tenant/drain")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "same-key")
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(body.to_string()))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res2.status(), StatusCode::OK);
+
+        let b1 = axum::body::to_bytes(res1.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let b2 = axum::body::to_bytes(res2.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let v1: serde_json::Value = serde_json::from_slice(&b1).unwrap();
+        let v2: serde_json::Value = serde_json::from_slice(&b2).unwrap();
+        assert_eq!(v1.get("job_id"), v2.get("job_id"));
+    }
+
+    #[tokio::test]
+    async fn tenant_lock_prevents_concurrent_mutations() {
+        let token = make_token(&["control:write", "control:read"]);
+        let app = test_app();
+        let tenant_id = Uuid::new_v4();
+
+        let res1 = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/tenant/drain")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "k1")
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(
+                        serde_json::json!({ "tenant_id": tenant_id, "reason": "r" }).to_string(),
+                    ))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res1.status(), StatusCode::OK);
+
+        let res2 = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/tenant/migrate")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "k2")
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(
+                        serde_json::json!({
+                            "tenant_id": tenant_id,
+                            "runner_target": "node-2",
+                            "reason": "r2"
+                        })
+                        .to_string(),
+                    ))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(res2.status(), StatusCode::CONFLICT);
+    }
+
+    #[tokio::test]
+    async fn migrate_preflight_fails_when_fleet_not_ready() {
+        let token = make_token(&["control:write", "control:read"]);
+        let app = test_app_with_fleet(vec![FleetService {
+            name: "unreachable".to_string(),
+            base_url: "http://127.0.0.1:1".to_string(),
+        }]);
+
+        let tenant_id = Uuid::new_v4();
+        let res = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/tenant/migrate")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "k3")
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(
+                        serde_json::json!({
+                            "tenant_id": tenant_id,
+                            "runner_target": "node-2",
+                            "reason": "r"
+                        })
+                        .to_string(),
+                    ))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res.status(), StatusCode::OK);
+
+        let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap();
+
+        let status = wait_for_terminal_status(app, job_id).await;
+        assert_eq!(status, JobStatus::Failed);
+    }
+
+    #[tokio::test]
+    async fn cancel_marks_job_cancelled() {
+        let token = make_token(&["control:write", "control:read"]);
+        let app = test_app();
+        let tenant_id = Uuid::new_v4();
+
+        let res = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/jobs/tenant/migrate")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header("idempotency-key", "k4")
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(
+                        serde_json::json!({
+                            "tenant_id": tenant_id,
+                            "runner_target": "node-2",
+                            "reason": "r"
+                        })
+                        .to_string(),
+                    ))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res.status(), StatusCode::OK);
+
+        let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap();
+
+        let res = app
+            .clone()
+            .oneshot(
+                Request::builder()
+                    .uri(format!("/admin/v1/jobs/{job_id}/cancel"))
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .body(Body::empty())
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res.status(), StatusCode::OK);
+
+        let status = wait_for_terminal_status(app, job_id).await;
+        assert_eq!(status, JobStatus::Cancelled);
+    }
+
+    #[tokio::test]
+    async fn migration_plan_is_deterministic() {
+        let token = make_token(&["control:write"]);
+        let app = test_app();
+        let tenant_id = Uuid::new_v4();
+
+        let res = app
+            .oneshot(
+                Request::builder()
+                    .uri("/admin/v1/plan/tenant/migrate")
+                    .method("POST")
+                    .header(header::AUTHORIZATION, format!("Bearer {token}"))
+                    .header(header::CONTENT_TYPE, "application/json")
+                    .body(Body::from(
+                        serde_json::json!({
+                            "tenant_id": tenant_id,
+                            "runner_target": "node-2",
+                            "reason": "r"
+                        })
+                        .to_string(),
+                    ))
+                    .unwrap(),
+            )
+            .await
+            .unwrap();
+        assert_eq!(res.status(), StatusCode::OK);
+
+        let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
+            .await
+            .unwrap();
+        let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
+        assert_eq!(
+            v.get("steps").unwrap(),
+            &serde_json::json!(["preflight", "drain", "update_placement", "reload", "verify"])
+        );
+    }
+}
--- a/control/api/src/main.rs
+++ b/control/api/src/main.rs
@@ -0,0 +1,109 @@
+use clap::Parser;
+use metrics_exporter_prometheus::PrometheusBuilder;
+use std::net::SocketAddr;
+use tracing_subscriber::EnvFilter;
+
+#[derive(Parser, Debug)]
+#[command(name = "control-api")]
+struct Args {
+    #[arg(long, env = "CONTROL_API_ADDR", default_value = "127.0.0.1:8080")]
+    addr: SocketAddr,
+}
+
+#[tokio::main]
+async fn main() {
+    let args = Args::parse();
+
+    tracing_subscriber::fmt()
+        .with_env_filter(
+            EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
+        )
+        .init();
+
+    let recorder = PrometheusBuilder::new()
+        .set_buckets(&[
+            1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, 2500.0, 5000.0,
+        ])
+        .expect("invalid prometheus buckets")
+        .install_recorder()
+        .expect("failed to install prometheus recorder");
+
+    let http = reqwest::Client::builder()
+        .user_agent("cloudlysis-control-api")
+        .build()
+        .expect("failed to build http client");
+
+    let placement_path = std::env::var("CONTROL_PLACEMENT_PATH")
+        .ok()
+        .unwrap_or_else(|| "placement/dev.json".to_string())
+        .into();
+
+    let swarm_path = std::env::var("CONTROL_SWARM_STATE_PATH")
+        .ok()
+        .unwrap_or_else(|| "swarm/dev.json".to_string())
+        .into();
+
+    let self_url = std::env::var("CONTROL_SELF_URL")
+        .ok()
+        .unwrap_or_else(|| "http://127.0.0.1:8080".to_string());
+
+    let mut fleet_services = vec![api::FleetService {
+        name: "control-api".to_string(),
+        base_url: self_url,
+    }];
+    if let Ok(spec) = std::env::var("CONTROL_FLEET_SERVICES") {
+        fleet_services.extend(parse_fleet_services(&spec));
+    }
+
+    let app = api::build_app(api::AppState {
+        prometheus: recorder,
+        auth: api::AuthConfig {
+            hs256_secret: std::env::var("CONTROL_GATEWAY_JWT_HS256_SECRET")
+                .ok()
+                .map(|s| s.into_bytes()),
+        },
+        jobs: api::JobStore::default(),
+        audit: api::AuditStore::default(),
+        tenant_locks: api::TenantLocks::default(),
+        http,
+        placement: api::PlacementStore::new(placement_path),
+        fleet_services,
+        swarm: api::SwarmStore::new(swarm_path),
+    });
+
+    let listener = tokio::net::TcpListener::bind(args.addr)
+        .await
+        .expect("failed to bind");
+
+    tracing::info!(addr = %args.addr, "control api listening");
+
+    axum::serve(listener, app)
+        .with_graceful_shutdown(shutdown_signal())
+        .await
+        .expect("server failed");
+}
+
+async fn shutdown_signal() {
+    let _ = tokio::signal::ctrl_c().await;
+}
+
+fn parse_fleet_services(spec: &str) -> Vec<api::FleetService> {
+    spec.split(',')
+        .filter_map(|pair| {
+            let pair = pair.trim();
+            if pair.is_empty() {
+                return None;
+            }
+            let (name, url) = pair.split_once('=')?;
+            let name = name.trim();
+            let url = url.trim();
+            if name.is_empty() || url.is_empty() {
+                return None;
+            }
+            Some(api::FleetService {
+                name: name.to_string(),
+                base_url: url.to_string(),
+            })
+        })
+        .collect()
+}
--- a/control/api/src/placement.rs
+++ b/control/api/src/placement.rs
@@ -0,0 +1,227 @@
+use serde::{Deserialize, Serialize};
+use std::{
+    collections::BTreeMap,
+    fs,
+    path::{Path, PathBuf},
+    sync::{Arc, RwLock},
+    time::SystemTime,
+};
+use uuid::Uuid;
+
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ServiceKind {
+    Aggregate,
+    Projection,
+    Runner,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct PlacementFile {
+    pub revision: Option<String>,
+    pub aggregate_placement: Option<PlacementKind>,
+    pub projection_placement: Option<PlacementKind>,
+    pub runner_placement: Option<PlacementKind>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct PlacementKind {
+    pub placements: Vec<TenantPlacement>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct TenantPlacement {
+    pub tenant_id: Uuid,
+    pub targets: Vec<String>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct PlacementResponse {
+    pub kind: ServiceKind,
+    pub revision: String,
+    pub placements: Vec<TenantPlacement>,
+}
+
+impl PlacementFile {
+    pub fn load(path: &Path) -> Option<Self> {
+        let raw = fs::read_to_string(path).ok()?;
+        serde_json::from_str(&raw).ok()
+    }
+
+    pub fn for_kind(&self, kind: ServiceKind) -> PlacementResponse {
+        let revision = self.revision.clone().unwrap_or_else(|| "dev".to_string());
+        let placements = match kind {
+            ServiceKind::Aggregate => self
+                .aggregate_placement
+                .as_ref()
+                .map(|p| p.placements.clone())
+                .unwrap_or_default(),
+            ServiceKind::Projection => self
+                .projection_placement
+                .as_ref()
+                .map(|p| p.placements.clone())
+                .unwrap_or_default(),
+            ServiceKind::Runner => self
+                .runner_placement
+                .as_ref()
+                .map(|p| p.placements.clone())
+                .unwrap_or_default(),
+        };
+
+        PlacementResponse {
+            kind,
+            revision,
+            placements,
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct PlacementStore {
+    inner: Arc<RwLock<Inner>>,
+}
+
+struct Inner {
+    path: PathBuf,
+    last_modified: Option<SystemTime>,
+    cached: Option<PlacementFile>,
+}
+
+impl PlacementStore {
+    pub fn new(path: PathBuf) -> Self {
+        Self {
+            inner: Arc::new(RwLock::new(Inner {
+                path,
+                last_modified: None,
+                cached: None,
+            })),
+        }
+    }
+
+    pub fn get_for_kind(&self, kind: ServiceKind) -> PlacementResponse {
+        let mut inner = self.inner.write().expect("placement lock poisoned");
+        inner.reload_if_changed();
+        match inner.cached.as_ref() {
+            Some(p) => p.for_kind(kind),
+            None => PlacementResponse {
+                kind,
+                revision: "dev".to_string(),
+                placements: vec![],
+            },
+        }
+    }
+
+    pub fn tenant_summaries(&self) -> Vec<TenantSummary> {
+        let mut inner = self.inner.write().expect("placement lock poisoned");
+        inner.reload_if_changed();
+
+        let Some(p) = inner.cached.as_ref() else {
+            return vec![];
+        };
+
+        let mut map: BTreeMap<Uuid, TenantSummary> = BTreeMap::new();
+
+        for (kind, placements) in [
+            (
+                ServiceKind::Aggregate,
+                p.for_kind(ServiceKind::Aggregate).placements,
+            ),
+            (
+                ServiceKind::Projection,
+                p.for_kind(ServiceKind::Projection).placements,
+            ),
+            (
+                ServiceKind::Runner,
+                p.for_kind(ServiceKind::Runner).placements,
+            ),
+        ] {
+            for tp in placements {
+                let entry = map.entry(tp.tenant_id).or_insert_with(|| TenantSummary {
+                    tenant_id: tp.tenant_id,
+                    aggregate_targets: vec![],
+                    projection_targets: vec![],
+                    runner_targets: vec![],
+                });
+                match kind {
+                    ServiceKind::Aggregate => entry.aggregate_targets = tp.targets,
+                    ServiceKind::Projection => entry.projection_targets = tp.targets,
+                    ServiceKind::Runner => entry.runner_targets = tp.targets,
+                }
+            }
+        }
+
+        map.into_values().collect()
+    }
+
+    pub fn update_runner_target(
+        &self,
+        tenant_id: Uuid,
+        runner_target: String,
+    ) -> Result<String, String> {
+        let mut inner = self.inner.write().expect("placement lock poisoned");
+        inner.reload_if_changed();
+
+        let mut file = inner.cached.clone().unwrap_or(PlacementFile {
+            revision: Some("dev".to_string()),
+            aggregate_placement: Some(PlacementKind { placements: vec![] }),
+            projection_placement: Some(PlacementKind { placements: vec![] }),
+            runner_placement: Some(PlacementKind { placements: vec![] }),
+        });
+
+        let mut runner = file
+            .runner_placement
+            .take()
+            .unwrap_or(PlacementKind { placements: vec![] });
+
+        if let Some(existing) = runner
+            .placements
+            .iter_mut()
+            .find(|p| p.tenant_id == tenant_id)
+        {
+            existing.targets = vec![runner_target];
+        } else {
+            runner.placements.push(TenantPlacement {
+                tenant_id,
+                targets: vec![runner_target],
+            });
+        }
+
+        runner.placements.sort_by_key(|p| p.tenant_id);
+        file.runner_placement = Some(runner);
+
+        let revision = format!("rev-{}", Uuid::new_v4());
+        file.revision = Some(revision.clone());
+
+        let raw = serde_json::to_string_pretty(&file).map_err(|e| e.to_string())?;
+        let tmp = inner.path.with_extension("json.tmp");
+        fs::write(&tmp, raw).map_err(|e| e.to_string())?;
+        fs::rename(&tmp, &inner.path).map_err(|e| e.to_string())?;
+
+        inner.last_modified = None;
+        inner.cached = Some(file);
+
+        Ok(revision)
+    }
+}
+
+impl Inner {
+    fn reload_if_changed(&mut self) {
+        let meta = fs::metadata(&self.path).ok();
+        let modified = meta.and_then(|m| m.modified().ok());
+
+        if self.cached.is_some() && modified.is_some() && modified == self.last_modified {
+            return;
+        }
+
+        self.last_modified = modified;
+        self.cached = PlacementFile::load(&self.path);
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct TenantSummary {
+    pub tenant_id: Uuid,
+    pub aggregate_targets: Vec<String>,
+    pub projection_targets: Vec<String>,
+    pub runner_targets: Vec<String>,
+}
--- a/control/api/src/swarm.rs
+++ b/control/api/src/swarm.rs
@@ -0,0 +1,62 @@
+use serde::{Deserialize, Serialize};
+use std::{fs, path::Path};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct SwarmService {
+    pub name: String,
+    pub image: Option<String>,
+    pub mode: Option<String>,
+    pub replicas: Option<String>,
+    pub updated_at: Option<String>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct SwarmTask {
+    pub id: String,
+    pub service: String,
+    pub node: Option<String>,
+    pub desired_state: Option<String>,
+    pub current_state: Option<String>,
+    pub error: Option<String>,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct SwarmStateFile {
+    pub services: Vec<SwarmService>,
+    pub tasks: Vec<SwarmTask>,
+}
+
+#[derive(Clone)]
+pub struct SwarmStore {
+    path: std::path::PathBuf,
+}
+
+impl SwarmStore {
+    pub fn new(path: std::path::PathBuf) -> Self {
+        Self { path }
+    }
+
+    pub fn list_services(&self) -> Vec<SwarmService> {
+        self.load().map(|s| s.services).unwrap_or_default()
+    }
+
+    pub fn list_tasks(&self, service_name: &str) -> Vec<SwarmTask> {
+        self.load()
+            .map(|s| {
+                s.tasks
+                    .into_iter()
+                    .filter(|t| t.service == service_name)
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+
+    fn load(&self) -> Option<SwarmStateFile> {
+        load_state(&self.path)
+    }
+}
+
+fn load_state(path: &Path) -> Option<SwarmStateFile> {
+    let raw = fs::read_to_string(path).ok()?;
+    serde_json::from_str(&raw).ok()
+}
--- a/control/api/tests/annotations.rs
+++ b/control/api/tests/annotations.rs
@@ -0,0 +1,16 @@
+#[test]
+fn annotation_writer_produces_expected_grafana_payload() {
+    let a = api::build_grafana_deploy_annotation(api::DeployAnnotationArgs {
+        service: "gateway",
+        version: Some("1.2.3"),
+        git_sha: Some("abc123"),
+        time_ms: 1234567890,
+    });
+
+    assert_eq!(a.time, 1234567890);
+    assert!(a.tags.iter().any(|t| t == "deploy"));
+    assert!(a.tags.iter().any(|t| t == "service:gateway"));
+    assert!(a.tags.iter().any(|t| t == "version:1.2.3"));
+    assert!(a.tags.iter().any(|t| t == "git_sha:abc123"));
+    assert!(a.text.contains("deploy gateway"));
+}
--- a/control/api/tests/build_info.rs
+++ b/control/api/tests/build_info.rs
@@ -0,0 +1,39 @@
+#[test]
+fn build_info_parser_extracts_expected_labels() {
+    let metrics = r#"
+# HELP gateway_build_info build info
+# TYPE gateway_build_info gauge
+gateway_build_info{service="gateway",version="1.2.3",git_sha="abc"} 1
+runner_build_info{service="runner",version="2.0.0",git_sha="def"} 1
+unrelated_metric 5
+"#;
+
+    let info = api::extract_build_info(metrics);
+    assert_eq!(info.len(), 2);
+    assert!(
+        info.iter()
+            .any(|i| i.service == "gateway" && i.version == "1.2.3" && i.git_sha == "abc")
+    );
+    assert!(
+        info.iter()
+            .any(|i| i.service == "runner" && i.version == "2.0.0" && i.git_sha == "def")
+    );
+}
+
+#[test]
+fn build_info_snapshot_has_required_services() {
+    let metrics = r#"
+gateway_build_info{service="gateway",version="1.2.3",git_sha="abc"} 1
+aggregate_build_info{service="aggregate",version="1.0.0",git_sha="aaa"} 1
+projection_build_info{service="projection",version="1.0.0",git_sha="bbb"} 1
+runner_build_info{service="runner",version="2.0.0",git_sha="ccc"} 1
+"#;
+
+    let info = api::extract_build_info(metrics);
+    for required in ["gateway", "aggregate", "projection", "runner"] {
+        assert!(
+            info.iter().any(|i| i.service == required),
+            "missing build_info for service={required}"
+        );
+    }
+}
--- a/control/api/tests/docker_config_validation.rs
+++ b/control/api/tests/docker_config_validation.rs
@@ -0,0 +1,55 @@
+use std::{fs, path::PathBuf, time::Duration};
+
+fn repo_root() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .and_then(|p| p.parent())
+        .expect("api crate should live under repo root")
+        .to_path_buf()
+}
+
+#[test]
+fn docker_compose_files_parse_and_include_required_services() {
+    let root = repo_root();
+    let compose = fs::read_to_string(root.join("observability/docker-compose.yml")).unwrap();
+    let v: serde_yaml::Value = serde_yaml::from_str(&compose).unwrap();
+
+    let services = v
+        .get("services")
+        .and_then(|x| x.as_mapping())
+        .expect("missing services");
+
+    for required in ["grafana", "victoria-metrics", "vmagent", "loki", "tempo"] {
+        assert!(
+            services.contains_key(serde_yaml::Value::String(required.to_string())),
+            "missing service {required}"
+        );
+    }
+}
+
+#[tokio::test]
+#[ignore]
+async fn docker_compose_config_validation_is_gated_and_fast() {
+    let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
+    assert_eq!(enabled.as_deref(), Some("1"));
+
+    let root = repo_root();
+    let compose = root.join("observability/docker-compose.yml");
+
+    let cmd = tokio::process::Command::new("docker")
+        .args(["compose", "-f"])
+        .arg(compose)
+        .args(["config"])
+        .output();
+
+    let out = tokio::time::timeout(Duration::from_secs(10), cmd)
+        .await
+        .expect("docker compose config timed out")
+        .expect("failed to run docker compose config");
+
+    assert!(
+        out.status.success(),
+        "docker compose config failed: {}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+}
--- a/control/api/tests/docker_gated.rs
+++ b/control/api/tests/docker_gated.rs
@@ -0,0 +1,6 @@
+#[test]
+#[ignore]
+fn docker_integration_tests_are_gated() {
+    let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
+    assert_eq!(enabled.as_deref(), Some("1"));
+}
--- a/control/api/tests/e2e_control_plane_fleet_docker.rs
+++ b/control/api/tests/e2e_control_plane_fleet_docker.rs
@@ -0,0 +1,183 @@
+use jsonwebtoken::{EncodingKey, Header, encode};
+use serde::Serialize;
+use std::{fs, net::TcpListener, time::Duration};
+
+#[derive(Serialize)]
+struct Claims {
+    sub: String,
+    session_id: String,
+    permissions: Vec<String>,
+    exp: usize,
+}
+
+fn free_port() -> u16 {
+    TcpListener::bind("127.0.0.1:0")
+        .unwrap()
+        .local_addr()
+        .unwrap()
+        .port()
+}
+
+fn token(secret: &[u8], perms: &[&str]) -> String {
+    let exp = (std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap()
+        .as_secs()
+        + 60) as usize;
+
+    encode(
+        &Header::default(),
+        &Claims {
+            sub: "op_1".to_string(),
+            session_id: "sess_1".to_string(),
+            permissions: perms.iter().map(|p| (*p).to_string()).collect(),
+            exp,
+        },
+        &EncodingKey::from_secret(secret),
+    )
+    .unwrap()
+}
+
+async fn wait_ready(url: &str) {
+    let client = reqwest::Client::new();
+    let start = tokio::time::Instant::now();
+    loop {
+        let ok = client
+            .get(format!("{url}/ready"))
+            .send()
+            .await
+            .map(|r| r.status().is_success())
+            .unwrap_or(false);
+        if ok {
+            return;
+        }
+        if start.elapsed() > Duration::from_secs(10) {
+            panic!("control-api did not become ready");
+        }
+        tokio::time::sleep(Duration::from_millis(100)).await;
+    }
+}
+
+#[tokio::test]
+#[ignore]
+async fn control_plane_can_see_the_fleet_via_docker_stubs() {
+    let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
+    assert_eq!(enabled.as_deref(), Some("1"));
+
+    let nginx_conf = r#"
+server {
+  listen 80;
+  server_name _;
+
+  location = /health { return 200 "ok\n"; }
+  location = /ready { return 200 "ready\n"; }
+  location = /metrics { return 200 "stub_build_info{service=\"stub\",version=\"dev\",git_sha=\"000\"} 1\n"; }
+}
+"#;
+
+    let mut conf_path = std::env::temp_dir();
+    conf_path.push(format!(
+        "cloudlysis-control-nginx-{}.conf",
+        uuid::Uuid::new_v4()
+    ));
+    fs::write(&conf_path, nginx_conf).unwrap();
+
+    let gateway_port = free_port();
+    let runner_port = free_port();
+    let aggregate_port = free_port();
+    let projection_port = free_port();
+
+    async fn run_stub(name: &str, port: u16, conf: &std::path::Path) -> String {
+        let out = tokio::process::Command::new("docker")
+            .args(["run", "-d", "--rm"])
+            .args(["-p", &format!("{port}:80")])
+            .args([
+                "-v",
+                &format!("{}:/etc/nginx/conf.d/default.conf:ro", conf.display()),
+            ])
+            .arg("nginx:1.29-alpine")
+            .output()
+            .await
+            .expect("failed to run docker");
+        assert!(
+            out.status.success(),
+            "{name} stub failed: {}",
+            String::from_utf8_lossy(&out.stderr)
+        );
+        String::from_utf8_lossy(&out.stdout).trim().to_string()
+    }
+
+    let gateway_id = run_stub("gateway", gateway_port, &conf_path).await;
+    let runner_id = run_stub("runner", runner_port, &conf_path).await;
+    let aggregate_id = run_stub("aggregate", aggregate_port, &conf_path).await;
+    let projection_id = run_stub("projection", projection_port, &conf_path).await;
+
+    let secret = b"e2e_secret";
+    let api_port = free_port();
+    let api_url = format!("http://127.0.0.1:{api_port}");
+
+    let mut placement_path = std::env::temp_dir();
+    placement_path.push(format!(
+        "cloudlysis-control-placement-{}.json",
+        uuid::Uuid::new_v4()
+    ));
+    fs::write(
+        &placement_path,
+        r#"{"revision":"e2e","aggregate_placement":{"placements":[]},"projection_placement":{"placements":[]},"runner_placement":{"placements":[]}}"#,
+    )
+    .unwrap();
+
+    let mut child = tokio::process::Command::new(env!("CARGO_BIN_EXE_api"))
+        .env("CONTROL_API_ADDR", format!("127.0.0.1:{api_port}"))
+        .env("CONTROL_GATEWAY_JWT_HS256_SECRET", "e2e_secret")
+        .env("CONTROL_PLACEMENT_PATH", placement_path.to_string_lossy().to_string())
+        .env(
+            "CONTROL_FLEET_SERVICES",
+            format!(
+                "gateway=http://127.0.0.1:{gateway_port},aggregate=http://127.0.0.1:{aggregate_port},projection=http://127.0.0.1:{projection_port},runner=http://127.0.0.1:{runner_port}"
+            ),
+        )
+        .spawn()
+        .expect("failed to spawn control-api");
+
+    wait_ready(&api_url).await;
+
+    let client = reqwest::Client::new();
+    let t = token(secret, &["control:read"]);
+
+    let res = client
+        .get(format!("{api_url}/admin/v1/fleet/snapshot"))
+        .header(reqwest::header::AUTHORIZATION, format!("Bearer {t}"))
+        .send()
+        .await
+        .unwrap();
+    assert!(res.status().is_success());
+
+    let v: serde_json::Value = res.json().await.unwrap();
+    let services = v.get("services").and_then(|x| x.as_array()).unwrap();
+    assert!(
+        services.len() >= 5,
+        "expected at least 5 services (including control-api), got {}",
+        services.len()
+    );
+
+    let res = client
+        .get(format!("{api_url}/admin/v1/tenants"))
+        .header(reqwest::header::AUTHORIZATION, format!("Bearer {t}"))
+        .send()
+        .await
+        .unwrap();
+    assert!(res.status().is_success());
+
+    let _ = child.kill().await;
+
+    for id in [gateway_id, runner_id, aggregate_id, projection_id] {
+        let _ = tokio::process::Command::new("docker")
+            .args(["stop", &id])
+            .output()
+            .await;
+    }
+
+    let _ = fs::remove_file(&conf_path);
+    let _ = fs::remove_file(&placement_path);
+}
--- a/control/api/tests/fleet_services_env.rs
+++ b/control/api/tests/fleet_services_env.rs
@@ -0,0 +1,30 @@
+#[test]
+fn fleet_services_env_parser_is_lenient() {
+    let services = {
+        fn parse(spec: &str) -> Vec<api::FleetService> {
+            spec.split(',')
+                .filter_map(|pair| {
+                    let pair = pair.trim();
+                    if pair.is_empty() {
+                        return None;
+                    }
+                    let (name, url) = pair.split_once('=')?;
+                    let name = name.trim();
+                    let url = url.trim();
+                    if name.is_empty() || url.is_empty() {
+                        return None;
+                    }
+                    Some(api::FleetService {
+                        name: name.to_string(),
+                        base_url: url.to_string(),
+                    })
+                })
+                .collect()
+        }
+        parse(" gateway=http://x , ,runner=http://y,broken, =http://z ")
+    };
+
+    assert_eq!(services.len(), 2);
+    assert_eq!(services[0].name, "gateway");
+    assert_eq!(services[1].name, "runner");
+}
--- a/control/api/tests/nats_gated.rs
+++ b/control/api/tests/nats_gated.rs
@@ -0,0 +1,23 @@
+use std::time::Duration;
+
+#[tokio::test]
+#[ignore]
+async fn nats_integration_tests_are_gated_and_fast_fail() {
+    let url = std::env::var("CONTROL_TEST_NATS_URL").expect("CONTROL_TEST_NATS_URL is required");
+
+    let without_scheme = url.strip_prefix("nats://").unwrap_or(url.as_str());
+    let hostport = without_scheme.split('/').next().unwrap_or(without_scheme);
+    let mut parts = hostport.split(':');
+    let host = parts.next().unwrap_or("127.0.0.1");
+    let port: u16 = parts
+        .next()
+        .unwrap_or("4222")
+        .parse()
+        .expect("invalid port in CONTROL_TEST_NATS_URL");
+
+    let connect = tokio::net::TcpStream::connect((host, port));
+    tokio::time::timeout(Duration::from_secs(2), connect)
+        .await
+        .expect("tcp connect to NATS timed out")
+        .expect("failed to connect to NATS");
+}
--- a/control/api/tests/observability_configs.rs
+++ b/control/api/tests/observability_configs.rs
@@ -0,0 +1,75 @@
+use std::{collections::BTreeSet, fs, path::PathBuf};
+
+fn repo_root() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .and_then(|p| p.parent())
+        .expect("api crate should live under repo root")
+        .to_path_buf()
+}
+
+#[test]
+fn grafana_provisioning_files_are_syntactically_valid() {
+    let root = repo_root();
+
+    let datasources = fs::read_to_string(
+        root.join("observability/grafana/provisioning/datasources/datasources.yml"),
+    )
+    .expect("missing grafana datasources provisioning file");
+    let dashboards = fs::read_to_string(
+        root.join("observability/grafana/provisioning/dashboards/dashboards.yml"),
+    )
+    .expect("missing grafana dashboards provisioning file");
+
+    let _datasources_yaml: serde_yaml::Value =
+        serde_yaml::from_str(&datasources).expect("invalid grafana datasources yaml");
+    let _dashboards_yaml: serde_yaml::Value =
+        serde_yaml::from_str(&dashboards).expect("invalid grafana dashboards yaml");
+}
+
+#[test]
+fn grafana_dashboards_are_syntactically_valid_json() {
+    let root = repo_root();
+    let dashboards_dir = root.join("observability/grafana/dashboards");
+
+    let mut found = 0usize;
+    for entry in fs::read_dir(&dashboards_dir).expect("missing dashboards dir") {
+        let entry = entry.expect("failed to read dashboards dir entry");
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) != Some("json") {
+            continue;
+        }
+        found += 1;
+        let raw = fs::read_to_string(&path).expect("failed to read dashboard json");
+        let _: serde_json::Value =
+            serde_json::from_str(&raw).unwrap_or_else(|e| panic!("{path:?}: {e}"));
+    }
+
+    assert!(found > 0, "expected at least one dashboard json file");
+}
+
+#[test]
+fn vmagent_config_parses_and_includes_required_jobs() {
+    let root = repo_root();
+    let scrape = fs::read_to_string(root.join("observability/vmagent/scrape.yml"))
+        .expect("missing vmagent scrape config");
+
+    let value: serde_yaml::Value =
+        serde_yaml::from_str(&scrape).expect("invalid vmagent scrape yaml");
+
+    let mut job_names = BTreeSet::<String>::new();
+    if let Some(scrape_configs) = value.get("scrape_configs").and_then(|v| v.as_sequence()) {
+        for cfg in scrape_configs {
+            if let Some(job) = cfg.get("job_name").and_then(|v| v.as_str()) {
+                job_names.insert(job.to_string());
+            }
+        }
+    }
+
+    for required in ["victoria-metrics", "vmagent", "control-api"] {
+        assert!(
+            job_names.contains(required),
+            "vmagent scrape config missing required job_name={required}"
+        );
+    }
+}
--- a/control/api/tests/observability_smoke_docker.rs
+++ b/control/api/tests/observability_smoke_docker.rs
@@ -0,0 +1,61 @@
+use std::{
+    net::TcpStream,
+    path::PathBuf,
+    process::Command,
+    time::{Duration, Instant},
+};
+
+fn repo_root() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .and_then(|p| p.parent())
+        .expect("api crate should live under repo root")
+        .to_path_buf()
+}
+
+fn wait_for_tcp(addr: &str, timeout: Duration) -> bool {
+    let start = Instant::now();
+    while start.elapsed() < timeout {
+        if TcpStream::connect_timeout(
+            &addr.parse().expect("invalid socket addr"),
+            Duration::from_secs(1),
+        )
+        .is_ok()
+        {
+            return true;
+        }
+        std::thread::sleep(Duration::from_millis(250));
+    }
+    false
+}
+
+#[test]
+#[ignore]
+fn observability_stack_reaches_healthy_state_fast() {
+    let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
+    assert_eq!(enabled.as_deref(), Some("1"));
+
+    let root = repo_root();
+    let compose = root.join("observability/docker-compose.yml");
+
+    let up = Command::new("docker")
+        .args(["compose", "-f"])
+        .arg(&compose)
+        .args(["up", "-d"])
+        .status()
+        .expect("failed to run docker compose up");
+    assert!(up.success(), "docker compose up failed");
+
+    let ok = wait_for_tcp("127.0.0.1:3000", Duration::from_secs(30))
+        && wait_for_tcp("127.0.0.1:8428", Duration::from_secs(30))
+        && wait_for_tcp("127.0.0.1:3100", Duration::from_secs(30))
+        && wait_for_tcp("127.0.0.1:3200", Duration::from_secs(30));
+
+    let _ = Command::new("docker")
+        .args(["compose", "-f"])
+        .arg(&compose)
+        .args(["down", "-v"])
+        .status();
+
+    assert!(ok, "observability stack did not become reachable in time");
+}
--- a/control/api/tests/placement_hot_reload.rs
+++ b/control/api/tests/placement_hot_reload.rs
@@ -0,0 +1,43 @@
+use std::{fs, path::PathBuf, thread, time::Duration};
+
+use api::PlacementStore;
+
+fn tmp_file(name: &str) -> PathBuf {
+    let mut p = std::env::temp_dir();
+    p.push(format!(
+        "cloudlysis-control-{name}-{}-{}.json",
+        std::process::id(),
+        std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_nanos()
+    ));
+    p
+}
+
+#[test]
+fn placement_store_hot_reload_swaps_atomically() {
+    let path = tmp_file("placement");
+    fs::write(
+        &path,
+        r#"{"revision":"r1","aggregate_placement":{"placements":[]},"projection_placement":{"placements":[]},"runner_placement":{"placements":[]}}"#,
+    )
+    .unwrap();
+
+    let store = PlacementStore::new(path.clone());
+    let a1 = store.get_for_kind(api::ServiceKind::Aggregate);
+    assert_eq!(a1.revision, "r1");
+
+    thread::sleep(Duration::from_millis(5));
+
+    fs::write(
+        &path,
+        r#"{"revision":"r2","aggregate_placement":{"placements":[]},"projection_placement":{"placements":[]},"runner_placement":{"placements":[]}}"#,
+    )
+    .unwrap();
+
+    let a2 = store.get_for_kind(api::ServiceKind::Aggregate);
+    assert_eq!(a2.revision, "r2");
+
+    let _ = fs::remove_file(&path);
+}
--- a/control/api/tests/swarm_client.rs
+++ b/control/api/tests/swarm_client.rs
@@ -0,0 +1,31 @@
+use std::{fs, path::PathBuf};
+
+#[test]
+fn swarm_store_is_deterministic_from_file() {
+    let mut path = std::env::temp_dir();
+    path.push(format!(
+        "cloudlysis-control-swarm-{}-{}.json",
+        std::process::id(),
+        uuid::Uuid::new_v4()
+    ));
+
+    fs::write(
+        &path,
+        r#"{"services":[{"name":"gateway","image":"x","mode":"replicated","replicas":"1/1","updated_at":null}],"tasks":[{"id":"t1","service":"gateway","node":"n1","desired_state":"running","current_state":"running","error":null}]}"#,
+    )
+    .unwrap();
+
+    let store = api::SwarmStore::new(PathBuf::from(&path));
+    let services = store.list_services();
+    assert_eq!(services.len(), 1);
+    assert_eq!(services[0].name, "gateway");
+
+    let tasks = store.list_tasks("gateway");
+    assert_eq!(tasks.len(), 1);
+    assert_eq!(tasks[0].id, "t1");
+
+    let none = store.list_tasks("missing");
+    assert_eq!(none.len(), 0);
+
+    let _ = fs::remove_file(&path);
+}
--- a/control/api/tests/swarm_smoke_docker.rs
+++ b/control/api/tests/swarm_smoke_docker.rs
@@ -0,0 +1,42 @@
+use std::time::Duration;
+
+#[tokio::test]
+#[ignore]
+async fn docker_swarm_smoke_test_is_gated_and_times_out() {
+    let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
+    assert_eq!(enabled.as_deref(), Some("1"));
+
+    let stack = "cloudlysis_control_test";
+    let compose = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .and_then(|p| p.parent())
+        .unwrap()
+        .join("swarm/stacks/control-plane.yml");
+
+    let deploy = tokio::process::Command::new("docker")
+        .args(["stack", "deploy", "-c"])
+        .arg(&compose)
+        .arg(stack)
+        .output();
+
+    let out = tokio::time::timeout(Duration::from_secs(30), deploy)
+        .await
+        .expect("docker stack deploy timed out")
+        .expect("failed to run docker stack deploy");
+    assert!(
+        out.status.success(),
+        "docker stack deploy failed: {}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+
+    let ls = tokio::process::Command::new("docker")
+        .args(["service", "ls"])
+        .output();
+    let _ = tokio::time::timeout(Duration::from_secs(10), ls).await;
+
+    let rm = tokio::process::Command::new("docker")
+        .args(["stack", "rm"])
+        .arg(stack)
+        .output();
+    let _ = tokio::time::timeout(Duration::from_secs(10), rm).await;
+}
--- a/control/api/tests/swarm_stack_yaml.rs
+++ b/control/api/tests/swarm_stack_yaml.rs
@@ -0,0 +1,40 @@
+use std::{fs, path::PathBuf};
+
+fn repo_root() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .and_then(|p| p.parent())
+        .expect("api crate should live under repo root")
+        .to_path_buf()
+}
+
+#[test]
+fn stack_files_parse_as_yaml() {
+    let root = repo_root();
+    for file in [
+        root.join("swarm/stacks/control-plane.yml"),
+        root.join("swarm/stacks/observability.yml"),
+    ] {
+        let raw = fs::read_to_string(&file).unwrap();
+        let _: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
+    }
+}
+
+#[test]
+fn control_plane_stack_has_required_services() {
+    let root = repo_root();
+    let raw = fs::read_to_string(root.join("swarm/stacks/control-plane.yml")).unwrap();
+    let v: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
+
+    let services = v
+        .get("services")
+        .and_then(|x| x.as_mapping())
+        .expect("missing services");
+
+    for required in ["control-api", "control-ui"] {
+        assert!(
+            services.contains_key(serde_yaml::Value::String(required.to_string())),
+            "missing service {required}"
+        );
+    }
+}
--- a/control/prd.md
+++ b/control/prd.md
@@ -0,0 +1,601 @@
+### 🧱 Component: Control Plane (Admin UI + Monitoring + Production Ops)
+
+**Definition:**  
+This repository hosts the **platform control plane**:
+1) the **Admin UI** used by platform operators and admins to manage users/roles/sessions, tenants, configuration, definitions, and production scaling; and  
+2) the **observability stack** and **production dashboards** (VictoriaMetrics + Loki + Grafana, plus alerting/scrape config) required to operate the platform in production.
+
+The control plane is the “single pane of glass” and the “safe hands” layer: it does not replace node runtime logic; it coordinates existing node capabilities and exposes them with strict RBAC, auditability, and operational guardrails.
+
+---
+
+## **Context: Existing Node Repositories (../)**
+
+This PRD is derived from the currently implemented node repos in `../`:
+- **Aggregate**: expects a control node to manage tenant placement and scaling operations, including tenant migrations ([aggregate/prd.md](file:///Users/vlad/Developer/cloudlysis/aggregate/prd.md#L82-L151)). Tenant placement primitives and KV helper exist ([swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L5-L227)).
+- **Gateway**: provides the platform ingress, authn/authz, and tenant-aware routing; it explicitly expects NATS KV-based tenant placement and hot reload in production ([gateway/prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/prd.md#L13-L175)).
+- **Projection**: consumes events, stores read models, and expects tenant-scoped query isolation and operational monitoring (consumer lag, checkpoints) ([projection/prd.md](file:///Users/vlad/Developer/cloudlysis/projection/prd.md#L7-L96)).
+- **Runner**: executes sagas + effects, includes tenant assignment watching via NATS KV and tenant draining semantics ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L11-L104)) and exposes admin endpoints for drain/reload in its PRD ([runner/prd.md](file:///Users/vlad/Developer/cloudlysis/runner/prd.md#L199-L210)).
+
+The control plane also adopts the proven **Admin UI UX + component library** from UltraBase’s control-plane admin UI, adapting screens and information architecture to Cloudlysis needs:
+- Reusable UI components live under [ui/control-plane-admin/src/components/ui](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/components/ui).
+- Example pages include [TenantsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantsPage.tsx), [AdminUsersPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/AdminUsersPage.tsx), [AdminSessionsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/AdminSessionsPage.tsx), [FleetPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/FleetPage.tsx), [TopologyPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TopologyPage.tsx), and [ObservabilityPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/ObservabilityPage.tsx).
+
+---
+
+## **Problem Statement**
+
+Operating the platform without a unified control plane forces operators to:
+- Use ad-hoc scripts, direct cluster access, or service-local admin endpoints
+- Manage tenants, placements, and deployments without a consistent audit trail
+- Correlate production incidents across services with incomplete dashboards and unsafe levels of access
+
+The platform needs a control plane that:
+- Centralizes **admin workflows** and **production operability**
+- Enforces **least-privilege RBAC**, **step-up**, and **auditing**
+- Provides a consistent, safe abstraction over **tenant placement**, **scale**, and **production operations**
+
+---
+
+## **Goals**
+
+- Deliver an Admin UI with full admin management over:
+  - users, sessions, roles/permissions
+  - configuration (global + per-tenant)
+  - definitions (aggregates, projections, sagas, effects, manifests)
+  - scaling and production management (tenant placement, drains, migrations, deployments)
+- Package production-grade monitoring:
+  - metrics via VictoriaMetrics
+  - logs via Loki
+  - dashboards and alerting via Grafana (+ vmalert where used)
+- Make production operations observable, auditable, and safe by default:
+  - strong change logging + approvals where needed
+  - idempotent operations + dry runs + rollback paths
+
+---
+
+## **Non-Goals**
+
+- Re-implement node business logic (Aggregate / Projection / Runner) or platform ingress (Gateway).
+- Replace NATS JetStream, libmdbx storage responsibilities, or per-service runtime concerns.
+- Provide an arbitrary “general API gateway” for third-party upstreams.
+
+---
+
+## **Primary Users**
+
+- **Platform Owner / SRE**: fleet operations, incident response, production change management.
+- **Platform Admin**: tenant provisioning, RBAC, config/definition promotion.
+- **Security Admin**: access reviews, session revocation, audit trails.
+- **Support / On-call**: triage dashboards, logs/metrics correlation, safe mitigations (drain, disable, rollback).
+
+---
+
+## **Key Concepts**
+
+### Control Plane Scope
+
+- The control plane is the authoritative interface for production operations and admin management.
+- The control plane uses node APIs, the Gateway, and NATS KV as its operational substrate rather than bypassing them.
+
+### Tenant-Aware Operations
+
+- All tenant-scoped operations are keyed by `tenant_id` (consistent with `x-tenant-id` usage across nodes and Gateway).
+- Tenant placement is treated as a first-class “control plane state” (NATS KV-backed in production; file/static in development), consistent with existing code patterns ([swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L188-L226), [tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L41-L104)).
+
+### Safe Change Management
+
+- Mutating actions require explicit intent, are recorded in audit logs, and should be reversible where possible.
+- All high-impact operations support:
+  - validation and preflight checks
+  - dry-run planning
+  - idempotency keys
+  - explicit rollback guidance
+
+### Control Plane Components (In This Repo)
+
+- **Admin UI (React)**:
+  - Reuse UltraBase’s control-plane admin UI component system and interaction patterns, adapting routes and pages to Cloudlysis requirements ([components/ui](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/components/ui)).
+  - The UI should prefer “table + detail pages + action dropdown + modals” patterns to keep ops workflows fast and consistent.
+- **Control Plane API (BFF / Admin API)**:
+  - A thin API layer that enforces RBAC, writes audit logs, and orchestrates multi-step operations (drain/migrate/rollout) as idempotent jobs.
+  - Integrates with the Gateway for platform authn/authz and with node admin endpoints for operational actions.
+- **Observability Stack**:
+  - Version-controlled provisioning for Grafana dashboards/datasources, scrape configs for vmagent, and alert rules (vmalert or Grafana Alerting), modeled after UltraBase’s baseline ([observability/README.md](file:///Users/vlad/Developer/madapes/ultrabase/observability/README.md#L1-L47)).
+
+---
+
+## **Functional Requirements**
+
+### 1) Admin IAM (Users, Sessions, Roles)
+
+#### 1.1 Users
+
+- CRUD users with lifecycle states:
+  - invited (pending acceptance), active, suspended, disabled, deleted (tombstoned)
+- Identity attributes:
+  - email (primary), optional secondary identities
+  - display name, avatar, metadata tags
+  - auth methods enabled (password, OIDC providers), MFA state
+- Administrative actions:
+  - invite/resend invite
+  - reset password flow initiation
+  - force MFA reset / revoke recovery codes
+  - disable login / suspend user
+  - impersonation (break-glass, audited, time-boxed)
+- Security constraints:
+  - privileged actions require step-up / recent auth
+  - sensitive events must be audit logged (who, what, when, why, from where)
+
+#### 1.2 Sessions
+
+- View active sessions and refresh token families:
+  - by user, by tenant, by IP / geo, by device, by time range
+- Revoke capabilities:
+  - revoke a single session
+  - revoke all sessions for a user
+  - revoke all sessions for a tenant (incident response)
+- Detection surfaces:
+  - unusual session fanout (many sessions per user)
+  - repeated failed logins / MFA failures
+  - suspicious IP changes
+
+#### 1.3 Roles & Permissions (RBAC)
+
+- Roles are sets of permissions; assignments bind principals to roles in a scope.
+- Scopes:
+  - global (platform-level)
+  - tenant-scoped
+  - environment-scoped (dev/staging/prod) when applicable
+- Required permission domains (minimum):
+  - iam.users.* (create/update/suspend/delete)
+  - iam.sessions.* (list/revoke)
+  - iam.roles.* (create/update/assign)
+  - tenants.* (create/update/archive)
+  - configs.* (read/write/approve/apply)
+  - definitions.* (read/write/validate/promote/rollback)
+  - scale.* (view/apply/migrate/drain)
+  - ops.* (deploy/rollback/restart/drain)
+  - observability.* (view dashboards, manage alert rules)
+  - audit.* (view/export)
+- Role templates:
+  - owner, admin, operator, support, read-only, security-admin, break-glass
+
+---
+
+### 2) Tenant Management
+
+- Create, list, and archive tenants.
+- Tenant status model:
+  - provisioning, active, draining, migrating, degraded, suspended, archived
+- Tenant metadata:
+  - plan/tier, quotas, feature flags, contact + billing metadata, environment(s)
+- Tenant operational actions:
+  - trigger provisioning workflows (create streams/buckets, seed configs, create placement)
+  - rotate tenant secrets (as definitions/config allow)
+  - pause/resume workload (soft kill switch via config flags)
+
+Tenant pages should mirror UltraBase’s “Tenant Overview + subpages” navigation patterns (example: [TenantsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantsPage.tsx) and [TenantOverviewPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantOverviewPage.tsx)).
+
+---
+
+### 3) Configuration Management (Global + Per-Tenant)
+
+#### 3.1 Config Model
+
+- Config items are versioned, typed documents with:
+  - scope (global / tenant / environment)
+  - schema version
+  - provenance (who/what wrote it)
+  - effective date and rollout strategy
+- Config must support:
+  - validation against a schema
+  - diff view (previous vs next)
+  - staged rollout (preview → apply)
+  - rollback to a prior version
+
+#### 3.2 Node-Related Configuration
+
+Required config surfaces (minimum):
+- **Gateway**: routing/placement sources, auth policies, rate limits (see routing expectations in [gateway/prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/prd.md#L154-L175)).
+- **Aggregate / Projection / Runner**:
+  - shard identifiers and tenant allowlists/placement settings
+  - drain/reload toggles and safety thresholds
+  - resource limits / concurrency caps
+
+---
+
+### 4) Definition Management (System “Blueprints”)
+
+Definitions are the declarative “what the platform is” and “what runs” layer: aggregates, projections, sagas, effect providers, and any manifests that tie runtime-function programs to entity types.
+
+Required capabilities:
+- Upload/edit versioned definitions with:
+  - validation (schema + semantic checks)
+  - “impact analysis” (which tenants/services are affected)
+  - promotion workflow (dev → staging → prod)
+- Change controls:
+  - approvals (role-based) for production promotion
+  - emergency rollback path (one-click revert to last-known-good definition bundle)
+- Tenant overrides:
+  - allow per-tenant definition overrides only when explicitly permitted by policy
+
+The control plane must present definitions in a way that maps to the node runtime responsibilities:
+- Aggregates and deterministic decide/apply programs ([aggregate/prd.md](file:///Users/vlad/Developer/cloudlysis/aggregate/prd.md#L155-L160))
+- Projections and deterministic project programs ([projection/prd.md](file:///Users/vlad/Developer/cloudlysis/projection/prd.md#L36-L55))
+- Runner sagas and effect provider manifests ([runner/prd.md](file:///Users/vlad/Developer/cloudlysis/runner/prd.md#L41-L57))
+
+---
+
+### 5) Scale Management (Tenant Placement, Shards, Fleet)
+
+#### 5.1 Placement Model
+
+- Placement is modeled as:
+  - a set of nodes/shards and their attributes (labels, capacity, region)
+  - tenant → shard assignments per service kind (Aggregate, Projection, Runner, optionally Gateway when relevant)
+- Control plane supports both:
+  - static placement (development)
+  - dynamic placement (production) backed by NATS KV (consistent with existing client patterns in [swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L79-L227))
+
+#### 5.2 Tenant Migration
+
+- Provide guided migration planning and execution:
+  - show current assignment, target assignment, and a sequenced action plan
+  - execute “graceful drain → update placement → reload” style plans (see [plan_graceful_tenant_migration](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L41-L65))
+- Migration safety:
+  - require explicit confirmation and reason
+  - block if draining is unsafe (inflight work too high, storage unhealthy, consumer lag too high)
+  - time-box and alert if drains do not converge
+
+#### 5.3 Fleet View
+
+- Fleet inventory:
+  - nodes (labels, region, capacity, version)
+  - services (replicas, image version, health)
+  - per-node and per-service load indicators (CPU/mem, request rate, consumer lag)
+- Operator actions:
+  - scale replicas, restart services, cordon/drain nodes (when supported by orchestrator)
+
+UX should align with the UltraBase “Fleet” and “Topology” navigation patterns ([FleetPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/FleetPage.tsx), [TopologyPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TopologyPage.tsx)).
+
+---
+
+### 6) Production Operations (Deployments, Maintenance, Safety)
+
+#### 6.1 Deployments
+
+- Manage deployable artifacts per service (Aggregate/Gateway/Projection/Runner) with:
+  - environment-specific rollout policies
+  - canary/rolling deploy support (when orchestrator supports it)
+  - automatic health checks gates and rollback triggers
+- Track releases:
+  - “what is running where” (service version matrix)
+  - change log links and approvals
+
+#### 6.2 Maintenance Operations
+
+- Drain operations:
+  - tenant drain (stop acquiring new work, finish inflight; required by Runner semantics in [TenantGate](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L106-L200))
+  - node drain (aggregate tenant ranges, projection consumers, runner workers)
+- Replay / rebuild operations:
+  - projection rebuild triggers (dangerous, must be guarded and audited)
+  - workflow replay controls (reset checkpoints only with explicit intent)
+
+#### 6.3 Incident Response Toolkit
+
+- “Safe switches”:
+  - per-tenant kill switch (disable commands/effects via config)
+  - global degrade modes (rate limit reductions, disable expensive features)
+- Run actions:
+  - revoke sessions at scale
+  - freeze deployments
+  - trigger drain/migrate with guided plan
+
+---
+
+### 7) Observability (VictoriaMetrics + Loki + Grafana) and Dashboards
+
+#### 7.1 Stack Requirements
+
+Adopt a production-ready stack consistent with UltraBase’s operational baseline:
+- **VictoriaMetrics** for metrics storage and Prometheus-compatible query
+- **vmagent** for scraping and remote_write
+- **Grafana** for dashboards and alert routing
+- **Loki** (+ optional **Promtail**) for logs
+- Optional **vmalert** for rule evaluation against VictoriaMetrics
+
+UltraBase’s observability design is a direct reference implementation to mirror and adapt:
+- Stack overview and conventions: [observability/README.md](file:///Users/vlad/Developer/madapes/ultrabase/observability/README.md#L1-L47)
+- Provisioned dashboards and datasources: [grafana provisioning](file:///Users/vlad/Developer/madapes/ultrabase/observability/grafana/provisioning)
+
+#### 7.2 Metrics Conventions
+
+- Every service exports `/metrics` in Prometheus format.
+- Required labels:
+  - `service` (stable, low cardinality)
+  - `env` (dev/staging/prod)
+  - `tenant_id` only where safe and bounded; avoid tenant_id on high-frequency per-request series unless cardinality is controlled.
+- HTTP metrics must avoid unbounded `path` cardinality; prefer route templates (pattern-based paths).
+
+Tenant-aware metrics guidelines:
+- Prefer **tenant-only aggregates** for “who is hurting us?” views:
+  - `..._requests_total{tenant_id,service,status_class}` (no `path`)
+  - `..._request_duration_seconds{tenant_id,service}` (no `path`, limited bucket count)
+- Prefer **route-only aggregates** for “what endpoint is hurting us?” views:
+  - `..._requests_total{service,path,status}` (no `tenant_id`)
+- Where per-tenant and per-route both matter, implement a **top-k sampling** policy:
+  - emit `(tenant_id,path)` series only for top N tenants, or only for a fixed allowlist of routes.
+
+#### 7.3 Required Dashboards (Production)
+
+Minimum set of dashboards (provisioned on startup):
+- **Platform — Operations overview**
+  - `up` for core services and observability stack
+  - RPS, 4xx/5xx ratio, p95/p99 latency per service
+  - saturation indicators (CPU/mem, inflight, queue depth)
+- **Platform — HTTP detail**
+  - per-service request breakdown by route template, method, status
+  - top failing paths and latency outliers
+- **Platform — Logs**
+  - Loki stream filtering by `service`, `tenant_id` (where present), and correlation identifiers
+- **Platform — Event bus / JetStream**
+  - consumer lag, redeliveries, ack latency, stream storage pressure
+- **Platform — Workers (Runner)**
+  - outbox depth, effect latency, poison message counts, schedules backlog
+- **Platform — Storage (libmdbx)**
+  - DB size growth, write stalls, fsync latency (where exported), disk usage
+- **Platform — Cluster / Orchestrator**
+  - node health, container restarts, placement distribution by tenant range
+
+Dashboards should be modeled after UltraBase’s default set (for structure, not content), e.g. [ultrabase-operations.json](file:///Users/vlad/Developer/madapes/ultrabase/observability/grafana/provisioning/dashboards/default/ultrabase-operations.json) and [ultrabase-http-detail.json](file:///Users/vlad/Developer/madapes/ultrabase/observability/grafana/provisioning/dashboards/default/ultrabase-http-detail.json).
+
+Additional production-operability dashboards (chosen and adapted):
+- **Platform — Noisy Neighbor & Tenant Health**
+  - Purpose: identify a tenant causing cluster instability (attack, runaway job, bad config) and quickly pivot all panels to that tenant.
+  - Panels (minimum):
+    - Top tenants by Gateway RPS (topk of tenant-only request counters).
+    - Tenant latency distribution (p95/p99 per tenant) from tenant-only latency histograms.
+    - Tenant error ratio (5xx and 429) per tenant.
+    - Aggregate in-flight commands by tenant (already exported: `aggregate_in_flight_commands{tenant_id}`).
+    - Projection processing error rate by tenant (from `projection_processing_errors_total{tenant_id,view_type}` aggregated per tenant).
+    - Loki logs panel with a `tenant_id` variable selector; selecting a tenant syncs RPS/latency/errors + logs.
+  - Required instrumentation:
+    - Gateway must expose **tenant-level** HTTP counters/histograms (tenant + status class + service, without `path`) in addition to existing route-level metrics.
+
+- **Platform — API Regression & Deployment**
+  - Purpose: determine whether a newly rolled out image caused regressions, and correlate changes with deployment events.
+  - Panels (minimum):
+    - Error rate comparison “old vs new” by `service` and `version` (or `image_tag`) labels.
+    - Latency comparison “old vs new” (p95/p99) per service.
+    - Restart / flapping rate per service (container restarts, crash loops).
+    - Dependency latency correlation:
+      - Gateway request duration vs Aggregate command duration vs Projection processing duration vs Runner effect latency.
+    - Loki “new errors” panel:
+      - errors seen in the last 10m that were not present in the prior 60m window, grouped by `service`.
+    - Deployment annotations:
+      - vertical markers when Swarm service updates started/finished (via annotations or a deploy event metric).
+  - Required instrumentation:
+    - Every service exports a `*_build_info{service,version,git_sha}` gauge (value=1) or equivalent, and scrape relabeling adds `image_tag` where possible.
+    - Control plane emits deployment annotations/events (or pulls them from the orchestrator and writes to Grafana annotations).
+
+- **Platform — Storage & Event Bus Bottlenecks**
+  - Purpose: debug timeouts when the API is “up” but underlying storage/eventing is saturated (the Cloudlysis equivalent of DB firefighting).
+  - Panels (minimum):
+    - NATS/JetStream health:
+      - stream storage pressure, publish/ack latency, consumer lag, redeliveries.
+    - Projection lag and throughput:
+      - events processed rate, processing duration, error rate.
+    - Aggregate write-path pressure:
+      - command duration, version conflicts, in-flight commands, tenant errors.
+    - Runner pressure:
+      - outbox dispatch failure rate, effect timeout rate, deadletter writes.
+    - Disk saturation on nodes hosting libmdbx:
+      - disk usage, read/write latency, IOPS; correlate with spikes in command/query latency.
+    - Optional Postgres/Autobase panels only when a managed DB backs any control-plane metadata:
+      - pool saturation, replica lag, slow queries, long transactions.
+  - Required instrumentation:
+    - Ensure JetStream metrics are scraped (NATS server `/varz` exporter or native Prometheus endpoint depending on deployment).
+    - Ensure node-level disk/IO metrics are scraped (node exporter / cadvisor / equivalent).
+
+- **Platform — Infrastructure Exhaustion**
+  - Purpose: detect node/resource pressure earlier than raw CPU% and catch observability blind spots.
+  - Panels (minimum):
+    - CPU/memory pressure (PSI) per node (when available), plus load average and CPU saturation.
+    - OOM kill tracker across the cluster.
+    - Disk usage + IO wait/latency on data volumes (libmdbx, Loki, VictoriaMetrics).
+    - vmagent health:
+      - scrape error rate, remote_write errors, queue backlog.
+    - Loki ingestion health:
+      - dropped log lines (promtail) and ingestion errors (loki).
+    - Swarm task hygiene:
+      - desired_state vs current_state mismatches, orphaned tasks, restart loops.
+  - Required instrumentation:
+    - node exporter / cadvisor (or equivalent) must be part of the production scrape plan.
+    - promtail (or alternative) must expose drop/error metrics when logs are enabled.
+
+#### 7.4 Alerting Requirements
+
+Minimum alert classes:
+- Availability:
+  - service down (`up == 0`)
+  - scrape failures, vmagent remote_write errors
+- Reliability:
+  - sustained elevated 5xx ratio
+  - sustained elevated p95 latency per service
+- Backlogs:
+  - JetStream consumer lag above threshold
+  - Runner outbox depth above threshold
+- Data safety:
+  - disk usage near full (nodes hosting libmdbx)
+  - abnormal restart loops
+- Security:
+  - login anomaly detection signals (where instrumented)
+  - suspicious spike in session revocations / failed MFA
+
+Alert rules can follow UltraBase’s approach of version-controlled rules in YAML (reference: [alerts/](file:///Users/vlad/Developer/madapes/ultrabase/observability/alerts)).
+
+#### 7.5 Control Plane → Observability Linking
+
+The Admin UI must embed or deep-link into observability tools:
+- per-tenant and per-service quick links to Grafana dashboards and Loki queries
+- incident triage shortcuts (operations overview → HTTP detail → logs)
+
+This mirrors UltraBase’s “observability links JSON” concept ([observability/README.md](file:///Users/vlad/Developer/madapes/ultrabase/observability/README.md#L65-L75)), but adapted to Cloudlysis services and dashboards.
+
+---
+
+### 8) Audit, Compliance, and Change History
+
+- Audit log is an append-only stream of security and operations events:
+  - authentication and session events
+  - RBAC changes and permission grants
+  - config/definition changes and promotions
+  - scaling, drain, and migration operations
+  - deployments and rollbacks
+- Audit log must support:
+  - search and export (bounded and access controlled)
+  - correlation to production incidents (request ids, trace ids)
+  - retention policy controls
+
+---
+
+### 9) Control Plane API Surface (Admin API)
+
+The control plane requires a stable API surface for the Admin UI and automation.
+
+Minimum API capabilities:
+- **Idempotent jobs for multi-step operations**:
+  - every mutating operation returns a `job_id`, supports polling and cancellation, and records a full execution trace in the audit log.
+- **Preflight endpoints**:
+  - validate an intended change and return a plan (and “would-change” diff) without applying it.
+- **RBAC-first access model**:
+  - all endpoints enforce permission checks at the API boundary (UI is not trusted).
+
+Minimum endpoint groups:
+- `/admin/v1/iam/*` (users, roles, assignments, sessions)
+- `/admin/v1/tenants/*` (tenants lifecycle, status, metadata)
+- `/admin/v1/config/*` (versioned config, diff, apply, rollback)
+- `/admin/v1/definitions/*` (bundles, validate, promote, rollback)
+- `/admin/v1/scale/*` (placement, migrations, drain status)
+- `/admin/v1/ops/*` (deployments, rollbacks, service actions)
+- `/admin/v1/observability/*` (links, saved queries, dashboard registry)
+- `/admin/v1/audit/*` (search, export)
+
+Authentication/authorization integration:
+- Prefer using the **Gateway** as the system of record for admin identities and sessions, with the control plane API validating requests using Gateway-issued tokens and enforcing platform-specific permissions.
+
+---
+
+### 10) Secrets and Credentials Management
+
+The control plane must treat secrets as first-class operational data with strict handling.
+
+Requirements:
+- Secret values must never be logged and must be redacted in UI/API responses.
+- Secrets must support:
+  - creation and rotation workflows
+  - scoped access (global/tenant/environment)
+  - staged rollout (write new → verify → promote → retire old)
+- Rendering rules:
+  - after creation, secret plaintext must not be retrievable unless explicitly enabled by policy (default: write-only).
+- Integrations:
+  - support referencing secrets from config/definitions without embedding values (secret refs).
+
+---
+
+### 11) Backups, Restore, and Disaster Recovery (Production Operability)
+
+The control plane must provide explicit visibility and guardrails for data safety operations.
+
+Minimum requirements:
+- **Backup status**:
+  - show last successful backup timestamps per critical store (metadata DB, NATS state if applicable, Grafana provisioning state as code, tenant placement/config stores).
+- **Restore readiness**:
+  - preflight checks that validate a restore plan (target environment, versions, dependencies).
+- **Operational playbooks**:
+  - link to the exact restore procedure and post-restore verification checklist.
+- **Key rotation**:
+  - explicit workflows and audit logs for rotating signing keys, service credentials, and secret backends.
+
+This should align with the platform’s existing operational patterns (e.g., the explicit “restore / post-restore checks” concept used in UltraBase observability docs).
+
+---
+
+## **Admin UI Requirements (Information Architecture + UX)**
+
+### Navigation (Minimum)
+
+Left navigation sections:
+- Overview
+- Tenants
+- Users
+- Sessions
+- Roles & Permissions
+- Config
+- Definitions
+- Scale & Placement
+- Deployments
+- Observability
+- Audit Log
+- Settings
+
+### Page Patterns (Reuse UltraBase UI)
+
+Adopt the UltraBase component system and page layout patterns:
+- Layout, styling tokens, UI primitives: [components/ui](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/components/ui)
+- Table + search + action dropdown pattern: [TenantsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantsPage.tsx#L94-L203)
+
+Required page types:
+- List pages:
+  - searchable table, bulk actions, row actions menu, status pills, empty states
+- Detail pages:
+  - header with primary actions (drain, migrate, rollback)
+  - sub-nav tabs for domain-specific views
+- Mutation flows:
+  - modal confirmation + explicit reason entry for high-impact changes
+  - toast notifications and “busy” state handling consistent with UltraBase patterns
+
+### Tenant Detail Subpages (Minimum)
+
+- Overview (status, assignments, SLO highlights)
+- Placement (per service: Aggregate/Projection/Runner)
+- Health (node readiness and dependency checks)
+- Config (effective config + diffs)
+- Definitions (applied definition bundle + version)
+- Activity (audit trail filtered to tenant)
+- Observability (embedded links / panels)
+
+---
+
+## **Non-Functional Requirements**
+
+- **Security**:
+  - strict RBAC everywhere; deny-by-default
+  - audit every privileged operation
+  - step-up for sensitive actions
+  - CSRF protection for browser sessions
+  - safe secret handling (no secret values rendered after creation unless explicitly permitted)
+  - allowlist outbound integrations (Grafana/Loki/VM URLs, orchestration API endpoints) to prevent SSRF-style abuse
+- **Reliability**:
+  - control plane operations are idempotent and resilient to partial failures
+  - operations have clear “current state” and do not rely on UI assumptions
+- **Performance**:
+  - list pages paginate and filter server-side for large fleets
+  - dashboards load with bounded query costs and controlled label cardinality
+- **Operability**:
+  - control plane itself must be observable (metrics/logs, dashboards, alerts)
+  - every operation must surface preflight checks and post-conditions
+
+---
+
+## **Open Questions / Design Constraints (To Resolve During Implementation)**
+
+- Where does the source of truth live for:
+  - users/sessions/roles (Gateway vs control-plane backing store)?
+  - configs/definitions (NATS KV vs database vs GitOps)?
+- How should production promotions be modeled:
+  - environment branches, approval workflow, and rollback semantics?
+- What orchestrator is the production baseline (Docker Swarm per existing PRDs, or will Kubernetes be introduced)?
+ - Where should the job/execution state for long-running operations live:
+   - embedded in the control plane API process, durable store, or NATS workflows?
--- a/control/ui/.gitignore
+++ b/control/ui/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
--- a/control/ui/README.md
+++ b/control/ui/README.md
@@ -0,0 +1,73 @@
+# React + TypeScript + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs)
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/)
+
+## React Compiler
+
+The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
+
+## Expanding the ESLint configuration
+
+If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
+
+```js
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      // Other configs...
+
+      // Remove tseslint.configs.recommended and replace with this
+      tseslint.configs.recommendedTypeChecked,
+      // Alternatively, use this for stricter rules
+      tseslint.configs.strictTypeChecked,
+      // Optionally, add this for stylistic rules
+      tseslint.configs.stylisticTypeChecked,
+
+      // Other configs...
+    ],
+    languageOptions: {
+      parserOptions: {
+        project: ['./tsconfig.node.json', './tsconfig.app.json'],
+        tsconfigRootDir: import.meta.dirname,
+      },
+      // other options...
+    },
+  },
+])
+```
+
+You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
+
+```js
+// eslint.config.js
+import reactX from 'eslint-plugin-react-x'
+import reactDom from 'eslint-plugin-react-dom'
+
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      // Other configs...
+      // Enable lint rules for React
+      reactX.configs['recommended-typescript'],
+      // Enable lint rules for React DOM
+      reactDom.configs.recommended,
+    ],
+    languageOptions: {
+      parserOptions: {
+        project: ['./tsconfig.node.json', './tsconfig.app.json'],
+        tsconfigRootDir: import.meta.dirname,
+      },
+      // other options...
+    },
+  },
+])
+```
--- a/control/ui/eslint.config.js
+++ b/control/ui/eslint.config.js
@@ -0,0 +1,23 @@
+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import tseslint from 'typescript-eslint'
+import { defineConfig, globalIgnores } from 'eslint/config'
+
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{ts,tsx}'],
+    extends: [
+      js.configs.recommended,
+      tseslint.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+  },
+])
--- a/control/ui/index.html
+++ b/control/ui/index.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>ui</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
--- a/control/ui/nginx.conf
+++ b/control/ui/nginx.conf
@@ -0,0 +1,11 @@
+server {
+  listen 80;
+  server_name _;
+
+  root /usr/share/nginx/html;
+  index index.html;
+
+  location / {
+    try_files $uri $uri/ /index.html;
+  }
+}
--- a/control/ui/package-lock.json
+++ b/control/ui/package-lock.json
--- a/control/ui/package.json
+++ b/control/ui/package.json
@@ -0,0 +1,37 @@
+{
+  "name": "ui",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "lint": "eslint .",
+    "typecheck": "tsc -b --pretty false",
+    "test": "vitest run",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^19.2.4",
+    "react-dom": "^19.2.4",
+    "react-router-dom": "^7.9.3"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.39.4",
+    "@testing-library/jest-dom": "^6.9.0",
+    "@testing-library/react": "^16.3.0",
+    "@types/node": "^24.12.0",
+    "@types/react": "^19.2.14",
+    "@types/react-dom": "^19.2.3",
+    "@vitejs/plugin-react": "^6.0.1",
+    "eslint": "^9.39.4",
+    "eslint-plugin-react-hooks": "^7.0.1",
+    "eslint-plugin-react-refresh": "^0.5.2",
+    "globals": "^17.4.0",
+    "jsdom": "^27.0.0",
+    "typescript": "~5.9.3",
+    "typescript-eslint": "^8.57.0",
+    "vite": "^8.0.1",
+    "vitest": "^3.2.4"
+  }
+}
--- a/control/ui/public/favicon.svg
+++ b/control/ui/public/favicon.svg
--- a/control/ui/public/icons.svg
+++ b/control/ui/public/icons.svg
@@ -0,0 +1,24 @@
+<svg xmlns="http://www.w3.org/2000/svg">
+  <symbol id="bluesky-icon" viewBox="0 0 16 17">
+    <g clip-path="url(#bluesky-clip)"><path fill="#08060d" d="M7.75 7.735c-.693-1.348-2.58-3.86-4.334-5.097-1.68-1.187-2.32-.981-2.74-.79C.188 2.065.1 2.812.1 3.251s.241 3.602.398 4.13c.52 1.744 2.367 2.333 4.07 2.145-2.495.37-4.71 1.278-1.805 4.512 3.196 3.309 4.38-.71 4.987-2.746.608 2.036 1.307 5.91 4.93 2.746 2.72-2.746.747-4.143-1.747-4.512 1.702.189 3.55-.4 4.07-2.145.156-.528.397-3.691.397-4.13s-.088-1.186-.575-1.406c-.42-.19-1.06-.395-2.741.79-1.755 1.24-3.64 3.752-4.334 5.099"/></g>
+    <defs><clipPath id="bluesky-clip"><path fill="#fff" d="M.1.85h15.3v15.3H.1z"/></clipPath></defs>
+  </symbol>
+  <symbol id="discord-icon" viewBox="0 0 20 19">
+    <path fill="#08060d" d="M16.224 3.768a14.5 14.5 0 0 0-3.67-1.153c-.158.286-.343.67-.47.976a13.5 13.5 0 0 0-4.067 0c-.128-.306-.317-.69-.476-.976A14.4 14.4 0 0 0 3.868 3.77C1.546 7.28.916 10.703 1.231 14.077a14.7 14.7 0 0 0 4.5 2.306q.545-.748.965-1.587a9.5 9.5 0 0 1-1.518-.74q.191-.14.372-.293c2.927 1.369 6.107 1.369 8.999 0q.183.152.372.294-.723.437-1.52.74.418.838.963 1.588a14.6 14.6 0 0 0 4.504-2.308c.37-3.911-.63-7.302-2.644-10.309m-9.13 8.234c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.894 0 1.614.82 1.599 1.82.001 1-.705 1.82-1.6 1.82m5.91 0c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.893 0 1.614.82 1.599 1.82 0 1-.706 1.82-1.6 1.82"/>
+  </symbol>
+  <symbol id="documentation-icon" viewBox="0 0 21 20">
+    <path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="m15.5 13.333 1.533 1.322c.645.555.967.833.967 1.178s-.322.623-.967 1.179L15.5 18.333m-3.333-5-1.534 1.322c-.644.555-.966.833-.966 1.178s.322.623.966 1.179l1.534 1.321"/>
+    <path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M17.167 10.836v-4.32c0-1.41 0-2.117-.224-2.68-.359-.906-1.118-1.621-2.08-1.96-.599-.21-1.349-.21-2.848-.21-2.623 0-3.935 0-4.983.369-1.684.591-3.013 1.842-3.641 3.428C3 6.449 3 7.684 3 10.154v2.122c0 2.558 0 3.838.706 4.726q.306.383.713.671c.76.536 1.79.64 3.581.66"/>
+    <path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M3 10a2.78 2.78 0 0 1 2.778-2.778c.555 0 1.209.097 1.748-.047.48-.129.854-.503.982-.982.145-.54.048-1.194.048-1.749a2.78 2.78 0 0 1 2.777-2.777"/>
+  </symbol>
+  <symbol id="github-icon" viewBox="0 0 19 19">
+    <path fill="#08060d" fill-rule="evenodd" d="M9.356 1.85C5.05 1.85 1.57 5.356 1.57 9.694a7.84 7.84 0 0 0 5.324 7.44c.387.079.528-.168.528-.376 0-.182-.013-.805-.013-1.454-2.165.467-2.616-.935-2.616-.935-.349-.91-.864-1.143-.864-1.143-.71-.48.051-.48.051-.48.787.051 1.2.805 1.2.805.695 1.194 1.817.857 2.268.649.064-.507.27-.857.49-1.052-1.728-.182-3.545-.857-3.545-3.87 0-.857.31-1.558.8-2.104-.078-.195-.349-1 .077-2.078 0 0 .657-.208 2.14.805a7.5 7.5 0 0 1 1.946-.26c.657 0 1.328.092 1.946.26 1.483-1.013 2.14-.805 2.14-.805.426 1.078.155 1.883.078 2.078.502.546.799 1.247.799 2.104 0 3.013-1.818 3.675-3.558 3.87.284.247.528.714.528 1.454 0 1.052-.012 1.896-.012 2.156 0 .208.142.455.528.377a7.84 7.84 0 0 0 5.324-7.441c.013-4.338-3.48-7.844-7.773-7.844" clip-rule="evenodd"/>
+  </symbol>
+  <symbol id="social-icon" viewBox="0 0 20 20">
+    <path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M12.5 6.667a4.167 4.167 0 1 0-8.334 0 4.167 4.167 0 0 0 8.334 0"/>
+    <path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M2.5 16.667a5.833 5.833 0 0 1 8.75-5.053m3.837.474.513 1.035c.07.144.257.282.414.309l.93.155c.596.1.736.536.307.965l-.723.73a.64.64 0 0 0-.152.531l.207.903c.164.715-.213.991-.84.618l-.872-.52a.63.63 0 0 0-.577 0l-.872.52c-.624.373-1.003.094-.84-.618l.207-.903a.64.64 0 0 0-.152-.532l-.723-.729c-.426-.43-.289-.864.306-.964l.93-.156a.64.64 0 0 0 .412-.31l.513-1.034c.28-.562.735-.562 1.012 0"/>
+  </symbol>
+  <symbol id="x-icon" viewBox="0 0 19 19">
+    <path fill="#08060d" fill-rule="evenodd" d="M1.893 1.98c.052.072 1.245 1.769 2.653 3.77l2.892 4.114c.183.261.333.48.333.486s-.068.089-.152.183l-.522.593-.765.867-3.597 4.087c-.375.426-.734.834-.798.905a1 1 0 0 0-.118.148c0 .01.236.017.664.017h.663l.729-.83c.4-.457.796-.906.879-.999a692 692 0 0 0 1.794-2.038c.034-.037.301-.34.594-.675l.551-.624.345-.392a7 7 0 0 1 .34-.374c.006 0 .93 1.306 2.052 2.903l2.084 2.965.045.063h2.275c1.87 0 2.273-.003 2.266-.021-.008-.02-1.098-1.572-3.894-5.547-2.013-2.862-2.28-3.246-2.273-3.266.008-.019.282-.332 2.085-2.38l2-2.274 1.567-1.782c.022-.028-.016-.03-.65-.03h-.674l-.3.342a871 871 0 0 1-1.782 2.025c-.067.075-.405.458-.75.852a100 100 0 0 1-.803.91c-.148.172-.299.344-.99 1.127-.304.343-.32.358-.345.327-.015-.019-.904-1.282-1.976-2.808L6.365 1.85H1.8zm1.782.91 8.078 11.294c.772 1.08 1.413 1.973 1.425 1.984.016.017.241.02 1.05.017l1.03-.004-2.694-3.766L7.796 5.75 5.722 2.852l-1.039-.004-1.039-.004z" clip-rule="evenodd"/>
+  </symbol>
+</svg>
--- a/control/ui/src/App.css
+++ b/control/ui/src/App.css
@@ -0,0 +1,184 @@
+.counter {
+  font-size: 16px;
+  padding: 5px 10px;
+  border-radius: 5px;
+  color: var(--accent);
+  background: var(--accent-bg);
+  border: 2px solid transparent;
+  transition: border-color 0.3s;
+  margin-bottom: 24px;
+
+  &:hover {
+    border-color: var(--accent-border);
+  }
+  &:focus-visible {
+    outline: 2px solid var(--accent);
+    outline-offset: 2px;
+  }
+}
+
+.hero {
+  position: relative;
+
+  .base,
+  .framework,
+  .vite {
+    inset-inline: 0;
+    margin: 0 auto;
+  }
+
+  .base {
+    width: 170px;
+    position: relative;
+    z-index: 0;
+  }
+
+  .framework,
+  .vite {
+    position: absolute;
+  }
+
+  .framework {
+    z-index: 1;
+    top: 34px;
+    height: 28px;
+    transform: perspective(2000px) rotateZ(300deg) rotateX(44deg) rotateY(39deg)
+      scale(1.4);
+  }
+
+  .vite {
+    z-index: 0;
+    top: 107px;
+    height: 26px;
+    width: auto;
+    transform: perspective(2000px) rotateZ(300deg) rotateX(40deg) rotateY(39deg)
+      scale(0.8);
+  }
+}
+
+#center {
+  display: flex;
+  flex-direction: column;
+  gap: 25px;
+  place-content: center;
+  place-items: center;
+  flex-grow: 1;
+
+  @media (max-width: 1024px) {
+    padding: 32px 20px 24px;
+    gap: 18px;
+  }
+}
+
+#next-steps {
+  display: flex;
+  border-top: 1px solid var(--border);
+  text-align: left;
+
+  & > div {
+    flex: 1 1 0;
+    padding: 32px;
+    @media (max-width: 1024px) {
+      padding: 24px 20px;
+    }
+  }
+
+  .icon {
+    margin-bottom: 16px;
+    width: 22px;
+    height: 22px;
+  }
+
+  @media (max-width: 1024px) {
+    flex-direction: column;
+    text-align: center;
+  }
+}
+
+#docs {
+  border-right: 1px solid var(--border);
+
+  @media (max-width: 1024px) {
+    border-right: none;
+    border-bottom: 1px solid var(--border);
+  }
+}
+
+#next-steps ul {
+  list-style: none;
+  padding: 0;
+  display: flex;
+  gap: 8px;
+  margin: 32px 0 0;
+
+  .logo {
+    height: 18px;
+  }
+
+  a {
+    color: var(--text-h);
+    font-size: 16px;
+    border-radius: 6px;
+    background: var(--social-bg);
+    display: flex;
+    padding: 6px 12px;
+    align-items: center;
+    gap: 8px;
+    text-decoration: none;
+    transition: box-shadow 0.3s;
+
+    &:hover {
+      box-shadow: var(--shadow);
+    }
+    .button-icon {
+      height: 18px;
+      width: 18px;
+    }
+  }
+
+  @media (max-width: 1024px) {
+    margin-top: 20px;
+    flex-wrap: wrap;
+    justify-content: center;
+
+    li {
+      flex: 1 1 calc(50% - 8px);
+    }
+
+    a {
+      width: 100%;
+      justify-content: center;
+      box-sizing: border-box;
+    }
+  }
+}
+
+#spacer {
+  height: 88px;
+  border-top: 1px solid var(--border);
+  @media (max-width: 1024px) {
+    height: 48px;
+  }
+}
+
+.ticks {
+  position: relative;
+  width: 100%;
+
+  &::before,
+  &::after {
+    content: '';
+    position: absolute;
+    top: -4.5px;
+    border: 5px solid transparent;
+  }
+
+  &::before {
+    left: 0;
+    border-left-color: var(--border);
+  }
+  &::after {
+    right: 0;
+    border-right-color: var(--border);
+  }
+}
--- a/control/ui/src/App.tsx
+++ b/control/ui/src/App.tsx
@@ -0,0 +1,8 @@
+import { RouterProvider } from 'react-router-dom'
+import { createBrowserAppRouter } from './app/router'
+
+const router = createBrowserAppRouter()
+
+export default function App() {
+  return <RouterProvider router={router} />
+}
--- a/control/ui/src/api/client.ts
+++ b/control/ui/src/api/client.ts
@@ -0,0 +1,122 @@
+type RequestIds = {
+  requestId: string
+  correlationId?: string
+  traceparent?: string
+}
+
+const LAST_IDS_STORAGE_KEY = 'control:last_request_ids'
+
+export class ApiError extends Error {
+  status: number
+  requestId: string
+  correlationId?: string
+  traceparent?: string
+
+  constructor(args: {
+    status: number
+    message: string
+    requestId: string
+    correlationId?: string
+    traceparent?: string
+  }) {
+    super(args.message)
+    this.name = 'ApiError'
+    this.status = args.status
+    this.requestId = args.requestId
+    this.correlationId = args.correlationId
+    this.traceparent = args.traceparent
+  }
+}
+
+const state: {
+  last?: RequestIds
+} = {}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null
+}
+
+function loadLastIds(): RequestIds | undefined {
+  try {
+    const raw = localStorage.getItem(LAST_IDS_STORAGE_KEY)
+    if (!raw) return undefined
+    const parsed = JSON.parse(raw) as unknown
+    if (isRecord(parsed) && typeof parsed.requestId === 'string') {
+      const correlationId =
+        typeof parsed.correlationId === 'string' ? parsed.correlationId : undefined
+      const traceparent =
+        typeof parsed.traceparent === 'string' ? parsed.traceparent : undefined
+      return { requestId: parsed.requestId, correlationId, traceparent }
+    }
+  } catch {
+    return undefined
+  }
+  return undefined
+}
+
+function persistLastIds(ids: RequestIds) {
+  try {
+    localStorage.setItem(LAST_IDS_STORAGE_KEY, JSON.stringify(ids))
+  } catch {
+    return
+  }
+}
+
+function newRequestId(): string {
+  if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) {
+    return crypto.randomUUID()
+  }
+  return `${Date.now()}-${Math.random().toString(16).slice(2)}`
+}
+
+export function getLastRequestIds(): RequestIds | undefined {
+  return state.last ?? loadLastIds()
+}
+
+type ApiRequestInit = RequestInit & {
+  correlationId?: string
+  traceparent?: string
+  useLastCorrelationId?: boolean
+  useLastTraceparent?: boolean
+}
+
+export async function apiFetch(
+  input: RequestInfo | URL,
+  init?: ApiRequestInit,
+) {
+  const requestId = newRequestId()
+
+  const headers = new Headers(init?.headers)
+  headers.set('x-request-id', requestId)
+  const last = getLastRequestIds()
+  const correlationId =
+    init?.correlationId ?? (init?.useLastCorrelationId ? last?.correlationId : undefined)
+  const traceparent =
+    init?.traceparent ?? (init?.useLastTraceparent ? last?.traceparent : undefined)
+
+  if (correlationId) headers.set('x-correlation-id', correlationId)
+  if (traceparent) headers.set('traceparent', traceparent)
+
+  const res = await fetch(input, { ...init, headers })
+  const resCorrelationId = res.headers.get('x-correlation-id') ?? correlationId ?? undefined
+  const resTraceparent = res.headers.get('traceparent') ?? traceparent ?? undefined
+  const ids = { requestId, correlationId: resCorrelationId, traceparent: resTraceparent }
+  state.last = ids
+  persistLastIds(ids)
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => '')
+    const err = new ApiError({
+      status: res.status,
+      requestId,
+      correlationId: resCorrelationId,
+      traceparent: resTraceparent,
+      message: `API error ${res.status}${text ? `: ${text}` : ''} (request_id=${requestId}${
+        resCorrelationId ? ` correlation_id=${resCorrelationId}` : ''
+      })`,
+    })
+    throw err
+  }
+
+  return res
+}
--- a/control/ui/src/api/control.ts
+++ b/control/ui/src/api/control.ts
@@ -0,0 +1,179 @@
+import { apiFetch } from './client'
+import { getAccessToken } from '../auth/token'
+
+function baseUrl() {
+  const v = import.meta.env.VITE_CONTROL_API_URL as string | undefined
+  return (v ?? 'http://127.0.0.1:8080').replace(/\/$/, '')
+}
+
+async function apiJson<T>(path: string): Promise<T> {
+  const controller = new AbortController()
+  const t = window.setTimeout(() => controller.abort(), 2000)
+
+  const token = getAccessToken()
+  const headers: HeadersInit = token ? { Authorization: `Bearer ${token}` } : {}
+
+  try {
+    const res = await apiFetch(`${baseUrl()}${path}`, {
+      headers,
+      signal: controller.signal,
+      useLastCorrelationId: true,
+      useLastTraceparent: true,
+    })
+    return (await res.json()) as T
+  } finally {
+    window.clearTimeout(t)
+  }
+}
+
+async function apiPostJson<T>(path: string, body: unknown, idempotencyKey?: string): Promise<T> {
+  const controller = new AbortController()
+  const t = window.setTimeout(() => controller.abort(), 2000)
+
+  const token = getAccessToken()
+  const headers: HeadersInit = {
+    'content-type': 'application/json',
+    ...(token ? { Authorization: `Bearer ${token}` } : {}),
+    ...(idempotencyKey ? { 'Idempotency-Key': idempotencyKey } : {}),
+  }
+
+  try {
+    const res = await apiFetch(`${baseUrl()}${path}`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify(body),
+      signal: controller.signal,
+      useLastCorrelationId: true,
+      useLastTraceparent: true,
+    })
+    return (await res.json()) as T
+  } finally {
+    window.clearTimeout(t)
+  }
+}
+
+export type FleetSnapshot = {
+  services: Array<{
+    name: string
+    base_url: string
+    health_ok: boolean
+    ready_ok: boolean
+    metrics_ok: boolean
+  }>
+}
+
+export type PlacementResponse = {
+  kind: 'aggregate' | 'projection' | 'runner'
+  revision: string
+  placements: Array<{ tenant_id: string; targets: string[] }>
+}
+
+export type TenantsResponse = {
+  tenants: Array<{
+    tenant_id: string
+    aggregate_targets: string[]
+    projection_targets: string[]
+    runner_targets: string[]
+  }>
+}
+
+export type Job = {
+  job_id: string
+  status: 'pending' | 'running' | 'succeeded' | 'failed' | 'cancelled'
+  steps: Array<{ name: string; status: Job['status']; attempts: number; error?: string | null }>
+  error?: string | null
+  created_at_ms: number
+  started_at_ms?: number | null
+  finished_at_ms?: number | null
+}
+
+export type AuditEvent = {
+  ts_ms: number
+  principal_sub: string
+  action: string
+  tenant_id?: string | null
+  reason: string
+  job_id?: string | null
+}
+
+export function getFleetSnapshot(): Promise<FleetSnapshot> {
+  return apiJson('/admin/v1/fleet/snapshot')
+}
+
+export function getPlacement(kind: 'aggregate' | 'projection' | 'runner'): Promise<PlacementResponse> {
+  return apiJson(`/admin/v1/placement/${kind}`)
+}
+
+export function getTenants(): Promise<TenantsResponse> {
+  return apiJson('/admin/v1/tenants')
+}
+
+export function getJob(jobId: string): Promise<Job> {
+  return apiJson(`/admin/v1/jobs/${jobId}`)
+}
+
+export function cancelJob(jobId: string): Promise<void> {
+  return apiPostJson(`/admin/v1/jobs/${jobId}/cancel`, {}, undefined).then(() => undefined)
+}
+
+export function startTenantDrainJob(args: {
+  tenantId: string
+  reason: string
+  idempotencyKey: string
+}): Promise<{ job_id: string }> {
+  return apiPostJson(
+    '/admin/v1/jobs/tenant/drain',
+    { tenant_id: args.tenantId, reason: args.reason },
+    args.idempotencyKey,
+  )
+}
+
+export function startTenantMigrateJob(args: {
+  tenantId: string
+  runnerTarget: string
+  reason: string
+  idempotencyKey: string
+}): Promise<{ job_id: string }> {
+  return apiPostJson(
+    '/admin/v1/jobs/tenant/migrate',
+    { tenant_id: args.tenantId, runner_target: args.runnerTarget, reason: args.reason },
+    args.idempotencyKey,
+  )
+}
+
+export function planTenantMigrate(args: { tenantId: string; runnerTarget: string; reason: string }): Promise<{ steps: string[] }> {
+  return apiPostJson('/admin/v1/plan/tenant/migrate', {
+    tenant_id: args.tenantId,
+    runner_target: args.runnerTarget,
+    reason: args.reason,
+  })
+}
+
+export function listAudit(): Promise<{ events: AuditEvent[] }> {
+  return apiJson('/admin/v1/audit')
+}
+
+export type SwarmService = {
+  name: string
+  image?: string | null
+  mode?: string | null
+  replicas?: string | null
+  updated_at?: string | null
+}
+
+export type SwarmTask = {
+  id: string
+  service: string
+  node?: string | null
+  desired_state?: string | null
+  current_state?: string | null
+  error?: string | null
+}
+
+export function getSwarmServices(): Promise<{ services: SwarmService[] }> {
+  return apiJson('/admin/v1/swarm/services')
+}
+
+export function getSwarmTasks(serviceName: string): Promise<{ service: string; tasks: SwarmTask[] }> {
+  return apiJson(`/admin/v1/swarm/services/${encodeURIComponent(serviceName)}/tasks`)
+}
--- a/Show More
+++ b/Show More