Monorepo consolidation: workspace, shared types, transport plans, docker/swam assets
Some checks failed
ci / rust (push) Failing after 2m34s
ci / ui (push) Failing after 30s

This commit is contained in:
2026-03-30 11:40:42 +03:00
parent 7e7041cf8b
commit 1298d9a3df
246 changed files with 55434 additions and 0 deletions

1
aggregate/.clippy.toml Normal file
View File

@@ -0,0 +1 @@

37
aggregate/.gitignore vendored Normal file
View File

@@ -0,0 +1,37 @@
/target/
/target-*/
**/target/
*.rs.bk
*.pdb
*.dSYM/
*.orig
*.rej
*.log
*.swp
*.swo
*~
.DS_Store
.idea/
.vscode/
.env
.env.*
.envrc
.direnv/
docker-compose.override.yml
*.mdbx
*.mdbx-*
*.mdbx-lock
*.mdbx.dat
*.mdbx.lck
*.mdb
*.db
/data/
/tmp/
/coverage/
lcov.info
*.profraw
*.profdata

42
aggregate/Cargo.toml Normal file
View File

@@ -0,0 +1,42 @@
[package]
name = "aggregate"
version = "0.1.0"
edition = "2021"
[features]
default = []
runtime-v8 = ["v8"]
runtime-wasm = []
[dependencies]
shared = { path = "../shared" }
edge_storage = { version = "0.1", registry = "madapes" }
runtime-function = { version = "0.2", registry = "madapes" }
edge-logger-client = { version = "0.1", registry = "madapes" }
query_engine = { version = "0.1", registry = "madapes" }
async-nats = "0.39"
tokio = { version = "1", features = ["full"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml = "0.9"
toml = "0.8"
thiserror = "2"
anyhow = "1"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }
uuid = { version = "1", features = ["v7", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
futures = "0.3"
lru = "0.12"
v8 = { version = "0.106", optional = true }
tonic = { version = "0.12", default-features = false, features = ["codegen", "prost", "transport"] }
prost = "0.13"
axum = "0.7"
[dev-dependencies]
tempfile = "3"
tokio-stream = { version = "0.1", features = ["net"] }
[build-dependencies]
tonic-build = { version = "0.12", default-features = false, features = ["prost"] }
protoc-bin-vendored = "3"

File diff suppressed because it is too large Load Diff

81
aggregate/README.md Normal file
View File

@@ -0,0 +1,81 @@
# aggregate
## Running
### Configuration
Configuration is loaded in this order:
1. If `AGGREGATE_CONFIG_PATH` is set and points to a readable config file, load that file and apply env overrides.
2. Otherwise load defaults and apply env overrides.
Supported config formats:
- YAML (`.yaml`, `.yml`)
- TOML (`.toml`)
- JSON (`.json`)
### Environment Variables
#### Core
- `AGGREGATE_NATS_URL` (default: `nats://localhost:4222`): NATS server URL.
- `AGGREGATE_STORAGE_PATH` (default: `./data`): Path used by the snapshot storage.
- `AGGREGATE_SNAPSHOT_THRESHOLD` (default: `10`): Save snapshot when events since last snapshot reach this threshold.
- `AGGREGATE_MAX_RETRIES` (default: `3`): Max retries for version conflicts in command handling.
- `AGGREGATE_HTTP_ADDR` (default: `0.0.0.0:8080`): HTTP bind address.
- `AGGREGATE_GRPC_ADDR` (default: `0.0.0.0:50051`): gRPC bind address for command submission.
#### Multi-tenant
- `AGGREGATE_MULTI_TENANT` (default: `true`): Enables multi-tenant behavior when parsing/validating tenant ids.
- `AGGREGATE_DEFAULT_TENANT_ID` (default: unset): Default tenant id when the incoming request doesn't specify one.
- `AGGREGATE_SHARD_ID` (default: `local`): Shard id used when applying placement maps.
#### Logging
- `AGGREGATE_LOGGER_SOCKET` (default: unset): Socket path for `edge-logger-client` integration (if enabled).
#### Server
- `AGGREGATE_CONFIG_PATH` (default: unset): Path to a YAML/TOML/JSON config file.
#### Placement
- `AGGREGATE_PLACEMENT_BUCKET` (default: `AGGREGATE_PLACEMENT`): NATS KV bucket to watch.
- `AGGREGATE_PLACEMENT_KEY` (default: `aggregate_placement`): NATS KV key to watch. Value is a JSON object mapping `tenant_id -> shard_id`.
#### Runtime Programs
- `AGGREGATE_DECIDE_PROGRAM` / `AGGREGATE_APPLY_PROGRAM`: Inline program source strings.
- `AGGREGATE_DECIDE_PROGRAM_PATH` / `AGGREGATE_APPLY_PROGRAM_PATH`: File paths to program source strings.
## HTTP Endpoints
- `GET /health` → JSON health report
- `GET /ready` → JSON boolean readiness
- `GET /metrics` → Prometheus text format
- `GET /admin/tenants` → JSON list of hosted tenants
- `POST /admin/drain` → marks tenant draining and waits for in-flight commands to finish (`{"tenant_id":"..."}`)
- `POST /admin/reload` → updates hosted tenant allowlist (`{"hosted_tenants":[...]}`) or applies a placement map (`{"placement":{...}}`)
- `GET /admin/tenant/{tenant_id}/status` → JSON tenant status (`hosted`, `accepting`, `draining`, `in_flight`)
- `GET /admin/tenant/{tenant_id}/ready` → JSON boolean (node ready AND accepting tenant)
- `POST /admin/tenant/{tenant_id}/drain` → drains tenant with optional timeout (`{"timeout_ms":10000}`)
## gRPC
Aggregate exposes a command submission API for the Gateway:
- Service: `aggregate.gateway.v1.CommandService`
- Method: `SubmitCommand`
- Metadata: `x-tenant-id` (tenant routing hint)
Proto definition: [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto)
## Container
Build and run locally:
```bash
docker build -t cloudlysis/aggregate:local -f docker/Dockerfile.rust --build-arg PACKAGE=aggregate --build-arg BIN=aggregate .
docker compose up -d --build
```
Container smoke test (requires Docker installed):
```bash
sh docker/scripts/verify_aggregate_container.sh
```

8
aggregate/build.rs Normal file
View File

@@ -0,0 +1,8 @@
fn main() -> Result<(), Box<dyn std::error::Error>> {
let protoc = protoc_bin_vendored::protoc_bin_path()?;
std::env::set_var("PROTOC", protoc);
tonic_build::configure().compile_protos(&["proto/aggregate.proto"], &["proto"])?;
Ok(())
}

4
aggregate/cargo-build.sh Normal file
View File

@@ -0,0 +1,4 @@
#!/bin/bash
export CARGO_REGISTRIES_MADAPES_TOKEN=0f5ef6366637224dceae4c35e0e3b5639be77b69
source ~/.cargo/env
cargo "$@"

192
aggregate/external_prd.md Normal file
View File

@@ -0,0 +1,192 @@
### External PRD: Changes Required in Aggregate, Projection, Runner
This document captures the work needed outside the Gateway to support:
- Tenant-aware routing via `x-tenant-id`
- Independent horizontal scalability of Aggregate, Projection, Runner
- A safe mechanism for tenant rebalancing per service kind
---
## **Target State**
### Independent Placements
Each service kind has its own placement map:
- `aggregate_placement[tenant_id] -> aggregate_shard_id`
- `projection_placement[tenant_id] -> projection_shard_id`
- `runner_placement[tenant_id] -> runner_shard_id`
Each shard is a replica set that can scale independently.
### Rebalancing Contract (Per Service Kind)
All nodes MUST support:
- Dynamic placement updates (watch NATS KV or reload config)
- A drain mechanism that can target a specific tenant (stop acquiring new work for that tenant, finish in-flight, report status)
- Clear readiness semantics that reflect whether the node will accept work for a tenant
Additionally, all nodes SHOULD converge on the same operational contract:
- A per-tenant “accepting” gate (can this shard accept new work/queries/commands for tenant X?)
- A per-tenant “drained” signal (no in-flight work remains for tenant X)
- A per-tenant warmup/catchup signal where relevant (projection lag, aggregate snapshot availability)
---
## **Aggregate: Required Changes**
### 1) Expose a Real Command API (Gateway Upstream)
Today, Aggregate has internal command handling types (e.g., `CommandServer`) but its running HTTP server only exposes health/metrics/admin endpoints ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L82), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L81-L213)).
Aggregate MUST expose one of the following upstream APIs for the Gateway to call:
- **Option A (Recommended)**: gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` compatible with [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31).
- **Option B**: HTTP endpoint for command submission (REST), with a stable request/response shape that the Gateway can proxy.
### 2) Tenant Placement Enforcement
Aggregate MUST enforce “hosted tenants” so independent scaling is safe:
- If an Aggregate shard/node is not assigned a tenant, it MUST reject commands for that tenant (e.g., `403` or `503` with retriable hint depending on whether the issue is authorization vs placement).
- Aggregate SHOULD maintain an in-memory allowlist of hosted tenants that is driven by:
- NATS KV placement watcher (preferred), or
- Hot-reloaded config pushed via `/admin/reload`
Aggregate already has admin hooks for drain/reload, but they are currently generic and/or illustrative ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L72), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L402-L442)). These need to become placement-aware.
### 3) Tenant Drain (Per Tenant)
Aggregate MUST provide a per-tenant drain mechanism to support rebalancing:
- Stop accepting new commands for the tenant.
- Allow in-flight commands to finish (bounded wait), then report drained.
- Expose drain status per tenant (admin endpoint).
### 4) Rebalancing State Strategy
Aggregate persists snapshots locally (MDBX) and uses JetStream for events. To move a tenant:
- **Approach 1 (Snapshot migration)**: copy tenant snapshot DB/state to the target shard, then switch placement.
- **Approach 2 (Cold rehydrate)**: switch placement and let the target shard rebuild state by replaying events from JetStream; expect higher latency during warmup.
The system should support both, with the rebalancer selecting the strategy based on tenant size/SLO.
### 5) Metrics for Placement Decisions
Aggregate SHOULD expose:
- Per-tenant command rate, error rate
- In-flight commands by tenant
- Rehydrate time / snapshot hit ratio
- Storage size per tenant (if feasible)
---
## **Projection: Required Changes**
### 1) Expose Query API Upstream for Gateway
Projection has a working `QueryService` with tenant-scoped prefix scans ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L121-L162)) but it is not exposed via HTTP/gRPC (current HTTP routes are health/ready/metrics/info only: [projection/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/http/mod.rs#L102-L109)).
Projection MUST add one upstream API the Gateway can route to:
- `POST /query/{view_type}` (HTTP) accepting `x-tenant-id` and a UQF payload, returning `QueryResponse`.
- Or a gRPC query service (new proto) if gRPC is preferred end-to-end.
### 2) Tenant Placement Filtering (Independent Scaling)
Projection MUST support running in one of these modes:
- **Multi-tenant shard**: consumes all tenants (simple, less isolated).
- **Tenant-filtered shard (required for rebalancing)**:
- only consumes/serves queries for the tenants assigned to that shard
- rejects queries for unassigned tenants (consistent error semantics)
Implementation direction:
- Add a placement watcher similar to Runners tenant filter ([runner/tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)).
- Apply tenant filter to:
- event consumption subject filters (preferred), and
- query serving validation (always).
### 3) Drain + Warmup Endpoints
Projection SHOULD add:
- `/admin/drain?tenant_id=...` (stop consuming new events for that tenant, finish in-flight, flush checkpoints)
- `/admin/reload` (apply latest placement/config)
- Optional warmup status: whether the shard has caught up to JetStream tail for that tenant/view_types
### 4) Rebalancing Strategy for Projection
Projection can rebalance safely with “warm then cut over”:
- Assign tenant to the new projection shard while old shard still serves.
- New shard catches up (replay from JetStream, build view KV).
- Switch Gateway placement for query routing to new shard.
- Drain old shard for that tenant and optionally delete old tenant KV keys.
### 5) Metrics for Placement Decisions
Projection SHOULD expose:
- JetStream lag per tenant/view_type (tail minus checkpoint)
- Query latency and scan counts
- Storage size per tenant (if feasible)
---
## **Runner: Required Changes**
Runner already has:
- A tenant placement watcher capable of producing an allowlist ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100))
- Admin endpoints including drain/reload/config ([runner/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/http/mod.rs#L69-L86))
- Gateway client integration for aggregate command submission ([runner/gateway/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47))
To support independent scalability + rebalancing, Runner needs the following.
### 1) Per-Tenant Drain (Not Only Global)
Runners current drain is global (`/admin/drain` toggles a single draining flag). Runner MUST support draining a specific tenant:
- Stop acquiring new saga/effect work for the tenant.
- Allow in-flight work for the tenant to finish (bounded).
- Flush outbox for the tenant (or guarantee idempotency on handoff).
- Persist final checkpoints so another shard can continue without duplication beyond at-least-once bounds.
### 2) Placement-Enforced Work Acquisition
Runner MUST validate tenant assignment at the boundary where it:
- consumes JetStream messages (saga triggers, effect commands), and
- dispatches outbox work.
If a tenant is not assigned to the shard, Runner must not process its work.
### 3) Handoff Safety Rules for Rebalancing
Runner rebalancing should follow:
- New shard begins processing only after it is assigned the tenant.
- Old shard stops acquiring new work for that tenant, then drains.
- Idempotency remains correct across handoff using checkpoints and dedupe markers.
### 4) Metrics for Placement Decisions
Runner SHOULD expose:
- Outbox depth by tenant
- Work processing latency and retries by tenant/effect
- Schedule due items by tenant
- Consumer lag by tenant (if the consumption model supports per-tenant lag)
### 5) Auth Delivery Side Effects (Email/SMS/Push)
If the platforms AuthN flows require out-of-band delivery (password reset links, email verification, MFA codes), the Runner SHOULD be the standard place to execute those side effects:
- Define a stable effect interface for sending transactional emails (reset links, verification links, security alerts).
- Optionally add SMS/push providers later under the same effect contract.
This keeps the Gateway free of long-lived provider credentials and aligns with the existing “effects are executed by workers” pattern.
---
## **Gateway Integration Notes**
Once the above changes exist:
- Gateway routes per `(tenant_id, service_kind)` using independent placement maps.
- Gateway can implement “warm then cut over” rebalancing for Projection and Runner by switching only query/workflow routing after readiness conditions are met.
- Gateway can enforce consistent tenant validation, authn/authz, and error semantics at the edge even as placements move.
---
## **Gaps / Opportunities**
- **KV schema + ownership**: define the exact NATS KV bucket layout, key naming, revisioning rules, and who is allowed to write placement updates.
- **Rebalancer API**: define operator workflows (plan/apply/rollback), status reporting, and audit log requirements for placement changes.
- **Shard discovery**: define how shard endpoints are registered (static config vs KV directory entries) and how health is represented.
- **Consistency boundaries**: define rebalancing guarantees per service kind (projection can be warm-cutover; runner requires checkpoint handoff; aggregate requires single-writer and state availability).

View File

@@ -0,0 +1,4 @@
tenants:
tenant-a: "http://aggregate-node-a:8080"
tenant-b: "http://aggregate-node-b:8080"

160
aggregate/prd.md Normal file
View File

@@ -0,0 +1,160 @@
### 🧱 Component: Aggregate
**Definition:**
The Aggregate is a standalone Rust-based container that serves as the primary consistency boundary and decision-making unit of the system. It is a stateful entity that encapsulates business logic, enforces invariants, and ensures that all changes to the system are valid according to defined rules. Commands are received from users through a Gateway, and events are stored on **NATS JetStream**; `edge-storage` `AggregateStore` holds versioned **snapshots** for efficient rehydration.
**Multi-Tenancy:**
The Aggregate supports optional multi-tenancy via `tenant_id`. When enabled:
- **Routing:** The Gateway routes commands to Aggregate nodes based on the `x-tenant-id` header
- **Sharding:** Aggregate instances are sharded across nodes by `tenant_id`, ensuring tenant data isolation
- **Storage:** Snapshots and events are namespaced by `tenant_id` to prevent cross-tenant access
- **Subject Naming:** NATS subjects include `tenant_id` (e.g., `tenant.<tenant_id>.aggregate.<aggregate_type>.<aggregate_id>`)
- **Backward Compatibility:** Aggregates without multi-tenancy use a default/empty `tenant_id`
**Dependencies:**
* Core crates pulled from the custom Cargo registry:
```toml
[registries.madapes]
index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/"
```
| Crate | Purpose |
|-------|---------|
| `edge-storage` | libmdbx-backed AggregateStore for versioned snapshots |
| `runtime-function` | Deterministic DAG execution for `decide`/`apply` programs |
| `edge-logger` | High-performance logging (UDS + Protobuf, Loki sink) |
| `query-engine` | UQF query support for filtering/querying aggregate state |
| `async-nats` | NATS JetStream client for event streaming |
* Source code available at `../../madapes/`
* **Note:** This is a standalone container — it does not use `framework-bus` or `framework-aggregate` (those serve a different system)
**Observability:**
* Production stack: **Grafana** + **Victoria Metrics** + **Loki**
* `edge-logger` provides structured logging via Unix Domain Sockets with lock-free batching
* Metrics exposed via `metrics-exporter-prometheus` for Victoria Metrics scraping
* Traces/logs flow to Loki with cardinality protection and multi-tenant isolation
#### 1. Core Responsibilities
* **Command Validation:** Receives intent (Commands) from the Gateway and uses `runtime-function` DAG programs to determine if the intent is valid based on the current state.
* **State Rehydration:** Reconstructs its internal state by loading the latest **snapshot** from `edge-storage` `AggregateStore` (`get_latest_snapshot`) and replaying any subsequent events from NATS JetStream.
* **Event Production:** Transforms valid commands into one or more Events that represent a "fact" that has occurred.
* **Atomic Persistence:** Publishes new events to NATS JetStream and stores an updated snapshot in `edge-storage` `AggregateStore` (`put_snapshot_sync`).
* **Concurrency Control:** Protects against "lost updates" using version-based optimistic locking. `edge-storage` `AggregateStore` returns `VersionConflict` for duplicate versions.
#### 2. The Lifecycle of a Command
1. **Reception:** The Gateway routes a Command from a user to the Aggregate container based on the `aggregate_id` and `x-tenant-id` header. The `tenant_id` is extracted and included in the Command envelope for tenant-aware processing.
2. **Loading (Rehydration):**
* The Aggregate fetches the latest **Snapshot** from `edge-storage` `AggregateStore` using the composite key `(tenant_id, aggregate_id)`.
* It reads any **Events** from NATS JetStream (tenant-namespaced subject) that occurred after the snapshot version.
* It applies these events sequentially to the snapshot state using the deterministic `apply` runtime-function program to reach the "Current State."
3. **Execution:**
* The Aggregate passes the Current State and the Command to the `decide` runtime-function program.
* If invalid: Returns an Error (Command Rejected).
* If valid: Returns a list of New Events.
4. **Persistence (The Commit):**
* The Aggregate publishes New Events to NATS JetStream on tenant-namespaced subjects, with `command_id` mapped to `idempotency_key`.
* It stores an updated snapshot in `edge-storage` `AggregateStore` using `(tenant_id, aggregate_id, new_version)` as the composite key.
* **Constraint:** `AggregateStore` enforces strict monotonicity — if `new_version` already exists, it returns `VersionConflict`, and the Aggregate must reload and retry.
5. **Publication:**
* Events published to NATS JetStream are immediately available for downstream consumption by Sagas and Projections (filtered by tenant if needed).
#### 3. Technical Constraints & Guarantees
* **Determinism:** The logic within an Aggregate must be 100% deterministic. `runtime-function` DAG programs are sandboxed and gas-metered, with no access to the system clock, random number generators, or external APIs. All data required for a decision must be present in the Command or the Aggregate State.
* **Side-Effect Free:** An Aggregate does not send emails, update databases, or call other services. It only produces events. Side effects are the responsibility of Sagas.
* **Single Writer:** While multiple nodes may attempt to process commands for the same `aggregate_id`, only one "Commit" can succeed for a specific version, enforced by `edge-storage` `AggregateStore` (`VersionConflict`).
* **Tenant Isolation:** An Aggregate can only access data within its `tenant_id` scope. Cross-tenant access is blocked at the storage and stream layers. The `tenant_id` is validated on every command to prevent tenant spoofing.
* **Isolation:** An Aggregate cannot see the state of other Aggregates. If a business rule spans multiple Aggregates, it must be handled by a **Saga**.
#### 4. Data Structure (The Envelope)
Each Aggregate maintains a metadata header:
* `tenant_id`: Optional identifier for multi-tenant isolation (routed via `x-tenant-id` header)
* `aggregate_id`: Unique UUID or URN for the instance.
* `aggregate_type`: The name of the business entity (e.g., `Account`, `Order`).
* `version`: A monotonically increasing integer representing the number of events processed.
* `snapshot_threshold`: A configuration defining how many events should trigger a new snapshot in `edge-storage`.
#### 5. Error Handling
* **Validation Errors:** Business rule violations (e.g., "Insufficient Funds") result in an immediate synchronous rejection of the command.
* **Tenant Access Errors:** Cross-tenant access attempts (e.g., wrong `tenant_id` in command) are rejected with `TenantAccessDenied`.
* **Concurrency Conflicts:** If `edge-storage` returns `VersionConflict`, the framework implements an automatic "Retry-on-Conflict" policy (Reload → Re-validate → Re-commit) up to a defined limit.
* **System Failures:** If `edge-storage` or NATS JetStream is unavailable, the Aggregate remains in a read-only or "unavailable" state to prevent inconsistent branching of the event stream.
#### 6. Horizontal Scaling Strategy
The Aggregate container is designed for horizontal scaling on **Docker Swarm**, leveraging tenant-based sharding for predictable data locality and simple operations.
**Sharding Model:**
- **Tenant-Aware Placement:** Aggregate instances are placed on Swarm nodes based on `tenant_id` using Docker Swarm placement constraints
- **Consistent Hashing:** A hash ring maps `tenant_id` values to specific nodes, ensuring all commands for a tenant route to the same node (or replica set)
- **Subject-Based Routing:** NATS JetStream consumer groups are tenant-namespaced, enabling parallel processing across tenants without coordination
**Scaling Architecture:**
```
┌─────────────────────────────────────────────────────────────────┐
│ Admin UI (Control Node) │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Scale Manager: CRUD for tenant → node assignments │ │
│ │ - List tenants, node assignments, load metrics │ │
│ │ - Add/remove nodes, migrate tenants │ │
│ │ - Emit scaling commands to Docker Swarm API │ │
│ └─────────────────────────────────────────────────────────┘ │
└──────────────────────────┬──────────────────────────────────────┘
│ Docker Swarm API / SSH
┌─────────────────────────────────────────────────────────────────┐
│ Docker Swarm Cluster │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Node A │ │ Node B │ │ Node C │ │
│ │ tenant: a-c │ │ tenant: d-m │ │ tenant: n-z │ │
│ │ ┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐ │ │
│ │ │Agg Ctr │ │ │ │Agg Ctr │ │ │ │Agg Ctr │ │ │
│ │ └───┬────┘ │ │ └───┬────┘ │ │ └───┬────┘ │ │
│ │ │ │ │ │ │ │ │ │ │
│ │ ┌───▼────┐ │ │ ┌───▼────┐ │ │ ┌───▼────┐ │ │
│ │ │libmdbx │ │ │ │libmdbx │ │ │ │libmdbx │ │ │
│ │ │(local) │ │ │ │(local) │ │ │ │(local) │ │ │
│ │ └────────┘ │ │ └────────┘ │ │ └────────┘ │ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
│ │ │ │ │
│ └──────────────────┴──────────────────┘ │
│ │ │
│ ┌────────────────────────▼────────────────────────────────────┐ │
│ │ Shared NATS JetStream Cluster │ │
│ │ (tenant-namespaced subjects for isolation) │ │
│ └─────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
```
**Note:** Each node has its own embedded `edge-storage` (libmdbx) containing snapshots for its assigned tenants. NATS JetStream provides shared event storage. Tenant migration requires snapshot data transfer between nodes.
**Operational Model:**
- **Scale Up:** Admin UI calls Swarm API to add new node, updates tenant → node mapping, Gateway updates routing table
- **Scale Down:** Migrate tenants to other nodes (drain), remove node from Swarm
- **Tenant Migration:** Pause consumer, copy tenant data, update routing, resume on new node
- **Zero-Downtime:** New tenant assignments are picked up by Gateway via config reload without restart
**Placement Constraints:**
- Each Aggregate service runs with `--constraint node.labels.tenant_range==<range>`
- Gateway uses tenant → node mapping to route commands to correct Swarm service endpoint
- Multiple replicas per tenant range supported for HA (active-passive via NATS consumer groups)
**Admin Endpoints (per Aggregate container):**
- `/health` - Container health (NATS, storage, active aggregates)
- `/ready` - Readiness for receiving commands
- `/metrics` - Prometheus metrics with tenant_id labels
- `/admin/tenants` - List tenants hosted on this node (read-only)
- `/admin/drain` - Graceful drain for tenant migration
- `/admin/reload` - Hot-reload tenant placement config
**External Control Node:**
- Separate service that calls Aggregate admin endpoints
- Manages Docker Swarm API for scaling operations
- Publishes tenant → node mapping to NATS KV
- See Admin UI repository for full implementation
---
### 💡 Implementation Note:
The **Aggregate Logic** is a pair of `runtime-function` DAG programs:
1. **`decide` program**: `(state, command) → events[]` — The business logic (validates command, produces events).
2. **`apply` program**: `(state, event) → new_state` — The state transition logic (used during rehydration from snapshots + events).
These are referenced in the manifest as `decide:` and `apply:` fields under each aggregate definition.

View File

@@ -0,0 +1,32 @@
syntax = "proto3";
package aggregate.gateway.v1;
service CommandService {
rpc SubmitCommand(SubmitCommandRequest) returns (SubmitCommandResponse);
}
message SubmitCommandRequest {
string tenant_id = 1;
string command_id = 2;
string aggregate_id = 3;
string aggregate_type = 4;
string payload_json = 5;
map<string, string> metadata = 6;
}
message Event {
string event_id = 1;
string command_id = 2;
string aggregate_id = 3;
string aggregate_type = 4;
uint64 version = 5;
string event_type = 6;
string payload_json = 7;
string timestamp_rfc3339 = 8;
}
message SubmitCommandResponse {
repeated Event events = 1;
}

2
aggregate/rustfmt.toml Normal file
View File

@@ -0,0 +1,2 @@
edition = "2021"
newline_style = "Unix"

View File

@@ -0,0 +1,487 @@
use super::AggregateInstance;
use crate::query::{QueryClient, StateProjection};
use crate::runtime::RuntimeExecutor;
use crate::storage::StorageClient;
use crate::stream::StreamClient;
use crate::types::{
AggregateError, AggregateId, AggregateType, Command, Event, Snapshot, TenantId, Version,
};
#[derive(Debug, Clone)]
pub struct AggregateHandler {
storage: StorageClient,
stream: StreamClient,
executor: RuntimeExecutor,
query: QueryClient,
decide_program: String,
apply_program: String,
snapshot_threshold: u64,
max_retries: u32,
}
impl AggregateHandler {
pub fn new(
storage: StorageClient,
stream: StreamClient,
executor: RuntimeExecutor,
decide_program: String,
apply_program: String,
) -> Self {
Self {
storage,
stream,
executor,
query: QueryClient::embedded(),
decide_program,
apply_program,
snapshot_threshold: 10,
max_retries: 3,
}
}
pub fn with_query_client(mut self, query: QueryClient) -> Self {
self.query = query;
self
}
pub fn with_snapshot_threshold(mut self, threshold: u64) -> Self {
self.snapshot_threshold = threshold;
self
}
pub fn with_max_retries(mut self, max_retries: u32) -> Self {
self.max_retries = max_retries.max(1);
self
}
pub async fn handle_command(&self, command: Command) -> Result<Vec<Event>, AggregateError> {
let mut attempt = 0;
loop {
attempt += 1;
let tenant_id = &command.tenant_id;
let aggregate_id = &command.aggregate_id;
let aggregate_type = &command.aggregate_type;
let instance = self
.load_or_create_instance(tenant_id, aggregate_id, aggregate_type)
.await?;
let (instance, events) = self.execute_command(instance, command.clone()).await?;
if events.is_empty() {
return Ok(events);
}
match self.persist_events(&events).await {
Ok(()) => {
self.maybe_save_snapshot(&instance).await?;
self.project_state(&instance).await?;
return Ok(events);
}
Err(AggregateError::VersionConflict { .. }) if attempt < self.max_retries => {
continue;
}
Err(e) => return Err(e),
}
}
}
async fn load_or_create_instance(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
aggregate_type: &AggregateType,
) -> Result<AggregateInstance, AggregateError> {
let snapshot = self.storage.get_snapshot(tenant_id, aggregate_id).await?;
match snapshot {
Some(snapshot) => {
let events = self
.stream
.fetch_events(tenant_id, aggregate_id, snapshot.version)
.await?;
AggregateInstance::rehydrate_with_executor(
tenant_id.clone(),
snapshot,
events,
self.decide_program.clone(),
self.apply_program.clone(),
&self.executor,
)
.await
}
None => {
let events = self
.stream
.fetch_events(tenant_id, aggregate_id, Version::initial())
.await?;
if events.is_empty() {
Ok(AggregateInstance::new(
aggregate_id.clone(),
aggregate_type.clone(),
tenant_id.clone(),
self.decide_program.clone(),
self.apply_program.clone(),
))
} else {
let initial_snapshot = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::initial(),
serde_json::Value::Null,
);
AggregateInstance::rehydrate_with_executor(
tenant_id.clone(),
initial_snapshot,
events,
self.decide_program.clone(),
self.apply_program.clone(),
&self.executor,
)
.await
}
}
}
}
async fn execute_command(
&self,
mut instance: AggregateInstance,
command: Command,
) -> Result<(AggregateInstance, Vec<Event>), AggregateError> {
let events = instance.handle_command(command, &self.executor).await?;
Ok((instance, events))
}
async fn project_state(&self, instance: &AggregateInstance) -> Result<(), AggregateError> {
let projection = StateProjection::default_projection_from_state(
instance.tenant_id(),
instance.aggregate_id(),
instance.aggregate_type(),
&instance.version(),
instance.state(),
);
self.query
.index(projection)
.await
.map_err(|e| AggregateError::StorageError(e.to_string()))
}
async fn maybe_save_snapshot(
&self,
instance: &AggregateInstance,
) -> Result<(), AggregateError> {
let current_version = instance.version();
let events_since_snapshot = current_version
.as_u64()
.saturating_sub(instance.snapshot_version().as_u64());
if events_since_snapshot >= self.snapshot_threshold {
let snapshot = instance.to_snapshot();
match self.storage.put_snapshot(&snapshot).await {
Ok(()) => {}
Err(AggregateError::VersionConflict { .. }) => {}
Err(e) => return Err(e),
}
}
Ok(())
}
async fn persist_events(&self, events: &[Event]) -> Result<(), AggregateError> {
self.stream.publish_events(events.to_vec()).await
}
pub async fn load_aggregate(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
aggregate_type: &AggregateType,
) -> Result<AggregateInstance, AggregateError> {
self.load_or_create_instance(tenant_id, aggregate_id, aggregate_type)
.await
}
pub fn storage(&self) -> &StorageClient {
&self.storage
}
pub fn stream(&self) -> &StreamClient {
&self.stream
}
pub fn executor(&self) -> &RuntimeExecutor {
&self.executor
}
pub fn query_client(&self) -> &QueryClient {
&self.query
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
async fn create_test_handler() -> (tempfile::TempDir, AggregateHandler) {
let dir = tempdir().unwrap();
let path = dir.path().join("test.mdbx");
let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
let stream = StreamClient::in_memory();
let executor = RuntimeExecutor::with_config(
crate::runtime::ExecutorConfig::default().with_mock_runtime(),
);
let handler = AggregateHandler::new(
storage,
stream,
executor,
"function decide(s,c) { return []; }".to_string(),
"function apply(s,e) { return s; }".to_string(),
);
(dir, handler)
}
#[test]
fn handler_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<AggregateHandler>();
}
#[test]
fn snapshot_threshold_defaults_to_10() {
let dir = tempdir().unwrap();
let path = dir.path().join("test.mdbx");
let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
let stream = StreamClient::in_memory();
let executor = RuntimeExecutor::new();
let handler = AggregateHandler::new(
storage,
stream,
executor,
"decide".to_string(),
"apply".to_string(),
);
let handler_with_threshold = AggregateHandler::new(
handler.storage.clone(),
handler.stream.clone(),
handler.executor.clone(),
"decide".to_string(),
"apply".to_string(),
)
.with_snapshot_threshold(25);
assert_eq!(handler.snapshot_threshold, 10);
assert_eq!(handler_with_threshold.snapshot_threshold, 25);
}
#[tokio::test]
async fn handler_full_lifecycle_persists_events_and_snapshot() {
let (_dir, handler) = create_test_handler().await;
let handler = handler.with_snapshot_threshold(1);
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let command = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
serde_json::json!({"type": "deposit", "amount": 50}),
);
let events = handler.handle_command(command).await.unwrap();
assert_eq!(events.len(), 1);
let snapshot = handler
.storage
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap()
.unwrap();
assert_eq!(snapshot.version, Version::from(1));
}
#[tokio::test]
async fn retry_on_version_conflict() {
let (_dir, handler) = create_test_handler().await;
let handler = handler.with_max_retries(5);
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let cmd1 = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
serde_json::json!({"type": "deposit", "amount": 10}),
);
let cmd2 = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
serde_json::json!({"type": "deposit", "amount": 20}),
);
let (r1, r2) = tokio::join!(handler.handle_command(cmd1), handler.handle_command(cmd2));
assert!(r1.is_ok());
assert!(r2.is_ok());
let events = handler
.stream
.fetch_events(&tenant_id, &aggregate_id, Version::initial())
.await
.unwrap();
assert_eq!(events.len(), 2);
assert_eq!(events[0].version, Version::from(1));
assert_eq!(events[1].version, Version::from(2));
}
#[tokio::test]
async fn snapshot_threshold_respected() {
let (_dir, handler) = create_test_handler().await;
let handler = handler.with_snapshot_threshold(3);
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
for _ in 0..5 {
let cmd = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
serde_json::json!({"type": "deposit", "amount": 1}),
);
handler.handle_command(cmd).await.unwrap();
}
let snapshot = handler
.storage
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap()
.unwrap();
assert_eq!(snapshot.version, Version::from(3));
}
#[tokio::test]
async fn empty_tenant_id_allowed_in_single_tenant_mode() {
let (_dir, handler) = create_test_handler().await;
let handler = handler.with_snapshot_threshold(1);
let tenant_id = TenantId::default();
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let command = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
serde_json::json!({"type": "deposit", "amount": 5}),
);
let events = handler.handle_command(command).await.unwrap();
assert_eq!(events.len(), 1);
let proj = handler
.query
.get(&tenant_id, &aggregate_id.to_string())
.await
.unwrap()
.unwrap();
assert_eq!(proj.state["balance"], 5);
}
#[tokio::test]
async fn query_returns_correct_aggregate_state() {
let (_dir, handler) = create_test_handler().await;
let handler = handler.with_snapshot_threshold(1);
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let command = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type,
serde_json::json!({"type": "deposit", "amount": 100}),
);
handler.handle_command(command).await.unwrap();
let proj = handler
.query
.get(&tenant_id, &aggregate_id.to_string())
.await
.unwrap()
.unwrap();
assert_eq!(proj.state["balance"], 100);
}
#[tokio::test]
async fn system_failure_recovery_rehydrates_state() {
let dir = tempdir().unwrap();
let path = dir.path().join("test.mdbx");
let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
let stream = StreamClient::in_memory();
let executor = RuntimeExecutor::with_config(
crate::runtime::ExecutorConfig::default().with_mock_runtime(),
);
let handler1 = AggregateHandler::new(
storage,
stream.clone(),
executor.clone(),
"decide".to_string(),
"apply".to_string(),
)
.with_snapshot_threshold(2);
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
for _ in 0..2 {
let cmd = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
serde_json::json!({"type": "deposit", "amount": 10}),
);
handler1.handle_command(cmd).await.unwrap();
}
drop(handler1);
let storage2 = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
let handler2 = AggregateHandler::new(
storage2,
stream,
executor,
"decide".to_string(),
"apply".to_string(),
);
let loaded = handler2
.load_aggregate(&tenant_id, &aggregate_id, &aggregate_type)
.await
.unwrap();
assert_eq!(loaded.state()["balance"], 20);
}
}

View File

@@ -0,0 +1,5 @@
mod handler;
mod state;
pub use handler::*;
pub use state::*;

View File

@@ -0,0 +1,448 @@
use crate::runtime::RuntimeExecutor;
use crate::types::{
AggregateError, AggregateId, AggregateType, Command, Event, Snapshot, TenantId, Version,
};
use serde_json::Value;
use std::collections::HashSet;
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct AggregateInstance {
aggregate_id: AggregateId,
aggregate_type: AggregateType,
tenant_id: TenantId,
snapshot_version: Version,
version: Version,
state: Value,
decide_program: String,
apply_program: String,
processed_command_ids: HashSet<Uuid>,
}
impl AggregateInstance {
pub fn new(
aggregate_id: AggregateId,
aggregate_type: AggregateType,
tenant_id: TenantId,
decide_program: String,
apply_program: String,
) -> Self {
Self {
aggregate_id,
aggregate_type,
tenant_id,
snapshot_version: Version::initial(),
version: Version::initial(),
state: Value::Null,
decide_program,
apply_program,
processed_command_ids: HashSet::new(),
}
}
pub fn aggregate_id(&self) -> &AggregateId {
&self.aggregate_id
}
pub fn aggregate_type(&self) -> &AggregateType {
&self.aggregate_type
}
pub fn tenant_id(&self) -> &TenantId {
&self.tenant_id
}
pub fn version(&self) -> Version {
self.version
}
pub fn snapshot_version(&self) -> Version {
self.snapshot_version
}
pub fn state(&self) -> &Value {
&self.state
}
pub fn rehydrate(
tenant_id: TenantId,
snapshot: Snapshot,
events: Vec<Event>,
decide_program: String,
apply_program: String,
) -> Result<Self, AggregateError> {
if snapshot.tenant_id != tenant_id {
return Err(AggregateError::TenantAccessDenied {
tenant_id: snapshot.tenant_id,
});
}
let mut instance = Self {
aggregate_id: snapshot.aggregate_id,
aggregate_type: snapshot.aggregate_type,
tenant_id,
snapshot_version: snapshot.version,
version: snapshot.version,
state: snapshot.state,
decide_program,
apply_program,
processed_command_ids: HashSet::new(),
};
for event in events {
instance.apply_event_internal(&event)?;
}
Ok(instance)
}
pub async fn rehydrate_with_executor(
tenant_id: TenantId,
snapshot: Snapshot,
events: Vec<Event>,
decide_program: String,
apply_program: String,
executor: &RuntimeExecutor,
) -> Result<Self, AggregateError> {
if snapshot.tenant_id != tenant_id {
return Err(AggregateError::TenantAccessDenied {
tenant_id: snapshot.tenant_id,
});
}
let mut instance = Self {
aggregate_id: snapshot.aggregate_id,
aggregate_type: snapshot.aggregate_type,
tenant_id,
snapshot_version: snapshot.version,
version: snapshot.version,
state: snapshot.state,
decide_program,
apply_program,
processed_command_ids: HashSet::new(),
};
for event in events {
let apply_result = executor
.execute_apply(&instance.state, &event, &instance.apply_program)
.await?;
instance.state = apply_result.new_state;
instance.apply_event_internal(&event)?;
}
Ok(instance)
}
fn apply_event_internal(&mut self, event: &Event) -> Result<(), AggregateError> {
if event.tenant_id != self.tenant_id {
return Err(AggregateError::TenantAccessDenied {
tenant_id: event.tenant_id.clone(),
});
}
self.processed_command_ids.insert(event.command_id);
self.version = event.version;
Ok(())
}
pub fn apply_event(&mut self, event: &Event) -> Result<(), AggregateError> {
self.apply_event_internal(event)
}
pub async fn handle_command(
&mut self,
command: Command,
executor: &RuntimeExecutor,
) -> Result<Vec<Event>, AggregateError> {
if command.tenant_id != self.tenant_id {
return Err(AggregateError::TenantAccessDenied {
tenant_id: command.tenant_id,
});
}
if command.aggregate_id != self.aggregate_id {
return Err(AggregateError::NotFound(command.aggregate_id));
}
if self.processed_command_ids.contains(&command.command_id) {
return Ok(Vec::new());
}
let decide_result = executor
.execute_decide(&self.state, &command, &self.decide_program)
.await?;
let command_id = command.command_id;
let correlation_id = command
.metadata
.get("correlation_id")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let traceparent = command
.metadata
.get("traceparent")
.and_then(|v| v.as_str())
.map(|s| s.to_string());
let mut events = Vec::with_capacity(decide_result.events.len());
for event_payload in decide_result.events {
let event_type = event_payload
.get("type")
.and_then(|t| t.as_str())
.unwrap_or("Unknown")
.to_string();
let new_version = self.version.increment();
let mut event = Event::new(
self.tenant_id.clone(),
self.aggregate_id.clone(),
self.aggregate_type.clone(),
new_version,
event_type,
event_payload,
command_id,
);
event.correlation_id = correlation_id.clone();
event.traceparent = traceparent.clone();
let apply_result = executor
.execute_apply(&self.state, &event, &self.apply_program)
.await?;
self.state = apply_result.new_state;
self.version = new_version;
events.push(event);
}
self.processed_command_ids.insert(command_id);
Ok(events)
}
pub fn to_snapshot(&self) -> Snapshot {
Snapshot::new(
self.tenant_id.clone(),
self.aggregate_id.clone(),
self.aggregate_type.clone(),
self.version,
self.state.clone(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn test_instance() -> AggregateInstance {
AggregateInstance::new(
AggregateId::new_v7(),
AggregateType::new("Account"),
TenantId::new("tenant-a"),
"function decide(s,c) { return []; }".to_string(),
"function apply(s,e) { return s; }".to_string(),
)
}
#[test]
fn aggregate_instance_has_id_and_tenant() {
let agg = test_instance();
assert_eq!(agg.tenant_id().as_str(), "tenant-a");
assert_eq!(agg.aggregate_type().as_str(), "Account");
assert!(!agg.aggregate_id().to_string().is_empty());
}
#[test]
fn new_instance_starts_at_version_zero() {
let agg = test_instance();
assert_eq!(agg.version(), Version::initial());
}
#[test]
fn rehydrate_validates_tenant() {
let snapshot = Snapshot::new(
TenantId::new("tenant-a"),
AggregateId::new_v7(),
AggregateType::new("Account"),
Version::from(5),
json!({ "balance": 100 }),
);
let result = AggregateInstance::rehydrate(
TenantId::new("tenant-b"),
snapshot,
vec![],
"decide".to_string(),
"apply".to_string(),
);
assert!(result.is_err());
match result.unwrap_err() {
AggregateError::TenantAccessDenied { tenant_id } => {
assert_eq!(tenant_id, TenantId::new("tenant-a"));
}
_ => panic!("Expected TenantAccessDenied"),
}
}
#[tokio::test]
async fn rehydrate_applies_events() {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::new("Account");
let snapshot = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(2),
json!({ "balance": 100 }),
);
let event1 = Event::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(3),
"deposited",
json!({ "amount": 50 }),
Uuid::now_v7(),
);
let event2 = Event::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(4),
"withdrawn",
json!({ "amount": 25 }),
Uuid::now_v7(),
);
let executor = RuntimeExecutor::with_config(
crate::runtime::ExecutorConfig::default().with_mock_runtime(),
);
let instance = AggregateInstance::rehydrate_with_executor(
tenant_id,
snapshot,
vec![event1, event2],
"decide".to_string(),
"apply".to_string(),
&executor,
)
.await
.unwrap();
assert_eq!(instance.version(), Version::from(4));
assert_eq!(instance.state()["balance"], 125);
}
#[test]
fn to_snapshot_captures_state() {
let mut agg = test_instance();
agg.state = json!({ "balance": 150 });
agg.version = Version::from(3);
let snapshot = agg.to_snapshot();
assert_eq!(snapshot.state, json!({ "balance": 150 }));
assert_eq!(snapshot.version, Version::from(3));
}
#[tokio::test]
async fn idempotency_via_command_id_returns_empty() {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::new("Account");
let command_id = Uuid::now_v7();
let mut agg = AggregateInstance::new(
aggregate_id.clone(),
aggregate_type.clone(),
tenant_id.clone(),
"decide".to_string(),
"apply".to_string(),
);
let e = Event::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(1),
"deposited",
json!({ "amount": 10 }),
command_id,
);
agg.apply_event(&e).unwrap();
let before_version = agg.version();
let mut cmd = Command::new(
tenant_id,
aggregate_id,
aggregate_type,
json!({ "type": "deposit", "amount": 10 }),
);
cmd.command_id = command_id;
let executor = RuntimeExecutor::new();
let events = agg.handle_command(cmd, &executor).await.unwrap();
assert!(events.is_empty());
assert_eq!(agg.version(), before_version);
}
#[tokio::test]
async fn handle_command_validates_tenant() {
let agg = AggregateInstance::new(
AggregateId::new_v7(),
AggregateType::new("Account"),
TenantId::new("tenant-a"),
"decide".to_string(),
"apply".to_string(),
);
let command = Command::new(
TenantId::new("tenant-b"),
agg.aggregate_id.clone(),
AggregateType::new("Account"),
json!({ "type": "deposit", "amount": 50 }),
);
let executor = RuntimeExecutor::new();
let mut agg = agg;
let result = agg.handle_command(command, &executor).await;
assert!(result.is_err());
match result.unwrap_err() {
AggregateError::TenantAccessDenied { .. } => {}
_ => panic!("Expected TenantAccessDenied"),
}
}
#[tokio::test]
async fn handle_command_validates_aggregate_id() {
let agg = test_instance();
let wrong_agg_id = AggregateId::new_v7();
let command = Command::new(
agg.tenant_id.clone(),
wrong_agg_id,
AggregateType::new("Account"),
json!({ "type": "deposit", "amount": 50 }),
);
let executor = RuntimeExecutor::new();
let mut agg = agg;
let result = agg.handle_command(command, &executor).await;
assert!(result.is_err());
match result.unwrap_err() {
AggregateError::NotFound(_) => {}
_ => panic!("Expected NotFound"),
}
}
#[test]
fn instance_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<AggregateInstance>();
}
}

View File

@@ -0,0 +1,3 @@
mod settings;
pub use settings::*;

View File

@@ -0,0 +1,274 @@
use serde::{Deserialize, Serialize};
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct Settings {
pub nats_url: String,
pub storage_path: String,
pub logger_socket: Option<String>,
pub snapshot_threshold: u64,
pub max_retries: u32,
pub multi_tenant_enabled: bool,
pub default_tenant_id: Option<String>,
pub shard_id: String,
pub placement_bucket: String,
pub placement_key: String,
pub grpc_addr: String,
pub decide_program: String,
pub apply_program: String,
}
impl Default for Settings {
fn default() -> Self {
Self {
nats_url: "nats://localhost:4222".to_string(),
storage_path: "./data".to_string(),
logger_socket: None,
snapshot_threshold: 10,
max_retries: 3,
multi_tenant_enabled: true,
default_tenant_id: None,
shard_id: "local".to_string(),
placement_bucket: "AGGREGATE_PLACEMENT".to_string(),
placement_key: "aggregate_placement".to_string(),
grpc_addr: "0.0.0.0:50051".to_string(),
decide_program: "function decide(state, command) { return []; }".to_string(),
apply_program: "function apply(state, event) { return state; }".to_string(),
}
}
}
impl Settings {
pub fn from_env() -> Result<Self, std::env::VarError> {
let mut settings = Self::default();
settings.apply_env_overrides();
Ok(settings)
}
pub fn from_yaml(yaml: &str) -> Result<Self, serde_yaml::Error> {
serde_yaml::from_str(yaml)
}
pub fn from_toml(toml_str: &str) -> Result<Self, toml::de::Error> {
toml::from_str(toml_str)
}
pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json)
}
pub fn from_file(path: impl AsRef<Path>) -> Result<Self, SettingsLoadError> {
let path = path.as_ref();
let raw = std::fs::read_to_string(path)?;
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
match ext {
"yaml" | "yml" => Ok(Self::from_yaml(&raw)?),
"toml" => Ok(Self::from_toml(&raw)?),
"json" => Ok(Self::from_json(&raw)?),
_ => Err(SettingsLoadError::UnsupportedFormat {
path: path.display().to_string(),
}),
}
}
pub fn load_from_file_with_env_overrides(
path: impl AsRef<Path>,
) -> Result<Self, SettingsLoadError> {
let mut settings = Self::from_file(path)?;
settings.apply_env_overrides();
Ok(settings)
}
fn apply_env_overrides(&mut self) {
if let Ok(url) = std::env::var("AGGREGATE_NATS_URL") {
self.nats_url = url;
}
if let Ok(path) = std::env::var("AGGREGATE_STORAGE_PATH") {
self.storage_path = path;
}
if let Ok(socket) = std::env::var("AGGREGATE_LOGGER_SOCKET") {
self.logger_socket = Some(socket);
}
if let Ok(threshold) = std::env::var("AGGREGATE_SNAPSHOT_THRESHOLD") {
if let Ok(value) = threshold.parse() {
self.snapshot_threshold = value;
}
}
if let Ok(retries) = std::env::var("AGGREGATE_MAX_RETRIES") {
if let Ok(value) = retries.parse() {
self.max_retries = value;
}
}
if let Ok(enabled) = std::env::var("AGGREGATE_MULTI_TENANT") {
if let Ok(value) = enabled.parse() {
self.multi_tenant_enabled = value;
}
}
if let Ok(default_tenant_id) = std::env::var("AGGREGATE_DEFAULT_TENANT_ID") {
if default_tenant_id.is_empty() {
self.default_tenant_id = None;
} else {
self.default_tenant_id = Some(default_tenant_id);
}
}
if let Ok(shard_id) = std::env::var("AGGREGATE_SHARD_ID") {
if !shard_id.is_empty() {
self.shard_id = shard_id;
}
}
if let Ok(bucket) = std::env::var("AGGREGATE_PLACEMENT_BUCKET") {
if !bucket.is_empty() {
self.placement_bucket = bucket;
}
}
if let Ok(key) = std::env::var("AGGREGATE_PLACEMENT_KEY") {
if !key.is_empty() {
self.placement_key = key;
}
}
if let Ok(addr) = std::env::var("AGGREGATE_GRPC_ADDR") {
if !addr.is_empty() {
self.grpc_addr = addr;
}
}
if let Ok(program) = std::env::var("AGGREGATE_DECIDE_PROGRAM") {
if !program.is_empty() {
self.decide_program = program;
}
}
if let Ok(program) = std::env::var("AGGREGATE_APPLY_PROGRAM") {
if !program.is_empty() {
self.apply_program = program;
}
}
if let Ok(path) = std::env::var("AGGREGATE_DECIDE_PROGRAM_PATH") {
if let Ok(raw) = std::fs::read_to_string(path) {
if !raw.is_empty() {
self.decide_program = raw;
}
}
}
if let Ok(path) = std::env::var("AGGREGATE_APPLY_PROGRAM_PATH") {
if let Ok(raw) = std::fs::read_to_string(path) {
if !raw.is_empty() {
self.apply_program = raw;
}
}
}
}
pub fn validate(&self) -> Result<(), String> {
if self.nats_url.is_empty() {
return Err("NATS URL is required".to_string());
}
if self.storage_path.is_empty() {
return Err("Storage path is required".to_string());
}
Ok(())
}
}
#[derive(Debug, thiserror::Error)]
pub enum SettingsLoadError {
#[error("Failed to read config file: {0}")]
Io(#[from] std::io::Error),
#[error("Failed to parse YAML config: {0}")]
Yaml(#[from] serde_yaml::Error),
#[error("Failed to parse TOML config: {0}")]
Toml(#[from] toml::de::Error),
#[error("Failed to parse JSON config: {0}")]
Json(#[from] serde_json::Error),
#[error("Unsupported config format: {path}")]
UnsupportedFormat { path: String },
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn settings_from_env() {
std::env::set_var("AGGREGATE_NATS_URL", "nats://localhost:4222");
let settings = Settings::from_env().unwrap();
assert_eq!(settings.nats_url, "nats://localhost:4222");
std::env::remove_var("AGGREGATE_NATS_URL");
}
#[test]
fn settings_validation() {
let settings = Settings {
nats_url: "".to_string(),
..Default::default()
};
assert!(settings.validate().is_err());
}
#[test]
fn settings_from_yaml_file_and_env_override() {
let dir = tempdir().unwrap();
let file_path = dir.path().join("aggregate.yaml");
std::fs::write(
&file_path,
r#"
nats_url: "nats://from-file:4222"
storage_path: "/tmp/agg"
snapshot_threshold: 25
multi_tenant_enabled: false
"#,
)
.unwrap();
std::env::set_var("AGGREGATE_NATS_URL", "nats://from-env:4222");
let settings = Settings::load_from_file_with_env_overrides(&file_path).unwrap();
assert_eq!(settings.nats_url, "nats://from-env:4222");
assert_eq!(settings.storage_path, "/tmp/agg");
assert_eq!(settings.snapshot_threshold, 25);
assert!(!settings.multi_tenant_enabled);
std::env::remove_var("AGGREGATE_NATS_URL");
}
#[test]
fn settings_from_toml_file() {
let dir = tempdir().unwrap();
let file_path = dir.path().join("aggregate.toml");
std::fs::write(
&file_path,
r#"
nats_url = "nats://from-file:4222"
storage_path = "/tmp/agg"
max_retries = 7
"#,
)
.unwrap();
let settings = Settings::from_file(&file_path).unwrap();
assert_eq!(settings.nats_url, "nats://from-file:4222");
assert_eq!(settings.storage_path, "/tmp/agg");
assert_eq!(settings.max_retries, 7);
}
#[test]
fn settings_is_clone() {
let s = Settings::default();
let _s2 = s.clone();
}
}

View File

@@ -0,0 +1,24 @@
#[cfg(test)]
mod tests {
#[test]
fn dockerfile_is_multi_stage_and_builds_selected_binary() {
let raw = std::fs::read_to_string("../docker/Dockerfile.rust").unwrap();
assert!(raw.contains("AS builder"));
assert!(raw.contains("FROM debian:"));
assert!(raw.contains("ARG PACKAGE"));
assert!(raw.contains("ARG BIN"));
assert!(raw.contains("cargo build -p ${PACKAGE} --bin ${BIN} --release"));
assert!(raw.contains("COPY --from=builder"));
assert!(raw.contains("ENTRYPOINT"));
assert!(raw.contains("FROM"));
}
#[test]
fn docker_compose_is_valid_yaml_and_has_services() {
let raw = std::fs::read_to_string("../docker-compose.yml").unwrap();
let doc: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap();
assert!(services.contains_key(serde_yaml::Value::from("nats")));
assert!(services.contains_key(serde_yaml::Value::from("aggregate")));
}
}

View File

@@ -0,0 +1,7 @@
pub const TENANT_ID_METADATA_KEY: &str = "x-tenant-id";
pub mod proto {
tonic::include_proto!("aggregate.gateway.v1");
}
pub mod server;

View File

@@ -0,0 +1,306 @@
use super::proto::command_service_server::{CommandService, CommandServiceServer};
use super::proto::{Event as ProtoEvent, SubmitCommandRequest, SubmitCommandResponse};
use crate::aggregate::AggregateHandler;
use crate::observability::Observability;
use crate::placement::TenantPlacementManager;
use crate::types::{AggregateError, AggregateId, AggregateType, Command, TenantId};
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use tonic::{Request, Response, Status};
#[derive(Clone)]
pub struct GrpcCommandServer {
handler: AggregateHandler,
placement: Arc<TenantPlacementManager>,
observability: Arc<Observability>,
multi_tenant_enabled: bool,
default_tenant_id: Option<TenantId>,
}
impl GrpcCommandServer {
pub fn new(
handler: AggregateHandler,
placement: Arc<TenantPlacementManager>,
observability: Arc<Observability>,
multi_tenant_enabled: bool,
default_tenant_id: Option<TenantId>,
) -> Self {
Self {
handler,
placement,
observability,
multi_tenant_enabled,
default_tenant_id,
}
}
pub fn service(self) -> CommandServiceServer<Self> {
CommandServiceServer::new(self)
}
}
#[tonic::async_trait]
impl CommandService for GrpcCommandServer {
async fn submit_command(
&self,
request: Request<SubmitCommandRequest>,
) -> Result<Response<SubmitCommandResponse>, Status> {
let correlation_id = request
.metadata()
.get("x-correlation-id")
.and_then(|v| v.to_str().ok())
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.map(|s| s.to_string());
let traceparent = request
.metadata()
.get("traceparent")
.and_then(|v| v.to_str().ok())
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.map(|s| s.to_string());
let trace_id = traceparent.as_deref().and_then(trace_id_from_traceparent);
let metadata_tenant = request
.metadata()
.get(super::TENANT_ID_METADATA_KEY)
.and_then(|v| v.to_str().ok())
.unwrap_or("")
.to_string();
let req = request.into_inner();
let tenant_id = resolve_tenant_id(
&req.tenant_id,
&metadata_tenant,
self.multi_tenant_enabled,
self.default_tenant_id.as_ref(),
)
.map_err(Status::invalid_argument)?;
if !tenant_id.as_str().is_empty() && !is_valid_tenant_id(tenant_id.as_str()) {
return Err(Status::invalid_argument("invalid tenant_id"));
}
let aggregate_id = AggregateId::from_str(&req.aggregate_id)
.map_err(|e| Status::invalid_argument(e.to_string()))?;
let aggregate_type = AggregateType::from(req.aggregate_type);
let payload: serde_json::Value = serde_json::from_str(&req.payload_json)
.map_err(|e| Status::invalid_argument(e.to_string()))?;
let command_id = if req.command_id.is_empty() {
uuid::Uuid::now_v7()
} else {
uuid::Uuid::parse_str(&req.command_id)
.map_err(|e| Status::invalid_argument(e.to_string()))?
};
let metadata: HashMap<String, serde_json::Value> = req
.metadata
.into_iter()
.map(|(k, v)| (k, serde_json::Value::String(v)))
.collect();
let mut metadata = metadata;
if let Some(correlation_id) = correlation_id.as_deref() {
metadata.insert(
"correlation_id".to_string(),
serde_json::Value::String(correlation_id.to_string()),
);
}
if let Some(traceparent) = traceparent.as_deref() {
metadata.insert(
"traceparent".to_string(),
serde_json::Value::String(traceparent.to_string()),
);
}
let command = Command {
tenant_id: tenant_id.clone(),
command_id,
aggregate_id: aggregate_id.clone(),
aggregate_type: aggregate_type.clone(),
payload,
metadata,
};
let span = self.observability.start_command_span(
&aggregate_id.to_string(),
aggregate_type.as_str(),
tenant_id.as_str(),
&command_id.to_string(),
correlation_id.as_deref(),
trace_id.as_deref(),
);
let _guard = self
.placement
.begin_command(&tenant_id)
.await
.map_err(|e| {
self.observability.record_command_error(&span, &e);
map_aggregate_error(e)
})?;
let events = self.handler.handle_command(command).await.map_err(|e| {
self.observability.record_command_error(&span, &e);
map_aggregate_error(e)
})?;
self.observability
.record_command_success(&span, events.len());
let proto_events = events
.into_iter()
.map(|e| ProtoEvent {
event_id: e.event_id.to_string(),
command_id: e.command_id.to_string(),
aggregate_id: e.aggregate_id.to_string(),
aggregate_type: e.aggregate_type.to_string(),
version: e.version.as_u64(),
event_type: e.event_type,
payload_json: serde_json::to_string(&e.payload)
.unwrap_or_else(|_| "{}".to_string()),
timestamp_rfc3339: e.timestamp.to_rfc3339(),
})
.collect();
let mut response = Response::new(SubmitCommandResponse {
events: proto_events,
});
if let Some(correlation_id) = correlation_id.as_deref() {
if let Ok(v) = tonic::metadata::MetadataValue::try_from(correlation_id) {
response.metadata_mut().insert("x-correlation-id", v);
}
}
if let Some(traceparent) = traceparent.as_deref() {
if let Ok(v) = tonic::metadata::MetadataValue::try_from(traceparent) {
response.metadata_mut().insert("traceparent", v);
}
}
Ok(response)
}
}
fn trace_id_from_traceparent(traceparent: &str) -> Option<String> {
shared::trace_id_from_traceparent(traceparent).map(|s| s.to_string())
}
fn map_aggregate_error(error: AggregateError) -> Status {
match error {
AggregateError::TenantNotHosted { .. } => Status::unavailable(error.to_string()),
AggregateError::TenantDraining { .. } => Status::unavailable(error.to_string()),
AggregateError::TenantAccessDenied { .. } => Status::permission_denied(error.to_string()),
AggregateError::ValidationError(_) => Status::invalid_argument(error.to_string()),
AggregateError::VersionConflict { .. } => Status::aborted(error.to_string()),
AggregateError::NotFound(_) => Status::not_found(error.to_string()),
AggregateError::StorageError(_) => Status::internal(error.to_string()),
AggregateError::StreamError(_) => Status::unavailable(error.to_string()),
AggregateError::RehydrationError(_) => Status::internal(error.to_string()),
AggregateError::DecideError(_) => Status::failed_precondition(error.to_string()),
AggregateError::ApplyError(_) => Status::failed_precondition(error.to_string()),
}
}
fn resolve_tenant_id(
explicit: &str,
metadata: &str,
multi_tenant_enabled: bool,
default_tenant_id: Option<&TenantId>,
) -> Result<TenantId, &'static str> {
if !explicit.is_empty() {
return Ok(TenantId::new(explicit));
}
if !metadata.is_empty() {
return Ok(TenantId::new(metadata));
}
if multi_tenant_enabled {
if let Some(default_tenant_id) = default_tenant_id {
return Ok(default_tenant_id.clone());
}
return Err("tenant_id is required");
}
Ok(TenantId::default())
}
fn is_valid_tenant_id(id: &str) -> bool {
id.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
}
#[cfg(test)]
mod tests {
use super::*;
use crate::runtime::{ExecutorConfig, RuntimeExecutor};
use crate::storage::StorageClient;
use crate::stream::StreamClient;
use tempfile::tempdir;
use tonic::transport::{Channel, Server};
#[tokio::test]
async fn grpc_submit_command_rejects_unhosted_tenant() {
let obs = Arc::new(Observability::default());
let placement = Arc::new(TenantPlacementManager::new(obs.clone()));
placement
.set_hosted_tenants(vec!["tenant-a".to_string()])
.await;
let dir = tempdir().unwrap();
let path = dir.path().join("test.mdbx");
let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
let stream = StreamClient::in_memory();
let executor = RuntimeExecutor::with_config(ExecutorConfig::default().with_mock_runtime());
let handler = AggregateHandler::new(
storage,
stream,
executor,
"decide".to_string(),
"apply".to_string(),
);
let service = GrpcCommandServer::new(handler, placement, obs, true, None).service();
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
tokio::spawn(async move {
Server::builder()
.add_service(service)
.serve_with_incoming_shutdown(
tokio_stream::wrappers::TcpListenerStream::new(listener),
async move {
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
},
)
.await
.unwrap();
});
let channel = Channel::from_shared(format!("http://{}", addr))
.unwrap()
.connect()
.await
.unwrap();
let mut client =
super::super::proto::command_service_client::CommandServiceClient::new(channel);
let resp = client
.submit_command(SubmitCommandRequest {
tenant_id: "tenant-b".to_string(),
command_id: uuid::Uuid::now_v7().to_string(),
aggregate_id: AggregateId::new_v7().to_string(),
aggregate_type: "Account".to_string(),
payload_json: "{}".to_string(),
metadata: HashMap::new(),
})
.await;
assert!(resp.is_err());
let status = resp.unwrap_err();
assert_eq!(status.code(), tonic::Code::Unavailable);
}
}

View File

@@ -0,0 +1,230 @@
use crate::server::{AdminResponse, AdminServer};
use axum::extract::{Path, State};
use axum::http::{HeaderValue, StatusCode};
use axum::response::{IntoResponse, Response};
use axum::routing::{get, post};
use axum::{Json, Router};
use serde::Deserialize;
use std::future::Future;
use std::sync::Arc;
#[derive(Clone)]
pub struct HttpState {
pub admin: Arc<AdminServer>,
}
pub fn router(admin: Arc<AdminServer>) -> Router {
let state = HttpState { admin };
Router::new()
.route("/health", get(health_route))
.route("/ready", get(ready))
.route("/metrics", get(metrics))
.route("/admin/tenants", get(admin_tenants))
.route("/admin/tenant/:tenant_id/status", get(admin_tenant_status))
.route("/admin/tenant/:tenant_id/ready", get(admin_tenant_ready))
.route("/admin/tenant/:tenant_id/drain", post(admin_tenant_drain))
.route("/admin/drain", post(admin_drain))
.route("/admin/reload", post(admin_reload))
.with_state(state)
}
pub async fn serve(
listener: tokio::net::TcpListener,
admin: Arc<AdminServer>,
shutdown: impl Future<Output = ()> + Send + 'static,
) {
axum::serve(listener, router(admin))
.with_graceful_shutdown(shutdown)
.await
.expect("http server failed");
}
async fn health_route(State(state): State<HttpState>) -> Response {
proxy_json(state.admin.get("/health").await).await
}
async fn ready(State(state): State<HttpState>) -> Response {
proxy_json(state.admin.get("/ready").await).await
}
async fn admin_tenants(State(state): State<HttpState>) -> Response {
proxy_json(state.admin.get("/admin/tenants").await).await
}
async fn metrics(State(state): State<HttpState>) -> Response {
let resp = state.admin.get("/metrics").await;
let mut response = (StatusCode::OK, resp.text().await).into_response();
response.headers_mut().insert(
axum::http::header::CONTENT_TYPE,
HeaderValue::from_static("text/plain; version=0.0.4"),
);
response
}
async fn admin_drain(
State(state): State<HttpState>,
Json(body): Json<serde_json::Value>,
) -> Response {
proxy_json(state.admin.post("/admin/drain", body).await).await
}
async fn admin_reload(
State(state): State<HttpState>,
Json(body): Json<serde_json::Value>,
) -> Response {
proxy_json(state.admin.post("/admin/reload", body).await).await
}
#[derive(Debug, Deserialize)]
struct DrainBody {
timeout_ms: Option<u64>,
}
async fn admin_tenant_status(
State(state): State<HttpState>,
Path(tenant_id): Path<String>,
) -> Response {
let status = state
.admin
.placement_manager()
.tenant_status(&crate::types::TenantId::new(tenant_id))
.await;
let mut response = (StatusCode::OK, serde_json::to_string(&status).unwrap()).into_response();
response.headers_mut().insert(
axum::http::header::CONTENT_TYPE,
HeaderValue::from_static("application/json"),
);
response
}
async fn admin_tenant_ready(
State(state): State<HttpState>,
Path(tenant_id): Path<String>,
) -> Response {
let tenant_id = crate::types::TenantId::new(tenant_id);
let status = state
.admin
.placement_manager()
.tenant_status(&tenant_id)
.await;
let ready = state.admin.health_checker().is_ready() && status.accepting;
let mut response = (StatusCode::OK, serde_json::to_string(&ready).unwrap()).into_response();
response.headers_mut().insert(
axum::http::header::CONTENT_TYPE,
HeaderValue::from_static("application/json"),
);
response
}
async fn admin_tenant_drain(
State(state): State<HttpState>,
Path(tenant_id): Path<String>,
body: Option<Json<DrainBody>>,
) -> Response {
let tenant_id = crate::types::TenantId::new(tenant_id);
state
.admin
.placement_manager()
.drain_tenant(&tenant_id)
.await;
let timeout = body
.and_then(|b| b.timeout_ms)
.map(std::time::Duration::from_millis)
.unwrap_or(std::time::Duration::from_secs(10));
let drained = state
.admin
.placement_manager()
.wait_drained_with_timeout(&tenant_id, timeout)
.await;
let status = state
.admin
.placement_manager()
.tenant_status(&tenant_id)
.await;
let resp = serde_json::json!({ "drained": drained, "status": status });
let mut response = (StatusCode::OK, serde_json::to_string(&resp).unwrap()).into_response();
response.headers_mut().insert(
axum::http::header::CONTENT_TYPE,
HeaderValue::from_static("application/json"),
);
response
}
async fn proxy_json(resp: AdminResponse) -> Response {
let mut response = (StatusCode::OK, resp.text().await).into_response();
response.headers_mut().insert(
axum::http::header::CONTENT_TYPE,
HeaderValue::from_static("application/json"),
);
response
}
#[cfg(test)]
mod tests {
use super::*;
use crate::observability::Observability;
use crate::server::HealthChecker;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
async fn http_get(addr: std::net::SocketAddr, path: &str) -> String {
let mut stream = tokio::net::TcpStream::connect(addr).await.unwrap();
let req = format!(
"GET {} HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n",
path
);
stream.write_all(req.as_bytes()).await.unwrap();
let mut buf = Vec::new();
stream.read_to_end(&mut buf).await.unwrap();
String::from_utf8_lossy(&buf).to_string()
}
#[tokio::test]
async fn http_server_exposes_health_ready_metrics() {
let health = HealthChecker::new();
health.set_storage_healthy(true);
health.set_stream_healthy(true);
let admin = Arc::new(AdminServer::new(
Observability::default(),
health,
"test-shard".to_string(),
));
admin
.placement_manager()
.set_hosted_tenants(vec!["test-tenant".to_string()])
.await;
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
let addr = listener.local_addr().unwrap();
let (tx, rx) = tokio::sync::oneshot::channel::<()>();
let handle = tokio::spawn(async move {
serve(listener, admin, async move {
let _ = rx.await;
})
.await;
});
let health_resp = http_get(addr, "/health").await;
assert!(health_resp.starts_with("HTTP/1.1 200"));
let ready_resp = http_get(addr, "/ready").await;
assert!(ready_resp.starts_with("HTTP/1.1 200"));
let metrics_resp = http_get(addr, "/metrics").await;
assert!(metrics_resp.starts_with("HTTP/1.1 200"));
assert!(metrics_resp.contains("aggregate_commands_total"));
let status_resp = http_get(addr, "/admin/tenant/test-tenant/status").await;
assert!(status_resp.starts_with("HTTP/1.1 200"));
assert!(status_resp.contains("test-tenant"));
let ready_resp = http_get(addr, "/admin/tenant/test-tenant/ready").await;
assert!(ready_resp.starts_with("HTTP/1.1 200"));
let _ = tx.send(());
handle.await.unwrap();
}
}

26
aggregate/src/lib.rs Normal file
View File

@@ -0,0 +1,26 @@
pub mod aggregate;
pub mod config;
pub mod container;
pub mod gateway;
pub mod http_server;
pub mod observability;
pub mod placement;
pub mod query;
pub mod runtime;
pub mod server;
pub mod storage;
pub mod stream;
pub mod swarm;
pub mod types;
pub use aggregate::{AggregateHandler, AggregateInstance};
pub use config::Settings;
pub use query::{
AggregateProjection, QueryClient, QueryConfig, QueryError, QueryRequest, QueryResponse,
QueryServer, StateProjection,
};
pub use runtime::{ExecutorConfig, RuntimeExecutor};
pub use server::{CommandRequest, CommandResponse, CommandServer, HealthChecker, HealthStatus};
pub use storage::StorageClient;
pub use stream::StreamClient;
pub use types::*;

213
aggregate/src/main.rs Normal file
View File

@@ -0,0 +1,213 @@
use aggregate::config::Settings;
use aggregate::gateway::server::GrpcCommandServer;
use aggregate::http_server;
use aggregate::observability::Observability;
use aggregate::runtime::RuntimeExecutor;
use aggregate::server::AdminServer;
use aggregate::storage::StorageClient;
use aggregate::stream::StreamClient;
use aggregate::swarm::TenantPlacementKvClient;
use aggregate::{aggregate::AggregateHandler, placement::TenantPlacementManager};
use futures::StreamExt;
use std::sync::Arc;
use std::time::Duration;
#[tokio::main]
async fn main() {
match std::env::args().nth(1).as_deref() {
Some("-h") | Some("--help") => {
print_help();
return;
}
Some("serve") | None => serve().await,
Some(other) => {
eprintln!("Unknown command: {}", other);
print_help();
}
}
}
async fn serve() {
let settings = load_settings();
let observability = Observability::default();
let health_checker = aggregate::server::HealthChecker::new();
let admin = Arc::new(AdminServer::new(
observability,
health_checker,
settings.shard_id.clone(),
));
spawn_health_probe(admin.clone(), settings.clone());
spawn_placement_watcher(admin.placement_manager(), settings.clone());
let storage = StorageClient::open(settings.storage_path.clone()).unwrap();
let stream = StreamClient::new(settings.nats_url.clone()).await.unwrap();
let _ = stream.setup_stream().await;
let executor = RuntimeExecutor::new();
let handler = AggregateHandler::new(
storage,
stream,
executor,
settings.decide_program.clone(),
settings.apply_program.clone(),
)
.with_snapshot_threshold(settings.snapshot_threshold)
.with_max_retries(settings.max_retries);
let grpc_addr: std::net::SocketAddr = settings.grpc_addr.parse().unwrap();
let grpc_service = GrpcCommandServer::new(
handler,
admin.placement_manager(),
admin.observability(),
settings.multi_tenant_enabled,
settings
.default_tenant_id
.as_ref()
.map(aggregate::types::TenantId::new),
)
.service();
let addr = std::env::var("AGGREGATE_HTTP_ADDR").unwrap_or_else(|_| "0.0.0.0:8080".to_string());
let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
let (shutdown_tx, _) = tokio::sync::broadcast::channel::<()>(1);
let mut http_shutdown = shutdown_tx.subscribe();
let mut grpc_shutdown = shutdown_tx.subscribe();
let http_task = tokio::spawn(async move {
http_server::serve(listener, admin, async move {
let _ = http_shutdown.recv().await;
})
.await;
});
let grpc_task = tokio::spawn(async move {
tonic::transport::Server::builder()
.add_service(grpc_service)
.serve_with_shutdown(grpc_addr, async move {
let _ = grpc_shutdown.recv().await;
})
.await
.unwrap();
});
let _ = tokio::signal::ctrl_c().await;
let _ = shutdown_tx.send(());
let _ = tokio::join!(http_task, grpc_task);
}
fn print_help() {
println!(
"aggregate\n\nUSAGE:\n aggregate [COMMAND]\n\nCOMMANDS:\n serve Start the HTTP server (default)\n\nOPTIONS:\n -h, --help Print help\n"
);
}
fn load_settings() -> Settings {
if let Ok(path) = std::env::var("AGGREGATE_CONFIG_PATH") {
if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) {
return settings;
}
}
Settings::from_env().unwrap_or_default()
}
fn spawn_health_probe(admin: Arc<AdminServer>, settings: Settings) {
tokio::spawn(async move {
loop {
let storage_ok = StorageClient::open(settings.storage_path.clone()).is_ok();
admin.health_checker().set_storage_healthy(storage_ok);
let stream_ok = tokio::time::timeout(Duration::from_secs(1), async {
let stream = StreamClient::new(settings.nats_url.clone()).await?;
let _ = stream.setup_stream().await;
Ok::<_, aggregate::types::AggregateError>(())
})
.await
.is_ok_and(|r| r.is_ok());
admin.health_checker().set_stream_healthy(stream_ok);
tokio::time::sleep(Duration::from_secs(5)).await;
}
});
}
fn spawn_placement_watcher(placement: Arc<TenantPlacementManager>, settings: Settings) {
tokio::spawn(async move {
loop {
let client = TenantPlacementKvClient::connect(
settings.nats_url.clone(),
settings.placement_bucket.clone(),
)
.await;
let client = match client {
Ok(c) => c,
Err(_) => {
tokio::time::sleep(Duration::from_secs(1)).await;
continue;
}
};
if let Ok(Some(value)) = client.get_json(&settings.placement_key).await {
apply_placement_value(&placement, &settings.shard_id, value).await;
}
let watch = client.watch_json(&settings.placement_key).await;
let mut stream = match watch {
Ok(s) => s,
Err(_) => {
tokio::time::sleep(Duration::from_secs(1)).await;
continue;
}
};
while let Some(update) = stream.next().await {
if let Ok(value) = update {
apply_placement_value(&placement, &settings.shard_id, value).await;
}
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
});
}
async fn apply_placement_value(
placement: &TenantPlacementManager,
shard_id: &str,
value: serde_json::Value,
) {
if let Some(map) = value.as_object() {
let placement_map = map
.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect::<std::collections::HashMap<_, _>>();
placement
.apply_placement_map(shard_id, &placement_map)
.await;
return;
}
if let Some(map) = value.get("placement").and_then(|v| v.as_object()) {
let placement_map = map
.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect::<std::collections::HashMap<_, _>>();
placement
.apply_placement_map(shard_id, &placement_map)
.await;
}
}
#[cfg(test)]
mod tests {
#[test]
fn binary_exists() {
assert!(std::env::current_exe().is_ok());
}
}

View File

@@ -0,0 +1,365 @@
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
use std::time::Duration;
pub trait MetricsRegistry: Send + Sync {
fn increment_counter(&self, name: &str, labels: &[(&str, &str)]);
fn record_histogram(&self, name: &str, value: f64, labels: &[(&str, &str)]);
fn export_prometheus(&self) -> String;
}
#[derive(Debug)]
struct AtomicHistogram {
count: AtomicU64,
sum: AtomicU64,
buckets: Vec<(f64, AtomicU64)>,
}
impl AtomicHistogram {
fn new() -> Self {
let buckets: Vec<(f64, AtomicU64)> = vec![
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
]
.into_iter()
.map(|v| (v, AtomicU64::new(0)))
.collect();
Self {
count: AtomicU64::new(0),
sum: AtomicU64::new(0),
buckets,
}
}
fn observe(&self, duration: Duration) {
let value_ms = duration.as_secs_f64() * 1000.0;
self.count.fetch_add(1, Ordering::Relaxed);
self.sum
.fetch_add((value_ms * 1000.0) as u64, Ordering::Relaxed);
for (threshold, count) in &self.buckets {
if value_ms <= *threshold {
count.fetch_add(1, Ordering::Relaxed);
}
}
}
fn export(&self, name: &str, labels: &str) -> String {
let mut output = String::new();
let count = self.count.load(Ordering::Relaxed);
let sum = self.sum.load(Ordering::Relaxed) as f64 / 1000.0;
let label_str = if labels.is_empty() {
String::new()
} else {
format!("{{{}}}", labels.trim_start_matches(','))
};
output.push_str(&format!("{}_sum{} {}\n", name, label_str, sum));
output.push_str(&format!("{}_count{} {}\n", name, label_str, count));
for (threshold, bucket_count) in &self.buckets {
let c = bucket_count.load(Ordering::Relaxed);
let bucket_labels = if labels.is_empty() {
format!("le=\"{}\"", threshold)
} else {
format!("le=\"{}\"{}", threshold, labels)
};
output.push_str(&format!("{}_bucket{{{}}} {}\n", name, bucket_labels, c));
}
let inf_labels = if labels.is_empty() {
"le=\"+Inf\"".to_string()
} else {
format!("le=\"+Inf\"{}", labels)
};
output.push_str(&format!("{}_bucket{{{}}} {}\n", name, inf_labels, count));
output
}
}
impl Default for AtomicHistogram {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug)]
pub struct Metrics {
commands_total: RwLock<HashMap<String, AtomicU64>>,
command_errors_total: RwLock<HashMap<String, AtomicU64>>,
command_duration: RwLock<HashMap<String, AtomicHistogram>>,
version_conflicts: AtomicU64,
tenant_errors: AtomicU64,
rehydration_duration: RwLock<HashMap<String, AtomicHistogram>>,
in_flight: RwLock<HashMap<String, AtomicU64>>,
}
impl Metrics {
pub fn new() -> Self {
Self {
commands_total: RwLock::new(HashMap::new()),
command_errors_total: RwLock::new(HashMap::new()),
command_duration: RwLock::new(HashMap::new()),
version_conflicts: AtomicU64::new(0),
tenant_errors: AtomicU64::new(0),
rehydration_duration: RwLock::new(HashMap::new()),
in_flight: RwLock::new(HashMap::new()),
}
}
pub fn increment_commands_total(&self, aggregate_type: &str, tenant_id: &str) {
let key = format!("{}:{}", aggregate_type, tenant_id);
let map = self.commands_total.read().unwrap();
if let Some(counter) = map.get(&key) {
counter.fetch_add(1, Ordering::Relaxed);
return;
}
drop(map);
let mut map = self.commands_total.write().unwrap();
let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0));
counter.fetch_add(1, Ordering::Relaxed);
}
pub fn increment_command_errors_total(
&self,
aggregate_type: &str,
tenant_id: &str,
error_kind: &str,
) {
let key = format!("{}:{}:{}", aggregate_type, tenant_id, error_kind);
let map = self.command_errors_total.read().unwrap();
if let Some(counter) = map.get(&key) {
counter.fetch_add(1, Ordering::Relaxed);
return;
}
drop(map);
let mut map = self.command_errors_total.write().unwrap();
let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0));
counter.fetch_add(1, Ordering::Relaxed);
}
pub fn record_command_duration(&self, duration: Duration, aggregate_type: &str) {
let mut map = self.command_duration.write().unwrap();
let histogram = map.entry(aggregate_type.to_string()).or_default();
histogram.observe(duration);
}
pub fn increment_version_conflicts(&self) {
self.version_conflicts.fetch_add(1, Ordering::Relaxed);
}
pub fn increment_tenant_errors(&self) {
self.tenant_errors.fetch_add(1, Ordering::Relaxed);
}
pub fn record_rehydration_duration(&self, duration: Duration, aggregate_type: &str) {
let mut map = self.rehydration_duration.write().unwrap();
let histogram = map.entry(aggregate_type.to_string()).or_default();
histogram.observe(duration);
}
pub fn set_in_flight(&self, tenant_id: &str, value: u64) {
let map = self.in_flight.read().unwrap();
if let Some(gauge) = map.get(tenant_id) {
gauge.store(value, Ordering::Relaxed);
return;
}
drop(map);
let mut map = self.in_flight.write().unwrap();
let gauge = map
.entry(tenant_id.to_string())
.or_insert_with(|| AtomicU64::new(0));
gauge.store(value, Ordering::Relaxed);
}
pub fn export_prometheus(&self) -> String {
let mut output = String::new();
output.push_str("# HELP aggregate_commands_total Total number of commands processed\n");
output.push_str("# TYPE aggregate_commands_total counter\n");
{
let map = self.commands_total.read().unwrap();
for (key, counter) in map.iter() {
let parts: Vec<&str> = key.split(':').collect();
if parts.len() == 2 {
let value = counter.load(Ordering::Relaxed);
output.push_str(&format!(
"aggregate_commands_total{{aggregate_type=\"{}\",tenant_id=\"{}\"}} {}\n",
parts[0], parts[1], value
));
}
}
}
output.push_str("\n# HELP aggregate_command_errors_total Total number of command errors\n");
output.push_str("# TYPE aggregate_command_errors_total counter\n");
{
let map = self.command_errors_total.read().unwrap();
for (key, counter) in map.iter() {
let parts: Vec<&str> = key.split(':').collect();
if parts.len() == 3 {
let value = counter.load(Ordering::Relaxed);
output.push_str(&format!(
"aggregate_command_errors_total{{aggregate_type=\"{}\",tenant_id=\"{}\",error_kind=\"{}\"}} {}\n",
parts[0], parts[1], parts[2], value
));
}
}
}
output
.push_str("\n# HELP aggregate_command_duration_seconds Command processing duration\n");
output.push_str("# TYPE aggregate_command_duration_seconds histogram\n");
{
let map = self.command_duration.read().unwrap();
for (aggregate_type, histogram) in map.iter() {
let labels = format!(",aggregate_type=\"{}\"", aggregate_type);
output.push_str(&histogram.export("aggregate_command_duration_seconds", &labels));
}
}
output
.push_str("\n# HELP aggregate_version_conflicts_total Total version conflict errors\n");
output.push_str("# TYPE aggregate_version_conflicts_total counter\n");
output.push_str(&format!(
"aggregate_version_conflicts_total {}\n",
self.version_conflicts.load(Ordering::Relaxed)
));
output
.push_str("\n# HELP aggregate_tenant_errors_total Total tenant access denied errors\n");
output.push_str("# TYPE aggregate_tenant_errors_total counter\n");
output.push_str(&format!(
"aggregate_tenant_errors_total {}\n",
self.tenant_errors.load(Ordering::Relaxed)
));
output.push_str(
"\n# HELP aggregate_rehydration_duration_seconds Aggregate rehydration duration\n",
);
output.push_str("# TYPE aggregate_rehydration_duration_seconds histogram\n");
{
let map = self.rehydration_duration.read().unwrap();
for (aggregate_type, histogram) in map.iter() {
let labels = format!(",aggregate_type=\"{}\"", aggregate_type);
output
.push_str(&histogram.export("aggregate_rehydration_duration_seconds", &labels));
}
}
output.push_str(
"\n# HELP aggregate_in_flight_commands Number of in-flight commands by tenant\n",
);
output.push_str("# TYPE aggregate_in_flight_commands gauge\n");
{
let map = self.in_flight.read().unwrap();
for (tenant_id, gauge) in map.iter() {
let value = gauge.load(Ordering::Relaxed);
output.push_str(&format!(
"aggregate_in_flight_commands{{tenant_id=\"{}\"}} {}\n",
tenant_id, value
));
}
}
output
}
}
impl Default for Metrics {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
#[test]
fn metrics_increment_commands() {
let metrics = Metrics::new();
metrics.increment_commands_total("Account", "tenant-a");
metrics.increment_commands_total("Account", "tenant-a");
metrics.increment_commands_total("Account", "tenant-b");
let output = metrics.export_prometheus();
assert!(output.contains(
"aggregate_commands_total{aggregate_type=\"Account\",tenant_id=\"tenant-a\"} 2"
));
assert!(output.contains(
"aggregate_commands_total{aggregate_type=\"Account\",tenant_id=\"tenant-b\"} 1"
));
}
#[test]
fn metrics_records_version_conflicts() {
let metrics = Metrics::new();
metrics.increment_version_conflicts();
metrics.increment_version_conflicts();
let output = metrics.export_prometheus();
assert!(output.contains("aggregate_version_conflicts_total 2"));
}
#[test]
fn metrics_records_tenant_errors() {
let metrics = Metrics::new();
metrics.increment_tenant_errors();
let output = metrics.export_prometheus();
assert!(output.contains("aggregate_tenant_errors_total 1"));
}
#[test]
fn metrics_records_command_errors_with_labels() {
let metrics = Metrics::new();
metrics.increment_command_errors_total("Account", "tenant-a", "tenant_not_hosted");
metrics.increment_command_errors_total("Account", "tenant-a", "tenant_not_hosted");
let output = metrics.export_prometheus();
assert!(output.contains("aggregate_command_errors_total{aggregate_type=\"Account\",tenant_id=\"tenant-a\",error_kind=\"tenant_not_hosted\"} 2"));
}
#[test]
fn metrics_records_command_duration() {
let metrics = Metrics::new();
metrics.record_command_duration(Duration::from_millis(50), "Account");
let output = metrics.export_prometheus();
assert!(output.contains("aggregate_command_duration_seconds"));
assert!(output
.contains("aggregate_command_duration_seconds_count{aggregate_type=\"Account\"} 1"));
}
#[test]
fn metrics_records_rehydration_duration() {
let metrics = Metrics::new();
metrics.record_rehydration_duration(Duration::from_millis(100), "Account");
let output = metrics.export_prometheus();
assert!(output.contains("aggregate_rehydration_duration_seconds"));
assert!(output.contains(
"aggregate_rehydration_duration_seconds_count{aggregate_type=\"Account\"} 1"
));
}
#[test]
fn metrics_export_prometheus_format() {
let metrics = Metrics::new();
metrics.increment_commands_total("Account", "tenant-a");
let output = metrics.export_prometheus();
assert!(output.contains("# HELP aggregate_commands_total"));
assert!(output.contains("# TYPE aggregate_commands_total counter"));
}
#[test]
fn metrics_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Metrics>();
}
}

View File

@@ -0,0 +1,323 @@
mod metrics;
pub use metrics::{Metrics, MetricsRegistry};
use std::sync::Arc;
use std::time::Instant;
#[derive(Debug, Clone)]
pub struct ObservabilityConfig {
pub service_name: String,
pub environment: String,
pub enable_metrics: bool,
}
impl Default for ObservabilityConfig {
fn default() -> Self {
Self {
service_name: "aggregate".to_string(),
environment: "development".to_string(),
enable_metrics: true,
}
}
}
impl ObservabilityConfig {
pub fn with_service_name(mut self, name: impl Into<String>) -> Self {
self.service_name = name.into();
self
}
pub fn with_environment(mut self, env: impl Into<String>) -> Self {
self.environment = env.into();
self
}
pub fn without_metrics(mut self) -> Self {
self.enable_metrics = false;
self
}
}
#[derive(Debug, Clone)]
pub struct CommandSpan {
aggregate_id: String,
aggregate_type: String,
tenant_id: String,
command_id: String,
correlation_id: Option<String>,
trace_id: Option<String>,
start_time: Instant,
}
impl CommandSpan {
pub fn new(
aggregate_id: impl Into<String>,
aggregate_type: impl Into<String>,
tenant_id: impl Into<String>,
command_id: impl Into<String>,
correlation_id: Option<String>,
trace_id: Option<String>,
) -> Self {
Self {
aggregate_id: aggregate_id.into(),
aggregate_type: aggregate_type.into(),
tenant_id: tenant_id.into(),
command_id: command_id.into(),
correlation_id,
trace_id,
start_time: Instant::now(),
}
}
pub fn elapsed(&self) -> std::time::Duration {
self.start_time.elapsed()
}
pub fn aggregate_id(&self) -> &str {
&self.aggregate_id
}
pub fn aggregate_type(&self) -> &str {
&self.aggregate_type
}
pub fn tenant_id(&self) -> &str {
&self.tenant_id
}
pub fn command_id(&self) -> &str {
&self.command_id
}
pub fn correlation_id(&self) -> Option<&str> {
self.correlation_id.as_deref()
}
pub fn trace_id(&self) -> Option<&str> {
self.trace_id.as_deref()
}
}
pub struct Observability {
config: ObservabilityConfig,
metrics: Arc<Metrics>,
}
impl Observability {
pub fn new(config: ObservabilityConfig) -> Self {
let metrics = Arc::new(Metrics::new());
Self { config, metrics }
}
pub fn config(&self) -> &ObservabilityConfig {
&self.config
}
pub fn metrics(&self) -> &Arc<Metrics> {
&self.metrics
}
pub fn start_command_span(
&self,
aggregate_id: &str,
aggregate_type: &str,
tenant_id: &str,
command_id: &str,
correlation_id: Option<&str>,
trace_id: Option<&str>,
) -> CommandSpan {
tracing::info_span!(
"command",
aggregate_id = %aggregate_id,
aggregate_type = %aggregate_type,
tenant_id = %tenant_id,
command_id = %command_id,
correlation_id = correlation_id.unwrap_or(""),
trace_id = trace_id.unwrap_or(""),
);
CommandSpan::new(
aggregate_id,
aggregate_type,
tenant_id,
command_id,
correlation_id.map(|s| s.to_string()),
trace_id.map(|s| s.to_string()),
)
}
pub fn record_command_success(&self, span: &CommandSpan, events_count: usize) {
self.metrics
.increment_commands_total(&span.aggregate_type, &span.tenant_id);
self.metrics
.record_command_duration(span.elapsed(), &span.aggregate_type);
tracing::info!(
aggregate_id = %span.aggregate_id(),
aggregate_type = %span.aggregate_type(),
tenant_id = %span.tenant_id(),
command_id = %span.command_id(),
correlation_id = span.correlation_id().unwrap_or(""),
trace_id = span.trace_id().unwrap_or(""),
events_count = events_count,
duration_ms = span.elapsed().as_millis() as u64,
"Command handled successfully"
);
}
pub fn record_command_error(&self, span: &CommandSpan, error: &crate::types::AggregateError) {
self.metrics
.increment_commands_total(&span.aggregate_type, &span.tenant_id);
self.metrics
.record_command_duration(span.elapsed(), &span.aggregate_type);
self.metrics.increment_command_errors_total(
&span.aggregate_type,
&span.tenant_id,
error_kind(error),
);
if matches!(
error,
crate::types::AggregateError::TenantAccessDenied { .. }
| crate::types::AggregateError::TenantNotHosted { .. }
| crate::types::AggregateError::TenantDraining { .. }
) {
self.metrics.increment_tenant_errors();
}
if matches!(error, crate::types::AggregateError::VersionConflict { .. }) {
self.metrics.increment_version_conflicts();
}
tracing::error!(
aggregate_id = %span.aggregate_id(),
aggregate_type = %span.aggregate_type(),
tenant_id = %span.tenant_id(),
command_id = %span.command_id(),
correlation_id = span.correlation_id().unwrap_or(""),
trace_id = span.trace_id().unwrap_or(""),
error = %error,
duration_ms = span.elapsed().as_millis() as u64,
"Command handling failed"
);
}
pub fn record_rehydration(&self, duration: std::time::Duration, aggregate_type: &str) {
self.metrics
.record_rehydration_duration(duration, aggregate_type);
tracing::debug!(
aggregate_type = %aggregate_type,
duration_ms = duration.as_millis() as u64,
"Aggregate rehydrated"
);
}
pub fn export_metrics(&self) -> String {
self.metrics.export_prometheus()
}
}
impl Default for Observability {
fn default() -> Self {
Self::new(ObservabilityConfig::default())
}
}
fn error_kind(error: &crate::types::AggregateError) -> &'static str {
match error {
crate::types::AggregateError::TenantAccessDenied { .. } => "tenant_access_denied",
crate::types::AggregateError::TenantNotHosted { .. } => "tenant_not_hosted",
crate::types::AggregateError::TenantDraining { .. } => "tenant_draining",
crate::types::AggregateError::ValidationError(_) => "validation",
crate::types::AggregateError::VersionConflict { .. } => "version_conflict",
crate::types::AggregateError::StorageError(_) => "storage",
crate::types::AggregateError::StreamError(_) => "stream",
crate::types::AggregateError::RehydrationError(_) => "rehydration",
crate::types::AggregateError::DecideError(_) => "decide",
crate::types::AggregateError::ApplyError(_) => "apply",
crate::types::AggregateError::NotFound(_) => "not_found",
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{AggregateError, TenantId};
#[test]
fn observability_config_defaults() {
let config = ObservabilityConfig::default();
assert_eq!(config.service_name, "aggregate");
assert_eq!(config.environment, "development");
assert!(config.enable_metrics);
}
#[test]
fn observability_config_builder() {
let config = ObservabilityConfig::default()
.with_service_name("my-service")
.with_environment("production")
.without_metrics();
assert_eq!(config.service_name, "my-service");
assert_eq!(config.environment, "production");
assert!(!config.enable_metrics);
}
#[test]
fn command_span_tracks_elapsed_time() {
let span = CommandSpan::new("agg-123", "Account", "tenant-a", "cmd-456", None, None);
std::thread::sleep(std::time::Duration::from_millis(10));
assert!(span.elapsed() >= std::time::Duration::from_millis(10));
}
#[test]
fn observability_records_success() {
let obs = Observability::default();
let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None);
obs.record_command_success(&span, 3);
let metrics = obs.export_metrics();
assert!(metrics.contains("aggregate_commands_total"));
}
#[test]
fn observability_records_tenant_error() {
let obs = Observability::default();
let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None);
let error = AggregateError::TenantAccessDenied {
tenant_id: TenantId::new("other-tenant"),
};
obs.record_command_error(&span, &error);
let metrics = obs.export_metrics();
assert!(metrics.contains("aggregate_tenant_errors_total"));
}
#[test]
fn observability_records_version_conflict() {
let obs = Observability::default();
let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None);
let error = AggregateError::VersionConflict {
expected: crate::types::Version::from(5),
actual: crate::types::Version::from(4),
};
obs.record_command_error(&span, &error);
let metrics = obs.export_metrics();
assert!(metrics.contains("aggregate_version_conflicts_total"));
}
#[test]
fn observability_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Observability>();
assert_send_sync::<CommandSpan>();
}
}

267
aggregate/src/placement.rs Normal file
View File

@@ -0,0 +1,267 @@
use crate::observability::Observability;
use crate::types::{AggregateError, TenantId};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tokio::sync::RwLock;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TenantStatus {
pub tenant_id: TenantId,
pub hosted: bool,
pub accepting: bool,
pub draining: bool,
pub in_flight: u64,
}
pub struct TenantPlacementManager {
hosted: RwLock<HashSet<String>>,
draining: RwLock<HashSet<String>>,
in_flight: RwLock<HashMap<String, u64>>,
observability: Arc<Observability>,
}
impl TenantPlacementManager {
pub fn new(observability: Arc<Observability>) -> Self {
Self {
hosted: RwLock::new(HashSet::new()),
draining: RwLock::new(HashSet::new()),
in_flight: RwLock::new(HashMap::new()),
observability,
}
}
pub async fn set_hosted_tenants(&self, tenant_ids: impl IntoIterator<Item = String>) {
let mut hosted = self.hosted.write().await;
hosted.clear();
hosted.extend(tenant_ids);
}
pub async fn apply_placement_map(&self, shard_id: &str, placement: &HashMap<String, String>) {
let tenants = placement
.iter()
.filter_map(|(tenant_id, assigned)| {
if assigned == shard_id {
Some(tenant_id.clone())
} else {
None
}
})
.collect::<Vec<_>>();
self.set_hosted_tenants(tenants).await;
}
pub async fn is_hosted(&self, tenant_id: &TenantId) -> bool {
if tenant_id.as_str().is_empty() {
return true;
}
self.hosted.read().await.contains(tenant_id.as_str())
}
pub async fn is_draining(&self, tenant_id: &TenantId) -> bool {
self.draining.read().await.contains(tenant_id.as_str())
}
pub async fn begin_command(
self: &Arc<Self>,
tenant_id: &TenantId,
) -> Result<TenantCommandGuard, AggregateError> {
if !self.is_hosted(tenant_id).await {
return Err(AggregateError::TenantNotHosted {
tenant_id: tenant_id.clone(),
});
}
if self.is_draining(tenant_id).await {
return Err(AggregateError::TenantDraining {
tenant_id: tenant_id.clone(),
});
}
let mut map = self.in_flight.write().await;
let counter = map.entry(tenant_id.as_str().to_string()).or_insert(0);
*counter += 1;
let value = *counter;
drop(map);
self.observability
.metrics()
.set_in_flight(tenant_id.as_str(), value);
Ok(TenantCommandGuard {
tenant_id: tenant_id.clone(),
manager: self.clone(),
})
}
pub async fn drain_tenant(&self, tenant_id: &TenantId) {
if tenant_id.as_str().is_empty() {
return;
}
let mut draining = self.draining.write().await;
draining.insert(tenant_id.as_str().to_string());
}
pub async fn undrain_tenant(&self, tenant_id: &TenantId) {
let mut draining = self.draining.write().await;
draining.remove(tenant_id.as_str());
}
pub async fn wait_drained(&self, tenant_id: &TenantId) {
loop {
let in_flight = self
.in_flight
.read()
.await
.get(tenant_id.as_str())
.copied()
.unwrap_or(0);
if in_flight == 0 {
break;
}
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}
}
pub async fn wait_drained_with_timeout(
&self,
tenant_id: &TenantId,
timeout: std::time::Duration,
) -> bool {
let deadline = tokio::time::Instant::now() + timeout;
loop {
let in_flight = self
.in_flight
.read()
.await
.get(tenant_id.as_str())
.copied()
.unwrap_or(0);
if in_flight == 0 {
return true;
}
if tokio::time::Instant::now() >= deadline {
return false;
}
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}
}
pub async fn tenant_status(&self, tenant_id: &TenantId) -> TenantStatus {
let hosted = self.is_hosted(tenant_id).await;
let draining = self.is_draining(tenant_id).await;
let in_flight = self
.in_flight
.read()
.await
.get(tenant_id.as_str())
.copied()
.unwrap_or(0);
TenantStatus {
tenant_id: tenant_id.clone(),
hosted,
accepting: hosted && !draining,
draining,
in_flight,
}
}
pub async fn hosted_tenants(&self) -> Vec<TenantId> {
let hosted = self.hosted.read().await;
hosted.iter().map(TenantId::new).collect()
}
pub async fn all_statuses(&self) -> Vec<TenantStatus> {
let hosted = self.hosted.read().await.clone();
let draining = self.draining.read().await.clone();
let in_flight = self.in_flight.read().await.clone();
hosted
.into_iter()
.map(|id| {
let tenant_id = TenantId::new(id.clone());
let d = draining.contains(&id);
let f = in_flight.get(&id).copied().unwrap_or(0);
TenantStatus {
tenant_id,
hosted: true,
accepting: !d,
draining: d,
in_flight: f,
}
})
.collect()
}
async fn finish_command(&self, tenant_id: &TenantId) {
let mut map = self.in_flight.write().await;
let counter = map.entry(tenant_id.as_str().to_string()).or_insert(0);
if *counter > 0 {
*counter -= 1;
}
let value = *counter;
drop(map);
self.observability
.metrics()
.set_in_flight(tenant_id.as_str(), value);
}
}
pub struct TenantCommandGuard {
tenant_id: TenantId,
manager: Arc<TenantPlacementManager>,
}
impl Drop for TenantCommandGuard {
fn drop(&mut self) {
let tenant_id = self.tenant_id.clone();
let manager = self.manager.clone();
tokio::spawn(async move {
manager.finish_command(&tenant_id).await;
});
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::observability::Observability;
#[tokio::test]
async fn placement_rejects_unhosted_tenant() {
let obs = Arc::new(Observability::default());
let mgr = Arc::new(TenantPlacementManager::new(obs));
mgr.set_hosted_tenants(vec!["tenant-a".to_string()]).await;
let err = match mgr.begin_command(&TenantId::new("tenant-b")).await {
Ok(_) => panic!("expected error"),
Err(e) => e,
};
assert!(matches!(err, AggregateError::TenantNotHosted { .. }));
}
#[tokio::test]
async fn drain_blocks_new_commands_until_in_flight_zero() {
let obs = Arc::new(Observability::default());
let mgr = Arc::new(TenantPlacementManager::new(obs));
mgr.set_hosted_tenants(vec!["tenant-a".to_string()]).await;
let guard = mgr.begin_command(&TenantId::new("tenant-a")).await.unwrap();
mgr.drain_tenant(&TenantId::new("tenant-a")).await;
let err = match mgr.begin_command(&TenantId::new("tenant-a")).await {
Ok(_) => panic!("expected error"),
Err(e) => e,
};
assert!(matches!(err, AggregateError::TenantDraining { .. }));
drop(guard);
mgr.wait_drained(&TenantId::new("tenant-a")).await;
let err = match mgr.begin_command(&TenantId::new("tenant-a")).await {
Ok(_) => panic!("expected error"),
Err(e) => e,
};
assert!(matches!(err, AggregateError::TenantDraining { .. }));
}
}

View File

@@ -0,0 +1,594 @@
use super::{AggregateProjection, QueryRequest, QueryResponse};
use crate::types::TenantId;
use futures::stream::Stream;
use serde_json::Value as JsonValue;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
use thiserror::Error;
use tokio::sync::broadcast;
use tokio::sync::RwLock;
#[derive(Debug, Error)]
pub enum QueryError {
#[error("Query syntax error: {0}")]
SyntaxError(String),
#[error("Connection error: {0}")]
ConnectionError(String),
#[error("Tenant not found: {0}")]
TenantNotFound(String),
#[error("Internal error: {0}")]
InternalError(String),
}
pub type QueryResult<T> = Result<T, QueryError>;
#[derive(Debug, Clone)]
pub struct QueryConfig {
pub endpoint: Option<String>,
pub embedded: bool,
pub cache_size: usize,
pub cache_ttl_seconds: u64,
}
impl Default for QueryConfig {
fn default() -> Self {
Self {
endpoint: None,
embedded: true,
cache_size: 1000,
cache_ttl_seconds: 60,
}
}
}
impl QueryConfig {
pub fn embedded() -> Self {
Self {
embedded: true,
..Default::default()
}
}
pub fn remote(endpoint: impl Into<String>) -> Self {
Self {
endpoint: Some(endpoint.into()),
embedded: false,
..Default::default()
}
}
}
#[derive(Debug, Clone)]
struct CacheEntry {
projection: AggregateProjection,
inserted_at: std::time::Instant,
}
#[derive(Debug, Clone)]
pub struct QueryClient {
config: QueryConfig,
storage: Arc<RwLock<HashMap<String, Vec<AggregateProjection>>>>,
cache: Arc<RwLock<lru::LruCache<String, CacheEntry>>>,
updates: broadcast::Sender<AggregateProjection>,
}
impl QueryClient {
pub fn new(config: QueryConfig) -> Self {
let cache = lru::LruCache::new(
std::num::NonZeroUsize::new(config.cache_size)
.unwrap_or_else(|| std::num::NonZeroUsize::new(1000).unwrap()),
);
let (updates, _) = broadcast::channel(1024);
Self {
config,
storage: Arc::new(RwLock::new(HashMap::new())),
cache: Arc::new(RwLock::new(cache)),
updates,
}
}
pub fn embedded() -> Self {
Self::new(QueryConfig::embedded())
}
fn make_key(tenant_id: &str, aggregate_id: &str) -> String {
format!("{}:{}", tenant_id, aggregate_id)
}
pub async fn index(&self, projection: AggregateProjection) -> QueryResult<()> {
let key = Self::make_key(&projection.tenant_id, &projection.aggregate_id);
let _ = self.updates.send(projection.clone());
{
let mut cache = self.cache.write().await;
cache.put(
key.clone(),
CacheEntry {
projection: projection.clone(),
inserted_at: std::time::Instant::now(),
},
);
}
let mut storage = self.storage.write().await;
let tenant_projections = storage.entry(projection.tenant_id.clone()).or_default();
if let Some(existing) = tenant_projections
.iter_mut()
.find(|p| p.aggregate_id == projection.aggregate_id)
{
*existing = projection;
} else {
tenant_projections.push(projection);
}
Ok(())
}
pub fn subscribe(
&self,
tenant_id: TenantId,
) -> Pin<Box<dyn Stream<Item = AggregateProjection> + Send>> {
let tenant_id = tenant_id.as_str().to_string();
let receiver = self.updates.subscribe();
Box::pin(futures::stream::unfold(
(receiver, tenant_id),
|(mut receiver, tenant_id)| async move {
loop {
match receiver.recv().await {
Ok(proj) => {
if proj.tenant_id == tenant_id {
return Some((proj, (receiver, tenant_id)));
}
}
Err(broadcast::error::RecvError::Lagged(_)) => continue,
Err(broadcast::error::RecvError::Closed) => return None,
}
}
},
))
}
pub async fn query(&self, request: QueryRequest) -> QueryResult<QueryResponse> {
let storage = self.storage.read().await;
let tenant_projections = storage.get(&request.tenant_id);
let projections: Vec<AggregateProjection> = match tenant_projections {
Some(projs) => {
let mut filtered: Vec<_> = projs
.iter()
.filter(|p| {
if let Some(ref at) = request.aggregate_type {
&p.aggregate_type == at
} else {
true
}
})
.filter(|p| {
if let Some(ref filter) = request.filter {
self.evaluate_filter(&p.state, filter).unwrap_or(false)
} else {
true
}
})
.cloned()
.collect();
filtered.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
filtered
}
None => Vec::new(),
};
let total = projections.len();
let offset = request.offset.unwrap_or(0);
let limit = request.limit.unwrap_or(100);
let results: Vec<AggregateProjection> =
projections.into_iter().skip(offset).take(limit).collect();
Ok(QueryResponse::from_results(results, total, Some(limit)))
}
pub async fn get(
&self,
tenant_id: &TenantId,
aggregate_id: &str,
) -> QueryResult<Option<AggregateProjection>> {
let key = Self::make_key(tenant_id.as_str(), aggregate_id);
{
let mut cache = self.cache.write().await;
if let Some(entry) = cache.get(&key) {
let elapsed = entry.inserted_at.elapsed().as_secs();
if elapsed < self.config.cache_ttl_seconds {
return Ok(Some(entry.projection.clone()));
}
cache.pop(&key);
}
}
let storage = self.storage.read().await;
let tenant_projections = storage.get(tenant_id.as_str());
Ok(tenant_projections.and_then(|projs| {
projs
.iter()
.find(|p| p.aggregate_id == aggregate_id)
.cloned()
}))
}
pub async fn delete(&self, tenant_id: &TenantId, aggregate_id: &str) -> QueryResult<bool> {
let key = Self::make_key(tenant_id.as_str(), aggregate_id);
{
let mut cache = self.cache.write().await;
cache.pop(&key);
}
let mut storage = self.storage.write().await;
if let Some(tenant_projections) = storage.get_mut(tenant_id.as_str()) {
let len_before = tenant_projections.len();
tenant_projections.retain(|p| p.aggregate_id != aggregate_id);
return Ok(tenant_projections.len() < len_before);
}
Ok(false)
}
pub async fn clear_tenant(&self, tenant_id: &TenantId) -> QueryResult<usize> {
let mut storage = self.storage.write().await;
let count = storage
.remove(tenant_id.as_str())
.map(|v| v.len())
.unwrap_or(0);
let mut cache = self.cache.write().await;
let prefix = format!("{}:", tenant_id.as_str());
let keys_to_remove: Vec<_> = cache
.iter()
.filter(|(k, _)| k.starts_with(&prefix))
.map(|(k, _)| k.clone())
.collect();
for key in keys_to_remove {
cache.pop(&key);
}
Ok(count)
}
fn evaluate_filter(&self, state: &JsonValue, filter: &str) -> QueryResult<bool> {
let filter = filter.trim();
if filter.is_empty() || filter == "*" {
return Ok(true);
}
if let Some((field, op_value)) = filter.split_once('>') {
let field = field.trim();
let value = op_value.trim();
return self.compare_field(state, field, value, |a, b| a > b);
}
if let Some((field, op_value)) = filter.split_once('<') {
let field = field.trim();
let value = op_value.trim();
return self.compare_field(state, field, value, |a, b| a < b);
}
if let Some((field, op_value)) = filter.split_once("==") {
let field = field.trim();
let value = op_value.trim();
return self.compare_field(state, field, value, |a, b| a == b);
}
if let Some((field, op_value)) = filter.split_once("!=") {
let field = field.trim();
let value = op_value.trim();
return self.compare_field(state, field, value, |a, b| a != b);
}
if let Some((field, op_value)) = filter.split_once(">=") {
let field = field.trim();
let value = op_value.trim();
return self.compare_field(state, field, value, |a, b| a >= b);
}
if let Some((field, op_value)) = filter.split_once("<=") {
let field = field.trim();
let value = op_value.trim();
return self.compare_field(state, field, value, |a, b| a <= b);
}
Ok(false)
}
fn compare_field<F>(
&self,
state: &JsonValue,
field: &str,
value_str: &str,
compare: F,
) -> QueryResult<bool>
where
F: Fn(f64, f64) -> bool,
{
let field_value = state.get(field);
let field_num = match field_value {
Some(JsonValue::Number(n)) => n.as_f64().unwrap_or(f64::NAN),
Some(JsonValue::String(s)) => s.parse::<f64>().unwrap_or(f64::NAN),
_ => return Ok(false),
};
let compare_num = value_str.parse::<f64>().unwrap_or(f64::NAN);
if field_num.is_nan() || compare_num.is_nan() {
return Ok(false);
}
Ok(compare(field_num, compare_num))
}
}
#[cfg(test)]
mod tests {
use super::*;
use futures::StreamExt;
use serde_json::json;
fn create_test_client() -> QueryClient {
QueryClient::embedded()
}
fn create_test_projection(tenant: &str, id: &str, balance: i64) -> AggregateProjection {
AggregateProjection::new(tenant, id, "Account", 1, json!({"balance": balance}))
}
#[tokio::test]
async fn query_client_index_and_query() {
let client = create_test_client();
let proj = create_test_projection("tenant-a", "acc-1", 100);
client.index(proj).await.unwrap();
let request = QueryRequest::new("tenant-a").with_filter("balance > 50");
let response = client.query(request).await.unwrap();
assert_eq!(response.results.len(), 1);
assert_eq!(response.results[0].aggregate_id, "acc-1");
}
#[tokio::test]
async fn query_client_tenant_isolation() {
let client = create_test_client();
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
client
.index(create_test_projection("tenant-b", "acc-2", 200))
.await
.unwrap();
let response_a = client.query(QueryRequest::new("tenant-a")).await.unwrap();
let response_b = client.query(QueryRequest::new("tenant-b")).await.unwrap();
assert_eq!(response_a.results.len(), 1);
assert_eq!(response_b.results.len(), 1);
assert_eq!(response_a.results[0].state["balance"], 100);
assert_eq!(response_b.results[0].state["balance"], 200);
}
#[tokio::test]
async fn query_client_filter_operations() {
let client = create_test_client();
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
client
.index(create_test_projection("tenant-a", "acc-2", 50))
.await
.unwrap();
client
.index(create_test_projection("tenant-a", "acc-3", 150))
.await
.unwrap();
let gt_response = client
.query(QueryRequest::new("tenant-a").with_filter("balance > 75"))
.await
.unwrap();
assert_eq!(gt_response.results.len(), 2);
let lt_response = client
.query(QueryRequest::new("tenant-a").with_filter("balance < 75"))
.await
.unwrap();
assert_eq!(lt_response.results.len(), 1);
let eq_response = client
.query(QueryRequest::new("tenant-a").with_filter("balance == 100"))
.await
.unwrap();
assert_eq!(eq_response.results.len(), 1);
}
#[tokio::test]
async fn query_client_pagination() {
let client = create_test_client();
for i in 0..25 {
client
.index(create_test_projection(
"tenant-a",
&format!("acc-{}", i),
i * 10,
))
.await
.unwrap();
}
let page1 = client
.query(QueryRequest::new("tenant-a").with_limit(10))
.await
.unwrap();
assert_eq!(page1.results.len(), 10);
assert!(page1.has_more);
let page2 = client
.query(QueryRequest::new("tenant-a").with_limit(10).with_offset(10))
.await
.unwrap();
assert_eq!(page2.results.len(), 10);
let page3 = client
.query(QueryRequest::new("tenant-a").with_limit(10).with_offset(20))
.await
.unwrap();
assert_eq!(page3.results.len(), 5);
assert!(!page3.has_more);
}
#[tokio::test]
async fn query_client_get_by_id() {
let client = create_test_client();
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
let tenant = TenantId::new("tenant-a");
let result = client.get(&tenant, "acc-1").await.unwrap();
assert!(result.is_some());
let proj = result.unwrap();
assert_eq!(proj.aggregate_id, "acc-1");
assert_eq!(proj.state["balance"], 100);
}
#[tokio::test]
async fn query_client_delete() {
let client = create_test_client();
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
let tenant = TenantId::new("tenant-a");
let deleted = client.delete(&tenant, "acc-1").await.unwrap();
assert!(deleted);
let result = client.get(&tenant, "acc-1").await.unwrap();
assert!(result.is_none());
}
#[tokio::test]
async fn query_client_clear_tenant() {
let client = create_test_client();
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
client
.index(create_test_projection("tenant-a", "acc-2", 200))
.await
.unwrap();
client
.index(create_test_projection("tenant-b", "acc-3", 300))
.await
.unwrap();
let tenant = TenantId::new("tenant-a");
let count = client.clear_tenant(&tenant).await.unwrap();
assert_eq!(count, 2);
let response_a = client.query(QueryRequest::new("tenant-a")).await.unwrap();
assert_eq!(response_a.results.len(), 0);
let response_b = client.query(QueryRequest::new("tenant-b")).await.unwrap();
assert_eq!(response_b.results.len(), 1);
}
#[tokio::test]
async fn query_client_update_existing() {
let client = create_test_client();
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
client
.index(AggregateProjection::new(
"tenant-a",
"acc-1",
"Account",
2,
json!({"balance": 250}),
))
.await
.unwrap();
let response = client.query(QueryRequest::new("tenant-a")).await.unwrap();
assert_eq!(response.results.len(), 1);
assert_eq!(response.results[0].version, 2);
assert_eq!(response.results[0].state["balance"], 250);
}
#[tokio::test]
async fn query_client_subscribe_receives_updates() {
let client = create_test_client();
let mut updates = client.subscribe(TenantId::new("tenant-a"));
client
.index(create_test_projection("tenant-a", "acc-1", 100))
.await
.unwrap();
let next = updates.next().await.unwrap();
assert_eq!(next.tenant_id, "tenant-a");
assert_eq!(next.aggregate_id, "acc-1");
assert_eq!(next.state["balance"], 100);
}
#[test]
fn query_config_defaults() {
let config = QueryConfig::default();
assert!(config.embedded);
assert!(config.endpoint.is_none());
assert_eq!(config.cache_size, 1000);
assert_eq!(config.cache_ttl_seconds, 60);
}
#[test]
fn query_request_builder() {
let request = QueryRequest::new("tenant-a")
.with_aggregate_type("Account")
.with_filter("balance > 100")
.with_limit(50)
.with_offset(10);
assert_eq!(request.tenant_id, "tenant-a");
assert_eq!(request.aggregate_type, Some("Account".to_string()));
assert_eq!(request.filter, Some("balance > 100".to_string()));
assert_eq!(request.limit, Some(50));
assert_eq!(request.offset, Some(10));
}
}

193
aggregate/src/query/mod.rs Normal file
View File

@@ -0,0 +1,193 @@
mod client;
mod projection;
pub use client::{QueryClient, QueryConfig, QueryError, QueryResult};
pub use projection::{ProjectionConfig, StateProjection};
use serde::{Deserialize, Serialize};
use serde_json::Value as JsonValue;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AggregateProjection {
pub tenant_id: String,
pub aggregate_id: String,
pub aggregate_type: String,
pub version: u64,
pub state: JsonValue,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
impl AggregateProjection {
pub fn new(
tenant_id: impl Into<String>,
aggregate_id: impl Into<String>,
aggregate_type: impl Into<String>,
version: u64,
state: JsonValue,
) -> Self {
Self {
tenant_id: tenant_id.into(),
aggregate_id: aggregate_id.into(),
aggregate_type: aggregate_type.into(),
version,
state,
updated_at: chrono::Utc::now(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryRequest {
pub tenant_id: String,
pub aggregate_type: Option<String>,
pub filter: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
impl QueryRequest {
pub fn new(tenant_id: impl Into<String>) -> Self {
Self {
tenant_id: tenant_id.into(),
aggregate_type: None,
filter: None,
limit: None,
offset: None,
}
}
pub fn with_aggregate_type(mut self, aggregate_type: impl Into<String>) -> Self {
self.aggregate_type = Some(aggregate_type.into());
self
}
pub fn with_filter(mut self, filter: impl Into<String>) -> Self {
self.filter = Some(filter.into());
self
}
pub fn with_limit(mut self, limit: usize) -> Self {
self.limit = Some(limit);
self
}
pub fn with_offset(mut self, offset: usize) -> Self {
self.offset = Some(offset);
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryResponse {
pub results: Vec<AggregateProjection>,
pub total: usize,
pub has_more: bool,
}
impl QueryResponse {
pub fn empty() -> Self {
Self {
results: Vec::new(),
total: 0,
has_more: false,
}
}
pub fn from_results(
results: Vec<AggregateProjection>,
total: usize,
limit: Option<usize>,
) -> Self {
let has_more = limit.is_some_and(|l| results.len() == l && total > results.len());
Self {
results,
total,
has_more,
}
}
}
#[derive(Debug, Clone)]
pub struct QueryServer {
query: QueryClient,
}
impl QueryServer {
pub fn new(query: QueryClient) -> Self {
Self { query }
}
pub fn query_client(&self) -> &QueryClient {
&self.query
}
pub async fn handle(&self, request: QueryRequest) -> QueryResult<QueryResponse> {
self.query.query(request).await
}
pub async fn handle_raw(
&self,
tenant_id: impl Into<String>,
aggregate_type: Option<String>,
filter: Option<String>,
limit: Option<usize>,
offset: Option<usize>,
) -> QueryResult<QueryResponse> {
let mut request = QueryRequest::new(tenant_id);
request.aggregate_type = aggregate_type;
request.filter = filter;
request.limit = limit;
request.offset = offset;
self.handle(request).await
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[tokio::test]
async fn query_server_filters_by_tenant() {
let query = QueryClient::embedded();
let server = QueryServer::new(query.clone());
query
.index(AggregateProjection::new(
"tenant-a",
"agg-1",
"Account",
1,
json!({ "balance": 100 }),
))
.await
.unwrap();
query
.index(AggregateProjection::new(
"tenant-b",
"agg-2",
"Account",
1,
json!({ "balance": 200 }),
))
.await
.unwrap();
let resp = server
.handle_raw(
"tenant-a",
Some("Account".to_string()),
Some("balance > 50".to_string()),
Some(100),
Some(0),
)
.await
.unwrap();
assert_eq!(resp.total, 1);
assert_eq!(resp.results[0].tenant_id, "tenant-a");
assert_eq!(resp.results[0].state["balance"], 100);
}
}

View File

@@ -0,0 +1,217 @@
use super::AggregateProjection;
use crate::types::{AggregateId, AggregateType, Event, TenantId, Version};
use serde_json::Value as JsonValue;
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
#[derive(Debug, Clone)]
pub struct ProjectionConfig {
pub batch_size: usize,
pub projection_timeout_ms: u64,
}
impl Default for ProjectionConfig {
fn default() -> Self {
Self {
batch_size: 100,
projection_timeout_ms: 5000,
}
}
}
pub struct StateProjection {
config: ProjectionConfig,
handlers: Arc<RwLock<HashMap<String, ProjectionHandler>>>,
}
type ProjectionHandler = Box<dyn Fn(&Event) -> Option<AggregateProjection> + Send + Sync>;
impl StateProjection {
pub fn new(config: ProjectionConfig) -> Self {
Self {
config,
handlers: Arc::new(RwLock::new(HashMap::new())),
}
}
pub fn new_default() -> Self {
Self::new(ProjectionConfig::default())
}
pub async fn register_handler<F>(&self, aggregate_type: &str, handler: F)
where
F: Fn(&Event) -> Option<AggregateProjection> + Send + Sync + 'static,
{
let mut handlers = self.handlers.write().await;
handlers.insert(aggregate_type.to_string(), Box::new(handler));
}
pub async fn project_event(&self, event: &Event) -> Option<AggregateProjection> {
let handlers = self.handlers.read().await;
let aggregate_type = event.aggregate_type.as_str();
handlers.get(aggregate_type).and_then(|h| h(event))
}
pub async fn project_events(&self, events: &[Event]) -> Vec<AggregateProjection> {
let mut projections = Vec::with_capacity(events.len().min(self.config.batch_size));
for event in events.iter().take(self.config.batch_size) {
if let Some(proj) = self.project_event(event).await {
projections.push(proj);
}
}
projections
}
pub fn default_projection_from_event(event: &Event) -> AggregateProjection {
AggregateProjection::new(
event.tenant_id.as_str(),
event.aggregate_id.to_string(),
event.aggregate_type.as_str(),
event.version.as_u64(),
event.payload.clone(),
)
}
pub fn default_projection_from_state(
tenant_id: &TenantId,
aggregate_id: &AggregateId,
aggregate_type: &AggregateType,
version: &Version,
state: &JsonValue,
) -> AggregateProjection {
AggregateProjection::new(
tenant_id.as_str(),
aggregate_id.to_string(),
aggregate_type.as_str(),
version.as_u64(),
state.clone(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use serde_json::json;
fn create_test_event(tenant: &str, version: u64, event_type: &str) -> Event {
Event {
event_id: uuid::Uuid::now_v7(),
tenant_id: TenantId::new(tenant),
aggregate_id: AggregateId::new_v7(),
aggregate_type: AggregateType::from("Account"),
version: Version::from(version),
event_type: event_type.to_string(),
payload: json!({"amount": 100}),
timestamp: Utc::now(),
command_id: uuid::Uuid::nil(),
correlation_id: None,
traceparent: None,
}
}
#[tokio::test]
async fn state_projection_registers_handler() {
let projection = StateProjection::new_default();
projection
.register_handler("Account", |event| {
Some(AggregateProjection::new(
event.tenant_id.as_str(),
event.aggregate_id.to_string(),
"Account",
event.version.as_u64(),
event.payload.clone(),
))
})
.await;
let event = create_test_event("tenant-a", 1, "deposited");
let result = projection.project_event(&event).await;
assert!(result.is_some());
let proj = result.unwrap();
assert_eq!(proj.aggregate_type, "Account");
}
#[tokio::test]
async fn state_projection_project_events_batch() {
let projection = StateProjection::new_default();
projection
.register_handler("Account", |event| {
Some(AggregateProjection::new(
event.tenant_id.as_str(),
event.aggregate_id.to_string(),
"Account",
event.version.as_u64(),
event.payload.clone(),
))
})
.await;
let events = vec![
create_test_event("tenant-a", 1, "deposited"),
create_test_event("tenant-a", 1, "deposited"),
create_test_event("tenant-a", 1, "deposited"),
];
let projections = projection.project_events(&events).await;
assert_eq!(projections.len(), 3);
}
#[tokio::test]
async fn state_projection_no_handler_returns_none() {
let projection = StateProjection::new_default();
let event = create_test_event("tenant-a", 1, "deposited");
let result = projection.project_event(&event).await;
assert!(result.is_none());
}
#[test]
fn default_projection_from_event() {
let event = create_test_event("tenant-a", 5, "deposited");
let proj = StateProjection::default_projection_from_event(&event);
assert_eq!(proj.tenant_id, "tenant-a");
assert_eq!(proj.version, 5);
assert_eq!(proj.state["amount"], 100);
}
#[test]
fn default_projection_from_state() {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let version = Version::from(10);
let state = json!({"balance": 1000});
let proj = StateProjection::default_projection_from_state(
&tenant_id,
&aggregate_id,
&aggregate_type,
&version,
&state,
);
assert_eq!(proj.tenant_id, "tenant-a");
assert_eq!(proj.aggregate_type, "Account");
assert_eq!(proj.version, 10);
assert_eq!(proj.state["balance"], 1000);
}
#[test]
fn projection_config_defaults() {
let config = ProjectionConfig::default();
assert_eq!(config.batch_size, 100);
assert_eq!(config.projection_timeout_ms, 5000);
}
}

View File

@@ -0,0 +1,270 @@
use serde_json::Value as JsonValue;
use std::time::Duration;
pub async fn execute_decide_program(
state: &JsonValue,
command: &JsonValue,
program: &str,
gas_limit: u64,
timeout: Duration,
) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
let _ = (state, command, program, gas_limit, timeout);
#[cfg(feature = "runtime-v8")]
{
return execute_decide_v8(state, command, program, gas_limit, timeout).await;
}
#[cfg(feature = "runtime-wasm")]
{
return execute_decide_wasm(state, command, program, gas_limit, timeout).await;
}
#[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))]
{
Err(crate::types::AggregateError::DecideError(
"No runtime enabled. Enable 'runtime-v8' or 'runtime-wasm' feature.".to_string(),
))
}
}
pub async fn execute_apply_program(
state: &JsonValue,
event: &JsonValue,
program: &str,
gas_limit: u64,
timeout: Duration,
) -> Result<JsonValue, crate::types::AggregateError> {
let _ = (state, event, program, gas_limit, timeout);
#[cfg(feature = "runtime-v8")]
{
return execute_apply_v8(state, event, program, gas_limit, timeout).await;
}
#[cfg(feature = "runtime-wasm")]
{
return execute_apply_wasm(state, event, program, gas_limit, timeout).await;
}
#[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))]
{
Err(crate::types::AggregateError::ApplyError(
"No runtime enabled. Enable 'runtime-v8' or 'runtime-wasm' feature.".to_string(),
))
}
}
#[cfg(feature = "runtime-v8")]
async fn execute_decide_v8(
state: &JsonValue,
command: &JsonValue,
program: &str,
gas_limit: u64,
timeout: Duration,
) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
use std::sync::Arc;
use v8::{Array, Context, Function, HandleScope, Isolate, Object, Scope, Script};
let state_str = serde_json::to_string(state).map_err(|e| {
crate::types::AggregateError::DecideError(format!("State serialization: {}", e))
})?;
let command_str = serde_json::to_string(command).map_err(|e| {
crate::types::AggregateError::DecideError(format!("Command serialization: {}", e))
})?;
let result = tokio::task::spawn_blocking(move || {
let isolate = &mut Isolate::new(v8::CreateParams::default());
let scope = &mut HandleScope::new(isolate);
let context = Context::new(scope);
let scope = &mut ContextScope::new(scope, context);
let source =
v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?;
let script =
Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?;
script.run(scope).ok_or_else(|| "Failed to run program")?;
let global = context.global(scope);
let decide_name =
v8::String::new(scope, "decide").ok_or_else(|| "Failed to create decide string")?;
let decide_fn = global
.get(scope, decide_name.into())
.and_then(|v| v8::Local::<Function>::try_from(v).ok())
.ok_or_else(|| "decide function not found")?;
let state_json = v8::String::new(scope, &state_str)
.ok_or_else(|| "Failed to create state JSON string")?;
let state_obj =
v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?;
let command_json = v8::String::new(scope, &command_str)
.ok_or_else(|| "Failed to create command JSON string")?;
let command_obj =
v8::json::parse(scope, command_json).ok_or_else(|| "Failed to parse command JSON")?;
let args: [v8::Local<v8::Value>; 2] = [state_obj.into(), command_obj.into()];
let result = decide_fn
.call(scope, global.into(), &args)
.ok_or_else(|| "decide function call failed")?;
let result_json =
v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?;
let result_str = result_json.to_rust_string_lossy(scope);
let events: Vec<JsonValue> = serde_json::from_str(&result_str)
.map_err(|e| format!("Failed to parse result: {}", e))?;
Ok::<_, String>(events)
});
let timeout_result = tokio::time::timeout(timeout, result).await;
match timeout_result {
Ok(Ok(Ok(events))) => Ok(events),
Ok(Ok(Err(e))) => Err(crate::types::AggregateError::DecideError(e)),
Ok(Err(_)) => Err(crate::types::AggregateError::DecideError(
"Task join error".to_string(),
)),
Err(_) => Err(crate::types::AggregateError::DecideError(
"Execution timeout".to_string(),
)),
}
}
#[cfg(feature = "runtime-v8")]
async fn execute_apply_v8(
state: &JsonValue,
event: &JsonValue,
program: &str,
gas_limit: u64,
timeout: Duration,
) -> Result<JsonValue, crate::types::AggregateError> {
use v8::{Context, ContextScope, Function, HandleScope, Isolate, Script};
let state_str = serde_json::to_string(state).map_err(|e| {
crate::types::AggregateError::ApplyError(format!("State serialization: {}", e))
})?;
let event_str = serde_json::to_string(event).map_err(|e| {
crate::types::AggregateError::ApplyError(format!("Event serialization: {}", e))
})?;
let _ = gas_limit;
let result = tokio::task::spawn_blocking(move || {
let isolate = &mut Isolate::new(v8::CreateParams::default());
let scope = &mut HandleScope::new(isolate);
let context = Context::new(scope);
let scope = &mut ContextScope::new(scope, context);
let source =
v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?;
let script =
Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?;
script.run(scope).ok_or_else(|| "Failed to run program")?;
let global = context.global(scope);
let apply_name =
v8::String::new(scope, "apply").ok_or_else(|| "Failed to create apply string")?;
let apply_fn = global
.get(scope, apply_name.into())
.and_then(|v| v8::Local::<Function>::try_from(v).ok())
.ok_or_else(|| "apply function not found")?;
let state_json = v8::String::new(scope, &state_str)
.ok_or_else(|| "Failed to create state JSON string")?;
let state_obj =
v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?;
let event_json = v8::String::new(scope, &event_str)
.ok_or_else(|| "Failed to create event JSON string")?;
let event_obj =
v8::json::parse(scope, event_json).ok_or_else(|| "Failed to parse event JSON")?;
let args: [v8::Local<v8::Value>; 2] = [state_obj.into(), event_obj.into()];
let result = apply_fn
.call(scope, global.into(), &args)
.ok_or_else(|| "apply function call failed")?;
let result_json =
v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?;
let result_str = result_json.to_rust_string_lossy(scope);
let new_state: JsonValue = serde_json::from_str(&result_str)
.map_err(|e| format!("Failed to parse result: {}", e))?;
Ok::<_, String>(new_state)
});
let timeout_result = tokio::time::timeout(timeout, result).await;
match timeout_result {
Ok(Ok(Ok(new_state))) => Ok(new_state),
Ok(Ok(Err(e))) => Err(crate::types::AggregateError::ApplyError(e)),
Ok(Err(_)) => Err(crate::types::AggregateError::ApplyError(
"Task join error".to_string(),
)),
Err(_) => Err(crate::types::AggregateError::ApplyError(
"Execution timeout".to_string(),
)),
}
}
#[cfg(feature = "runtime-wasm")]
async fn execute_decide_wasm(
state: &JsonValue,
command: &JsonValue,
_program: &str,
_gas_limit: u64,
_timeout: Duration,
) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
let _ = (state, command);
Err(crate::types::AggregateError::DecideError(
"WASM runtime not yet implemented".to_string(),
))
}
#[cfg(feature = "runtime-wasm")]
async fn execute_apply_wasm(
state: &JsonValue,
event: &JsonValue,
_program: &str,
_gas_limit: u64,
_timeout: Duration,
) -> Result<JsonValue, crate::types::AggregateError> {
let _ = (state, event);
Err(crate::types::AggregateError::ApplyError(
"WASM runtime not yet implemented".to_string(),
))
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[tokio::test]
async fn no_runtime_returns_error() {
#[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))]
{
let state = json!({});
let command = json!({});
let result =
execute_decide_program(&state, &command, "program", 1000, Duration::from_secs(1))
.await;
assert!(result.is_err());
assert!(matches!(
result.unwrap_err(),
crate::types::AggregateError::DecideError(_)
));
}
}
}

View File

@@ -0,0 +1,484 @@
mod executor;
use lru::LruCache;
use std::num::NonZeroUsize;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::RwLock;
use crate::types::{AggregateError, Command, Event};
use serde_json::Value as JsonValue;
pub use executor::{execute_apply_program, execute_decide_program};
const DEFAULT_GAS_LIMIT: u64 = 1_000_000;
const DEFAULT_TIMEOUT_MS: u64 = 5_000;
const CACHE_SIZE: usize = 100;
#[derive(Debug, Clone)]
pub struct ExecutorConfig {
pub gas_limit: u64,
pub timeout: Duration,
pub cache_programs: bool,
pub mock_runtime: bool,
}
impl Default for ExecutorConfig {
fn default() -> Self {
Self {
gas_limit: DEFAULT_GAS_LIMIT,
timeout: Duration::from_millis(DEFAULT_TIMEOUT_MS),
cache_programs: true,
mock_runtime: false,
}
}
}
impl ExecutorConfig {
pub fn with_gas_limit(mut self, limit: u64) -> Self {
self.gas_limit = limit;
self
}
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
pub fn without_cache(mut self) -> Self {
self.cache_programs = false;
self
}
pub fn with_mock_runtime(mut self) -> Self {
self.mock_runtime = true;
self
}
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct ProgramHash(String);
impl ProgramHash {
pub fn new(program: &str) -> Self {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
program.hash(&mut hasher);
Self(format!("{:x}", hasher.finish()))
}
}
#[derive(Debug, Clone)]
pub struct DecideResult {
pub events: Vec<JsonValue>,
}
#[derive(Debug, Clone)]
pub struct ApplyResult {
pub new_state: JsonValue,
}
#[derive(Debug, Clone)]
pub struct RuntimeExecutor {
config: ExecutorConfig,
program_cache: Arc<RwLock<LruCache<ProgramHash, String>>>,
}
impl RuntimeExecutor {
pub fn new() -> Self {
Self::with_config(ExecutorConfig::default())
}
pub fn with_config(config: ExecutorConfig) -> Self {
let cache_size = NonZeroUsize::new(CACHE_SIZE).unwrap();
Self {
config,
program_cache: Arc::new(RwLock::new(LruCache::new(cache_size))),
}
}
pub async fn execute_decide(
&self,
state: &JsonValue,
command: &Command,
decide_program: &str,
) -> Result<DecideResult, AggregateError> {
if self.config.mock_runtime {
let events = mock_decide(state, command)?;
return Ok(DecideResult { events });
}
if self.config.cache_programs {
let hash = ProgramHash::new(decide_program);
let mut cache = self.program_cache.write().await;
cache.put(hash.clone(), decide_program.to_string());
}
let command_json = serde_json::to_value(command).map_err(|e| {
AggregateError::DecideError(format!("Command serialization failed: {}", e))
})?;
let result = executor::execute_decide_program(
state,
&command_json,
decide_program,
self.config.gas_limit,
self.config.timeout,
)
.await?;
Ok(DecideResult { events: result })
}
pub async fn execute_apply(
&self,
state: &JsonValue,
event: &Event,
apply_program: &str,
) -> Result<ApplyResult, AggregateError> {
if self.config.mock_runtime {
let new_state = mock_apply(state, event)?;
return Ok(ApplyResult { new_state });
}
if self.config.cache_programs {
let hash = ProgramHash::new(apply_program);
let mut cache = self.program_cache.write().await;
cache.put(hash.clone(), apply_program.to_string());
}
let event_json = serde_json::to_value(event).map_err(|e| {
AggregateError::ApplyError(format!("Event serialization failed: {}", e))
})?;
let result = executor::execute_apply_program(
state,
&event_json,
apply_program,
self.config.gas_limit,
self.config.timeout,
)
.await?;
Ok(ApplyResult { new_state: result })
}
pub async fn execute_apply_raw(
&self,
state: &JsonValue,
event: &JsonValue,
apply_program: &str,
) -> Result<ApplyResult, AggregateError> {
if self.config.mock_runtime {
let _ = apply_program;
return Err(AggregateError::ApplyError(
"mock_runtime does not support execute_apply_raw".to_string(),
));
}
if self.config.cache_programs {
let hash = ProgramHash::new(apply_program);
let mut cache = self.program_cache.write().await;
cache.put(hash.clone(), apply_program.to_string());
}
let result = executor::execute_apply_program(
state,
event,
apply_program,
self.config.gas_limit,
self.config.timeout,
)
.await?;
Ok(ApplyResult { new_state: result })
}
pub fn config(&self) -> &ExecutorConfig {
&self.config
}
pub async fn cache_size(&self) -> usize {
self.program_cache.read().await.len()
}
pub async fn clear_cache(&self) {
self.program_cache.write().await.clear();
}
}
impl Default for RuntimeExecutor {
fn default() -> Self {
Self::new()
}
}
fn mock_decide(state: &JsonValue, command: &Command) -> Result<Vec<JsonValue>, AggregateError> {
let cmd_type = command
.payload
.get("type")
.and_then(|v| v.as_str())
.unwrap_or("");
let amount = command
.payload
.get("amount")
.and_then(|v| v.as_i64())
.unwrap_or(0);
match cmd_type {
"deposit" => Ok(vec![
serde_json::json!({ "type": "deposited", "amount": amount }),
]),
"withdraw" => {
let balance = state.get("balance").and_then(|v| v.as_i64()).unwrap_or(0);
if balance < amount {
Err(AggregateError::DecideError(
"Insufficient funds".to_string(),
))
} else {
Ok(vec![
serde_json::json!({ "type": "withdrawn", "amount": amount }),
])
}
}
_ => Ok(Vec::new()),
}
}
fn mock_apply(state: &JsonValue, event: &Event) -> Result<JsonValue, AggregateError> {
let mut new_state = match state {
JsonValue::Object(map) => JsonValue::Object(map.clone()),
_ => serde_json::json!({}),
};
let balance = new_state
.get("balance")
.and_then(|v| v.as_i64())
.unwrap_or(0);
let amount = event
.payload
.get("amount")
.and_then(|v| v.as_i64())
.unwrap_or(0);
let next_balance = match event.event_type.as_str() {
"deposited" => balance + amount,
"withdrawn" => balance - amount,
_ => balance,
};
if let JsonValue::Object(map) = &mut new_state {
map.insert("balance".to_string(), JsonValue::from(next_balance));
}
Ok(new_state)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::{AggregateId, AggregateType, TenantId, Version};
use serde_json::json;
use std::collections::HashMap;
const DECIDE_PROGRAM: &str = r#"
function decide(state, command) {
if (command.type === "deposit") {
return [{ type: "deposited", amount: command.amount }];
}
if (command.type === "withdraw") {
if (state.balance < command.amount) {
throw new Error("Insufficient funds");
}
return [{ type: "withdrawn", amount: command.amount }];
}
return [];
}
"#;
const APPLY_PROGRAM: &str = r#"
function apply(state, event) {
if (event.type === "deposited") {
state.balance = (state.balance || 0) + event.amount;
}
if (event.type === "withdrawn") {
state.balance = (state.balance || 0) - event.amount;
}
return state;
}
"#;
fn make_command(cmd_type: &str, amount: i64, tenant_id: &TenantId) -> Command {
Command {
command_id: uuid::Uuid::now_v7(),
tenant_id: tenant_id.clone(),
aggregate_id: AggregateId::new_v7(),
aggregate_type: AggregateType::from("Account"),
payload: json!({ "type": cmd_type, "amount": amount }),
metadata: HashMap::new(),
}
}
#[tokio::test]
async fn executor_has_defaults() {
let executor = RuntimeExecutor::new();
assert_eq!(executor.config().gas_limit, DEFAULT_GAS_LIMIT);
assert!(executor.config().cache_programs);
}
#[tokio::test]
async fn config_builder_works() {
let config = ExecutorConfig::default()
.with_gas_limit(500_000)
.with_timeout(Duration::from_millis(1000))
.without_cache();
assert_eq!(config.gas_limit, 500_000);
assert_eq!(config.timeout, Duration::from_millis(1000));
assert!(!config.cache_programs);
}
#[tokio::test]
async fn program_hash_is_consistent() {
let h1 = ProgramHash::new("test program");
let h2 = ProgramHash::new("test program");
assert_eq!(h1, h2);
let h3 = ProgramHash::new("different program");
assert_ne!(h1, h3);
}
#[tokio::test]
async fn decide_returns_events_for_deposit() {
let executor = RuntimeExecutor::new();
let tenant_id = TenantId::new("test-tenant");
let state = json!({ "balance": 100 });
let command = make_command("deposit", 50, &tenant_id);
let result = executor
.execute_decide(&state, &command, DECIDE_PROGRAM)
.await;
match result {
Ok(decide_result) => {
assert!(!decide_result.events.is_empty());
}
Err(AggregateError::DecideError(msg)) => {
assert!(
msg.contains("runtime")
|| msg.contains("not available")
|| msg.contains("not implemented")
);
}
Err(e) => panic!("Unexpected error: {:?}", e),
}
}
#[tokio::test]
async fn decide_rejects_invalid_withdraw() {
let executor = RuntimeExecutor::new();
let tenant_id = TenantId::new("test-tenant");
let state = json!({ "balance": 10 });
let command = make_command("withdraw", 100, &tenant_id);
let result = executor
.execute_decide(&state, &command, DECIDE_PROGRAM)
.await;
assert!(matches!(result, Err(AggregateError::DecideError(_))));
}
#[tokio::test]
async fn decide_is_deterministic() {
let executor = RuntimeExecutor::new();
let tenant_id = TenantId::new("test-tenant");
let state = json!({ "balance": 100 });
let command = make_command("deposit", 50, &tenant_id);
let r1 = executor
.execute_decide(&state, &command, DECIDE_PROGRAM)
.await;
let r2 = executor
.execute_decide(&state, &command, DECIDE_PROGRAM)
.await;
assert_eq!(r1.is_ok(), r2.is_ok());
}
#[tokio::test]
async fn apply_transitions_state() {
let executor = RuntimeExecutor::new();
let tenant_id = TenantId::new("test-tenant");
let state = json!({ "balance": 100 });
let event = Event {
event_id: uuid::Uuid::now_v7(),
tenant_id,
aggregate_id: AggregateId::new_v7(),
aggregate_type: AggregateType::from("Account"),
event_type: "deposited".to_string(),
version: Version::from(1),
payload: json!({ "amount": 50 }),
command_id: uuid::Uuid::now_v7(),
timestamp: chrono::Utc::now(),
correlation_id: None,
traceparent: None,
};
let result = executor.execute_apply(&state, &event, APPLY_PROGRAM).await;
match result {
Ok(apply_result) => {
assert!(apply_result.new_state.is_object());
}
Err(AggregateError::ApplyError(msg)) => {
assert!(
msg.contains("runtime")
|| msg.contains("not available")
|| msg.contains("not implemented")
);
}
Err(e) => panic!("Unexpected error: {:?}", e),
}
}
#[tokio::test]
async fn cache_stores_programs() {
let executor = RuntimeExecutor::new();
let tenant_id = TenantId::new("test-tenant");
let state = json!({ "balance": 100 });
let command = make_command("deposit", 50, &tenant_id);
assert_eq!(executor.cache_size().await, 0);
let _ = executor
.execute_decide(&state, &command, DECIDE_PROGRAM)
.await;
assert_eq!(executor.cache_size().await, 1);
}
#[tokio::test]
async fn clear_cache_works() {
let executor = RuntimeExecutor::new();
let tenant_id = TenantId::new("test-tenant");
let state = json!({ "balance": 100 });
let command = make_command("deposit", 50, &tenant_id);
let _ = executor
.execute_decide(&state, &command, DECIDE_PROGRAM)
.await;
assert!(executor.cache_size().await > 0);
executor.clear_cache().await;
assert_eq!(executor.cache_size().await, 0);
}
#[test]
fn executor_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<RuntimeExecutor>();
}
}

View File

@@ -0,0 +1,259 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::RwLock;
use std::time::Instant;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum HealthStatus {
Healthy,
Degraded { issues: Vec<String> },
Unhealthy { reasons: Vec<String> },
}
impl HealthStatus {
pub fn is_healthy(&self) -> bool {
matches!(self, Self::Healthy)
}
pub fn is_degraded(&self) -> bool {
matches!(self, Self::Degraded { .. })
}
pub fn is_unhealthy(&self) -> bool {
matches!(self, Self::Unhealthy { .. })
}
}
#[derive(Debug, Clone)]
pub struct ComponentHealth {
pub name: String,
pub status: HealthStatus,
pub last_check: Instant,
pub details: HashMap<String, String>,
}
impl ComponentHealth {
pub fn healthy(name: impl Into<String>) -> Self {
Self {
name: name.into(),
status: HealthStatus::Healthy,
last_check: Instant::now(),
details: HashMap::new(),
}
}
pub fn degraded(name: impl Into<String>, issues: Vec<String>) -> Self {
Self {
name: name.into(),
status: HealthStatus::Degraded { issues },
last_check: Instant::now(),
details: HashMap::new(),
}
}
pub fn unhealthy(name: impl Into<String>, reasons: Vec<String>) -> Self {
Self {
name: name.into(),
status: HealthStatus::Unhealthy { reasons },
last_check: Instant::now(),
details: HashMap::new(),
}
}
pub fn with_detail(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.details.insert(key.into(), value.into());
self
}
}
pub struct HealthChecker {
storage_healthy: AtomicBool,
stream_healthy: AtomicBool,
components: RwLock<HashMap<String, ComponentHealth>>,
}
impl HealthChecker {
pub fn new() -> Self {
Self {
storage_healthy: AtomicBool::new(true),
stream_healthy: AtomicBool::new(true),
components: RwLock::new(HashMap::new()),
}
}
pub fn storage_healthy(&self) -> bool {
self.storage_healthy.load(Ordering::Relaxed)
}
pub fn stream_healthy(&self) -> bool {
self.stream_healthy.load(Ordering::Relaxed)
}
pub fn set_storage_healthy(&self, healthy: bool) {
self.storage_healthy.store(healthy, Ordering::Relaxed);
self.update_component(
"storage",
healthy,
if healthy { "connected" } else { "disconnected" },
);
}
pub fn set_stream_healthy(&self, healthy: bool) {
self.stream_healthy.store(healthy, Ordering::Relaxed);
self.update_component(
"stream",
healthy,
if healthy { "connected" } else { "disconnected" },
);
}
fn update_component(&self, name: &str, healthy: bool, status: &str) {
let mut components = self.components.write().unwrap();
let health = if healthy {
ComponentHealth::healthy(name).with_detail("status", status)
} else {
ComponentHealth::unhealthy(name, vec![format!("status: {}", status)])
};
components.insert(name.to_string(), health);
}
pub fn check(&self) -> HealthStatus {
let storage = self.storage_healthy.load(Ordering::Relaxed);
let stream = self.stream_healthy.load(Ordering::Relaxed);
match (storage, stream) {
(true, true) => HealthStatus::Healthy,
(true, false) | (false, true) => {
let mut issues = Vec::new();
if !storage {
issues.push("storage disconnected".to_string());
}
if !stream {
issues.push("stream disconnected".to_string());
}
HealthStatus::Degraded { issues }
}
(false, false) => HealthStatus::Unhealthy {
reasons: vec![
"storage disconnected".to_string(),
"stream disconnected".to_string(),
],
},
}
}
pub fn is_ready(&self) -> bool {
let status = self.check();
status.is_healthy() || status.is_degraded()
}
pub fn is_live(&self) -> bool {
true
}
pub fn components(&self) -> HashMap<String, ComponentHealth> {
self.components.read().unwrap().clone()
}
}
impl Default for HealthChecker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn health_status_checks() {
let healthy = HealthStatus::Healthy;
assert!(healthy.is_healthy());
assert!(!healthy.is_degraded());
assert!(!healthy.is_unhealthy());
let degraded = HealthStatus::Degraded {
issues: vec!["test".to_string()],
};
assert!(!degraded.is_healthy());
assert!(degraded.is_degraded());
assert!(!degraded.is_unhealthy());
let unhealthy = HealthStatus::Unhealthy {
reasons: vec!["test".to_string()],
};
assert!(!unhealthy.is_healthy());
assert!(!unhealthy.is_degraded());
assert!(unhealthy.is_unhealthy());
}
#[test]
fn component_health_builders() {
let healthy = ComponentHealth::healthy("storage");
assert_eq!(healthy.name, "storage");
assert!(healthy.status.is_healthy());
let degraded = ComponentHealth::degraded("stream", vec!["slow".to_string()]);
assert!(degraded.status.is_degraded());
let unhealthy = ComponentHealth::unhealthy("db", vec!["down".to_string()]);
assert!(unhealthy.status.is_unhealthy());
}
#[test]
fn health_checker_starts_healthy() {
let checker = HealthChecker::new();
assert!(checker.check().is_healthy());
}
#[test]
fn health_checker_storage_failure() {
let checker = HealthChecker::new();
checker.set_storage_healthy(false);
let status = checker.check();
assert!(status.is_degraded());
}
#[test]
fn health_checker_all_failures() {
let checker = HealthChecker::new();
checker.set_storage_healthy(false);
checker.set_stream_healthy(false);
let status = checker.check();
assert!(status.is_unhealthy());
}
#[test]
fn health_checker_is_ready() {
let checker = HealthChecker::new();
assert!(checker.is_ready());
checker.set_storage_healthy(false);
assert!(checker.is_ready());
}
#[test]
fn health_checker_is_live() {
let checker = HealthChecker::new();
assert!(checker.is_live());
checker.set_storage_healthy(false);
checker.set_stream_healthy(false);
assert!(checker.is_live());
}
#[test]
fn health_checker_tracks_components() {
let checker = HealthChecker::new();
checker.set_storage_healthy(true);
checker.set_stream_healthy(true);
let components = checker.components();
assert!(components.contains_key("storage"));
assert!(components.contains_key("stream"));
}
}

787
aggregate/src/server/mod.rs Normal file
View File

@@ -0,0 +1,787 @@
mod health;
pub use health::{HealthChecker, HealthStatus};
use crate::aggregate::AggregateHandler;
use crate::observability::Observability;
use crate::placement::{TenantPlacementManager, TenantStatus};
use crate::types::{AggregateError, AggregateId, AggregateType, Command, Event, TenantId};
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::DefaultHasher;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use std::str::FromStr;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct CommandRequest {
pub tenant_id: TenantId,
pub aggregate_id: AggregateId,
pub aggregate_type: AggregateType,
pub payload: serde_json::Value,
pub headers: HashMap<String, String>,
}
impl CommandRequest {
pub fn new(
tenant_id: TenantId,
aggregate_id: AggregateId,
aggregate_type: AggregateType,
payload: serde_json::Value,
) -> Self {
Self {
tenant_id,
aggregate_id,
aggregate_type,
payload,
headers: HashMap::new(),
}
}
pub fn with_header(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.headers.insert(key.into(), value.into());
self
}
pub fn into_command(self) -> Command {
let mut cmd = Command::new(
self.tenant_id,
self.aggregate_id,
self.aggregate_type,
self.payload,
);
if let Some(correlation_id) = self
.headers
.get("x-correlation-id")
.map(|s| s.trim())
.filter(|s| !s.is_empty())
{
cmd.metadata.insert(
"correlation_id".to_string(),
serde_json::Value::String(correlation_id.to_string()),
);
}
if let Some(traceparent) = self
.headers
.get("traceparent")
.map(|s| s.trim())
.filter(|s| !s.is_empty())
{
cmd.metadata.insert(
"traceparent".to_string(),
serde_json::Value::String(traceparent.to_string()),
);
}
cmd
}
}
#[derive(Debug, Clone)]
pub struct CommandResponse {
pub tenant_id: TenantId,
pub aggregate_id: AggregateId,
pub events: Vec<Event>,
}
#[derive(Debug, Clone)]
pub struct ServerConfig {
pub service_name: String,
pub validate_tenant_id: bool,
}
impl Default for ServerConfig {
fn default() -> Self {
Self {
service_name: "aggregate".to_string(),
validate_tenant_id: true,
}
}
}
pub struct CommandServer {
handler: AggregateHandler,
observability: Arc<Observability>,
health_checker: HealthChecker,
config: ServerConfig,
}
impl CommandServer {
pub fn new(handler: AggregateHandler, observability: Observability) -> Self {
Self {
handler,
observability: Arc::new(observability),
health_checker: HealthChecker::new(),
config: ServerConfig::default(),
}
}
pub fn with_config(mut self, config: ServerConfig) -> Self {
self.config = config;
self
}
pub fn extract_tenant_id(&self, headers: &HashMap<String, String>) -> TenantId {
headers
.get("x-tenant-id")
.map(TenantId::new)
.unwrap_or_default()
}
pub fn validate_tenant_id(&self, tenant_id: &TenantId) -> Result<(), ServerError> {
if !self.config.validate_tenant_id {
return Ok(());
}
let id = tenant_id.as_str();
if id.is_empty() {
return Ok(());
}
if !id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
return Err(ServerError::InvalidTenantId {
tenant_id: tenant_id.clone(),
reason:
"tenant_id must contain only alphanumeric characters, hyphens, and underscores"
.to_string(),
});
}
Ok(())
}
pub async fn handle(&self, request: CommandRequest) -> Result<CommandResponse, ServerError> {
let tenant_id = request.tenant_id.clone();
let aggregate_id = request.aggregate_id.clone();
let aggregate_type = request.aggregate_type.clone();
self.validate_tenant_id(&tenant_id)?;
let correlation_id = request
.headers
.get("x-correlation-id")
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.map(|s| s.to_string());
let trace_id = request
.headers
.get("traceparent")
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.and_then(trace_id_from_traceparent);
let span = self.observability.start_command_span(
&aggregate_id.to_string(),
aggregate_type.as_str(),
tenant_id.as_str(),
"cmd",
correlation_id.as_deref(),
trace_id.as_deref(),
);
let command = request.into_command();
match self.handler.handle_command(command).await {
Ok(events) => {
self.observability
.record_command_success(&span, events.len());
Ok(CommandResponse {
tenant_id,
aggregate_id,
events,
})
}
Err(e) => {
self.observability.record_command_error(&span, &e);
Err(e.into())
}
}
}
pub async fn handle_raw(
&self,
tenant_id: Option<&str>,
aggregate_id: &str,
aggregate_type: &str,
payload: serde_json::Value,
headers: HashMap<String, String>,
) -> Result<CommandResponse, ServerError> {
let resolved_tenant_id = tenant_id
.map(TenantId::new)
.unwrap_or_else(|| self.extract_tenant_id(&headers));
let request = CommandRequest::new(
resolved_tenant_id,
AggregateId::from_str(aggregate_id).map_err(|e| ServerError::InvalidAggregateId {
id: aggregate_id.to_string(),
reason: e.to_string(),
})?,
AggregateType::from(aggregate_type),
payload,
)
.with_headers(headers);
self.handle(request).await
}
pub async fn health_check(&self) -> HealthStatus {
self.health_checker.check()
}
pub async fn ready_check(&self) -> bool {
self.health_checker.is_ready()
}
pub fn metrics(&self) -> String {
self.observability.export_metrics()
}
pub fn health_checker(&self) -> &HealthChecker {
&self.health_checker
}
pub fn observability(&self) -> &Arc<Observability> {
&self.observability
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthReport {
pub status: HealthStatus,
pub nats_connected: bool,
pub storage_connected: bool,
pub active_aggregates: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TenantInfo {
pub tenant_id: TenantId,
pub aggregate_count: usize,
pub last_activity: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone)]
pub struct AdminResponse {
status: u16,
body: String,
}
impl AdminResponse {
pub fn status(&self) -> AdminStatus {
AdminStatus { code: self.status }
}
pub async fn text(&self) -> String {
self.body.clone()
}
pub async fn json<T: DeserializeOwned>(&self) -> T {
serde_json::from_str(&self.body).unwrap()
}
}
#[derive(Debug, Clone)]
pub struct AdminStatus {
code: u16,
}
impl AdminStatus {
pub fn is_success(&self) -> bool {
(200..300).contains(&self.code)
}
}
#[derive(Debug, Clone)]
pub struct HashRing {
replicas: usize,
ring: BTreeMap<u64, String>,
}
impl HashRing {
pub fn new(replicas: usize) -> Self {
Self {
replicas: replicas.max(1),
ring: BTreeMap::new(),
}
}
pub fn add_node(&mut self, node: impl Into<String>) {
let node = node.into();
for i in 0..self.replicas {
let key = Self::hash(&(node.as_str(), i));
self.ring.insert(key, node.clone());
}
}
pub fn remove_node(&mut self, node: &str) {
let keys: Vec<u64> = self
.ring
.iter()
.filter_map(|(k, v)| if v == node { Some(*k) } else { None })
.collect();
for k in keys {
self.ring.remove(&k);
}
}
pub fn route(&self, tenant_id: &str) -> Option<&str> {
if self.ring.is_empty() {
return None;
}
let h = Self::hash(&tenant_id);
let (_, node) = self
.ring
.range(h..)
.next()
.or_else(|| self.ring.iter().next())?;
Some(node.as_str())
}
fn hash<T: Hash>(value: &T) -> u64 {
let mut hasher = DefaultHasher::new();
value.hash(&mut hasher);
hasher.finish()
}
}
pub struct AdminServer {
observability: Arc<Observability>,
health_checker: Arc<HealthChecker>,
shard_id: String,
placement: Arc<TenantPlacementManager>,
}
impl AdminServer {
pub fn new(
observability: Observability,
health_checker: HealthChecker,
shard_id: String,
) -> Self {
let observability = Arc::new(observability);
let placement = Arc::new(TenantPlacementManager::new(observability.clone()));
Self {
observability,
health_checker: Arc::new(health_checker),
shard_id,
placement,
}
}
#[cfg(test)]
pub async fn new_test() -> Self {
let health = HealthChecker::new();
health.set_storage_healthy(true);
health.set_stream_healthy(true);
let server = Self::new(Observability::default(), health, "test-shard".to_string());
let span = server.observability.start_command_span(
"agg-1",
"Account",
"test-tenant",
"cmd-1",
None,
None,
);
server.observability.record_command_success(&span, 1);
server
.placement
.set_hosted_tenants(vec!["test-tenant".to_string()])
.await;
server
}
pub fn placement_manager(&self) -> Arc<TenantPlacementManager> {
self.placement.clone()
}
pub fn observability(&self) -> Arc<Observability> {
self.observability.clone()
}
pub fn health_checker(&self) -> &HealthChecker {
&self.health_checker
}
pub async fn get(&self, path: &str) -> AdminResponse {
match path {
"/health" => {
let report = self.health_report().await;
AdminResponse {
status: 200,
body: serde_json::to_string(&report).unwrap(),
}
}
"/ready" => AdminResponse {
status: 200,
body: serde_json::to_string(&self.health_checker.is_ready()).unwrap(),
},
"/metrics" => AdminResponse {
status: 200,
body: self.observability.export_metrics(),
},
"/admin/tenants" => {
let list: Vec<TenantStatus> = self.placement.all_statuses().await;
AdminResponse {
status: 200,
body: serde_json::to_string(&list).unwrap(),
}
}
_ => AdminResponse {
status: 404,
body: "not found".to_string(),
},
}
}
pub async fn post(&self, path: &str, body: serde_json::Value) -> AdminResponse {
match path {
"/admin/drain" => {
if let Some(tenant_id) = body.get("tenant_id").and_then(|v| v.as_str()) {
let tenant_id = TenantId::new(tenant_id);
self.placement.drain_tenant(&tenant_id).await;
self.placement.wait_drained(&tenant_id).await;
}
AdminResponse {
status: 200,
body: "{}".to_string(),
}
}
"/admin/reload" => {
if let Some(arr) = body.get("hosted_tenants").and_then(|v| v.as_array()) {
let tenants = arr
.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect::<Vec<_>>();
self.placement.set_hosted_tenants(tenants).await;
}
if let Some(map) = body.get("placement").and_then(|v| v.as_object()) {
let placement = map
.iter()
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
.collect::<HashMap<_, _>>();
self.placement
.apply_placement_map(&self.shard_id, &placement)
.await;
}
AdminResponse {
status: 200,
body: "{}".to_string(),
}
}
_ => AdminResponse {
status: 404,
body: "not found".to_string(),
},
}
}
pub async fn get_hosted_tenants(&self) -> Vec<TenantId> {
self.placement.hosted_tenants().await
}
async fn health_report(&self) -> HealthReport {
let active_aggregates = self.placement.hosted_tenants().await.len();
HealthReport {
status: self.health_checker.check(),
nats_connected: self.health_checker.stream_healthy(),
storage_connected: self.health_checker.storage_healthy(),
active_aggregates,
}
}
}
#[derive(Debug, Clone)]
pub enum ServerError {
InvalidTenantId { tenant_id: TenantId, reason: String },
InvalidAggregateId { id: String, reason: String },
AggregateError(AggregateError),
}
impl std::fmt::Display for ServerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidTenantId { tenant_id, reason } => {
write!(f, "invalid tenant_id '{}': {}", tenant_id.as_str(), reason)
}
Self::InvalidAggregateId { id, reason } => {
write!(f, "invalid aggregate_id '{}': {}", id, reason)
}
Self::AggregateError(e) => write!(f, "{}", e),
}
}
}
impl std::error::Error for ServerError {}
impl From<AggregateError> for ServerError {
fn from(e: AggregateError) -> Self {
Self::AggregateError(e)
}
}
impl CommandRequest {
pub fn with_headers(mut self, headers: HashMap<String, String>) -> Self {
self.headers = headers;
self
}
}
fn trace_id_from_traceparent(traceparent: &str) -> Option<String> {
shared::trace_id_from_traceparent(traceparent).map(|s| s.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn command_request_builder() {
let req = CommandRequest::new(
TenantId::new("tenant-a"),
AggregateId::new_v7(),
AggregateType::from("Account"),
serde_json::json!({"type": "deposit", "amount": 100}),
)
.with_header("x-request-id", "req-123");
assert_eq!(req.tenant_id.as_str(), "tenant-a");
assert_eq!(
req.headers.get("x-request-id"),
Some(&"req-123".to_string())
);
}
#[test]
fn extract_tenant_id_from_header() {
let _config = ServerConfig::default();
let mut headers = HashMap::new();
headers.insert("x-tenant-id".to_string(), "acme-corp".to_string());
let tenant_id = extract_tenant_id_static(&headers);
assert_eq!(tenant_id.as_str(), "acme-corp");
}
#[test]
fn extract_tenant_id_defaults_empty() {
let headers = HashMap::new();
let tenant_id = extract_tenant_id_static(&headers);
assert!(tenant_id.as_str().is_empty());
}
#[test]
fn validate_tenant_id_accepts_valid() {
assert!(validate_tenant_id_static(&TenantId::new("acme-corp")).is_ok());
assert!(validate_tenant_id_static(&TenantId::new("tenant_123")).is_ok());
assert!(validate_tenant_id_static(&TenantId::new("my-tenant")).is_ok());
}
#[test]
fn validate_tenant_id_rejects_invalid() {
assert!(validate_tenant_id_static(&TenantId::new("tenant@corp")).is_err());
assert!(validate_tenant_id_static(&TenantId::new("tenant name")).is_err());
}
#[test]
fn server_config_defaults() {
let config = ServerConfig::default();
assert_eq!(config.service_name, "aggregate");
assert!(config.validate_tenant_id);
}
#[test]
fn server_error_display() {
let err = ServerError::InvalidTenantId {
tenant_id: TenantId::new("bad@id"),
reason: "invalid characters".to_string(),
};
assert!(err.to_string().contains("bad@id"));
}
fn extract_tenant_id_static(headers: &HashMap<String, String>) -> TenantId {
headers
.get("x-tenant-id")
.map(TenantId::new)
.unwrap_or_default()
}
fn validate_tenant_id_static(tenant_id: &TenantId) -> Result<(), ServerError> {
let id = tenant_id.as_str();
if id.is_empty() {
return Ok(());
}
if !id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
return Err(ServerError::InvalidTenantId {
tenant_id: tenant_id.clone(),
reason:
"tenant_id must contain only alphanumeric characters, hyphens, and underscores"
.to_string(),
});
}
Ok(())
}
#[tokio::test]
async fn admin_health_endpoint_returns_status() {
let server = AdminServer::new_test().await;
let resp = server.get("/health").await;
assert!(resp.status().is_success());
let health: HealthReport = resp.json().await;
assert!(health.nats_connected);
assert!(health.storage_connected);
}
#[tokio::test]
async fn admin_ready_endpoint_returns_success() {
let server = AdminServer::new_test().await;
let resp = server.get("/ready").await;
assert!(resp.status().is_success());
}
#[tokio::test]
async fn admin_metrics_endpoint_prometheus_format() {
let server = AdminServer::new_test().await;
let resp = server.get("/metrics").await;
let body = resp.text().await;
assert!(body.contains("aggregate_commands_total"));
assert!(body.contains("tenant_id"));
}
#[tokio::test]
async fn admin_tenants_list_returns_hosted_tenants() {
let server = AdminServer::new_test().await;
let resp = server.get("/admin/tenants").await;
let tenants: Vec<TenantStatus> = resp.json().await;
assert!(tenants
.iter()
.any(|t| t.tenant_id == TenantId::new("test-tenant")));
}
#[tokio::test]
async fn admin_drain_waits_for_in_flight_commands() {
use std::time::{Duration, Instant};
let server = AdminServer::new_test().await;
let tenant_id = TenantId::new("test-tenant");
let guard = server
.placement_manager()
.begin_command(&tenant_id)
.await
.unwrap();
tokio::spawn(async move {
tokio::time::sleep(Duration::from_millis(50)).await;
drop(guard);
});
let start = Instant::now();
let resp = server
.post(
"/admin/drain",
serde_json::json!({"tenant_id": "test-tenant"}),
)
.await;
assert!(start.elapsed() < Duration::from_secs(5));
assert!(resp.status().is_success());
server.placement_manager().wait_drained(&tenant_id).await;
let status = server.placement_manager().tenant_status(&tenant_id).await;
assert!(status.draining);
assert!(!status.accepting);
assert_eq!(status.in_flight, 0);
}
#[tokio::test]
async fn admin_config_reload_updates_routing() {
let server = AdminServer::new_test().await;
let resp = server
.post(
"/admin/reload",
serde_json::json!({"hosted_tenants": ["new-tenant"]}),
)
.await;
assert!(resp.status().is_success());
let tenants = server.get_hosted_tenants().await;
assert!(tenants.contains(&TenantId::new("new-tenant")));
}
#[test]
fn admin_server_is_send() {
fn assert_send<T: Send>() {}
assert_send::<AdminServer>();
}
#[test]
fn hash_ring_routes_deterministically() {
let mut ring = HashRing::new(100);
ring.add_node("node-a");
ring.add_node("node-b");
ring.add_node("node-c");
let r1 = ring.route("tenant-a").unwrap().to_string();
let r2 = ring.route("tenant-a").unwrap().to_string();
assert_eq!(r1, r2);
}
#[test]
fn hash_ring_distributes_tenants_evenly() {
let mut ring = HashRing::new(200);
ring.add_node("node-a");
ring.add_node("node-b");
ring.add_node("node-c");
let mut counts: HashMap<String, usize> = HashMap::new();
for i in 0..3000 {
let tenant = format!("tenant-{}", i);
let node = ring.route(&tenant).unwrap().to_string();
*counts.entry(node).or_insert(0) += 1;
}
let avg = 3000.0 / 3.0;
for c in counts.values() {
let diff = (*c as f64 - avg).abs() / avg;
assert!(diff < 0.25);
}
}
#[test]
fn hash_ring_rebalances_on_node_add() {
let mut ring = HashRing::new(200);
ring.add_node("node-a");
ring.add_node("node-b");
let mut before: HashMap<String, String> = HashMap::new();
for i in 0..2000 {
let tenant = format!("tenant-{}", i);
before.insert(tenant.clone(), ring.route(&tenant).unwrap().to_string());
}
ring.add_node("node-c");
let mut moved = 0usize;
for (tenant, old) in before {
let new = ring.route(&tenant).unwrap();
if new != old {
moved += 1;
}
}
assert!(moved > 0);
assert!(moved < 2000);
}
}

View File

@@ -0,0 +1,216 @@
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CircuitState {
Closed,
Open,
HalfOpen,
}
#[derive(Debug, Clone)]
pub struct CircuitBreaker {
state: CircuitState,
failure_count: u32,
failure_threshold: u32,
reset_timeout: Duration,
last_failure_time: Option<Instant>,
half_open_successes: u32,
half_open_threshold: u32,
}
impl CircuitBreaker {
pub fn new() -> Self {
Self {
state: CircuitState::Closed,
failure_count: 0,
failure_threshold: 5,
reset_timeout: Duration::from_secs(30),
last_failure_time: None,
half_open_successes: 0,
half_open_threshold: 3,
}
}
pub fn with_failure_threshold(mut self, threshold: u32) -> Self {
self.failure_threshold = threshold;
self
}
pub fn with_reset_timeout(mut self, timeout: Duration) -> Self {
self.reset_timeout = timeout;
self
}
pub fn with_half_open_threshold(mut self, threshold: u32) -> Self {
self.half_open_threshold = threshold;
self
}
pub fn state(&self) -> CircuitState {
if self.state == CircuitState::Open {
if let Some(last_failure) = self.last_failure_time {
if last_failure.elapsed() >= self.reset_timeout {
return CircuitState::HalfOpen;
}
}
}
self.state
}
pub fn is_open(&self) -> bool {
matches!(self.state(), CircuitState::Open)
}
pub fn is_closed(&self) -> bool {
matches!(self.state(), CircuitState::Closed)
}
pub fn record_success(&mut self) {
match self.state() {
CircuitState::Closed => {
self.failure_count = 0;
}
CircuitState::HalfOpen => {
self.half_open_successes += 1;
if self.half_open_successes >= self.half_open_threshold {
self.state = CircuitState::Closed;
self.failure_count = 0;
self.half_open_successes = 0;
self.last_failure_time = None;
}
}
CircuitState::Open => {}
}
}
pub fn record_failure(&mut self) {
self.last_failure_time = Some(Instant::now());
match self.state() {
CircuitState::Closed => {
self.failure_count += 1;
if self.failure_count >= self.failure_threshold {
self.state = CircuitState::Open;
}
}
CircuitState::HalfOpen => {
self.state = CircuitState::Open;
self.half_open_successes = 0;
}
CircuitState::Open => {}
}
}
pub fn reset(&mut self) {
self.state = CircuitState::Closed;
self.failure_count = 0;
self.last_failure_time = None;
self.half_open_successes = 0;
}
}
impl Default for CircuitBreaker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread::sleep;
#[test]
fn circuit_breaker_starts_closed() {
let cb = CircuitBreaker::new();
assert!(cb.is_closed());
assert!(!cb.is_open());
}
#[test]
fn circuit_breaker_opens_after_threshold() {
let mut cb = CircuitBreaker::new().with_failure_threshold(3);
cb.record_failure();
assert!(cb.is_closed());
cb.record_failure();
assert!(cb.is_closed());
cb.record_failure();
assert!(cb.is_open());
}
#[test]
fn circuit_breaker_resets_after_timeout() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(10));
cb.record_failure();
assert!(cb.is_open());
sleep(Duration::from_millis(15));
assert_eq!(cb.state(), CircuitState::HalfOpen);
}
#[test]
fn circuit_breaker_closes_after_half_open_successes() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(10))
.with_half_open_threshold(2);
cb.record_failure();
assert!(cb.is_open());
sleep(Duration::from_millis(15));
assert_eq!(cb.state(), CircuitState::HalfOpen);
cb.record_success();
assert_eq!(cb.state(), CircuitState::HalfOpen);
cb.record_success();
assert!(cb.is_closed());
}
#[test]
fn circuit_breaker_reopens_on_half_open_failure() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(10));
cb.record_failure();
assert!(cb.is_open());
sleep(Duration::from_millis(15));
assert_eq!(cb.state(), CircuitState::HalfOpen);
cb.record_failure();
assert!(cb.is_open());
}
#[test]
fn circuit_breaker_success_resets_failure_count() {
let mut cb = CircuitBreaker::new().with_failure_threshold(3);
cb.record_failure();
cb.record_failure();
cb.record_success();
assert!(cb.is_closed());
cb.record_failure();
assert!(cb.is_closed());
}
#[test]
fn circuit_breaker_manual_reset() {
let mut cb = CircuitBreaker::new().with_failure_threshold(1);
cb.record_failure();
assert!(cb.is_open());
cb.reset();
assert!(cb.is_closed());
}
}

View File

@@ -0,0 +1,422 @@
mod circuit_breaker;
pub use circuit_breaker::CircuitBreaker;
use crate::types::{AggregateError, AggregateId, AggregateType, Snapshot, TenantId, Version};
use edge_storage::{AggregateStore, Config as EdgeConfig, EdgeStorage, WriteResult, Writer};
use std::sync::Arc;
use tokio::sync::RwLock;
pub struct StorageClient {
storage: Arc<EdgeStorage>,
aggregate_store: AggregateStore,
writer: Arc<Writer>,
circuit_breaker: RwLock<CircuitBreaker>,
}
impl std::fmt::Debug for StorageClient {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("StorageClient")
.field("circuit_breaker", &self.circuit_breaker)
.finish_non_exhaustive()
}
}
impl StorageClient {
pub fn open(storage_path: impl Into<String>) -> Result<Self, StorageInitError> {
let config = EdgeConfig::new(storage_path.into());
let storage = EdgeStorage::open(config)?;
let writer = Arc::new(Writer::new(storage.db().clone(), &EdgeConfig::default()));
let aggregate_store = AggregateStore::new(storage.db().clone(), writer.clone());
Ok(Self {
storage: Arc::new(storage),
aggregate_store,
writer,
circuit_breaker: RwLock::new(CircuitBreaker::new()),
})
}
#[cfg(test)]
pub fn in_memory() -> Self {
use tempfile::tempdir;
let dir = tempdir().expect("failed to create temp dir");
let path = dir.path().join("test.mdbx");
std::mem::forget(dir);
Self::open(path.to_string_lossy().to_string()).expect("failed to open in-memory storage")
}
pub fn with_circuit_breaker(mut self, cb: CircuitBreaker) -> Self {
self.circuit_breaker = RwLock::new(cb);
self
}
pub async fn get_snapshot(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
) -> Result<Option<Snapshot>, AggregateError> {
self.check_circuit().await?;
let key = Self::build_key(tenant_id, aggregate_id);
match self.aggregate_store.get_latest_snapshot(&key) {
Ok(Some((version, data))) => {
let snapshot = self
.decode_snapshot(tenant_id, aggregate_id, version, &data)
.map_err(|e| AggregateError::StorageError(e.to_string()))?;
self.record_success().await;
Ok(Some(snapshot))
}
Ok(None) => {
self.record_success().await;
Ok(None)
}
Err(e) => {
self.record_failure().await;
Err(AggregateError::StorageError(e.to_string()))
}
}
}
pub async fn get_snapshot_at_version(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
version: Version,
) -> Result<Option<Snapshot>, AggregateError> {
self.check_circuit().await?;
let key = Self::build_key(tenant_id, aggregate_id);
match self.aggregate_store.get_snapshot(&key, version.as_u64()) {
Ok(Some(data)) => {
let snapshot = self
.decode_snapshot(tenant_id, aggregate_id, version.as_u64(), &data)
.map_err(|e| AggregateError::StorageError(e.to_string()))?;
self.record_success().await;
Ok(Some(snapshot))
}
Ok(None) => {
self.record_success().await;
Ok(None)
}
Err(e) => {
self.record_failure().await;
Err(AggregateError::StorageError(e.to_string()))
}
}
}
pub async fn put_snapshot(&self, snapshot: &Snapshot) -> Result<(), AggregateError> {
self.check_circuit().await?;
let key = Self::build_key(&snapshot.tenant_id, &snapshot.aggregate_id);
let data = serde_json::to_vec(&snapshot.state)
.map_err(|e| AggregateError::StorageError(e.to_string()))?;
let result = self
.aggregate_store
.put_snapshot_sync(&key, snapshot.version.as_u64(), &data)
.map_err(|e| {
self.record_failure_sync();
AggregateError::StorageError(e.to_string())
})?;
match result {
WriteResult::Success => {
self.record_success().await;
Ok(())
}
WriteResult::VersionConflict {
aggregate_id: _,
version,
} => {
self.record_success().await;
Err(AggregateError::VersionConflict {
expected: Version::from(version).increment(),
actual: Version::from(version),
})
}
WriteResult::Error(e) => {
self.record_failure().await;
Err(AggregateError::StorageError(e))
}
}
}
pub async fn get_latest_version(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
) -> Result<Option<Version>, AggregateError> {
self.check_circuit().await?;
let key = Self::build_key(tenant_id, aggregate_id);
match self.aggregate_store.get_latest_version(&key) {
Ok(Some(v)) => {
self.record_success().await;
Ok(Some(Version::from(v)))
}
Ok(None) => {
self.record_success().await;
Ok(None)
}
Err(e) => {
self.record_failure().await;
Err(AggregateError::StorageError(e.to_string()))
}
}
}
pub async fn delete_snapshot(
&self,
_tenant_id: &TenantId,
_aggregate_id: &AggregateId,
) -> Result<(), AggregateError> {
self.check_circuit().await?;
self.record_success().await;
Err(AggregateError::StorageError(
"Snapshot deletion not supported in event-sourced system".to_string(),
))
}
fn build_key(tenant_id: &TenantId, aggregate_id: &AggregateId) -> Vec<u8> {
format!("{}:{}", tenant_id.as_str(), aggregate_id).into_bytes()
}
fn decode_snapshot(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
version: u64,
data: &[u8],
) -> Result<Snapshot, serde_json::Error> {
let state = serde_json::from_slice(data)?;
Ok(Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::new("Unknown"),
Version::from(version),
state,
))
}
async fn check_circuit(&self) -> Result<(), AggregateError> {
let cb = self.circuit_breaker.read().await;
if cb.is_open() {
return Err(AggregateError::StorageError(
"Circuit breaker is open".to_string(),
));
}
Ok(())
}
async fn record_success(&self) {
let mut cb = self.circuit_breaker.write().await;
cb.record_success();
}
fn record_failure_sync(&self) {
if let Ok(mut cb) = self.circuit_breaker.try_write() {
cb.record_failure();
}
}
async fn record_failure(&self) {
let mut cb = self.circuit_breaker.write().await;
cb.record_failure();
}
pub fn storage(&self) -> &Arc<EdgeStorage> {
&self.storage
}
}
impl Clone for StorageClient {
fn clone(&self) -> Self {
Self {
storage: self.storage.clone(),
aggregate_store: self.aggregate_store.clone(),
writer: self.writer.clone(),
circuit_breaker: RwLock::new(CircuitBreaker::new()),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum StorageInitError {
#[error("Failed to open storage: {0}")]
OpenError(#[from] edge_storage::Error),
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
use tempfile::tempdir;
fn create_test_client() -> (tempfile::TempDir, StorageClient) {
let dir = tempdir().unwrap();
let path = dir.path().join("test.mdbx");
let client = StorageClient::open(path.to_string_lossy().to_string()).unwrap();
(dir, client)
}
#[test]
fn storage_client_open() {
let (_dir, _client) = create_test_client();
}
#[tokio::test]
async fn storage_client_put_get_snapshot() {
let (_dir, client) = create_test_client();
let tenant_id = TenantId::new("acme-corp");
let aggregate_id = AggregateId::new_v7();
let snapshot = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(1),
json!({"balance": 100}),
);
client.put_snapshot(&snapshot).await.unwrap();
let retrieved = client
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap();
assert!(retrieved.is_some());
let retrieved = retrieved.unwrap();
assert_eq!(retrieved.version, Version::from(1));
assert_eq!(retrieved.state, json!({"balance": 100}));
}
#[tokio::test]
async fn storage_client_version_conflict() {
let (_dir, client) = create_test_client();
let tenant_id = TenantId::new("acme-corp");
let aggregate_id = AggregateId::new_v7();
let snapshot_v1 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(1),
json!({"balance": 100}),
);
client.put_snapshot(&snapshot_v1).await.unwrap();
let snapshot_v1_again = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(1),
json!({"balance": 200}),
);
let result = client.put_snapshot(&snapshot_v1_again).await;
assert!(matches!(
result,
Err(AggregateError::VersionConflict { .. })
));
}
#[tokio::test]
async fn storage_client_latest_version() {
let (_dir, client) = create_test_client();
let tenant_id = TenantId::new("acme-corp");
let aggregate_id = AggregateId::new_v7();
let version = client
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert!(version.is_none());
let snapshot_v1 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(1),
json!({"balance": 100}),
);
client.put_snapshot(&snapshot_v1).await.unwrap();
let version = client
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert_eq!(version, Some(Version::from(1)));
let snapshot_v3 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(3),
json!({"balance": 300}),
);
client.put_snapshot(&snapshot_v3).await.unwrap();
let version = client
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert_eq!(version, Some(Version::from(3)));
}
#[tokio::test]
async fn storage_client_isolation() {
let (_dir, client) = create_test_client();
let tenant_a = TenantId::new("tenant-a");
let tenant_b = TenantId::new("tenant-b");
let aggregate_id = AggregateId::new_v7();
let snapshot_a = Snapshot::new(
tenant_a.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(1),
json!({"owner": "A"}),
);
client.put_snapshot(&snapshot_a).await.unwrap();
let snapshot_b = Snapshot::new(
tenant_b.clone(),
aggregate_id.clone(),
AggregateType::new("Account"),
Version::from(1),
json!({"owner": "B"}),
);
client.put_snapshot(&snapshot_b).await.unwrap();
let retrieved_a = client
.get_snapshot(&tenant_a, &aggregate_id)
.await
.unwrap()
.unwrap();
let retrieved_b = client
.get_snapshot(&tenant_b, &aggregate_id)
.await
.unwrap()
.unwrap();
assert_eq!(retrieved_a.state["owner"], "A");
assert_eq!(retrieved_b.state["owner"], "B");
}
#[test]
fn storage_client_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<StorageClient>();
}
}

View File

@@ -0,0 +1,284 @@
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CircuitState {
Closed,
Open,
HalfOpen,
}
#[derive(Debug, Clone)]
pub struct CircuitBreaker {
state: CircuitState,
failure_count: u32,
failure_threshold: u32,
reset_timeout: Duration,
last_failure_time: Option<Instant>,
half_open_successes: u32,
half_open_threshold: u32,
consecutive_successes: u32,
}
impl CircuitBreaker {
pub fn new() -> Self {
Self {
state: CircuitState::Closed,
failure_count: 0,
failure_threshold: 5,
reset_timeout: Duration::from_secs(30),
last_failure_time: None,
half_open_successes: 0,
half_open_threshold: 3,
consecutive_successes: 0,
}
}
pub fn with_failure_threshold(mut self, threshold: u32) -> Self {
self.failure_threshold = threshold;
self
}
pub fn with_reset_timeout(mut self, timeout: Duration) -> Self {
self.reset_timeout = timeout;
self
}
pub fn with_half_open_threshold(mut self, threshold: u32) -> Self {
self.half_open_threshold = threshold;
self
}
pub fn state(&self) -> CircuitState {
if self.state == CircuitState::Open {
if let Some(last_failure) = self.last_failure_time {
if last_failure.elapsed() >= self.reset_timeout {
return CircuitState::HalfOpen;
}
}
}
self.state
}
pub fn is_open(&self) -> bool {
matches!(self.state(), CircuitState::Open)
}
pub fn is_closed(&self) -> bool {
matches!(self.state(), CircuitState::Closed)
}
pub fn is_half_open(&self) -> bool {
matches!(self.state(), CircuitState::HalfOpen)
}
pub fn failure_count(&self) -> u32 {
self.failure_count
}
pub fn consecutive_successes(&self) -> u32 {
self.consecutive_successes
}
pub fn record_success(&mut self) {
self.consecutive_successes += 1;
match self.state() {
CircuitState::Closed => {
self.failure_count = 0;
}
CircuitState::HalfOpen => {
self.half_open_successes += 1;
if self.half_open_successes >= self.half_open_threshold {
self.state = CircuitState::Closed;
self.failure_count = 0;
self.half_open_successes = 0;
self.last_failure_time = None;
}
}
CircuitState::Open => {}
}
}
pub fn record_failure(&mut self) {
self.consecutive_successes = 0;
self.last_failure_time = Some(Instant::now());
match self.state() {
CircuitState::Closed => {
self.failure_count += 1;
if self.failure_count >= self.failure_threshold {
self.state = CircuitState::Open;
}
}
CircuitState::HalfOpen => {
self.state = CircuitState::Open;
self.half_open_successes = 0;
}
CircuitState::Open => {}
}
}
pub fn reset(&mut self) {
self.state = CircuitState::Closed;
self.failure_count = 0;
self.last_failure_time = None;
self.half_open_successes = 0;
self.consecutive_successes = 0;
}
pub fn time_until_reset(&self) -> Option<Duration> {
if self.state == CircuitState::Open {
self.last_failure_time.map(|t| {
let elapsed = t.elapsed();
if elapsed < self.reset_timeout {
self.reset_timeout - elapsed
} else {
Duration::ZERO
}
})
} else {
None
}
}
}
impl Default for CircuitBreaker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread::sleep;
#[test]
fn circuit_breaker_starts_closed() {
let cb = CircuitBreaker::new();
assert!(cb.is_closed());
assert!(!cb.is_open());
}
#[test]
fn circuit_breaker_opens_after_threshold() {
let mut cb = CircuitBreaker::new().with_failure_threshold(3);
cb.record_failure();
assert!(cb.is_closed());
cb.record_failure();
assert!(cb.is_closed());
cb.record_failure();
assert!(cb.is_open());
}
#[test]
fn circuit_breaker_resets_after_timeout() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(10));
cb.record_failure();
assert!(cb.is_open());
sleep(Duration::from_millis(15));
assert!(cb.is_half_open());
}
#[test]
fn circuit_breaker_closes_after_half_open_successes() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(10))
.with_half_open_threshold(2);
cb.record_failure();
assert!(cb.is_open());
sleep(Duration::from_millis(15));
assert!(cb.is_half_open());
cb.record_success();
assert!(cb.is_half_open());
cb.record_success();
assert!(cb.is_closed());
}
#[test]
fn circuit_breaker_reopens_on_half_open_failure() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(10));
cb.record_failure();
assert!(cb.is_open());
sleep(Duration::from_millis(15));
assert!(cb.is_half_open());
cb.record_failure();
assert!(cb.is_open());
}
#[test]
fn circuit_breaker_success_resets_failure_count() {
let mut cb = CircuitBreaker::new().with_failure_threshold(3);
cb.record_failure();
cb.record_failure();
cb.record_success();
assert!(cb.is_closed());
assert_eq!(cb.failure_count(), 0);
cb.record_failure();
assert!(cb.is_closed());
}
#[test]
fn circuit_breaker_manual_reset() {
let mut cb = CircuitBreaker::new().with_failure_threshold(1);
cb.record_failure();
assert!(cb.is_open());
cb.reset();
assert!(cb.is_closed());
assert_eq!(cb.failure_count(), 0);
}
#[test]
fn circuit_breaker_tracks_consecutive_successes() {
let mut cb = CircuitBreaker::new();
assert_eq!(cb.consecutive_successes(), 0);
cb.record_success();
assert_eq!(cb.consecutive_successes(), 1);
cb.record_success();
assert_eq!(cb.consecutive_successes(), 2);
cb.record_failure();
assert_eq!(cb.consecutive_successes(), 0);
}
#[test]
fn circuit_breaker_time_until_reset() {
let mut cb = CircuitBreaker::new()
.with_failure_threshold(1)
.with_reset_timeout(Duration::from_millis(100));
assert!(cb.time_until_reset().is_none());
cb.record_failure();
let remaining = cb.time_until_reset();
assert!(remaining.is_some());
assert!(remaining.unwrap() <= Duration::from_millis(100));
cb.reset();
assert!(cb.time_until_reset().is_none());
}
}

627
aggregate/src/stream/mod.rs Normal file
View File

@@ -0,0 +1,627 @@
mod circuit_breaker;
pub use circuit_breaker::CircuitBreaker;
use crate::types::{AggregateError, AggregateId, AggregateType, Event, TenantId, Version};
use async_nats::jetstream::{
self, consumer::pull::Config as PullConfig, consumer::AckPolicy, consumer::DeliverPolicy,
consumer::ReplayPolicy, stream::Config as StreamConfig,
};
use futures::stream::{Stream, StreamExt};
use serde_json;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::broadcast;
use tokio::sync::RwLock;
use tokio::time::Instant;
const AGGREGATE_STREAM_NAME: &str = "AGGREGATE_EVENTS";
#[derive(Debug)]
pub struct StreamConfigSettings {
pub max_messages: i64,
pub max_bytes: i64,
pub max_age: Duration,
pub duplicate_window: Duration,
}
impl Default for StreamConfigSettings {
fn default() -> Self {
Self {
max_messages: 10_000_000,
max_bytes: -1,
max_age: Duration::from_secs(365 * 24 * 60 * 60),
duplicate_window: Duration::from_secs(120),
}
}
}
#[derive(Debug, Clone)]
pub struct StreamClient {
backend: StreamBackend,
circuit_breaker: Arc<RwLock<CircuitBreaker>>,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum StreamBackend {
JetStream(jetstream::Context),
InMemory(Arc<InMemoryStream>),
}
#[derive(Debug)]
struct InMemoryStream {
events_by_tenant_aggregate: RwLock<HashMap<(String, String), Vec<Event>>>,
updates: broadcast::Sender<Event>,
}
impl StreamClient {
pub async fn new(nats_url: impl Into<String>) -> Result<Self, AggregateError> {
let url = nats_url.into();
let client = async_nats::connect(&url).await.map_err(|e| {
AggregateError::StreamError(format!("Failed to connect to NATS: {}", e))
})?;
let jetstream = jetstream::new(client.clone());
Ok(Self {
backend: StreamBackend::JetStream(jetstream),
circuit_breaker: Arc::new(RwLock::new(CircuitBreaker::new())),
})
}
#[cfg(test)]
pub fn in_memory() -> Self {
let (updates, _) = broadcast::channel(1024);
Self {
backend: StreamBackend::InMemory(Arc::new(InMemoryStream {
events_by_tenant_aggregate: RwLock::new(HashMap::new()),
updates,
})),
circuit_breaker: Arc::new(RwLock::new(CircuitBreaker::new())),
}
}
pub async fn with_circuit_breaker(mut self, cb: CircuitBreaker) -> Self {
self.circuit_breaker = Arc::new(RwLock::new(cb));
self
}
pub async fn setup_stream(&self) -> Result<jetstream::stream::Stream, AggregateError> {
self.setup_stream_with_settings(StreamConfigSettings::default())
.await
}
pub async fn setup_stream_with_settings(
&self,
settings: StreamConfigSettings,
) -> Result<jetstream::stream::Stream, AggregateError> {
let jetstream = match &self.backend {
StreamBackend::JetStream(ctx) => ctx.clone(),
StreamBackend::InMemory(_) => {
return Err(AggregateError::StreamError(
"setup_stream not supported for in-memory stream".to_string(),
));
}
};
let config = StreamConfig {
name: AGGREGATE_STREAM_NAME.to_string(),
subjects: vec!["tenant.*.aggregate.*.*".to_string()],
max_messages: settings.max_messages,
max_bytes: settings.max_bytes,
max_age: settings.max_age,
duplicate_window: settings.duplicate_window,
..Default::default()
};
let stream = jetstream
.get_or_create_stream(config)
.await
.map_err(|e| AggregateError::StreamError(format!("Failed to create stream: {}", e)))?;
Ok(stream)
}
pub async fn publish_events(&self, events: Vec<Event>) -> Result<(), AggregateError> {
if events.is_empty() {
return Ok(());
}
if self.circuit_breaker.read().await.is_open() {
return Err(AggregateError::StreamError(
"Circuit breaker is open".to_string(),
));
}
match &self.backend {
StreamBackend::JetStream(jetstream) => {
for event in &events {
let subject =
build_subject(&event.tenant_id, &event.aggregate_type, &event.aggregate_id);
let payload = serde_json::to_vec(event).map_err(|e| {
AggregateError::StreamError(format!("Serialization error: {}", e))
})?;
let mut headers = async_nats::HeaderMap::new();
headers.insert("Nats-Msg-Id", event.event_id.to_string().as_str());
headers.insert("aggregate-version", event.version.to_string().as_str());
headers.insert("tenant-id", event.tenant_id.as_str());
headers.insert("aggregate-type", event.aggregate_type.as_str());
headers.insert("event-type", event.event_type.as_str());
if let Some(correlation_id) = event.correlation_id.as_deref() {
headers.insert("x-correlation-id", correlation_id);
headers.insert("correlation-id", correlation_id);
}
if let Some(traceparent) = event.traceparent.as_deref() {
headers.insert("traceparent", traceparent);
if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) {
headers.insert("trace-id", trace_id);
}
}
let result = jetstream
.publish_with_headers(subject.clone(), headers.clone(), payload.into())
.await;
match result {
Ok(_) => {
self.circuit_breaker.write().await.record_success();
}
Err(e) => {
self.circuit_breaker.write().await.record_failure();
return Err(AggregateError::StreamError(format!(
"Failed to publish event: {}",
e
)));
}
}
}
}
StreamBackend::InMemory(mem) => {
for event in events {
let key = (
event.tenant_id.as_str().to_string(),
event.aggregate_id.to_string(),
);
let mut map = mem.events_by_tenant_aggregate.write().await;
let bucket = map.entry(key).or_default();
if bucket.iter().any(|e| e.command_id == event.command_id) {
continue;
}
let current_max = bucket.last().map(|e| e.version.as_u64()).unwrap_or(0);
let expected = current_max + 1;
if event.version.as_u64() != expected {
return Err(AggregateError::VersionConflict {
expected: Version::from(current_max).increment(),
actual: event.version,
});
}
bucket.push(event.clone());
bucket.sort_by_key(|e| e.version);
let _ = mem.updates.send(event);
}
self.circuit_breaker.write().await.record_success();
}
}
Ok(())
}
pub async fn fetch_events(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
after_version: Version,
) -> Result<Vec<Event>, AggregateError> {
if self.circuit_breaker.read().await.is_open() {
return Err(AggregateError::StreamError(
"Circuit breaker is open".to_string(),
));
}
match &self.backend {
StreamBackend::JetStream(jetstream) => {
let stream = jetstream
.get_stream(AGGREGATE_STREAM_NAME)
.await
.map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?;
let subject = format!("tenant.{}.aggregate.*.{}", tenant_id.as_str(), aggregate_id);
let consumer_name = format!(
"fetch_{}_{}_{}",
tenant_id.as_str(),
aggregate_id,
uuid::Uuid::now_v7()
);
let consumer_config = PullConfig {
durable_name: Some(consumer_name.clone()),
filter_subject: subject.clone(),
deliver_policy: DeliverPolicy::All,
ack_policy: AckPolicy::Explicit,
replay_policy: ReplayPolicy::Instant,
..Default::default()
};
let consumer = stream
.get_or_create_consumer(&consumer_name, consumer_config)
.await
.map_err(|e| {
AggregateError::StreamError(format!("Consumer creation failed: {}", e))
})?;
let mut events = Vec::new();
let mut messages = consumer.messages().await.map_err(|e| {
AggregateError::StreamError(format!("Message stream error: {}", e))
})?;
let idle_timeout = Duration::from_millis(250);
let max_total_wait = Duration::from_secs(2);
let started = Instant::now();
loop {
if started.elapsed() >= max_total_wait {
break;
}
match tokio::time::timeout(idle_timeout, messages.next()).await {
Ok(Some(Ok(msg))) => {
let event: Event =
serde_json::from_slice(&msg.payload).map_err(|e| {
AggregateError::StreamError(format!(
"Deserialization error: {}",
e
))
})?;
if event.version > after_version {
events.push(event);
}
msg.ack().await.ok();
}
Ok(Some(Err(e))) => {
return Err(AggregateError::StreamError(format!(
"Message error: {}",
e
)));
}
Ok(None) => break,
Err(_) => break,
}
}
let _ = stream.delete_consumer(&consumer_name).await;
events.sort_by_key(|e| e.version);
self.circuit_breaker.write().await.record_success();
Ok(events)
}
StreamBackend::InMemory(mem) => {
let key = (tenant_id.as_str().to_string(), aggregate_id.to_string());
let map = mem.events_by_tenant_aggregate.read().await;
let mut out = map
.get(&key)
.map(|bucket| {
bucket
.iter()
.filter(|e| e.version > after_version)
.cloned()
.collect::<Vec<_>>()
})
.unwrap_or_default();
out.sort_by_key(|e| e.version);
self.circuit_breaker.write().await.record_success();
Ok(out)
}
}
}
pub async fn subscribe_to_events(
&self,
tenant_id: TenantId,
aggregate_type: AggregateType,
aggregate_id: AggregateId,
) -> Result<Pin<Box<dyn Stream<Item = Event> + Send>>, AggregateError> {
match &self.backend {
StreamBackend::JetStream(jetstream) => {
let subject = format!(
"tenant.{}.aggregate.{}.{}",
tenant_id.as_str(),
aggregate_type.as_str(),
aggregate_id
);
let stream = jetstream
.get_stream(AGGREGATE_STREAM_NAME)
.await
.map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?;
let consumer_name = format!("sub_{}_{}", tenant_id.as_str(), aggregate_id);
let consumer_config = PullConfig {
filter_subject: subject,
deliver_policy: DeliverPolicy::New,
..Default::default()
};
let consumer = stream
.get_or_create_consumer(&consumer_name, consumer_config)
.await
.map_err(|e| {
AggregateError::StreamError(format!("Consumer creation failed: {}", e))
})?;
let messages = consumer.messages().await.map_err(|e| {
AggregateError::StreamError(format!("Message stream error: {}", e))
})?;
let event_stream = messages.filter_map(move |msg| async move {
match msg {
Ok(m) => {
let event: Result<Event, _> = serde_json::from_slice(&m.payload);
match event {
Ok(e) => {
m.ack().await.ok();
Some(e)
}
Err(_) => None,
}
}
Err(_) => None,
}
});
let boxed: Pin<Box<dyn Stream<Item = Event> + Send>> = Box::pin(event_stream);
Ok(boxed)
}
StreamBackend::InMemory(mem) => {
let tenant_id = tenant_id.as_str().to_string();
let aggregate_type = aggregate_type.as_str().to_string();
let aggregate_id = aggregate_id.to_string();
let receiver = mem.updates.subscribe();
let boxed: Pin<Box<dyn Stream<Item = Event> + Send>> =
Box::pin(futures::stream::unfold(
(receiver, tenant_id, aggregate_type, aggregate_id),
|(mut receiver, tenant_id, aggregate_type, aggregate_id)| async move {
loop {
match receiver.recv().await {
Ok(event) => {
if event.tenant_id.as_str() == tenant_id
&& event.aggregate_type.as_str() == aggregate_type
&& event.aggregate_id.to_string() == aggregate_id
{
return Some((
event,
(receiver, tenant_id, aggregate_type, aggregate_id),
));
}
}
Err(broadcast::error::RecvError::Lagged(_)) => continue,
Err(broadcast::error::RecvError::Closed) => return None,
}
}
},
));
Ok(boxed)
}
}
}
pub async fn get_stream_info(&self) -> Result<Option<jetstream::stream::Info>, AggregateError> {
match &self.backend {
StreamBackend::JetStream(jetstream) => {
match jetstream.get_stream(AGGREGATE_STREAM_NAME).await {
Ok(mut stream) => {
let info = stream.info().await.map_err(|e| {
AggregateError::StreamError(format!("Stream info error: {}", e))
})?;
Ok(Some(info.clone()))
}
Err(_) => Ok(None),
}
}
StreamBackend::InMemory(_) => Ok(None),
}
}
pub async fn health_check(&self) -> Result<bool, AggregateError> {
match &self.backend {
StreamBackend::JetStream(jetstream) => {
match jetstream.get_stream(AGGREGATE_STREAM_NAME).await {
Ok(_) => {
self.circuit_breaker.write().await.record_success();
Ok(true)
}
Err(e) => {
self.circuit_breaker.write().await.record_failure();
Err(AggregateError::StreamError(format!(
"Health check failed: {}",
e
)))
}
}
}
StreamBackend::InMemory(_) => {
self.circuit_breaker.write().await.record_success();
Ok(true)
}
}
}
pub fn circuit_breaker_state(&self) -> circuit_breaker::CircuitState {
futures::executor::block_on(async { self.circuit_breaker.read().await.state() })
}
pub async fn delete_consumer(
&self,
tenant_id: &TenantId,
aggregate_id: &AggregateId,
) -> Result<(), AggregateError> {
let consumer_name = format!("sub_{}_{}", tenant_id.as_str(), aggregate_id);
match &self.backend {
StreamBackend::JetStream(jetstream) => {
let stream = jetstream
.get_stream(AGGREGATE_STREAM_NAME)
.await
.map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?;
stream.delete_consumer(&consumer_name).await.map_err(|e| {
AggregateError::StreamError(format!("Consumer deletion failed: {}", e))
})?;
Ok(())
}
StreamBackend::InMemory(_) => Ok(()),
}
}
}
pub fn build_subject(
tenant_id: &TenantId,
aggregate_type: &AggregateType,
aggregate_id: &AggregateId,
) -> String {
format!(
"tenant.{}.aggregate.{}.{}",
tenant_id.as_str(),
aggregate_type.as_str(),
aggregate_id
)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn stream_client_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<StreamClient>();
}
#[test]
fn subject_naming_includes_tenant() {
let tenant_id = TenantId::new("acme-corp");
let aggregate_type = AggregateType::from("Account");
let aggregate_id = AggregateId::new_v7();
let subject = build_subject(&tenant_id, &aggregate_type, &aggregate_id);
assert!(subject.starts_with("tenant.acme-corp.aggregate."));
}
#[test]
fn stream_config_settings_defaults() {
let settings = StreamConfigSettings::default();
assert_eq!(settings.max_messages, 10_000_000);
}
#[test]
fn circuit_breaker_accessible() {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let cb = CircuitBreaker::new();
assert!(cb.is_closed());
});
}
#[tokio::test]
async fn publish_and_fetch_events_with_tenant() {
let stream = StreamClient::in_memory();
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let e1 = Event::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(1),
"deposited",
json!({"amount": 10}),
uuid::Uuid::now_v7(),
);
let e2 = Event::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(2),
"deposited",
json!({"amount": 20}),
uuid::Uuid::now_v7(),
);
stream.publish_events(vec![e1, e2]).await.unwrap();
let fetched = stream
.fetch_events(&tenant_id, &aggregate_id, Version::initial())
.await
.unwrap();
assert_eq!(fetched.len(), 2);
}
#[tokio::test]
async fn fetch_with_version_filter() {
let stream = StreamClient::in_memory();
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let mut events = Vec::new();
for v in 1..=4 {
events.push(Event::new(
tenant_id.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(v),
"deposited",
json!({"amount": v}),
uuid::Uuid::now_v7(),
));
}
stream.publish_events(events).await.unwrap();
let fetched = stream
.fetch_events(&tenant_id, &aggregate_id, Version::from(2))
.await
.unwrap();
assert_eq!(fetched.len(), 2);
assert!(fetched.iter().all(|e| e.version > Version::from(2)));
}
#[tokio::test]
async fn tenant_isolation_fetch_returns_empty() {
let stream = StreamClient::in_memory();
let tenant_a = TenantId::new("tenant-a");
let tenant_b = TenantId::new("tenant-b");
let aggregate_id = AggregateId::new_v7();
let aggregate_type = AggregateType::from("Account");
let e1 = Event::new(
tenant_a.clone(),
aggregate_id.clone(),
aggregate_type.clone(),
Version::from(1),
"deposited",
json!({"amount": 10}),
uuid::Uuid::now_v7(),
);
stream.publish_events(vec![e1]).await.unwrap();
let fetched = stream
.fetch_events(&tenant_b, &aggregate_id, Version::initial())
.await
.unwrap();
assert!(fetched.is_empty());
}
}

332
aggregate/src/swarm.rs Normal file
View File

@@ -0,0 +1,332 @@
use futures::StreamExt;
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TenantPlacementConfig {
pub virtual_nodes_per_node: usize,
pub nodes: Vec<NodePlacement>,
pub tenants: std::collections::HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct NodePlacement {
pub node_id: String,
pub tenant_range: String,
}
pub fn placement_constraint_for_tenant_range(tenant_range: &str) -> String {
format!("node.labels.tenant_range == {}", tenant_range)
}
pub fn placement_constraints_for_node(node: &NodePlacement) -> Vec<String> {
vec![placement_constraint_for_tenant_range(&node.tenant_range)]
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MigrationPlan {
pub tenant_id: String,
pub from_node: String,
pub to_node: String,
pub actions: Vec<MigrationAction>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MigrationAction {
DrainTenant { tenant_id: String },
UpdatePlacement { tenant_id: String, node_id: String },
ReloadConfig,
}
pub fn plan_graceful_tenant_migration(
tenant_id: impl Into<String>,
from_node: impl Into<String>,
to_node: impl Into<String>,
) -> MigrationPlan {
let tenant_id = tenant_id.into();
let from_node = from_node.into();
let to_node = to_node.into();
MigrationPlan {
tenant_id: tenant_id.clone(),
from_node,
to_node: to_node.clone(),
actions: vec![
MigrationAction::DrainTenant {
tenant_id: tenant_id.clone(),
},
MigrationAction::UpdatePlacement {
tenant_id,
node_id: to_node,
},
MigrationAction::ReloadConfig,
],
}
}
#[derive(Debug, Error)]
pub enum TenantPlacementKvError {
#[error("NATS connection error: {0}")]
Connection(String),
#[error("KV error: {0}")]
Kv(String),
#[error("Config parse error: {0}")]
Parse(String),
#[error("Unsupported key operation")]
UnsupportedOperation,
}
#[derive(Debug, Clone)]
pub struct TenantPlacementKvClient {
kv: async_nats::jetstream::kv::Store,
}
impl TenantPlacementKvClient {
pub async fn connect(
nats_url: impl Into<String>,
bucket: impl Into<String>,
) -> Result<Self, TenantPlacementKvError> {
Self::connect_with_timeout(nats_url, bucket, std::time::Duration::from_secs(2)).await
}
pub async fn connect_with_timeout(
nats_url: impl Into<String>,
bucket: impl Into<String>,
timeout: std::time::Duration,
) -> Result<Self, TenantPlacementKvError> {
let nats_url = nats_url.into();
let bucket = bucket.into();
let client = tokio::time::timeout(timeout, async_nats::connect(nats_url))
.await
.map_err(|_| TenantPlacementKvError::Connection("connect timeout".to_string()))?
.map_err(|e| TenantPlacementKvError::Connection(e.to_string()))?;
let jetstream = async_nats::jetstream::new(client);
let kv = match jetstream.get_key_value(&bucket).await {
Ok(kv) => kv,
Err(_) => jetstream
.create_key_value(async_nats::jetstream::kv::Config {
bucket: bucket.clone(),
..Default::default()
})
.await
.map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?,
};
Ok(Self { kv })
}
pub async fn get_json(
&self,
key: &str,
) -> Result<Option<serde_json::Value>, TenantPlacementKvError> {
let entry = self
.kv
.entry(key)
.await
.map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
match entry {
Some(entry) => serde_json::from_slice::<serde_json::Value>(&entry.value)
.map(Some)
.map_err(|e| TenantPlacementKvError::Parse(e.to_string())),
None => Ok(None),
}
}
pub async fn put_json(
&self,
key: &str,
value: &serde_json::Value,
) -> Result<(), TenantPlacementKvError> {
let bytes =
serde_json::to_vec(value).map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?;
self.kv
.put(key, bytes.into())
.await
.map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
Ok(())
}
pub async fn watch_json(
&self,
pattern: &str,
) -> Result<
std::pin::Pin<
Box<
dyn futures::Stream<Item = Result<serde_json::Value, TenantPlacementKvError>>
+ Send,
>,
>,
TenantPlacementKvError,
> {
let watch = self
.kv
.watch(pattern)
.await
.map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
Ok(Box::pin(watch.filter_map(|entry| async move {
match entry {
Ok(entry) => match entry.operation {
async_nats::jetstream::kv::Operation::Put => {
match serde_json::from_slice::<serde_json::Value>(&entry.value) {
Ok(v) => Some(Ok(v)),
Err(e) => Some(Err(TenantPlacementKvError::Parse(e.to_string()))),
}
}
async_nats::jetstream::kv::Operation::Delete
| async_nats::jetstream::kv::Operation::Purge => None,
},
Err(e) => Some(Err(TenantPlacementKvError::Kv(e.to_string()))),
}
})))
}
pub async fn load_config_with_fallback(
nats_url: impl Into<String>,
bucket: impl Into<String>,
key: &str,
fallback_path: &str,
) -> Result<serde_json::Value, TenantPlacementKvError> {
let try_kv = match Self::connect_with_timeout(
nats_url,
bucket,
std::time::Duration::from_millis(300),
)
.await
{
Ok(client) => match client.get_json(key).await {
Ok(Some(v)) => Ok(v),
Ok(None) => Err(TenantPlacementKvError::Kv("missing key".to_string())),
Err(e) => Err(e),
},
Err(e) => Err(e),
};
match try_kv {
Ok(v) => Ok(v),
Err(_) => {
let raw = std::fs::read_to_string(fallback_path)
.map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?;
if fallback_path.ends_with(".json") {
serde_json::from_str(&raw)
.map_err(|e| TenantPlacementKvError::Parse(e.to_string()))
} else {
let yaml: serde_yaml::Value = serde_yaml::from_str(&raw)
.map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?;
let json = serde_json::to_value(yaml)
.map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?;
Ok(json)
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use futures::StreamExt;
#[test]
fn stack_file_is_valid_yaml() {
let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap();
let _: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
}
#[test]
fn stack_services_count() {
let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap();
let doc: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap();
assert!(services.contains_key(serde_yaml::Value::String("nats".to_string())));
assert!(services.contains_key(serde_yaml::Value::String("gateway".to_string())));
assert!(services.contains_key(serde_yaml::Value::String("aggregate".to_string())));
}
#[test]
fn tenant_placement_config_loads() {
let raw = std::fs::read_to_string("../swarm/tenant-placement.yaml").unwrap();
let cfg: TenantPlacementConfig = serde_yaml::from_str(&raw).unwrap();
assert_eq!(cfg.virtual_nodes_per_node, 200);
assert!(cfg.nodes.iter().any(|n| n.node_id == "node-a"));
assert_eq!(cfg.tenants.get("tenant-a").unwrap(), "node-a");
}
#[test]
fn placement_constraint_generated_correctly() {
let node = NodePlacement {
node_id: "node-a".to_string(),
tenant_range: "00-3f".to_string(),
};
let constraints = placement_constraints_for_node(&node);
assert_eq!(constraints, vec!["node.labels.tenant_range == 00-3f"]);
}
#[test]
fn graceful_tenant_migration_plan_is_ordered() {
let plan = plan_graceful_tenant_migration("tenant-a", "node-a", "node-b");
assert_eq!(plan.tenant_id, "tenant-a");
assert_eq!(
plan.actions,
vec![
MigrationAction::DrainTenant {
tenant_id: "tenant-a".to_string(),
},
MigrationAction::UpdatePlacement {
tenant_id: "tenant-a".to_string(),
node_id: "node-b".to_string(),
},
MigrationAction::ReloadConfig,
]
);
}
#[tokio::test]
async fn tenant_placement_kv_falls_back_to_local_file() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("placement.yaml");
std::fs::write(
&path,
r#"
virtual_nodes_per_node: 100
nodes:
- node_id: "node-a"
tenant_range: "00-ff"
tenants:
tenant-a: "node-a"
"#,
)
.unwrap();
let cfg = TenantPlacementKvClient::load_config_with_fallback(
"nats://127.0.0.1:1",
"TENANT_PLACEMENT",
"placement",
path.to_string_lossy().as_ref(),
)
.await
.unwrap();
assert_eq!(cfg["virtual_nodes_per_node"], 100);
assert_eq!(cfg["tenants"]["tenant-a"], "node-a");
}
#[tokio::test]
async fn tenant_placement_kv_watch_returns_stream() {
let result = TenantPlacementKvClient::connect_with_timeout(
"nats://127.0.0.1:1",
"TENANT_PLACEMENT",
std::time::Duration::from_millis(50),
)
.await;
assert!(result.is_err());
let mut stream =
futures::stream::empty::<Result<serde_json::Value, TenantPlacementKvError>>();
assert!(stream.next().await.is_none());
}
}

View File

@@ -0,0 +1,65 @@
use crate::types::{AggregateId, AggregateType, TenantId};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use uuid::Uuid;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Command {
pub tenant_id: TenantId,
pub command_id: Uuid,
pub aggregate_id: AggregateId,
pub aggregate_type: AggregateType,
pub payload: Value,
pub metadata: HashMap<String, Value>,
}
impl Command {
pub fn new(
tenant_id: TenantId,
aggregate_id: AggregateId,
aggregate_type: AggregateType,
payload: Value,
) -> Self {
Self {
tenant_id,
command_id: Uuid::now_v7(),
aggregate_id,
aggregate_type,
payload,
metadata: HashMap::new(),
}
}
pub fn with_metadata(mut self, key: impl Into<String>, value: Value) -> Self {
self.metadata.insert(key.into(), value);
self
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn command_serialization() {
let cmd = Command::new(
TenantId::new("acme-corp"),
AggregateId::new_v7(),
AggregateType::new("Account"),
json!({"type": "deposit", "amount": 100}),
);
let json = serde_json::to_string(&cmd).unwrap();
let decoded: Command = serde_json::from_str(&json).unwrap();
assert_eq!(cmd.command_id, decoded.command_id);
assert_eq!(cmd.aggregate_id, decoded.aggregate_id);
assert_eq!(cmd.tenant_id, decoded.tenant_id);
}
#[test]
fn command_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Command>();
}
}

View File

@@ -0,0 +1,58 @@
use crate::types::{AggregateId, TenantId, Version};
use thiserror::Error;
#[derive(Debug, Clone, Error)]
pub enum AggregateError {
#[error("Tenant access denied for tenant: {tenant_id}")]
TenantAccessDenied { tenant_id: TenantId },
#[error("Tenant not hosted on this shard: {tenant_id}")]
TenantNotHosted { tenant_id: TenantId },
#[error("Tenant is draining: {tenant_id}")]
TenantDraining { tenant_id: TenantId },
#[error("Validation error: {0}")]
ValidationError(String),
#[error("Version conflict: expected {expected}, actual {actual}")]
VersionConflict { expected: Version, actual: Version },
#[error("Storage error: {0}")]
StorageError(String),
#[error("Stream error: {0}")]
StreamError(String),
#[error("Rehydration error: {0}")]
RehydrationError(String),
#[error("Decide error: {0}")]
DecideError(String),
#[error("Apply error: {0}")]
ApplyError(String),
#[error("Not found: {0}")]
NotFound(AggregateId),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn error_implements_traits() {
let err = AggregateError::TenantAccessDenied {
tenant_id: TenantId::new("other"),
};
let _ = format!("{}", err);
let _: &dyn std::error::Error = &err;
}
#[test]
fn error_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<AggregateError>();
}
}

View File

@@ -0,0 +1,78 @@
use crate::types::{AggregateId, AggregateType, TenantId, Version};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use uuid::Uuid;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Event {
pub tenant_id: TenantId,
pub event_id: Uuid,
pub aggregate_id: AggregateId,
pub aggregate_type: AggregateType,
pub version: Version,
pub event_type: String,
pub payload: Value,
pub command_id: Uuid,
pub timestamp: DateTime<Utc>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub correlation_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub traceparent: Option<String>,
}
impl Event {
pub fn new(
tenant_id: TenantId,
aggregate_id: AggregateId,
aggregate_type: AggregateType,
version: Version,
event_type: impl Into<String>,
payload: Value,
command_id: Uuid,
) -> Self {
Self {
tenant_id,
event_id: Uuid::now_v7(),
aggregate_id,
aggregate_type,
version,
event_type: event_type.into(),
payload,
command_id,
timestamp: Utc::now(),
correlation_id: None,
traceparent: None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn event_serialization() {
let event = Event::new(
TenantId::new("acme-corp"),
AggregateId::new_v7(),
AggregateType::new("Account"),
Version::from(1),
"Deposited",
json!({"amount": 100}),
Uuid::now_v7(),
);
let json = serde_json::to_string(&event).unwrap();
let decoded: Event = serde_json::from_str(&json).unwrap();
assert_eq!(event.event_id, decoded.event_id);
assert_eq!(event.version, decoded.version);
assert_eq!(event.tenant_id, decoded.tenant_id);
}
#[test]
fn event_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Event>();
}
}

157
aggregate/src/types/id.rs Normal file
View File

@@ -0,0 +1,157 @@
use serde::{Deserialize, Serialize};
use std::fmt;
use std::str::FromStr;
pub type TenantId = shared::TenantId;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct AggregateId(uuid::Uuid);
impl AggregateId {
pub fn new_v7() -> Self {
Self(uuid::Uuid::now_v7())
}
pub fn from_uuid(uuid: uuid::Uuid) -> Self {
Self(uuid)
}
pub fn as_uuid(&self) -> &uuid::Uuid {
&self.0
}
}
impl fmt::Display for AggregateId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl FromStr for AggregateId {
type Err = uuid::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(uuid::Uuid::parse_str(s)?))
}
}
impl Default for AggregateId {
fn default() -> Self {
Self::new_v7()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct AggregateType(String);
impl AggregateType {
pub fn new(ty: impl Into<String>) -> Self {
Self(ty.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for AggregateType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<&str> for AggregateType {
fn from(s: &str) -> Self {
Self(s.to_string())
}
}
impl From<String> for AggregateType {
fn from(s: String) -> Self {
Self(s)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct Version(u64);
impl Version {
pub const fn initial() -> Self {
Self(0)
}
pub const fn from_u64(v: u64) -> Self {
Self(v)
}
pub const fn as_u64(self) -> u64 {
self.0
}
pub fn increment(self) -> Self {
Self(self.0 + 1)
}
}
impl Default for Version {
fn default() -> Self {
Self::initial()
}
}
impl From<u64> for Version {
fn from(v: u64) -> Self {
Self(v)
}
}
impl fmt::Display for Version {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tenant_id_serialization_roundtrip() {
let id = TenantId::new("acme-corp");
let json = serde_json::to_string(&id).unwrap();
let decoded: TenantId = serde_json::from_str(&json).unwrap();
assert_eq!(id, decoded);
}
#[test]
fn tenant_id_default() {
let id = TenantId::default();
assert!(id.is_empty());
}
#[test]
fn aggregate_id_serialization_roundtrip() {
let id = AggregateId::new_v7();
let json = serde_json::to_string(&id).unwrap();
let decoded: AggregateId = serde_json::from_str(&json).unwrap();
assert_eq!(id, decoded);
}
#[test]
fn version_increment() {
let v = Version::initial();
assert_eq!(v.as_u64(), 0);
let v2 = v.increment();
assert_eq!(v2.as_u64(), 1);
assert_eq!(v.as_u64(), 0);
}
#[test]
fn types_are_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<TenantId>();
assert_send_sync::<AggregateId>();
assert_send_sync::<AggregateType>();
assert_send_sync::<Version>();
}
}

View File

@@ -0,0 +1,61 @@
use crate::types::AggregateType;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProgramRef {
pub decide_program: String,
pub apply_program: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AggregateManifest {
pub aggregate_type: AggregateType,
pub programs: ProgramRef,
pub snapshot_threshold: Option<u64>,
}
impl AggregateManifest {
pub fn new(aggregate_type: AggregateType, programs: ProgramRef) -> Self {
Self {
aggregate_type,
programs,
snapshot_threshold: None,
}
}
pub fn with_snapshot_threshold(mut self, threshold: u64) -> Self {
self.snapshot_threshold = Some(threshold);
self
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ManifestRegistry {
aggregates: HashMap<String, AggregateManifest>,
}
impl ManifestRegistry {
pub fn new() -> Self {
Self {
aggregates: HashMap::new(),
}
}
pub fn register(&mut self, manifest: AggregateManifest) {
self.aggregates
.insert(manifest.aggregate_type.as_str().to_string(), manifest);
}
pub fn get(&self, aggregate_type: &AggregateType) -> Option<&AggregateManifest> {
self.aggregates.get(aggregate_type.as_str())
}
pub fn load_from_yaml(yaml: &str) -> Result<Self, serde_yaml::Error> {
serde_yaml::from_str(yaml)
}
pub fn load_from_json(json: &str) -> Result<Self, serde_json::Error> {
serde_json::from_str(json)
}
}

View File

@@ -0,0 +1,13 @@
mod command;
mod error;
mod event;
mod id;
mod manifest;
mod snapshot;
pub use command::*;
pub use error::*;
pub use event::*;
pub use id::*;
pub use manifest::*;
pub use snapshot::*;

View File

@@ -0,0 +1,61 @@
use crate::types::{AggregateId, AggregateType, TenantId, Version};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Snapshot {
pub tenant_id: TenantId,
pub aggregate_id: AggregateId,
pub aggregate_type: AggregateType,
pub version: Version,
pub state: Value,
pub created_at: DateTime<Utc>,
}
impl Snapshot {
pub fn new(
tenant_id: TenantId,
aggregate_id: AggregateId,
aggregate_type: AggregateType,
version: Version,
state: Value,
) -> Self {
Self {
tenant_id,
aggregate_id,
aggregate_type,
version,
state,
created_at: Utc::now(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn snapshot_serialization() {
let snap = Snapshot::new(
TenantId::new("acme-corp"),
AggregateId::new_v7(),
AggregateType::new("Account"),
Version::from(5),
json!({"balance": 100}),
);
let json = serde_json::to_string(&snap).unwrap();
let decoded: Snapshot = serde_json::from_str(&json).unwrap();
assert_eq!(snap.aggregate_id, decoded.aggregate_id);
assert_eq!(snap.version, decoded.version);
assert_eq!(snap.tenant_id, decoded.tenant_id);
}
#[test]
fn snapshot_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Snapshot>();
}
}

View File

@@ -0,0 +1,682 @@
use aggregate::observability::Observability;
use aggregate::runtime::RuntimeExecutor;
#[cfg(feature = "runtime-v8")]
use aggregate::runtime::{execute_apply_program, execute_decide_program};
use aggregate::server::{CommandRequest, HealthChecker};
use aggregate::storage::StorageClient;
use aggregate::types::{
AggregateError, AggregateId, AggregateType, Command, Event, TenantId, Version,
};
use serde_json::json;
use std::time::Duration;
use tempfile::TempDir;
fn create_test_storage() -> (TempDir, StorageClient) {
let dir = TempDir::new().expect("failed to create temp dir");
let path = dir.path().join("test.mdbx");
let storage =
StorageClient::open(path.to_string_lossy().to_string()).expect("failed to open storage");
(dir, storage)
}
#[cfg(feature = "runtime-v8")]
fn create_test_decide_program() -> &'static str {
r#"
function decide(state, command) {
if (command.type === "deposit") {
return [{ type: "deposited", amount: command.amount }];
}
if (command.type === "withdraw") {
if (state.balance < command.amount) {
return [{ type: "error", message: "insufficient funds" }];
}
return [{ type: "withdrawn", amount: command.amount }];
}
if (command.type === "open_account") {
return [{ type: "account_opened", initial_balance: command.initial_balance || 0 }];
}
return [];
}
"#
}
#[cfg(feature = "runtime-v8")]
fn create_test_apply_program() -> &'static str {
r#"
function apply(state, event) {
if (event.type === "account_opened") {
return { balance: event.initial_balance };
}
if (event.type === "deposited") {
return { balance: (state.balance || 0) + event.amount };
}
if (event.type === "withdrawn") {
return { balance: state.balance - event.amount };
}
return state;
}
"#
}
#[test]
fn storage_tenant_isolation() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let tenant_a = TenantId::new("tenant-a");
let tenant_b = TenantId::new("tenant-b");
let aggregate_id = AggregateId::new_v7();
use aggregate::types::Snapshot;
let snapshot_a = Snapshot::new(
tenant_a.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
json!({"balance": 100}),
);
storage.put_snapshot(&snapshot_a).await.unwrap();
let result_a = storage
.get_snapshot(&tenant_a, &aggregate_id)
.await
.unwrap();
let result_b = storage
.get_snapshot(&tenant_b, &aggregate_id)
.await
.unwrap();
assert!(result_a.is_some());
assert!(result_b.is_none());
});
}
#[test]
fn storage_version_conflict() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
use aggregate::types::Snapshot;
let snapshot_v1 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
json!({"balance": 100}),
);
storage.put_snapshot(&snapshot_v1).await.unwrap();
let result = storage.put_snapshot(&snapshot_v1).await;
assert!(matches!(
result,
Err(AggregateError::VersionConflict { .. })
));
});
}
#[test]
fn storage_latest_version() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let version = storage
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert!(version.is_none());
use aggregate::types::Snapshot;
let snapshot_v1 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
json!({"balance": 100}),
);
storage.put_snapshot(&snapshot_v1).await.unwrap();
let version = storage
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert_eq!(version, Some(Version::from(1)));
let snapshot_v3 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(3),
json!({"balance": 300}),
);
storage.put_snapshot(&snapshot_v3).await.unwrap();
let version = storage
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert_eq!(version, Some(Version::from(3)));
});
}
#[test]
fn storage_none_for_nonexistent_aggregate() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let snapshot = storage
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap();
assert!(snapshot.is_none());
});
}
#[cfg(feature = "runtime-v8")]
#[test]
fn runtime_decide_deposit() {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let state = json!({"balance": 100});
let command = json!({"type": "deposit", "amount": 50});
let events = execute_decide_program(
&state,
&command,
create_test_decide_program(),
1_000_000,
Duration::from_secs(5),
)
.await
.unwrap();
assert_eq!(events.len(), 1);
assert_eq!(events[0]["type"], "deposited");
assert_eq!(events[0]["amount"], 50);
});
}
#[cfg(feature = "runtime-v8")]
#[test]
fn runtime_decide_withdraw_insufficient() {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let state = json!({"balance": 10});
let command = json!({"type": "withdraw", "amount": 100});
let events = execute_decide_program(
&state,
&command,
create_test_decide_program(),
1_000_000,
Duration::from_secs(5),
)
.await
.unwrap();
assert_eq!(events.len(), 1);
assert_eq!(events[0]["type"], "error");
});
}
#[cfg(feature = "runtime-v8")]
#[test]
fn runtime_apply_transitions_state() {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let state = json!({"balance": 100});
let event = json!({"type": "deposited", "amount": 50});
let new_state = execute_apply_program(
&state,
&event,
create_test_apply_program(),
1_000_000,
Duration::from_secs(5),
)
.await
.unwrap();
assert_eq!(new_state["balance"], 150);
});
}
#[cfg(feature = "runtime-v8")]
#[test]
fn runtime_determinism() {
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
let state = json!({"balance": 100});
let command = json!({"type": "deposit", "amount": 50});
let r1 = execute_decide_program(
&state,
&command,
create_test_decide_program(),
1_000_000,
Duration::from_secs(5),
)
.await
.unwrap();
let r2 = execute_decide_program(
&state,
&command,
create_test_decide_program(),
1_000_000,
Duration::from_secs(5),
)
.await
.unwrap();
assert_eq!(r1, r2);
});
}
#[test]
fn command_request_tenant_extraction() {
let tenant_id = TenantId::new("acme-corp");
let aggregate_id = AggregateId::new_v7();
let request = CommandRequest::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
json!({"type": "deposit", "amount": 100}),
)
.with_header("x-request-id", "req-123")
.with_header("x-tenant-id", "override-tenant");
assert_eq!(request.tenant_id, tenant_id);
assert_eq!(
request.headers.get("x-request-id"),
Some(&"req-123".to_string())
);
}
#[test]
fn health_checker_tracks_state() {
let checker = HealthChecker::new();
let status = checker.check();
assert!(status.is_healthy());
assert!(checker.is_ready());
assert!(checker.is_live());
checker.set_storage_healthy(false);
checker.set_stream_healthy(false);
assert!(!checker.is_ready());
checker.set_storage_healthy(true);
checker.set_stream_healthy(true);
assert!(checker.is_ready());
}
#[test]
fn observability_metrics_export() {
let obs = Observability::default();
let span = obs.start_command_span("agg-123", "Account", "tenant-a", "deposit", None, None);
obs.record_command_success(&span, 2);
let metrics = obs.export_metrics();
assert!(metrics.contains("commands_total"));
assert!(metrics.contains("command_duration"));
}
#[test]
fn version_increment_and_ordering() {
let v0 = Version::initial();
assert_eq!(v0.as_u64(), 0);
let v1 = v0.increment();
assert_eq!(v1.as_u64(), 1);
assert_eq!(v0.as_u64(), 0);
let v2 = v1.increment();
assert_eq!(v2.as_u64(), 2);
assert!(v0 < v1);
assert!(v1 < v2);
}
#[test]
fn tenant_id_validation() {
let valid_ids = vec!["acme-corp", "tenant_123", "my-tenant", "Tenant1"];
let invalid_ids = vec!["tenant@corp", "tenant name", "tenant/id"];
for id in valid_ids {
let tenant_id = TenantId::new(id);
let chars_valid = tenant_id
.as_str()
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_');
assert!(chars_valid, "Expected {} to be valid", id);
}
for id in invalid_ids {
let tenant_id = TenantId::new(id);
let chars_valid = tenant_id
.as_str()
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_');
assert!(!chars_valid, "Expected {} to be invalid", id);
}
}
#[test]
fn aggregate_id_generation() {
let id1 = AggregateId::new_v7();
let id2 = AggregateId::new_v7();
assert_ne!(id1, id2);
let display = format!("{}", id1);
assert!(!display.is_empty());
}
#[test]
fn event_creation() {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let command_id = uuid::Uuid::now_v7();
let event = Event::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
"deposited".to_string(),
json!({"amount": 100}),
command_id,
);
assert_eq!(event.tenant_id, tenant_id);
assert_eq!(event.aggregate_id, aggregate_id);
assert_eq!(event.version, Version::from(1));
assert_eq!(event.event_type, "deposited");
}
#[test]
fn command_creation() {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let command = Command::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
json!({"type": "deposit", "amount": 100}),
);
assert_eq!(command.tenant_id, tenant_id);
assert_eq!(command.aggregate_id, aggregate_id);
assert_eq!(command.payload["type"], "deposit");
}
#[test]
fn snapshot_creation() {
let tenant_id = TenantId::new("tenant-a");
let aggregate_id = AggregateId::new_v7();
let snapshot = aggregate::types::Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(5),
json!({"balance": 500}),
);
assert_eq!(snapshot.tenant_id, tenant_id);
assert_eq!(snapshot.aggregate_id, aggregate_id);
assert_eq!(snapshot.version, Version::from(5));
assert_eq!(snapshot.state["balance"], 500);
}
#[test]
fn circuit_breaker_pattern() {
use aggregate::storage::CircuitBreaker;
let mut cb = CircuitBreaker::new()
.with_failure_threshold(3)
.with_reset_timeout(Duration::from_millis(50));
assert!(cb.is_closed());
cb.record_failure();
cb.record_failure();
cb.record_failure();
assert!(cb.is_open());
std::thread::sleep(Duration::from_millis(60));
assert!(!cb.is_closed());
assert!(!cb.is_open());
}
#[test]
fn error_types_are_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<AggregateError>();
assert_send_sync::<aggregate::server::ServerError>();
}
#[test]
fn all_types_are_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<TenantId>();
assert_send_sync::<AggregateId>();
assert_send_sync::<AggregateType>();
assert_send_sync::<Version>();
assert_send_sync::<Command>();
assert_send_sync::<Event>();
assert_send_sync::<StorageClient>();
assert_send_sync::<RuntimeExecutor>();
assert_send_sync::<Observability>();
assert_send_sync::<HealthChecker>();
}
#[test]
fn concurrent_storage_operations() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
use aggregate::types::Snapshot;
use std::sync::Arc;
use tokio::task::JoinSet;
let storage = Arc::new(storage);
let mut tasks = JoinSet::new();
for i in 0..10 {
let storage = storage.clone();
tasks.spawn(async move {
let tenant_id = TenantId::new(format!("tenant-{}", i % 3));
let aggregate_id = AggregateId::new_v7();
let snapshot = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
json!({"balance": i * 100}),
);
storage.put_snapshot(&snapshot).await.unwrap();
let loaded = storage
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap();
assert!(loaded.is_some());
loaded.unwrap()
});
}
let mut results = Vec::new();
while let Some(result) = tasks.join_next().await {
results.push(result.unwrap());
}
assert_eq!(results.len(), 10);
});
}
#[test]
fn tenant_isolation_e2e() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
use aggregate::types::Snapshot;
let tenant_a = TenantId::new("tenant-a");
let tenant_b = TenantId::new("tenant-b");
let aggregate_id = AggregateId::new_v7();
let snapshot_a = Snapshot::new(
tenant_a.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
json!({"balance": 1000, "owner": "Alice"}),
);
let snapshot_b = Snapshot::new(
tenant_b.clone(),
aggregate_id.clone(),
AggregateType::from("Account"),
Version::from(1),
json!({"balance": 500, "owner": "Bob"}),
);
storage.put_snapshot(&snapshot_a).await.unwrap();
storage.put_snapshot(&snapshot_b).await.unwrap();
let loaded_a = storage
.get_snapshot(&tenant_a, &aggregate_id)
.await
.unwrap()
.unwrap();
let loaded_b = storage
.get_snapshot(&tenant_b, &aggregate_id)
.await
.unwrap()
.unwrap();
assert_eq!(loaded_a.state["owner"], "Alice");
assert_eq!(loaded_a.state["balance"], 1000);
assert_eq!(loaded_b.state["owner"], "Bob");
assert_eq!(loaded_b.state["balance"], 500);
});
}
#[test]
fn bank_account_full_scenario() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
use aggregate::types::Snapshot;
let tenant_id = TenantId::new("bank-test");
let aggregate_id = AggregateId::new_v7();
let snapshot_v1 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("BankAccount"),
Version::from(1),
json!({"balance": 0}),
);
storage.put_snapshot(&snapshot_v1).await.unwrap();
let snapshot_v2 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("BankAccount"),
Version::from(2),
json!({"balance": 100}),
);
storage.put_snapshot(&snapshot_v2).await.unwrap();
let snapshot_v3 = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("BankAccount"),
Version::from(3),
json!({"balance": 50}),
);
storage.put_snapshot(&snapshot_v3).await.unwrap();
let loaded = storage
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap()
.unwrap();
assert_eq!(loaded.version, Version::from(3));
assert_eq!(loaded.state["balance"], 50);
let version = storage
.get_latest_version(&tenant_id, &aggregate_id)
.await
.unwrap();
assert_eq!(version, Some(Version::from(3)));
});
}
#[test]
fn version_sequence_integrity() {
let (_dir, storage) = create_test_storage();
let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async {
use aggregate::types::Snapshot;
let tenant_id = TenantId::new("version-test");
let aggregate_id = AggregateId::new_v7();
for v in 1..=5 {
let snapshot = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Counter"),
Version::from(v),
json!({"count": v}),
);
storage.put_snapshot(&snapshot).await.unwrap();
}
let loaded = storage
.get_snapshot(&tenant_id, &aggregate_id)
.await
.unwrap()
.unwrap();
assert_eq!(loaded.version, Version::from(5));
assert_eq!(loaded.state["count"], 5);
let duplicate = Snapshot::new(
tenant_id.clone(),
aggregate_id.clone(),
AggregateType::from("Counter"),
Version::from(5),
json!({"count": 999}),
);
let result = storage.put_snapshot(&duplicate).await;
assert!(matches!(
result,
Err(AggregateError::VersionConflict { .. })
));
});
}