transport: complete M0–M7
shared: add stream+consumer policy helpers; NATS context header builder aggregate/runner/projection: centralize stream validation and header usage; set bounded consumer params projection: add QueryService gRPC and wire into main; settings include PROJECTION_GRPC_ADDR gateway: gRPC routing to Projection/Runner with deadlines; bounded read-only retries; pooled gRPC channels (bounded LRU+TTL); admin proxy forwards to gRPC; probes use concurrency limiter + TTL cache runner: add RunnerAdmin gRPC server (drain, status, reload) and wire into main; settings include RUNNER_GRPC_ADDR tests: add gateway authz for runner admin, projection tenant isolation, runner admin drain semantics docs: update TRANSPORT_DEVELOPMENT_PLAN to reflect completed milestones and details
This commit is contained in:
@@ -2,7 +2,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let protoc = protoc_bin_vendored::protoc_bin_path()?;
|
||||
std::env::set_var("PROTOC", protoc);
|
||||
|
||||
tonic_build::configure().compile_protos(&["proto/aggregate.proto"], &["proto"])?;
|
||||
tonic_build::configure()
|
||||
.compile_protos(&["proto/aggregate.proto", "proto/admin.proto"], &["proto"])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
25
runner/proto/admin.proto
Normal file
25
runner/proto/admin.proto
Normal file
@@ -0,0 +1,25 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package runner.admin.v1;
|
||||
|
||||
service RunnerAdmin {
|
||||
rpc Drain(DrainRequest) returns (AdminResponse);
|
||||
rpc DrainStatus(DrainStatusRequest) returns (AdminResponse);
|
||||
rpc Reload(ReloadRequest) returns (AdminResponse);
|
||||
}
|
||||
|
||||
message DrainRequest {
|
||||
string tenant_id = 1;
|
||||
uint64 wait_ms = 2;
|
||||
}
|
||||
|
||||
message DrainStatusRequest {
|
||||
string tenant_id = 1;
|
||||
}
|
||||
|
||||
message ReloadRequest {}
|
||||
|
||||
message AdminResponse {
|
||||
uint32 http_status = 1;
|
||||
string json = 2;
|
||||
}
|
||||
@@ -45,6 +45,7 @@ pub struct Settings {
|
||||
pub effect_retry_backoff_ms: u64,
|
||||
|
||||
pub http_addr: String,
|
||||
pub grpc_addr: String,
|
||||
|
||||
pub test_saga_crash_after_commit: bool,
|
||||
pub test_effect_crash_after_dedupe_before_ack: bool,
|
||||
@@ -79,8 +80,12 @@ impl Default for Settings {
|
||||
workflow_commands_stream: "WORKFLOW_COMMANDS".to_string(),
|
||||
workflow_events_stream: "WORKFLOW_EVENTS".to_string(),
|
||||
|
||||
saga_trigger_subject_filters: vec!["tenant.*.aggregate.*.*".to_string()],
|
||||
effect_command_subject_filters: vec!["tenant.*.effect.*.*".to_string()],
|
||||
saga_trigger_subject_filters: vec![
|
||||
shared::NATS_SUBJECT_AGGREGATE_EVENTS_ALL.to_string()
|
||||
],
|
||||
effect_command_subject_filters: vec![
|
||||
shared::NATS_SUBJECT_EFFECT_COMMANDS_ALL.to_string()
|
||||
],
|
||||
|
||||
consumer_durable_prefix: "runner".to_string(),
|
||||
deliver_group: None,
|
||||
@@ -104,6 +109,7 @@ impl Default for Settings {
|
||||
effect_retry_backoff_ms: 250,
|
||||
|
||||
http_addr: "0.0.0.0:8080".to_string(),
|
||||
grpc_addr: "0.0.0.0:9091".to_string(),
|
||||
|
||||
test_saga_crash_after_commit: false,
|
||||
test_effect_crash_after_dedupe_before_ack: false,
|
||||
@@ -350,6 +356,11 @@ impl Settings {
|
||||
self.http_addr = addr;
|
||||
}
|
||||
}
|
||||
if let Ok(addr) = std::env::var("RUNNER_GRPC_ADDR") {
|
||||
if !addr.trim().is_empty() {
|
||||
self.grpc_addr = addr;
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(v) = std::env::var("RUNNER_TEST_SAGA_CRASH_AFTER_COMMIT") {
|
||||
self.test_saga_crash_after_commit =
|
||||
@@ -375,6 +386,12 @@ impl Settings {
|
||||
if self.aggregate_events_stream.is_empty() {
|
||||
return Err("Aggregate events stream name is required".to_string());
|
||||
}
|
||||
if self.http_addr.trim().is_empty() {
|
||||
return Err("HTTP addr is required".to_string());
|
||||
}
|
||||
if self.grpc_addr.trim().is_empty() {
|
||||
return Err("gRPC addr is required".to_string());
|
||||
}
|
||||
if matches!(self.mode, RunnerMode::Saga | RunnerMode::Combined)
|
||||
&& self.saga_trigger_subject_filters.is_empty()
|
||||
{
|
||||
@@ -388,6 +405,9 @@ impl Settings {
|
||||
if self.consumer_durable_prefix.trim().is_empty() {
|
||||
return Err("Consumer durable prefix is required".to_string());
|
||||
}
|
||||
if self.deliver_group.is_some() {
|
||||
return Err("deliver_group is not supported with pull consumers".to_string());
|
||||
}
|
||||
if self.max_in_flight == 0 {
|
||||
return Err("Max in-flight must be > 0".to_string());
|
||||
}
|
||||
@@ -479,6 +499,17 @@ mod tests {
|
||||
std::env::remove_var("RUNNER_TENANT_ALLOWLIST");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deliver_group_is_rejected_with_pull_consumers() {
|
||||
let settings = Settings {
|
||||
saga_manifest_path: "runner/config/sagas.yaml".to_string(),
|
||||
effects_manifest_path: "runner/config/effects.yaml".to_string(),
|
||||
deliver_group: Some("g1".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(settings.validate().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn settings_validation_catches_missing_required() {
|
||||
let settings = Settings {
|
||||
|
||||
@@ -221,7 +221,7 @@ async fn run_effect_worker_single(
|
||||
.effect_command_subject_filters
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "tenant.*.effect.*.*".to_string());
|
||||
.unwrap_or_else(|| shared::NATS_SUBJECT_EFFECT_COMMANDS_ALL.to_string());
|
||||
|
||||
let consumer = jetstream
|
||||
.effect_command_consumer(
|
||||
@@ -326,7 +326,7 @@ async fn run_effect_worker_for_tenant(
|
||||
draining: Arc<AtomicBool>,
|
||||
) -> Result<(), RunnerError> {
|
||||
let durable_name = format!("{}_effects_{}", settings.consumer_durable_prefix, tenant);
|
||||
let filter_subject = format!("tenant.{}.effect.*.*", tenant);
|
||||
let filter_subject = shared::nats_filter_subject_effect_for_tenant(&tenant);
|
||||
|
||||
let consumer = jetstream
|
||||
.effect_command_consumer(
|
||||
@@ -467,11 +467,7 @@ enum ProcessDecision {
|
||||
}
|
||||
|
||||
trait EffectResultPublisher: Send + Sync {
|
||||
fn publish(
|
||||
&self,
|
||||
subject: String,
|
||||
result: EffectResultEnvelope,
|
||||
) -> BoxFuture<'static, Result<(), RunnerError>>;
|
||||
fn publish(&self, result: EffectResultEnvelope) -> BoxFuture<'static, Result<(), RunnerError>>;
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -486,13 +482,9 @@ impl JetStreamPublisher {
|
||||
}
|
||||
|
||||
impl EffectResultPublisher for JetStreamPublisher {
|
||||
fn publish(
|
||||
&self,
|
||||
subject: String,
|
||||
result: EffectResultEnvelope,
|
||||
) -> BoxFuture<'static, Result<(), RunnerError>> {
|
||||
fn publish(&self, result: EffectResultEnvelope) -> BoxFuture<'static, Result<(), RunnerError>> {
|
||||
let jetstream = self.jetstream.clone();
|
||||
Box::pin(async move { jetstream.publish_effect_result(subject, &result).await })
|
||||
Box::pin(async move { jetstream.publish_effect_result(&result).await })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -564,14 +556,7 @@ async fn publish_and_mark(
|
||||
result.metadata.trace_id = cmd.metadata.trace_id.clone();
|
||||
}
|
||||
|
||||
let subject = format!(
|
||||
"tenant.{}.effect_result.{}.{}",
|
||||
cmd.tenant_id.as_str(),
|
||||
cmd.effect_name.as_str(),
|
||||
cmd.command_id.as_str()
|
||||
);
|
||||
|
||||
if let Err(e) = publisher.publish(subject, result).await {
|
||||
if let Err(e) = publisher.publish(result).await {
|
||||
metrics.inc_effect_publish_failed();
|
||||
return Err(e);
|
||||
}
|
||||
@@ -690,7 +675,6 @@ mod tests {
|
||||
impl EffectResultPublisher for FakePublisher {
|
||||
fn publish(
|
||||
&self,
|
||||
_subject: String,
|
||||
_result: EffectResultEnvelope,
|
||||
) -> BoxFuture<'static, Result<(), RunnerError>> {
|
||||
let fail = self.fail;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
pub const TENANT_ID_METADATA_KEY: &str = "x-tenant-id";
|
||||
pub const CORRELATION_ID_METADATA_KEY: &str = "x-correlation-id";
|
||||
pub const TRACEPARENT_METADATA_KEY: &str = "traceparent";
|
||||
pub const TENANT_ID_METADATA_KEY: &str = shared::HEADER_X_TENANT_ID;
|
||||
pub const CORRELATION_ID_METADATA_KEY: &str = shared::HEADER_X_CORRELATION_ID;
|
||||
pub const TRACEPARENT_METADATA_KEY: &str = shared::HEADER_TRACEPARENT;
|
||||
|
||||
pub mod proto {
|
||||
tonic::include_proto!("aggregate.gateway.v1");
|
||||
@@ -47,7 +47,7 @@ impl GatewayClient {
|
||||
let correlation_id = grpc_request
|
||||
.get_ref()
|
||||
.metadata
|
||||
.get("x-correlation-id")
|
||||
.get(shared::HEADER_X_CORRELATION_ID)
|
||||
.or_else(|| grpc_request.get_ref().metadata.get("correlation_id"))
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
@@ -68,7 +68,7 @@ impl GatewayClient {
|
||||
let traceparent = grpc_request
|
||||
.get_ref()
|
||||
.metadata
|
||||
.get("traceparent")
|
||||
.get(shared::HEADER_TRACEPARENT)
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string())
|
||||
@@ -78,10 +78,8 @@ impl GatewayClient {
|
||||
.metadata
|
||||
.get("trace_id")
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| s.len() == 32 && s.chars().all(|c| c.is_ascii_hexdigit()))
|
||||
.map(|trace_id| {
|
||||
let span_id = uuid::Uuid::new_v4().simple().to_string()[..16].to_string();
|
||||
format!("00-{trace_id}-{span_id}-01")
|
||||
.and_then(|trace_id| {
|
||||
shared::traceparent_from_trace_id(&shared::TraceId::new(trace_id))
|
||||
})
|
||||
});
|
||||
if let Some(traceparent) = traceparent {
|
||||
|
||||
292
runner/src/grpc_admin.rs
Normal file
292
runner/src/grpc_admin.rs
Normal file
@@ -0,0 +1,292 @@
|
||||
use crate::http::AppState;
|
||||
use axum::http::StatusCode;
|
||||
use serde_json::json;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
pub mod proto {
|
||||
tonic::include_proto!("runner.admin.v1");
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RunnerAdminService {
|
||||
state: Arc<AppState>,
|
||||
}
|
||||
|
||||
impl RunnerAdminService {
|
||||
pub fn new(state: Arc<AppState>) -> Self {
|
||||
Self { state }
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl proto::runner_admin_server::RunnerAdmin for RunnerAdminService {
|
||||
async fn drain(
|
||||
&self,
|
||||
request: tonic::Request<proto::DrainRequest>,
|
||||
) -> Result<tonic::Response<proto::AdminResponse>, tonic::Status> {
|
||||
let md_tenant = request
|
||||
.metadata()
|
||||
.get(shared::HEADER_X_TENANT_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let req = request.into_inner();
|
||||
let tenant_id = req.tenant_id.trim().to_string();
|
||||
if tenant_id.is_empty() {
|
||||
self.state.start_draining();
|
||||
return Ok(tonic::Response::new(proto::AdminResponse {
|
||||
http_status: StatusCode::OK.as_u16() as u32,
|
||||
json: json!({ "ok": true, "draining": true }).to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
if let Some(md_tenant) = md_tenant.as_deref() {
|
||||
if md_tenant != tenant_id {
|
||||
return Err(tonic::Status::permission_denied("tenant mismatch"));
|
||||
}
|
||||
}
|
||||
|
||||
self.state.tenant_gate.start_draining(&tenant_id);
|
||||
let wait_ms = req.wait_ms;
|
||||
if wait_ms > 0 {
|
||||
let deadline = tokio::time::Instant::now() + Duration::from_millis(wait_ms);
|
||||
loop {
|
||||
let status = tenant_drain_state(&self.state, &tenant_id);
|
||||
if status.drained {
|
||||
break;
|
||||
}
|
||||
if tokio::time::Instant::now() >= deadline {
|
||||
break;
|
||||
}
|
||||
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
|
||||
let _ = self
|
||||
.state
|
||||
.tenant_gate
|
||||
.wait_inflight_zero(&tenant_id, remaining.min(Duration::from_millis(250)))
|
||||
.await;
|
||||
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
}
|
||||
|
||||
let resp = tenant_drain_status(&self.state, &tenant_id);
|
||||
Ok(tonic::Response::new(resp))
|
||||
}
|
||||
|
||||
async fn drain_status(
|
||||
&self,
|
||||
request: tonic::Request<proto::DrainStatusRequest>,
|
||||
) -> Result<tonic::Response<proto::AdminResponse>, tonic::Status> {
|
||||
let md_tenant = request
|
||||
.metadata()
|
||||
.get(shared::HEADER_X_TENANT_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.trim())
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let tenant_id = request.into_inner().tenant_id.trim().to_string();
|
||||
if tenant_id.is_empty() {
|
||||
return Ok(tonic::Response::new(proto::AdminResponse {
|
||||
http_status: StatusCode::BAD_REQUEST.as_u16() as u32,
|
||||
json: json!({ "ok": false, "error": "tenant_id required" }).to_string(),
|
||||
}));
|
||||
}
|
||||
if let Some(md_tenant) = md_tenant.as_deref() {
|
||||
if md_tenant != tenant_id {
|
||||
return Err(tonic::Status::permission_denied("tenant mismatch"));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tonic::Response::new(tenant_drain_status(
|
||||
&self.state,
|
||||
&tenant_id,
|
||||
)))
|
||||
}
|
||||
|
||||
async fn reload(
|
||||
&self,
|
||||
_request: tonic::Request<proto::ReloadRequest>,
|
||||
) -> Result<tonic::Response<proto::AdminResponse>, tonic::Status> {
|
||||
self.state.notify_reload();
|
||||
Ok(tonic::Response::new(proto::AdminResponse {
|
||||
http_status: StatusCode::OK.as_u16() as u32,
|
||||
json: json!({ "ok": true }).to_string(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
fn tenant_drain_status(state: &AppState, tenant_id: &str) -> proto::AdminResponse {
|
||||
let status = tenant_drain_state(state, tenant_id);
|
||||
let code = if status.drained {
|
||||
StatusCode::OK
|
||||
} else {
|
||||
StatusCode::ACCEPTED
|
||||
};
|
||||
|
||||
proto::AdminResponse {
|
||||
http_status: code.as_u16() as u32,
|
||||
json: json!({
|
||||
"ok": true,
|
||||
"tenant_id": tenant_id,
|
||||
"draining_tenant": state.tenant_gate.is_draining(tenant_id),
|
||||
"assigned": state.tenant_gate.is_assigned(tenant_id),
|
||||
"in_flight": status.in_flight,
|
||||
"outbox_items": status.outbox_items,
|
||||
"drained": status.drained
|
||||
})
|
||||
.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
struct TenantDrainState {
|
||||
in_flight: usize,
|
||||
outbox_items: usize,
|
||||
drained: bool,
|
||||
}
|
||||
|
||||
fn tenant_drain_state(state: &AppState, tenant_id: &str) -> TenantDrainState {
|
||||
let in_flight = state.tenant_gate.inflight_count(tenant_id);
|
||||
let outbox_items = state
|
||||
.storage
|
||||
.list_outbox_prefix(&crate::types::TenantId::new(tenant_id.to_string()), 50_000)
|
||||
.map(|v| v.len())
|
||||
.unwrap_or(0);
|
||||
TenantDrainState {
|
||||
in_flight,
|
||||
outbox_items,
|
||||
drained: in_flight == 0 && outbox_items == 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn serve(
|
||||
addr: std::net::SocketAddr,
|
||||
state: Arc<AppState>,
|
||||
shutdown: impl std::future::Future<Output = ()> + Send + 'static,
|
||||
) -> Result<(), crate::types::RunnerError> {
|
||||
tonic::transport::Server::builder()
|
||||
.add_service(proto::runner_admin_server::RunnerAdminServer::new(
|
||||
RunnerAdminService::new(state),
|
||||
))
|
||||
.serve_with_shutdown(addr, shutdown)
|
||||
.await
|
||||
.map_err(|e| crate::types::RunnerError::StreamError(e.to_string()))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::tenant_placement::TenantGate;
|
||||
use crate::types::{
|
||||
CommandId, EffectCommandEnvelope, EffectName, MessageMetadata, TenantId, WorkId, WorkItem,
|
||||
};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
#[tokio::test]
|
||||
async fn rejects_tenant_mismatch() {
|
||||
let metrics = Arc::new(crate::observability::Metrics::default());
|
||||
let draining = Arc::new(AtomicBool::new(false));
|
||||
let tenant_gate = Arc::new(TenantGate::new(None));
|
||||
let storage = crate::storage::KvClient::in_memory();
|
||||
let reload = Arc::new(tokio::sync::Notify::new());
|
||||
let state = Arc::new(crate::http::AppState::new(
|
||||
crate::Settings::default(),
|
||||
draining,
|
||||
tenant_gate,
|
||||
metrics,
|
||||
storage,
|
||||
reload,
|
||||
));
|
||||
|
||||
let svc = RunnerAdminService::new(state);
|
||||
let mut req = tonic::Request::new(proto::DrainStatusRequest {
|
||||
tenant_id: "tenant-a".to_string(),
|
||||
});
|
||||
req.metadata_mut().insert(
|
||||
shared::HEADER_X_TENANT_ID,
|
||||
tonic::metadata::MetadataValue::try_from("tenant-b").unwrap(),
|
||||
);
|
||||
let err = proto::runner_admin_server::RunnerAdmin::drain_status(&svc, req)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_eq!(err.code(), tonic::Code::PermissionDenied);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn drain_status_reflects_outbox_backlog() {
|
||||
let metrics = Arc::new(crate::observability::Metrics::default());
|
||||
let draining = Arc::new(AtomicBool::new(false));
|
||||
let tenant_gate = Arc::new(TenantGate::new(None));
|
||||
let storage = crate::storage::KvClient::in_memory();
|
||||
let reload = Arc::new(tokio::sync::Notify::new());
|
||||
let state = Arc::new(crate::http::AppState::new(
|
||||
crate::Settings::default(),
|
||||
draining,
|
||||
tenant_gate,
|
||||
metrics,
|
||||
storage.clone(),
|
||||
reload,
|
||||
));
|
||||
|
||||
let tenant = TenantId::new("tenant-a");
|
||||
let work_id = WorkId::new_v7();
|
||||
let item = WorkItem::EffectCommand(EffectCommandEnvelope {
|
||||
tenant_id: tenant.clone(),
|
||||
command_id: CommandId::new("c1"),
|
||||
effect_name: EffectName::new("noop"),
|
||||
payload: serde_json::json!({"ok": true}),
|
||||
metadata: MessageMetadata::default(),
|
||||
});
|
||||
let _key = storage
|
||||
.put_outbox_item(&tenant, "effect", &work_id, &item)
|
||||
.unwrap();
|
||||
|
||||
let svc = RunnerAdminService::new(state);
|
||||
let req = tonic::Request::new(proto::DrainStatusRequest {
|
||||
tenant_id: tenant.as_str().to_string(),
|
||||
});
|
||||
let resp = proto::runner_admin_server::RunnerAdmin::drain_status(&svc, req)
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
assert_eq!(resp.http_status, 202);
|
||||
let json: serde_json::Value = serde_json::from_str(&resp.json).unwrap();
|
||||
assert_eq!(json["tenant_id"], tenant.as_str());
|
||||
assert_eq!(json["drained"], false);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn drain_wait_zero_returns_drained_when_no_backlog() {
|
||||
let metrics = Arc::new(crate::observability::Metrics::default());
|
||||
let draining = Arc::new(AtomicBool::new(false));
|
||||
let tenant_gate = Arc::new(TenantGate::new(None));
|
||||
let storage = crate::storage::KvClient::in_memory();
|
||||
let reload = Arc::new(tokio::sync::Notify::new());
|
||||
let state = Arc::new(crate::http::AppState::new(
|
||||
crate::Settings::default(),
|
||||
draining,
|
||||
tenant_gate,
|
||||
metrics,
|
||||
storage,
|
||||
reload,
|
||||
));
|
||||
|
||||
let svc = RunnerAdminService::new(state);
|
||||
let req = tonic::Request::new(proto::DrainRequest {
|
||||
tenant_id: "tenant-a".to_string(),
|
||||
wait_ms: 0,
|
||||
});
|
||||
let resp = proto::runner_admin_server::RunnerAdmin::drain(&svc, req)
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert_eq!(resp.http_status, 200);
|
||||
let json: serde_json::Value = serde_json::from_str(&resp.json).unwrap();
|
||||
assert_eq!(json["tenant_id"], "tenant-a");
|
||||
assert_eq!(json["drained"], true);
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,7 @@ use std::time::Duration;
|
||||
pub struct AppState {
|
||||
pub settings: Settings,
|
||||
draining: Arc<AtomicBool>,
|
||||
tenant_gate: Arc<TenantGate>,
|
||||
pub(crate) tenant_gate: Arc<TenantGate>,
|
||||
pub metrics: Arc<Metrics>,
|
||||
pub storage: KvClient,
|
||||
reload: Arc<tokio::sync::Notify>,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod config;
|
||||
pub mod effects;
|
||||
pub mod gateway;
|
||||
pub mod grpc_admin;
|
||||
pub mod http;
|
||||
pub mod observability;
|
||||
pub mod outbox;
|
||||
|
||||
@@ -83,6 +83,16 @@ async fn serve() {
|
||||
.await
|
||||
});
|
||||
|
||||
let grpc_addr: std::net::SocketAddr = settings.grpc_addr.parse().unwrap();
|
||||
let grpc_shutdown = shutdown.clone();
|
||||
let grpc_state = state.clone();
|
||||
let grpc_task = tokio::spawn(async move {
|
||||
runner::grpc_admin::serve(grpc_addr, grpc_state, async move {
|
||||
grpc_shutdown.notified().await
|
||||
})
|
||||
.await
|
||||
});
|
||||
|
||||
let signal_shutdown = shutdown.clone();
|
||||
let signal_draining = draining.clone();
|
||||
tokio::spawn(async move {
|
||||
@@ -278,6 +288,7 @@ async fn serve() {
|
||||
draining.store(true, Ordering::Relaxed);
|
||||
shutdown.notify_waiters();
|
||||
let _ = http_task.await;
|
||||
let _ = grpc_task.await;
|
||||
|
||||
if let Some(e) = failed {
|
||||
tracing::error!(error = %e, "Runner terminated with error");
|
||||
|
||||
@@ -225,7 +225,7 @@ async fn run_saga_worker_single(
|
||||
.saga_trigger_subject_filters
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| "tenant.*.aggregate.*.*".to_string());
|
||||
.unwrap_or_else(|| shared::NATS_SUBJECT_AGGREGATE_EVENTS_ALL.to_string());
|
||||
|
||||
let consumer = jetstream
|
||||
.saga_trigger_consumer(
|
||||
@@ -296,7 +296,7 @@ async fn run_saga_worker_for_tenant(
|
||||
draining: Arc<AtomicBool>,
|
||||
) -> Result<(), RunnerError> {
|
||||
let durable_name = format!("{}_saga_{}", settings.consumer_durable_prefix, tenant);
|
||||
let filter_subject = format!("tenant.{}.aggregate.*.*", tenant);
|
||||
let filter_subject = shared::nats_filter_subject_aggregate_for_tenant(&tenant);
|
||||
|
||||
let consumer = jetstream
|
||||
.saga_trigger_consumer(
|
||||
|
||||
@@ -31,19 +31,15 @@ impl JetStreamClient {
|
||||
|
||||
let jetstream = jetstream::new(client);
|
||||
|
||||
let aggregate_events_subjects = if settings.saga_trigger_subject_filters.is_empty() {
|
||||
vec!["tenant.*.aggregate.*.*".to_string()]
|
||||
} else {
|
||||
settings.saga_trigger_subject_filters.clone()
|
||||
};
|
||||
let aggregate_events_subjects = vec![shared::NATS_SUBJECT_AGGREGATE_EVENTS_ALL.to_string()];
|
||||
let workflow_commands_subjects = vec![
|
||||
"tenant.*.effect.*.*".to_string(),
|
||||
"tenant.*.workflow.*.*".to_string(),
|
||||
shared::NATS_SUBJECT_EFFECT_COMMANDS_ALL.to_string(),
|
||||
shared::NATS_SUBJECT_WORKFLOW_COMMANDS_ALL.to_string(),
|
||||
];
|
||||
|
||||
let workflow_events_subjects = vec![
|
||||
"tenant.*.effect_result.*.*".to_string(),
|
||||
"tenant.*.workflow_event.*.*".to_string(),
|
||||
shared::NATS_SUBJECT_EFFECT_RESULTS_ALL.to_string(),
|
||||
shared::NATS_SUBJECT_WORKFLOW_EVENTS_ALL.to_string(),
|
||||
];
|
||||
|
||||
let mut last_err = None;
|
||||
@@ -83,15 +79,20 @@ impl JetStreamClient {
|
||||
settings: &Settings,
|
||||
options: ConsumerOptions,
|
||||
) -> Result<jetstream::consumer::PullConsumer, StreamInitError> {
|
||||
let policy = shared::consumer_policy_from_parts(
|
||||
settings.ack_timeout_ms,
|
||||
settings.max_in_flight,
|
||||
settings.max_deliver,
|
||||
);
|
||||
let consumer_config = PullConfig {
|
||||
durable_name: Some(options.durable_name.clone()),
|
||||
deliver_policy: options.deliver_policy,
|
||||
ack_policy: AckPolicy::Explicit,
|
||||
ack_wait: std::time::Duration::from_millis(settings.ack_timeout_ms),
|
||||
ack_wait: policy.ack_wait,
|
||||
filter_subject: options.filter_subject,
|
||||
replay_policy: ReplayPolicy::Instant,
|
||||
max_ack_pending: settings.max_in_flight as i64,
|
||||
max_deliver: settings.max_deliver,
|
||||
max_ack_pending: policy.max_ack_pending,
|
||||
max_deliver: policy.max_deliver,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -106,15 +107,20 @@ impl JetStreamClient {
|
||||
settings: &Settings,
|
||||
options: ConsumerOptions,
|
||||
) -> Result<jetstream::consumer::PullConsumer, StreamInitError> {
|
||||
let policy = shared::consumer_policy_from_parts(
|
||||
settings.ack_timeout_ms,
|
||||
settings.max_in_flight,
|
||||
settings.max_deliver,
|
||||
);
|
||||
let consumer_config = PullConfig {
|
||||
durable_name: Some(options.durable_name.clone()),
|
||||
deliver_policy: options.deliver_policy,
|
||||
ack_policy: AckPolicy::Explicit,
|
||||
ack_wait: std::time::Duration::from_millis(settings.ack_timeout_ms),
|
||||
ack_wait: policy.ack_wait,
|
||||
filter_subject: options.filter_subject,
|
||||
replay_policy: ReplayPolicy::Instant,
|
||||
max_ack_pending: settings.max_in_flight as i64,
|
||||
max_deliver: settings.max_deliver,
|
||||
max_ack_pending: policy.max_ack_pending,
|
||||
max_deliver: policy.max_deliver,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -126,38 +132,16 @@ impl JetStreamClient {
|
||||
|
||||
pub async fn publish_effect_result(
|
||||
&self,
|
||||
subject: String,
|
||||
result: &EffectResultEnvelope,
|
||||
) -> Result<(), RunnerError> {
|
||||
let subject = shared::nats_subject_effect_result(
|
||||
result.tenant_id.as_str(),
|
||||
result.effect_name.as_str(),
|
||||
result.command_id.as_str(),
|
||||
);
|
||||
let payload =
|
||||
serde_json::to_vec(result).map_err(|e| RunnerError::DecodeError(e.to_string()))?;
|
||||
let mut headers = async_nats::HeaderMap::new();
|
||||
headers.insert("tenant-id", result.tenant_id.as_str());
|
||||
headers.insert("command-id", result.command_id.as_str());
|
||||
headers.insert("effect-name", result.effect_name.as_str());
|
||||
if let Some(correlation_id) = result.metadata.correlation_id.as_ref() {
|
||||
headers.insert("x-correlation-id", correlation_id.as_str());
|
||||
headers.insert("correlation-id", correlation_id.as_str());
|
||||
}
|
||||
if let Some(trace_id) = result.metadata.trace_id.as_ref() {
|
||||
headers.insert("trace-id", trace_id.as_str());
|
||||
if let Some(traceparent) = shared::traceparent_from_trace_id(trace_id) {
|
||||
headers.insert("traceparent", traceparent.as_str());
|
||||
}
|
||||
}
|
||||
if let Some(traceparent) = result
|
||||
.metadata
|
||||
.extra
|
||||
.get("traceparent")
|
||||
.and_then(|v| v.as_str())
|
||||
{
|
||||
headers.insert("traceparent", traceparent);
|
||||
if result.metadata.trace_id.is_none() {
|
||||
if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) {
|
||||
headers.insert("trace-id", trace_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
let headers = build_effect_result_headers(result);
|
||||
|
||||
self.jetstream
|
||||
.publish_with_headers(subject, headers, payload.into())
|
||||
@@ -170,43 +154,15 @@ impl JetStreamClient {
|
||||
&self,
|
||||
cmd: &EffectCommandEnvelope,
|
||||
) -> Result<(), RunnerError> {
|
||||
let subject = format!(
|
||||
"tenant.{}.effect.{}.{}",
|
||||
let subject = shared::nats_subject_effect_command(
|
||||
cmd.tenant_id.as_str(),
|
||||
cmd.effect_name.as_str(),
|
||||
cmd.command_id.as_str()
|
||||
cmd.command_id.as_str(),
|
||||
);
|
||||
|
||||
let payload =
|
||||
serde_json::to_vec(cmd).map_err(|e| RunnerError::DecodeError(e.to_string()))?;
|
||||
let mut headers = async_nats::HeaderMap::new();
|
||||
headers.insert("Nats-Msg-Id", cmd.command_id.as_str());
|
||||
headers.insert("tenant-id", cmd.tenant_id.as_str());
|
||||
headers.insert("command-id", cmd.command_id.as_str());
|
||||
headers.insert("effect-name", cmd.effect_name.as_str());
|
||||
if let Some(correlation_id) = cmd.metadata.correlation_id.as_ref() {
|
||||
headers.insert("x-correlation-id", correlation_id.as_str());
|
||||
headers.insert("correlation-id", correlation_id.as_str());
|
||||
}
|
||||
if let Some(trace_id) = cmd.metadata.trace_id.as_ref() {
|
||||
headers.insert("trace-id", trace_id.as_str());
|
||||
if let Some(traceparent) = shared::traceparent_from_trace_id(trace_id) {
|
||||
headers.insert("traceparent", traceparent.as_str());
|
||||
}
|
||||
}
|
||||
if let Some(traceparent) = cmd
|
||||
.metadata
|
||||
.extra
|
||||
.get("traceparent")
|
||||
.and_then(|v| v.as_str())
|
||||
{
|
||||
headers.insert("traceparent", traceparent);
|
||||
if cmd.metadata.trace_id.is_none() {
|
||||
if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) {
|
||||
headers.insert("trace-id", trace_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
let headers = build_effect_command_headers(cmd);
|
||||
|
||||
self.jetstream
|
||||
.publish_with_headers(subject, headers, payload.into())
|
||||
@@ -216,6 +172,120 @@ impl JetStreamClient {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_effect_command_headers(cmd: &EffectCommandEnvelope) -> async_nats::HeaderMap {
|
||||
let mut headers = async_nats::HeaderMap::new();
|
||||
|
||||
let effect_name = cmd.effect_name.as_str().to_string();
|
||||
|
||||
let ctx = shared::nats_context_headers_required(
|
||||
cmd.tenant_id.as_str(),
|
||||
Some(cmd.command_id.as_str()),
|
||||
cmd.metadata.correlation_id.as_ref().map(|v| v.as_str()),
|
||||
cmd.metadata
|
||||
.extra
|
||||
.get(shared::HEADER_TRACEPARENT)
|
||||
.and_then(|v| v.as_str()),
|
||||
cmd.metadata.trace_id.as_ref().map(|v| v.as_str()),
|
||||
);
|
||||
for (k, v) in ctx {
|
||||
headers.insert(k, v);
|
||||
}
|
||||
|
||||
headers.insert("command-id", cmd.command_id.as_str().to_string());
|
||||
headers.insert("effect-name", effect_name);
|
||||
|
||||
headers
|
||||
}
|
||||
|
||||
fn build_effect_result_headers(result: &EffectResultEnvelope) -> async_nats::HeaderMap {
|
||||
let mut headers = async_nats::HeaderMap::new();
|
||||
|
||||
let effect_name = result.effect_name.as_str().to_string();
|
||||
|
||||
let ctx = shared::nats_context_headers_required(
|
||||
result.tenant_id.as_str(),
|
||||
Some(result.command_id.as_str()),
|
||||
result.metadata.correlation_id.as_ref().map(|v| v.as_str()),
|
||||
result
|
||||
.metadata
|
||||
.extra
|
||||
.get(shared::HEADER_TRACEPARENT)
|
||||
.and_then(|v| v.as_str()),
|
||||
result.metadata.trace_id.as_ref().map(|v| v.as_str()),
|
||||
);
|
||||
for (k, v) in ctx {
|
||||
headers.insert(k, v);
|
||||
}
|
||||
|
||||
headers.insert("command-id", result.command_id.as_str().to_string());
|
||||
headers.insert("effect-name", effect_name);
|
||||
|
||||
headers
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::types::{CommandId, EffectName, MessageMetadata, TenantId};
|
||||
use chrono::Utc;
|
||||
|
||||
#[test]
|
||||
fn effect_command_headers_include_required_context() {
|
||||
let cmd = EffectCommandEnvelope {
|
||||
tenant_id: TenantId::new("t1"),
|
||||
command_id: CommandId::new("c1"),
|
||||
effect_name: EffectName::new("send_email"),
|
||||
payload: serde_json::json!({"x": 1}),
|
||||
metadata: MessageMetadata::default(),
|
||||
};
|
||||
|
||||
let headers = build_effect_command_headers(&cmd);
|
||||
assert!(headers.get(shared::NATS_HEADER_TENANT_ID).is_some());
|
||||
assert!(headers.get(shared::NATS_HEADER_NATS_MSG_ID).is_some());
|
||||
assert!(headers.get(shared::HEADER_X_CORRELATION_ID).is_some());
|
||||
assert!(headers.get(shared::NATS_HEADER_CORRELATION_ID).is_some());
|
||||
assert!(headers.get(shared::HEADER_TRACEPARENT).is_some());
|
||||
assert!(headers.get(shared::HEADER_TRACE_ID).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effect_result_headers_include_required_context() {
|
||||
let result = EffectResultEnvelope {
|
||||
tenant_id: TenantId::new("t1"),
|
||||
command_id: CommandId::new("c1"),
|
||||
effect_name: EffectName::new("send_email"),
|
||||
result_type: crate::types::EffectResultType::Succeeded,
|
||||
payload: serde_json::json!({"ok": true}),
|
||||
timestamp: Utc::now(),
|
||||
metadata: MessageMetadata::default(),
|
||||
};
|
||||
|
||||
let headers = build_effect_result_headers(&result);
|
||||
assert!(headers.get(shared::NATS_HEADER_TENANT_ID).is_some());
|
||||
assert!(headers.get(shared::NATS_HEADER_NATS_MSG_ID).is_some());
|
||||
assert!(headers.get(shared::HEADER_X_CORRELATION_ID).is_some());
|
||||
assert!(headers.get(shared::NATS_HEADER_CORRELATION_ID).is_some());
|
||||
assert!(headers.get(shared::HEADER_TRACEPARENT).is_some());
|
||||
assert!(headers.get(shared::HEADER_TRACE_ID).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_config_validation_allows_subject_superset() {
|
||||
let expected = stream_policy_config("S", vec!["a".to_string(), "b".to_string()]);
|
||||
let mut actual = expected.clone();
|
||||
actual.subjects.push("c".to_string());
|
||||
validate_stream_config(&expected, &actual).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_config_validation_rejects_missing_subject() {
|
||||
let expected = stream_policy_config("S", vec!["a".to_string(), "b".to_string()]);
|
||||
let mut actual = expected.clone();
|
||||
actual.subjects.retain(|s| s != "b");
|
||||
assert!(validate_stream_config(&expected, &actual).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
async fn try_init_streams(
|
||||
jetstream: &jetstream::Context,
|
||||
settings: &Settings,
|
||||
@@ -263,18 +333,54 @@ async fn ensure_stream(
|
||||
name: &str,
|
||||
subjects: Vec<String>,
|
||||
) -> Result<jetstream::stream::Stream, StreamInitError> {
|
||||
let config = StreamConfig {
|
||||
name: name.to_string(),
|
||||
subjects,
|
||||
max_messages: 10_000_000,
|
||||
max_bytes: -1,
|
||||
max_age: std::time::Duration::from_secs(365 * 24 * 60 * 60),
|
||||
duplicate_window: std::time::Duration::from_secs(120),
|
||||
..Default::default()
|
||||
};
|
||||
jetstream
|
||||
.get_or_create_stream(config)
|
||||
let expected = stream_policy_config(name, subjects);
|
||||
let mut stream = jetstream
|
||||
.get_or_create_stream(expected.clone())
|
||||
.await
|
||||
.map_err(|e| StreamInitError::Stream(e.to_string()))?;
|
||||
|
||||
let info = stream
|
||||
.info()
|
||||
.await
|
||||
.map_err(|e| StreamInitError::Stream(e.to_string()))?;
|
||||
validate_stream_config(&expected, &info.config)?;
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
fn stream_policy_config(name: &str, subjects: Vec<String>) -> StreamConfig {
|
||||
let policy = shared::stream_policy_defaults(name.to_string(), subjects);
|
||||
StreamConfig {
|
||||
name: policy.name,
|
||||
subjects: policy.subjects,
|
||||
max_messages: policy.max_messages,
|
||||
max_bytes: policy.max_bytes,
|
||||
max_age: policy.max_age,
|
||||
duplicate_window: policy.duplicate_window,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_stream_config(
|
||||
expected: &StreamConfig,
|
||||
actual: &StreamConfig,
|
||||
) -> Result<(), StreamInitError> {
|
||||
let expected = shared::stream_policy_from_parts(
|
||||
expected.name.as_str(),
|
||||
expected.subjects.clone(),
|
||||
expected.max_messages,
|
||||
expected.max_bytes,
|
||||
expected.max_age,
|
||||
expected.duplicate_window,
|
||||
);
|
||||
let actual = shared::stream_policy_from_parts(
|
||||
actual.name.as_str(),
|
||||
actual.subjects.clone(),
|
||||
actual.max_messages,
|
||||
actual.max_bytes,
|
||||
actual.max_age,
|
||||
actual.duplicate_window,
|
||||
);
|
||||
shared::validate_stream_policy(&expected, &actual)
|
||||
.map_err(|e| StreamInitError::Stream(e.to_string()))
|
||||
}
|
||||
|
||||
|
||||
@@ -44,9 +44,6 @@ fn jetstream_connects_and_can_publish_effect_result() {
|
||||
metadata: MessageMetadata::default(),
|
||||
};
|
||||
|
||||
runner_js
|
||||
.publish_effect_result("tenant.t1.effect_result.noop.c1".to_string(), &result)
|
||||
.await
|
||||
.unwrap();
|
||||
runner_js.publish_effect_result(&result).await.unwrap();
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user