shared: add stream+consumer policy helpers; NATS context header builder aggregate/runner/projection: centralize stream validation and header usage; set bounded consumer params projection: add QueryService gRPC and wire into main; settings include PROJECTION_GRPC_ADDR gateway: gRPC routing to Projection/Runner with deadlines; bounded read-only retries; pooled gRPC channels (bounded LRU+TTL); admin proxy forwards to gRPC; probes use concurrency limiter + TTL cache runner: add RunnerAdmin gRPC server (drain, status, reload) and wire into main; settings include RUNNER_GRPC_ADDR tests: add gateway authz for runner admin, projection tenant isolation, runner admin drain semantics docs: update TRANSPORT_DEVELOPMENT_PLAN to reflect completed milestones and details
327 lines
11 KiB
Rust
327 lines
11 KiB
Rust
use runner::config::Settings;
|
|
use runner::effects::run_effect_worker;
|
|
use runner::http;
|
|
use runner::observability::Observability;
|
|
use runner::outbox::OutboxRelay;
|
|
use runner::saga::{run_saga_worker, SagaPrograms, SagaRuntime};
|
|
use runner::schedule::Scheduler;
|
|
use runner::storage::KvClient;
|
|
use runner::stream::JetStreamClient;
|
|
use runner::tenant_placement::{start_tenant_filter, TenantGate};
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
use std::sync::Arc;
|
|
|
|
#[tokio::main]
|
|
async fn main() {
|
|
match std::env::args().nth(1).as_deref() {
|
|
Some("-h") | Some("--help") => {
|
|
print_help();
|
|
return;
|
|
}
|
|
Some("serve") | None => serve().await,
|
|
Some(other) => {
|
|
eprintln!("Unknown command: {}", other);
|
|
print_help();
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn serve() {
|
|
let settings = load_settings();
|
|
if let Err(e) = settings.validate() {
|
|
eprintln!("Invalid configuration: {}", e);
|
|
std::process::exit(2);
|
|
}
|
|
|
|
let observability = Observability::default();
|
|
observability.init();
|
|
let metrics = observability.metrics();
|
|
|
|
tracing::info!(settings = ?settings, "Runner starting");
|
|
|
|
let shutdown = Arc::new(tokio::sync::Notify::new());
|
|
let reload = Arc::new(tokio::sync::Notify::new());
|
|
let draining = Arc::new(AtomicBool::new(false));
|
|
|
|
let storage = match KvClient::open(settings.storage_path.clone()) {
|
|
Ok(s) => s,
|
|
Err(e) => {
|
|
eprintln!("Failed to open storage: {}", e);
|
|
std::process::exit(1);
|
|
}
|
|
};
|
|
|
|
let tenant_filter = match start_tenant_filter(&settings).await {
|
|
Ok(v) => v,
|
|
Err(e) => {
|
|
tracing::error!(error = %e, "Failed to initialize tenant filter");
|
|
None
|
|
}
|
|
};
|
|
|
|
let tenant_gate = Arc::new(TenantGate::new(tenant_filter.clone()));
|
|
|
|
let state = Arc::new(http::AppState::new(
|
|
settings.clone(),
|
|
draining.clone(),
|
|
tenant_gate.clone(),
|
|
metrics.clone(),
|
|
storage.clone(),
|
|
reload.clone(),
|
|
));
|
|
|
|
let http_listener = tokio::net::TcpListener::bind(settings.http_addr.as_str())
|
|
.await
|
|
.unwrap();
|
|
|
|
let http_shutdown = shutdown.clone();
|
|
let http_state = state.clone();
|
|
let http_task = tokio::spawn(async move {
|
|
http::serve(http_listener, http_state, async move {
|
|
http_shutdown.notified().await
|
|
})
|
|
.await
|
|
});
|
|
|
|
let grpc_addr: std::net::SocketAddr = settings.grpc_addr.parse().unwrap();
|
|
let grpc_shutdown = shutdown.clone();
|
|
let grpc_state = state.clone();
|
|
let grpc_task = tokio::spawn(async move {
|
|
runner::grpc_admin::serve(grpc_addr, grpc_state, async move {
|
|
grpc_shutdown.notified().await
|
|
})
|
|
.await
|
|
});
|
|
|
|
let signal_shutdown = shutdown.clone();
|
|
let signal_draining = draining.clone();
|
|
tokio::spawn(async move {
|
|
#[cfg(unix)]
|
|
{
|
|
use tokio::signal::unix::{signal, SignalKind};
|
|
let mut sigterm = signal(SignalKind::terminate()).ok();
|
|
let mut sigint = signal(SignalKind::interrupt()).ok();
|
|
tokio::select! {
|
|
_ = tokio::signal::ctrl_c() => {},
|
|
_ = async { if let Some(s) = &mut sigterm { let _ = s.recv().await; } } => {},
|
|
_ = async { if let Some(s) = &mut sigint { let _ = s.recv().await; } } => {},
|
|
}
|
|
}
|
|
|
|
#[cfg(not(unix))]
|
|
{
|
|
let _ = tokio::signal::ctrl_c().await;
|
|
}
|
|
|
|
signal_draining.store(true, Ordering::Relaxed);
|
|
signal_shutdown.notify_waiters();
|
|
});
|
|
|
|
let mut tasks = Vec::new();
|
|
|
|
match settings.mode {
|
|
runner::config::RunnerMode::Saga => {
|
|
let programs = Arc::new(match SagaPrograms::load(&settings) {
|
|
Ok(p) => p,
|
|
Err(e) => {
|
|
tracing::error!(error = %e, "Failed to load saga manifest/programs");
|
|
std::process::exit(1);
|
|
}
|
|
});
|
|
let saga_runtime = SagaRuntime::default();
|
|
tasks.push(tokio::spawn(run_saga_worker(
|
|
settings.clone(),
|
|
storage.clone(),
|
|
programs.clone(),
|
|
saga_runtime.clone(),
|
|
metrics.clone(),
|
|
tenant_gate.clone(),
|
|
tenant_filter.clone(),
|
|
shutdown.clone(),
|
|
draining.clone(),
|
|
)));
|
|
let outbox_settings = settings.clone();
|
|
let outbox_storage = storage.clone();
|
|
let outbox_shutdown = shutdown.clone();
|
|
let outbox_draining = draining.clone();
|
|
let outbox_metrics = metrics.clone();
|
|
let outbox_tenant_gate = tenant_gate.clone();
|
|
tasks.push(tokio::spawn(async move {
|
|
let js = JetStreamClient::connect(&outbox_settings)
|
|
.await
|
|
.map_err(|e| runner::types::RunnerError::StreamError(e.to_string()))?;
|
|
OutboxRelay
|
|
.run(
|
|
outbox_settings,
|
|
outbox_storage,
|
|
js,
|
|
outbox_metrics,
|
|
outbox_tenant_gate,
|
|
outbox_shutdown,
|
|
outbox_draining,
|
|
)
|
|
.await
|
|
}));
|
|
let scheduler_settings = settings.clone();
|
|
let scheduler_storage = storage.clone();
|
|
let scheduler_shutdown = shutdown.clone();
|
|
let scheduler_draining = draining.clone();
|
|
let scheduler_metrics = metrics.clone();
|
|
let scheduler_tenant_gate = tenant_gate.clone();
|
|
tasks.push(tokio::spawn(async move {
|
|
Scheduler
|
|
.run(
|
|
scheduler_settings,
|
|
scheduler_storage,
|
|
programs,
|
|
saga_runtime,
|
|
scheduler_metrics,
|
|
scheduler_tenant_gate,
|
|
scheduler_shutdown,
|
|
scheduler_draining,
|
|
)
|
|
.await
|
|
}));
|
|
}
|
|
runner::config::RunnerMode::Effect => {
|
|
tasks.push(tokio::spawn(run_effect_worker(
|
|
settings.clone(),
|
|
storage.clone(),
|
|
metrics.clone(),
|
|
tenant_gate.clone(),
|
|
tenant_filter.clone(),
|
|
reload.clone(),
|
|
shutdown.clone(),
|
|
draining.clone(),
|
|
)));
|
|
}
|
|
runner::config::RunnerMode::Combined => {
|
|
let programs = Arc::new(match SagaPrograms::load(&settings) {
|
|
Ok(p) => p,
|
|
Err(e) => {
|
|
tracing::error!(error = %e, "Failed to load saga manifest/programs");
|
|
std::process::exit(1);
|
|
}
|
|
});
|
|
let saga_runtime = SagaRuntime::default();
|
|
tasks.push(tokio::spawn(run_saga_worker(
|
|
settings.clone(),
|
|
storage.clone(),
|
|
programs.clone(),
|
|
saga_runtime.clone(),
|
|
metrics.clone(),
|
|
tenant_gate.clone(),
|
|
tenant_filter.clone(),
|
|
shutdown.clone(),
|
|
draining.clone(),
|
|
)));
|
|
tasks.push(tokio::spawn(run_effect_worker(
|
|
settings.clone(),
|
|
storage.clone(),
|
|
metrics.clone(),
|
|
tenant_gate.clone(),
|
|
tenant_filter.clone(),
|
|
reload.clone(),
|
|
shutdown.clone(),
|
|
draining.clone(),
|
|
)));
|
|
let outbox_settings = settings.clone();
|
|
let outbox_storage = storage.clone();
|
|
let outbox_shutdown = shutdown.clone();
|
|
let outbox_draining = draining.clone();
|
|
let outbox_metrics = metrics.clone();
|
|
let outbox_tenant_gate = tenant_gate.clone();
|
|
tasks.push(tokio::spawn(async move {
|
|
let js = JetStreamClient::connect(&outbox_settings)
|
|
.await
|
|
.map_err(|e| runner::types::RunnerError::StreamError(e.to_string()))?;
|
|
OutboxRelay
|
|
.run(
|
|
outbox_settings,
|
|
outbox_storage,
|
|
js,
|
|
outbox_metrics,
|
|
outbox_tenant_gate,
|
|
outbox_shutdown,
|
|
outbox_draining,
|
|
)
|
|
.await
|
|
}));
|
|
let scheduler_settings = settings.clone();
|
|
let scheduler_storage = storage.clone();
|
|
let scheduler_shutdown = shutdown.clone();
|
|
let scheduler_draining = draining.clone();
|
|
let scheduler_metrics = metrics.clone();
|
|
let scheduler_tenant_gate = tenant_gate.clone();
|
|
tasks.push(tokio::spawn(async move {
|
|
Scheduler
|
|
.run(
|
|
scheduler_settings,
|
|
scheduler_storage,
|
|
programs,
|
|
saga_runtime,
|
|
scheduler_metrics,
|
|
scheduler_tenant_gate,
|
|
scheduler_shutdown,
|
|
scheduler_draining,
|
|
)
|
|
.await
|
|
}));
|
|
}
|
|
}
|
|
|
|
let mut failed = None;
|
|
for task in tasks {
|
|
match task.await {
|
|
Ok(Ok(())) => {}
|
|
Ok(Err(e)) => {
|
|
failed = Some(e);
|
|
break;
|
|
}
|
|
Err(e) => {
|
|
failed = Some(runner::types::RunnerError::RuntimeError(e.to_string()));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
draining.store(true, Ordering::Relaxed);
|
|
shutdown.notify_waiters();
|
|
let _ = http_task.await;
|
|
let _ = grpc_task.await;
|
|
|
|
if let Some(e) = failed {
|
|
tracing::error!(error = %e, "Runner terminated with error");
|
|
std::process::exit(1);
|
|
}
|
|
}
|
|
|
|
fn print_help() {
|
|
println!(
|
|
"runner\n\nUSAGE:\n runner [COMMAND]\n\nCOMMANDS:\n serve Start the HTTP server (default)\n\nOPTIONS:\n -h, --help Print help\n"
|
|
);
|
|
}
|
|
|
|
fn load_settings() -> Settings {
|
|
if let Ok(path) = std::env::var("RUNNER_CONFIG_PATH") {
|
|
if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) {
|
|
return settings;
|
|
}
|
|
}
|
|
|
|
Settings::from_env().unwrap_or_default()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
#[test]
|
|
fn test_harness_runs() {
|
|
let settings = runner::Settings::default();
|
|
assert_eq!(settings.aggregate_events_stream, "AGGREGATE_EVENTS");
|
|
assert!(settings
|
|
.saga_trigger_subject_filters
|
|
.iter()
|
|
.any(|s| s == "tenant.*.aggregate.*.*"));
|
|
}
|
|
}
|