Files
cloudlysis/runner/src/main.rs
Vlad Durnea 90c307016d
Some checks failed
ci / rust (push) Failing after 2m21s
ci / ui (push) Failing after 28s
images / build-and-push (push) Failing after 18s
transport: complete M0–M7
shared: add stream+consumer policy helpers; NATS context header builder

aggregate/runner/projection: centralize stream validation and header usage; set bounded consumer params

projection: add QueryService gRPC and wire into main; settings include PROJECTION_GRPC_ADDR

gateway: gRPC routing to Projection/Runner with deadlines; bounded read-only retries; pooled gRPC channels (bounded LRU+TTL); admin proxy forwards to gRPC; probes use concurrency limiter + TTL cache

runner: add RunnerAdmin gRPC server (drain, status, reload) and wire into main; settings include RUNNER_GRPC_ADDR

tests: add gateway authz for runner admin, projection tenant isolation, runner admin drain semantics

docs: update TRANSPORT_DEVELOPMENT_PLAN to reflect completed milestones and details
2026-03-30 14:24:14 +03:00

327 lines
11 KiB
Rust

use runner::config::Settings;
use runner::effects::run_effect_worker;
use runner::http;
use runner::observability::Observability;
use runner::outbox::OutboxRelay;
use runner::saga::{run_saga_worker, SagaPrograms, SagaRuntime};
use runner::schedule::Scheduler;
use runner::storage::KvClient;
use runner::stream::JetStreamClient;
use runner::tenant_placement::{start_tenant_filter, TenantGate};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
#[tokio::main]
async fn main() {
match std::env::args().nth(1).as_deref() {
Some("-h") | Some("--help") => {
print_help();
return;
}
Some("serve") | None => serve().await,
Some(other) => {
eprintln!("Unknown command: {}", other);
print_help();
}
}
}
async fn serve() {
let settings = load_settings();
if let Err(e) = settings.validate() {
eprintln!("Invalid configuration: {}", e);
std::process::exit(2);
}
let observability = Observability::default();
observability.init();
let metrics = observability.metrics();
tracing::info!(settings = ?settings, "Runner starting");
let shutdown = Arc::new(tokio::sync::Notify::new());
let reload = Arc::new(tokio::sync::Notify::new());
let draining = Arc::new(AtomicBool::new(false));
let storage = match KvClient::open(settings.storage_path.clone()) {
Ok(s) => s,
Err(e) => {
eprintln!("Failed to open storage: {}", e);
std::process::exit(1);
}
};
let tenant_filter = match start_tenant_filter(&settings).await {
Ok(v) => v,
Err(e) => {
tracing::error!(error = %e, "Failed to initialize tenant filter");
None
}
};
let tenant_gate = Arc::new(TenantGate::new(tenant_filter.clone()));
let state = Arc::new(http::AppState::new(
settings.clone(),
draining.clone(),
tenant_gate.clone(),
metrics.clone(),
storage.clone(),
reload.clone(),
));
let http_listener = tokio::net::TcpListener::bind(settings.http_addr.as_str())
.await
.unwrap();
let http_shutdown = shutdown.clone();
let http_state = state.clone();
let http_task = tokio::spawn(async move {
http::serve(http_listener, http_state, async move {
http_shutdown.notified().await
})
.await
});
let grpc_addr: std::net::SocketAddr = settings.grpc_addr.parse().unwrap();
let grpc_shutdown = shutdown.clone();
let grpc_state = state.clone();
let grpc_task = tokio::spawn(async move {
runner::grpc_admin::serve(grpc_addr, grpc_state, async move {
grpc_shutdown.notified().await
})
.await
});
let signal_shutdown = shutdown.clone();
let signal_draining = draining.clone();
tokio::spawn(async move {
#[cfg(unix)]
{
use tokio::signal::unix::{signal, SignalKind};
let mut sigterm = signal(SignalKind::terminate()).ok();
let mut sigint = signal(SignalKind::interrupt()).ok();
tokio::select! {
_ = tokio::signal::ctrl_c() => {},
_ = async { if let Some(s) = &mut sigterm { let _ = s.recv().await; } } => {},
_ = async { if let Some(s) = &mut sigint { let _ = s.recv().await; } } => {},
}
}
#[cfg(not(unix))]
{
let _ = tokio::signal::ctrl_c().await;
}
signal_draining.store(true, Ordering::Relaxed);
signal_shutdown.notify_waiters();
});
let mut tasks = Vec::new();
match settings.mode {
runner::config::RunnerMode::Saga => {
let programs = Arc::new(match SagaPrograms::load(&settings) {
Ok(p) => p,
Err(e) => {
tracing::error!(error = %e, "Failed to load saga manifest/programs");
std::process::exit(1);
}
});
let saga_runtime = SagaRuntime::default();
tasks.push(tokio::spawn(run_saga_worker(
settings.clone(),
storage.clone(),
programs.clone(),
saga_runtime.clone(),
metrics.clone(),
tenant_gate.clone(),
tenant_filter.clone(),
shutdown.clone(),
draining.clone(),
)));
let outbox_settings = settings.clone();
let outbox_storage = storage.clone();
let outbox_shutdown = shutdown.clone();
let outbox_draining = draining.clone();
let outbox_metrics = metrics.clone();
let outbox_tenant_gate = tenant_gate.clone();
tasks.push(tokio::spawn(async move {
let js = JetStreamClient::connect(&outbox_settings)
.await
.map_err(|e| runner::types::RunnerError::StreamError(e.to_string()))?;
OutboxRelay
.run(
outbox_settings,
outbox_storage,
js,
outbox_metrics,
outbox_tenant_gate,
outbox_shutdown,
outbox_draining,
)
.await
}));
let scheduler_settings = settings.clone();
let scheduler_storage = storage.clone();
let scheduler_shutdown = shutdown.clone();
let scheduler_draining = draining.clone();
let scheduler_metrics = metrics.clone();
let scheduler_tenant_gate = tenant_gate.clone();
tasks.push(tokio::spawn(async move {
Scheduler
.run(
scheduler_settings,
scheduler_storage,
programs,
saga_runtime,
scheduler_metrics,
scheduler_tenant_gate,
scheduler_shutdown,
scheduler_draining,
)
.await
}));
}
runner::config::RunnerMode::Effect => {
tasks.push(tokio::spawn(run_effect_worker(
settings.clone(),
storage.clone(),
metrics.clone(),
tenant_gate.clone(),
tenant_filter.clone(),
reload.clone(),
shutdown.clone(),
draining.clone(),
)));
}
runner::config::RunnerMode::Combined => {
let programs = Arc::new(match SagaPrograms::load(&settings) {
Ok(p) => p,
Err(e) => {
tracing::error!(error = %e, "Failed to load saga manifest/programs");
std::process::exit(1);
}
});
let saga_runtime = SagaRuntime::default();
tasks.push(tokio::spawn(run_saga_worker(
settings.clone(),
storage.clone(),
programs.clone(),
saga_runtime.clone(),
metrics.clone(),
tenant_gate.clone(),
tenant_filter.clone(),
shutdown.clone(),
draining.clone(),
)));
tasks.push(tokio::spawn(run_effect_worker(
settings.clone(),
storage.clone(),
metrics.clone(),
tenant_gate.clone(),
tenant_filter.clone(),
reload.clone(),
shutdown.clone(),
draining.clone(),
)));
let outbox_settings = settings.clone();
let outbox_storage = storage.clone();
let outbox_shutdown = shutdown.clone();
let outbox_draining = draining.clone();
let outbox_metrics = metrics.clone();
let outbox_tenant_gate = tenant_gate.clone();
tasks.push(tokio::spawn(async move {
let js = JetStreamClient::connect(&outbox_settings)
.await
.map_err(|e| runner::types::RunnerError::StreamError(e.to_string()))?;
OutboxRelay
.run(
outbox_settings,
outbox_storage,
js,
outbox_metrics,
outbox_tenant_gate,
outbox_shutdown,
outbox_draining,
)
.await
}));
let scheduler_settings = settings.clone();
let scheduler_storage = storage.clone();
let scheduler_shutdown = shutdown.clone();
let scheduler_draining = draining.clone();
let scheduler_metrics = metrics.clone();
let scheduler_tenant_gate = tenant_gate.clone();
tasks.push(tokio::spawn(async move {
Scheduler
.run(
scheduler_settings,
scheduler_storage,
programs,
saga_runtime,
scheduler_metrics,
scheduler_tenant_gate,
scheduler_shutdown,
scheduler_draining,
)
.await
}));
}
}
let mut failed = None;
for task in tasks {
match task.await {
Ok(Ok(())) => {}
Ok(Err(e)) => {
failed = Some(e);
break;
}
Err(e) => {
failed = Some(runner::types::RunnerError::RuntimeError(e.to_string()));
break;
}
}
}
draining.store(true, Ordering::Relaxed);
shutdown.notify_waiters();
let _ = http_task.await;
let _ = grpc_task.await;
if let Some(e) = failed {
tracing::error!(error = %e, "Runner terminated with error");
std::process::exit(1);
}
}
fn print_help() {
println!(
"runner\n\nUSAGE:\n runner [COMMAND]\n\nCOMMANDS:\n serve Start the HTTP server (default)\n\nOPTIONS:\n -h, --help Print help\n"
);
}
fn load_settings() -> Settings {
if let Ok(path) = std::env::var("RUNNER_CONFIG_PATH") {
if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) {
return settings;
}
}
Settings::from_env().unwrap_or_default()
}
#[cfg(test)]
mod tests {
#[test]
fn test_harness_runs() {
let settings = runner::Settings::default();
assert_eq!(settings.aggregate_events_stream, "AGGREGATE_EVENTS");
assert!(settings
.saga_trigger_subject_filters
.iter()
.any(|s| s == "tenant.*.aggregate.*.*"));
}
}