Monorepo consolidation: workspace, shared types, transport plans, docker/swam assets
This commit is contained in:
417
control/api/src/admin.rs
Normal file
417
control/api/src/admin.rs
Normal file
@@ -0,0 +1,417 @@
|
||||
use crate::{
|
||||
AppState, RequestIds,
|
||||
auth::{Principal, has_permission},
|
||||
fleet,
|
||||
job_engine::{JobEngine, StartJobError},
|
||||
jobs::{Job, JobStatus, JobStep},
|
||||
placement::{PlacementResponse, ServiceKind},
|
||||
swarm::{SwarmService, SwarmTask},
|
||||
};
|
||||
use axum::{
|
||||
Json, Router,
|
||||
extract::{Extension, Path, State},
|
||||
http::{HeaderMap, StatusCode},
|
||||
response::IntoResponse,
|
||||
routing::{get, post},
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use uuid::Uuid;
|
||||
|
||||
const HEADER_IDEMPOTENCY_KEY: &str = "idempotency-key";
|
||||
const HEADER_TENANT_ID: &str = "x-tenant-id";
|
||||
|
||||
pub fn admin_router() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/whoami", get(whoami))
|
||||
.route("/platform/info", get(platform_info))
|
||||
.route("/fleet/snapshot", get(fleet_snapshot))
|
||||
.route("/tenants", get(list_tenants))
|
||||
.route("/placement/{kind}", get(get_placement))
|
||||
.route("/tenants/echo", get(tenant_echo))
|
||||
.route("/jobs/echo", post(create_echo_job))
|
||||
.route("/jobs/{job_id}", get(get_job))
|
||||
.route("/jobs/{job_id}/cancel", post(cancel_job))
|
||||
.route("/jobs/tenant/drain", post(start_tenant_drain))
|
||||
.route("/jobs/tenant/migrate", post(start_tenant_migrate))
|
||||
.route("/plan/tenant/migrate", post(plan_tenant_migrate))
|
||||
.route("/audit", get(list_audit))
|
||||
.route("/swarm/services", get(list_swarm_services))
|
||||
.route("/swarm/services/{name}/tasks", get(list_swarm_tasks))
|
||||
}
|
||||
|
||||
async fn whoami(Extension(principal): Extension<Principal>) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"sub": principal.sub,
|
||||
"session_id": principal.session_id,
|
||||
"permissions": principal.permissions,
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn platform_info(Extension(principal): Extension<Principal>) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"service": "control-api",
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn fleet_snapshot(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Extension(request_ids): Extension<RequestIds>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let services =
|
||||
fleet::snapshot_with_context(&state.http, &state.fleet_services, Some(&request_ids)).await;
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "services": services })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn get_placement(
|
||||
State(state): State<AppState>,
|
||||
Path(kind): Path<String>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let kind = match kind.as_str() {
|
||||
"aggregate" => ServiceKind::Aggregate,
|
||||
"projection" => ServiceKind::Projection,
|
||||
"runner" => ServiceKind::Runner,
|
||||
_ => return StatusCode::NOT_FOUND.into_response(),
|
||||
};
|
||||
|
||||
let resp: PlacementResponse = state.placement.get_for_kind(kind);
|
||||
|
||||
(StatusCode::OK, Json(resp)).into_response()
|
||||
}
|
||||
|
||||
async fn list_tenants(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let tenants = state.placement.tenant_summaries();
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "tenants": tenants })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn tenant_echo(
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let tenant_id = headers
|
||||
.get(HEADER_TENANT_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST)
|
||||
.and_then(|s| Uuid::parse_str(s).map_err(|_| StatusCode::BAD_REQUEST));
|
||||
|
||||
match tenant_id {
|
||||
Ok(tenant_id) => (
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"tenant_id": tenant_id,
|
||||
})),
|
||||
)
|
||||
.into_response(),
|
||||
Err(status) => status.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_echo_job(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let now = now_ms();
|
||||
let job_id = Uuid::new_v4();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Succeeded,
|
||||
steps: vec![JobStep {
|
||||
name: "echo".to_string(),
|
||||
status: JobStatus::Succeeded,
|
||||
attempts: 1,
|
||||
error: None,
|
||||
}],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: Some(now),
|
||||
finished_at_ms: Some(now),
|
||||
};
|
||||
|
||||
let job_id = state.jobs.insert_idempotent(key, job);
|
||||
state.audit.record(crate::audit::AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: "job.echo".to_string(),
|
||||
tenant_id: None,
|
||||
reason: "echo".to_string(),
|
||||
job_id: Some(job_id),
|
||||
});
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"job_id": job_id,
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn get_job(
|
||||
State(state): State<AppState>,
|
||||
Path(job_id): Path<Uuid>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
match state.jobs.get(job_id) {
|
||||
Some(job) => (StatusCode::OK, Json(job)).into_response(),
|
||||
None => StatusCode::NOT_FOUND.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TenantDrainRequest {
|
||||
tenant_id: Uuid,
|
||||
reason: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct TenantMigrateRequest {
|
||||
tenant_id: Uuid,
|
||||
runner_target: String,
|
||||
reason: String,
|
||||
}
|
||||
|
||||
async fn start_tenant_drain(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<TenantDrainRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let engine = JobEngine::new(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_tenant_drain(
|
||||
state.clone(),
|
||||
&principal,
|
||||
body.tenant_id,
|
||||
body.reason,
|
||||
key,
|
||||
) {
|
||||
Ok(id) => id,
|
||||
Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "job_id": job_id })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn start_tenant_migrate(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<TenantMigrateRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let engine = JobEngine::new(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_tenant_migrate(
|
||||
state.clone(),
|
||||
&principal,
|
||||
body.tenant_id,
|
||||
body.runner_target,
|
||||
body.reason,
|
||||
key,
|
||||
) {
|
||||
Ok(id) => id,
|
||||
Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "job_id": job_id })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn cancel_job(
|
||||
State(state): State<AppState>,
|
||||
Path(job_id): Path<Uuid>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
if state.jobs.request_cancel(job_id) {
|
||||
state.audit.record(crate::audit::AuditEvent {
|
||||
ts_ms: now_ms(),
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: "job.cancel".to_string(),
|
||||
tenant_id: None,
|
||||
reason: "cancel requested".to_string(),
|
||||
job_id: Some(job_id),
|
||||
});
|
||||
StatusCode::OK.into_response()
|
||||
} else {
|
||||
StatusCode::NOT_FOUND.into_response()
|
||||
}
|
||||
}
|
||||
|
||||
fn now_ms() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64
|
||||
}
|
||||
|
||||
async fn list_audit(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let events = state.audit.list_recent(200);
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "events": events })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn plan_tenant_migrate(
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<TenantMigrateRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let _ = (body.tenant_id, body.runner_target, body.reason);
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"steps": ["preflight", "drain", "update_placement", "reload", "verify"]
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn list_swarm_services(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let services: Vec<SwarmService> = state.swarm.list_services();
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "services": services })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn list_swarm_tasks(
|
||||
State(state): State<AppState>,
|
||||
Path(name): Path<String>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let tasks: Vec<SwarmTask> = state.swarm.list_tasks(&name);
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "service": name, "tasks": tasks })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
31
control/api/src/audit.rs
Normal file
31
control/api/src/audit.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct AuditEvent {
|
||||
pub ts_ms: u64,
|
||||
pub principal_sub: String,
|
||||
pub action: String,
|
||||
pub tenant_id: Option<Uuid>,
|
||||
pub reason: String,
|
||||
pub job_id: Option<Uuid>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct AuditStore {
|
||||
inner: Arc<Mutex<Vec<AuditEvent>>>,
|
||||
}
|
||||
|
||||
impl AuditStore {
|
||||
pub fn record(&self, event: AuditEvent) {
|
||||
let mut events = self.inner.lock().expect("audit lock poisoned");
|
||||
events.push(event);
|
||||
}
|
||||
|
||||
pub fn list_recent(&self, limit: usize) -> Vec<AuditEvent> {
|
||||
let events = self.inner.lock().expect("audit lock poisoned");
|
||||
let start = events.len().saturating_sub(limit);
|
||||
events[start..].to_vec()
|
||||
}
|
||||
}
|
||||
78
control/api/src/auth.rs
Normal file
78
control/api/src/auth.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
use crate::AppState;
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::{Request, StatusCode},
|
||||
middleware::Next,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AuthConfig {
|
||||
pub hs256_secret: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Principal {
|
||||
pub sub: String,
|
||||
pub session_id: String,
|
||||
pub permissions: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct Claims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
pub async fn auth_middleware(
|
||||
State(state): State<AppState>,
|
||||
mut req: Request<axum::body::Body>,
|
||||
next: Next,
|
||||
) -> Response {
|
||||
match authenticate(
|
||||
&state.auth,
|
||||
req.headers().get(axum::http::header::AUTHORIZATION),
|
||||
) {
|
||||
Ok(principal) => {
|
||||
req.extensions_mut().insert(principal);
|
||||
next.run(req).await
|
||||
}
|
||||
Err(status) => status.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
fn authenticate(
|
||||
cfg: &AuthConfig,
|
||||
auth_header: Option<&axum::http::HeaderValue>,
|
||||
) -> Result<Principal, StatusCode> {
|
||||
let secret = cfg
|
||||
.hs256_secret
|
||||
.as_ref()
|
||||
.ok_or(StatusCode::SERVICE_UNAVAILABLE)?;
|
||||
let header = auth_header.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||
let header_str = header.to_str().map_err(|_| StatusCode::UNAUTHORIZED)?;
|
||||
|
||||
let token = header_str
|
||||
.strip_prefix("Bearer ")
|
||||
.ok_or(StatusCode::UNAUTHORIZED)?;
|
||||
|
||||
let mut validation = Validation::new(Algorithm::HS256);
|
||||
validation.required_spec_claims.insert("exp".to_string());
|
||||
|
||||
let data = decode::<Claims>(token, &DecodingKey::from_secret(secret), &validation)
|
||||
.map_err(|_| StatusCode::UNAUTHORIZED)?;
|
||||
|
||||
Ok(Principal {
|
||||
sub: data.claims.sub,
|
||||
session_id: data.claims.session_id,
|
||||
permissions: data.claims.permissions,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn has_permission(principal: &Principal, permission: &str) -> bool {
|
||||
principal.permissions.iter().any(|p| p == permission)
|
||||
}
|
||||
57
control/api/src/build_info.rs
Normal file
57
control/api/src/build_info.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct BuildInfo {
|
||||
pub service: String,
|
||||
pub version: String,
|
||||
pub git_sha: String,
|
||||
}
|
||||
|
||||
pub fn extract_build_info(metrics: &str) -> Vec<BuildInfo> {
|
||||
let mut out = Vec::new();
|
||||
for line in metrics.lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() || line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
let Some((metric_and_labels, value)) = line.split_once(' ') else {
|
||||
continue;
|
||||
};
|
||||
if value.trim() != "1" {
|
||||
continue;
|
||||
}
|
||||
if !metric_and_labels.ends_with('}') {
|
||||
continue;
|
||||
}
|
||||
let Some((name, labels)) = metric_and_labels.split_once('{') else {
|
||||
continue;
|
||||
};
|
||||
if !name.ends_with("_build_info") {
|
||||
continue;
|
||||
}
|
||||
let labels = labels.trim_end_matches('}');
|
||||
let mut service = None;
|
||||
let mut version = None;
|
||||
let mut git_sha = None;
|
||||
for part in labels.split(',') {
|
||||
let Some((k, v)) = part.split_once('=') else {
|
||||
continue;
|
||||
};
|
||||
let v = v.trim().trim_matches('"');
|
||||
match k.trim() {
|
||||
"service" => service = Some(v.to_string()),
|
||||
"version" => version = Some(v.to_string()),
|
||||
"git_sha" => git_sha = Some(v.to_string()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let (Some(service), Some(version), Some(git_sha)) = (service, version, git_sha) {
|
||||
out.push(BuildInfo {
|
||||
service,
|
||||
version,
|
||||
git_sha,
|
||||
});
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
42
control/api/src/deployments.rs
Normal file
42
control/api/src/deployments.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct GrafanaAnnotation {
|
||||
pub time: i64,
|
||||
pub tags: Vec<String>,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
pub fn build_grafana_deploy_annotation(args: DeployAnnotationArgs) -> GrafanaAnnotation {
|
||||
let mut tags = vec![
|
||||
"cloudlysis".to_string(),
|
||||
"deploy".to_string(),
|
||||
format!("service:{}", args.service),
|
||||
];
|
||||
if let Some(v) = args.version {
|
||||
tags.push(format!("version:{v}"));
|
||||
}
|
||||
if let Some(sha) = args.git_sha {
|
||||
tags.push(format!("git_sha:{sha}"));
|
||||
}
|
||||
|
||||
let text = match (args.version, args.git_sha) {
|
||||
(Some(v), Some(sha)) => format!("deploy {} v={} git_sha={sha}", args.service, v),
|
||||
(Some(v), None) => format!("deploy {} v={}", args.service, v),
|
||||
(None, Some(sha)) => format!("deploy {} git_sha={sha}", args.service),
|
||||
(None, None) => format!("deploy {}", args.service),
|
||||
};
|
||||
|
||||
GrafanaAnnotation {
|
||||
time: args.time_ms,
|
||||
tags,
|
||||
text,
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DeployAnnotationArgs<'a> {
|
||||
pub service: &'a str,
|
||||
pub version: Option<&'a str>,
|
||||
pub git_sha: Option<&'a str>,
|
||||
pub time_ms: i64,
|
||||
}
|
||||
67
control/api/src/fleet.rs
Normal file
67
control/api/src/fleet.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::RequestIds;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct FleetService {
|
||||
pub name: String,
|
||||
pub base_url: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct FleetServiceSnapshot {
|
||||
pub name: String,
|
||||
pub base_url: String,
|
||||
pub health_ok: bool,
|
||||
pub ready_ok: bool,
|
||||
pub metrics_ok: bool,
|
||||
}
|
||||
|
||||
pub async fn snapshot(
|
||||
client: &reqwest::Client,
|
||||
services: &[FleetService],
|
||||
) -> Vec<FleetServiceSnapshot> {
|
||||
snapshot_with_context(client, services, None).await
|
||||
}
|
||||
|
||||
pub async fn snapshot_with_context(
|
||||
client: &reqwest::Client,
|
||||
services: &[FleetService],
|
||||
ctx: Option<&RequestIds>,
|
||||
) -> Vec<FleetServiceSnapshot> {
|
||||
let mut out = Vec::with_capacity(services.len());
|
||||
for svc in services {
|
||||
let base = svc.base_url.trim_end_matches('/');
|
||||
let health_ok = get_ok(client, &format!("{base}/health"), ctx).await;
|
||||
let ready_ok = get_ok(client, &format!("{base}/ready"), ctx).await;
|
||||
let metrics_ok = get_ok(client, &format!("{base}/metrics"), ctx).await;
|
||||
out.push(FleetServiceSnapshot {
|
||||
name: svc.name.clone(),
|
||||
base_url: svc.base_url.clone(),
|
||||
health_ok,
|
||||
ready_ok,
|
||||
metrics_ok,
|
||||
});
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
async fn get_ok(client: &reqwest::Client, url: &str, ctx: Option<&RequestIds>) -> bool {
|
||||
let mut req = client.get(url).timeout(Duration::from_secs(2));
|
||||
if let Some(ctx) = ctx {
|
||||
req = req.header("x-request-id", &ctx.request_id);
|
||||
if let Some(cid) = &ctx.correlation_id {
|
||||
req = req.header("x-correlation-id", cid);
|
||||
}
|
||||
if let Some(tp) = &ctx.traceparent {
|
||||
req = req.header("traceparent", tp);
|
||||
}
|
||||
}
|
||||
|
||||
let res = req.send().await;
|
||||
match res {
|
||||
Ok(r) => r.status().is_success(),
|
||||
Err(_) => false,
|
||||
}
|
||||
}
|
||||
348
control/api/src/job_engine.rs
Normal file
348
control/api/src/job_engine.rs
Normal file
@@ -0,0 +1,348 @@
|
||||
use crate::{
|
||||
AppState, Principal,
|
||||
audit::{AuditEvent, AuditStore},
|
||||
fleet,
|
||||
jobs::{Job, JobStatus, JobStep, JobStore},
|
||||
};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, Mutex},
|
||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct TenantLocks {
|
||||
inner: Arc<Mutex<HashMap<Uuid, Uuid>>>,
|
||||
}
|
||||
|
||||
impl TenantLocks {
|
||||
pub fn try_lock(&self, tenant_id: Uuid, job_id: Uuid) -> bool {
|
||||
let mut map = self.inner.lock().expect("tenant locks poisoned");
|
||||
if map.contains_key(&tenant_id) {
|
||||
return false;
|
||||
}
|
||||
map.insert(tenant_id, job_id);
|
||||
true
|
||||
}
|
||||
|
||||
pub fn unlock(&self, tenant_id: Uuid, job_id: Uuid) {
|
||||
let mut map = self.inner.lock().expect("tenant locks poisoned");
|
||||
if map.get(&tenant_id).copied() == Some(job_id) {
|
||||
map.remove(&tenant_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct JobEngine {
|
||||
pub jobs: JobStore,
|
||||
pub audit: AuditStore,
|
||||
pub tenant_locks: TenantLocks,
|
||||
pub step_timeout: Duration,
|
||||
}
|
||||
|
||||
impl JobEngine {
|
||||
pub fn new(jobs: JobStore, audit: AuditStore, tenant_locks: TenantLocks) -> Self {
|
||||
Self {
|
||||
jobs,
|
||||
audit,
|
||||
tenant_locks,
|
||||
step_timeout: Duration::from_millis(500),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_tenant_drain(
|
||||
&self,
|
||||
state: AppState,
|
||||
principal: &Principal,
|
||||
tenant_id: Uuid,
|
||||
reason: String,
|
||||
idempotency_key: &str,
|
||||
) -> Result<Uuid, StartJobError> {
|
||||
if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
if !self.tenant_locks.try_lock(tenant_id, job_id) {
|
||||
return Err(StartJobError::TenantLocked);
|
||||
}
|
||||
|
||||
let now = now_ms();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Pending,
|
||||
steps: vec![step("preflight"), step("drain"), step("verify")],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: None,
|
||||
finished_at_ms: None,
|
||||
};
|
||||
|
||||
let inserted = self.jobs.insert_idempotent(idempotency_key, job);
|
||||
self.audit.record(AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: "tenant.drain".to_string(),
|
||||
tenant_id: Some(tenant_id),
|
||||
reason,
|
||||
job_id: Some(inserted),
|
||||
});
|
||||
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(state, inserted, Some(tenant_id), RunSpec::Drain)
|
||||
.await;
|
||||
});
|
||||
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
pub fn start_tenant_migrate(
|
||||
&self,
|
||||
state: AppState,
|
||||
principal: &Principal,
|
||||
tenant_id: Uuid,
|
||||
runner_target: String,
|
||||
reason: String,
|
||||
idempotency_key: &str,
|
||||
) -> Result<Uuid, StartJobError> {
|
||||
if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
if !self.tenant_locks.try_lock(tenant_id, job_id) {
|
||||
return Err(StartJobError::TenantLocked);
|
||||
}
|
||||
|
||||
let now = now_ms();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Pending,
|
||||
steps: vec![
|
||||
step("preflight"),
|
||||
step("drain"),
|
||||
step("update_placement"),
|
||||
step("reload"),
|
||||
step("verify"),
|
||||
],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: None,
|
||||
finished_at_ms: None,
|
||||
};
|
||||
|
||||
let inserted = self.jobs.insert_idempotent(idempotency_key, job);
|
||||
self.audit.record(AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: "tenant.migrate".to_string(),
|
||||
tenant_id: Some(tenant_id),
|
||||
reason,
|
||||
job_id: Some(inserted),
|
||||
});
|
||||
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(
|
||||
state,
|
||||
inserted,
|
||||
Some(tenant_id),
|
||||
RunSpec::Migrate { runner_target },
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
async fn run_job(&self, state: AppState, job_id: Uuid, tenant_id: Option<Uuid>, spec: RunSpec) {
|
||||
self.jobs.update(job_id, |j| {
|
||||
j.status = JobStatus::Running;
|
||||
j.started_at_ms = Some(now_ms());
|
||||
});
|
||||
|
||||
let mut ok = true;
|
||||
for idx in 0.. {
|
||||
if self.jobs.cancel_requested(job_id) {
|
||||
ok = false;
|
||||
self.jobs.update(job_id, |j| {
|
||||
j.status = JobStatus::Cancelled;
|
||||
j.finished_at_ms = Some(now_ms());
|
||||
j.error = Some("cancelled".to_string());
|
||||
for step in &mut j.steps {
|
||||
if step.status == JobStatus::Pending || step.status == JobStatus::Running {
|
||||
step.status = JobStatus::Cancelled;
|
||||
}
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
let step_name = {
|
||||
let Some(job) = self.jobs.get(job_id) else {
|
||||
break;
|
||||
};
|
||||
let Some(step) = job.steps.get(idx) else {
|
||||
break;
|
||||
};
|
||||
step.name.clone()
|
||||
};
|
||||
|
||||
self.jobs.update(job_id, |j| {
|
||||
if let Some(step) = j.steps.get_mut(idx) {
|
||||
step.status = JobStatus::Running;
|
||||
step.attempts += 1;
|
||||
}
|
||||
});
|
||||
|
||||
let r = tokio::time::timeout(
|
||||
self.step_timeout,
|
||||
run_step(&state, &spec, &step_name, tenant_id),
|
||||
)
|
||||
.await;
|
||||
match r {
|
||||
Ok(Ok(())) => {
|
||||
self.jobs.update(job_id, |j| {
|
||||
if let Some(step) = j.steps.get_mut(idx) {
|
||||
step.status = JobStatus::Succeeded;
|
||||
step.error = None;
|
||||
}
|
||||
});
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
ok = false;
|
||||
self.jobs.update(job_id, |j| {
|
||||
if let Some(step) = j.steps.get_mut(idx) {
|
||||
step.status = JobStatus::Failed;
|
||||
step.error = Some(e.clone());
|
||||
}
|
||||
j.status = JobStatus::Failed;
|
||||
j.error = Some(e);
|
||||
j.finished_at_ms = Some(now_ms());
|
||||
});
|
||||
break;
|
||||
}
|
||||
Err(_) => {
|
||||
ok = false;
|
||||
self.jobs.update(job_id, |j| {
|
||||
if let Some(step) = j.steps.get_mut(idx) {
|
||||
step.status = JobStatus::Failed;
|
||||
step.error = Some("step timeout".to_string());
|
||||
}
|
||||
j.status = JobStatus::Failed;
|
||||
j.error = Some("step timeout".to_string());
|
||||
j.finished_at_ms = Some(now_ms());
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
break;
|
||||
}
|
||||
|
||||
let done = match self.jobs.get(job_id) {
|
||||
Some(job) => idx + 1 >= job.steps.len(),
|
||||
None => true,
|
||||
};
|
||||
if done {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ok {
|
||||
self.jobs.update(job_id, |j| {
|
||||
j.status = JobStatus::Succeeded;
|
||||
j.finished_at_ms = Some(now_ms());
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(tid) = tenant_id {
|
||||
self.tenant_locks.unlock(tid, job_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum StartJobError {
|
||||
TenantLocked,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum RunSpec {
|
||||
Drain,
|
||||
Migrate { runner_target: String },
|
||||
}
|
||||
|
||||
fn step(name: &str) -> JobStep {
|
||||
JobStep {
|
||||
name: name.to_string(),
|
||||
status: JobStatus::Pending,
|
||||
attempts: 0,
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn now_ms() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_millis() as u64
|
||||
}
|
||||
|
||||
async fn run_step(
|
||||
state: &AppState,
|
||||
spec: &RunSpec,
|
||||
step: &str,
|
||||
tenant_id: Option<Uuid>,
|
||||
) -> Result<(), String> {
|
||||
match step {
|
||||
"preflight" => {
|
||||
let snapshots = fleet::snapshot(&state.http, &state.fleet_services).await;
|
||||
if snapshots.iter().any(|s| !s.ready_ok) {
|
||||
return Err("preflight failed: fleet not ready".to_string());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
"drain" => {
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
Ok(())
|
||||
}
|
||||
"update_placement" => match spec {
|
||||
RunSpec::Migrate { runner_target } => {
|
||||
let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?;
|
||||
state
|
||||
.placement
|
||||
.update_runner_target(tenant_id, runner_target.clone())
|
||||
.map(|_| ())
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
"reload" => {
|
||||
let _ = state.placement.tenant_summaries();
|
||||
Ok(())
|
||||
}
|
||||
"verify" => match spec {
|
||||
RunSpec::Migrate { runner_target } => {
|
||||
let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?;
|
||||
let summaries = state.placement.tenant_summaries();
|
||||
let found = summaries
|
||||
.iter()
|
||||
.find(|t| t.tenant_id == tenant_id)
|
||||
.map(|t| t.runner_targets.iter().any(|x| x == runner_target))
|
||||
.unwrap_or(false);
|
||||
if !found {
|
||||
return Err("verify failed: placement not updated".to_string());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
122
control/api/src/jobs.rs
Normal file
122
control/api/src/jobs.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{
|
||||
Arc, Mutex,
|
||||
atomic::{AtomicBool, Ordering},
|
||||
},
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum JobStatus {
|
||||
Pending,
|
||||
Running,
|
||||
Succeeded,
|
||||
Failed,
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Job {
|
||||
pub job_id: Uuid,
|
||||
pub status: JobStatus,
|
||||
pub steps: Vec<JobStep>,
|
||||
pub error: Option<String>,
|
||||
pub created_at_ms: u64,
|
||||
pub started_at_ms: Option<u64>,
|
||||
pub finished_at_ms: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct JobStep {
|
||||
pub name: String,
|
||||
pub status: JobStatus,
|
||||
pub attempts: u32,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
struct JobRecord {
|
||||
job: Mutex<Job>,
|
||||
cancel: AtomicBool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct JobStore {
|
||||
inner: Arc<Inner>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Inner {
|
||||
jobs: Mutex<HashMap<Uuid, Arc<JobRecord>>>,
|
||||
idempotency: Mutex<HashMap<String, Uuid>>,
|
||||
}
|
||||
|
||||
impl JobStore {
|
||||
pub fn get(&self, job_id: Uuid) -> Option<Job> {
|
||||
let jobs = self.inner.jobs.lock().ok()?;
|
||||
let rec = jobs.get(&job_id)?.clone();
|
||||
rec.job.lock().ok().map(|j| j.clone())
|
||||
}
|
||||
|
||||
pub fn get_idempotent(&self, key: &str) -> Option<Uuid> {
|
||||
let map = self.inner.idempotency.lock().ok()?;
|
||||
map.get(key).copied()
|
||||
}
|
||||
|
||||
pub fn insert_idempotent(&self, key: &str, job: Job) -> Uuid {
|
||||
let mut idempotency = self
|
||||
.inner
|
||||
.idempotency
|
||||
.lock()
|
||||
.expect("idempotency lock poisoned");
|
||||
if let Some(existing) = idempotency.get(key) {
|
||||
return *existing;
|
||||
}
|
||||
|
||||
let job_id = job.job_id;
|
||||
let rec = Arc::new(JobRecord {
|
||||
job: Mutex::new(job),
|
||||
cancel: AtomicBool::new(false),
|
||||
});
|
||||
self.inner
|
||||
.jobs
|
||||
.lock()
|
||||
.expect("jobs lock poisoned")
|
||||
.insert(job_id, rec);
|
||||
|
||||
idempotency.insert(key.to_string(), job_id);
|
||||
job_id
|
||||
}
|
||||
|
||||
pub fn request_cancel(&self, job_id: Uuid) -> bool {
|
||||
let jobs = self.inner.jobs.lock().expect("jobs lock poisoned");
|
||||
let Some(rec) = jobs.get(&job_id) else {
|
||||
return false;
|
||||
};
|
||||
rec.cancel.store(true, Ordering::SeqCst);
|
||||
true
|
||||
}
|
||||
|
||||
pub fn cancel_requested(&self, job_id: Uuid) -> bool {
|
||||
let jobs = self.inner.jobs.lock().expect("jobs lock poisoned");
|
||||
let Some(rec) = jobs.get(&job_id) else {
|
||||
return false;
|
||||
};
|
||||
rec.cancel.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn update<F>(&self, job_id: Uuid, f: F) -> bool
|
||||
where
|
||||
F: FnOnce(&mut Job),
|
||||
{
|
||||
let jobs = self.inner.jobs.lock().expect("jobs lock poisoned");
|
||||
let Some(rec) = jobs.get(&job_id) else {
|
||||
return false;
|
||||
};
|
||||
let mut job = rec.job.lock().expect("job lock poisoned");
|
||||
f(&mut job);
|
||||
true
|
||||
}
|
||||
}
|
||||
692
control/api/src/lib.rs
Normal file
692
control/api/src/lib.rs
Normal file
@@ -0,0 +1,692 @@
|
||||
mod admin;
|
||||
mod audit;
|
||||
mod auth;
|
||||
mod build_info;
|
||||
mod deployments;
|
||||
mod fleet;
|
||||
mod job_engine;
|
||||
mod jobs;
|
||||
mod placement;
|
||||
mod swarm;
|
||||
|
||||
pub use audit::AuditStore;
|
||||
pub use auth::{AuthConfig, Principal};
|
||||
use axum::{
|
||||
Router,
|
||||
extract::State,
|
||||
http::{HeaderName, HeaderValue, Request, StatusCode},
|
||||
middleware::{Next, from_fn, from_fn_with_state},
|
||||
response::{IntoResponse, Response},
|
||||
routing::get,
|
||||
};
|
||||
pub use build_info::{BuildInfo, extract_build_info};
|
||||
pub use deployments::{DeployAnnotationArgs, GrafanaAnnotation, build_grafana_deploy_annotation};
|
||||
pub use fleet::FleetService;
|
||||
pub use job_engine::TenantLocks;
|
||||
pub use jobs::JobStore;
|
||||
use metrics_exporter_prometheus::PrometheusHandle;
|
||||
pub use placement::PlacementStore;
|
||||
pub use placement::ServiceKind;
|
||||
use std::time::Instant;
|
||||
pub use swarm::SwarmStore;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing::{Span, field};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub prometheus: PrometheusHandle,
|
||||
pub auth: AuthConfig,
|
||||
pub jobs: JobStore,
|
||||
pub audit: AuditStore,
|
||||
pub tenant_locks: TenantLocks,
|
||||
pub http: reqwest::Client,
|
||||
pub placement: PlacementStore,
|
||||
pub fleet_services: Vec<FleetService>,
|
||||
pub swarm: SwarmStore,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RequestIds {
|
||||
pub request_id: String,
|
||||
pub correlation_id: Option<String>,
|
||||
pub traceparent: Option<String>,
|
||||
}
|
||||
|
||||
const HEADER_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
|
||||
const HEADER_CORRELATION_ID: HeaderName = HeaderName::from_static("x-correlation-id");
|
||||
const HEADER_TRACEPARENT: HeaderName = HeaderName::from_static("traceparent");
|
||||
|
||||
pub fn build_app(state: AppState) -> Router {
|
||||
let trace = TraceLayer::new_for_http()
|
||||
.make_span_with(|req: &Request<_>| {
|
||||
let request_id = req
|
||||
.headers()
|
||||
.get(&HEADER_REQUEST_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("")
|
||||
.to_owned();
|
||||
|
||||
let correlation_id = req
|
||||
.headers()
|
||||
.get(&HEADER_CORRELATION_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("")
|
||||
.to_owned();
|
||||
|
||||
tracing::info_span!(
|
||||
"http_request",
|
||||
request.method = %req.method(),
|
||||
request.path = %req.uri().path(),
|
||||
request_id = %request_id,
|
||||
correlation_id = %correlation_id,
|
||||
trace_id = "",
|
||||
status = field::Empty,
|
||||
duration_ms = field::Empty,
|
||||
)
|
||||
})
|
||||
.on_response(
|
||||
|res: &Response, latency: std::time::Duration, span: &Span| {
|
||||
span.record("status", field::display(res.status()));
|
||||
span.record("duration_ms", field::display(latency.as_millis()));
|
||||
tracing::info!("response");
|
||||
},
|
||||
);
|
||||
|
||||
let admin =
|
||||
admin::admin_router().layer(from_fn_with_state(state.clone(), auth::auth_middleware));
|
||||
|
||||
Router::new()
|
||||
.route("/health", get(health))
|
||||
.route("/ready", get(ready))
|
||||
.route("/metrics", get(metrics))
|
||||
.nest("/admin/v1", admin)
|
||||
.with_state(state)
|
||||
.layer(trace)
|
||||
.layer(from_fn(request_id_middleware))
|
||||
}
|
||||
|
||||
async fn health() -> impl IntoResponse {
|
||||
(StatusCode::OK, "ok")
|
||||
}
|
||||
|
||||
async fn ready() -> impl IntoResponse {
|
||||
(StatusCode::OK, "ready")
|
||||
}
|
||||
|
||||
async fn metrics(State(state): State<AppState>) -> impl IntoResponse {
|
||||
(StatusCode::OK, state.prometheus.render())
|
||||
}
|
||||
|
||||
async fn request_id_middleware(mut req: Request<axum::body::Body>, next: Next) -> Response {
|
||||
let request_id = req
|
||||
.headers()
|
||||
.get(&HEADER_REQUEST_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.to_owned())
|
||||
.unwrap_or_else(|| Uuid::new_v4().to_string());
|
||||
|
||||
let correlation_id = req
|
||||
.headers()
|
||||
.get(&HEADER_CORRELATION_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.to_owned());
|
||||
|
||||
let traceparent = req
|
||||
.headers()
|
||||
.get(&HEADER_TRACEPARENT)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.to_owned());
|
||||
|
||||
if req.headers().get(&HEADER_REQUEST_ID).is_none()
|
||||
&& let Ok(v) = HeaderValue::from_str(&request_id)
|
||||
{
|
||||
req.headers_mut().insert(HEADER_REQUEST_ID.clone(), v);
|
||||
}
|
||||
|
||||
req.extensions_mut().insert(RequestIds {
|
||||
request_id: request_id.clone(),
|
||||
correlation_id: correlation_id.clone(),
|
||||
traceparent: traceparent.clone(),
|
||||
});
|
||||
|
||||
let start = Instant::now();
|
||||
let mut res = next.run(req).await;
|
||||
|
||||
if let Ok(v) = HeaderValue::from_str(&request_id) {
|
||||
res.headers_mut().insert(HEADER_REQUEST_ID.clone(), v);
|
||||
}
|
||||
|
||||
if let Some(correlation_id) = correlation_id
|
||||
&& let Ok(v) = HeaderValue::from_str(&correlation_id)
|
||||
{
|
||||
res.headers_mut().insert(HEADER_CORRELATION_ID.clone(), v);
|
||||
}
|
||||
|
||||
metrics::histogram!("http_request_duration_ms").record(start.elapsed().as_millis() as f64);
|
||||
res
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::jobs::JobStatus;
|
||||
use axum::{
|
||||
body::Body,
|
||||
http::{Request, StatusCode, header},
|
||||
};
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use serde::Serialize;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::OnceLock;
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
static HANDLE: OnceLock<PrometheusHandle> = OnceLock::new();
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
fn test_app() -> Router {
|
||||
test_app_with_fleet(vec![])
|
||||
}
|
||||
|
||||
fn test_app_with_fleet(fleet_services: Vec<FleetService>) -> Router {
|
||||
let handle = HANDLE
|
||||
.get_or_init(|| {
|
||||
PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder")
|
||||
})
|
||||
.clone();
|
||||
|
||||
let placement_path = temp_placement_file();
|
||||
|
||||
build_app(AppState {
|
||||
prometheus: handle,
|
||||
auth: AuthConfig {
|
||||
hs256_secret: Some(b"test_secret".to_vec()),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(placement_path),
|
||||
fleet_services,
|
||||
swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
|
||||
})
|
||||
}
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
fn temp_placement_file() -> PathBuf {
|
||||
let root = repo_root();
|
||||
let src = root.join("placement/dev.json");
|
||||
let mut dst = std::env::temp_dir();
|
||||
dst.push(format!(
|
||||
"cloudlysis-control-placement-{}-{}.json",
|
||||
std::process::id(),
|
||||
Uuid::new_v4()
|
||||
));
|
||||
let raw = fs::read_to_string(src).expect("missing placement/dev.json");
|
||||
fs::write(&dst, raw).expect("failed to write temp placement file");
|
||||
dst
|
||||
}
|
||||
|
||||
fn assert_send_sync<T: Send + Sync>() {}
|
||||
|
||||
#[test]
|
||||
fn core_state_types_are_send_sync() {
|
||||
assert_send_sync::<AppState>();
|
||||
assert_send_sync::<JobStore>();
|
||||
assert_send_sync::<AuthConfig>();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn health_returns_200() {
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/health")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ready_returns_200() {
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/ready")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn metrics_returns_200() {
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/metrics")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
fn make_token(perms: &[&str]) -> String {
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 60) as usize;
|
||||
|
||||
encode(
|
||||
&Header::default(),
|
||||
&TestClaims {
|
||||
sub: "user_1".to_string(),
|
||||
session_id: "sess_1".to_string(),
|
||||
permissions: perms.iter().map(|p| (*p).to_string()).collect(),
|
||||
exp,
|
||||
},
|
||||
&EncodingKey::from_secret(b"test_secret"),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unauthorized_admin_calls_return_401() {
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/platform/info")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::UNAUTHORIZED);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn forbidden_admin_calls_return_403() {
|
||||
let token = make_token(&["control:read"]);
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/echo")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "k1")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::FORBIDDEN);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tenant_scoped_endpoints_require_x_tenant_id() {
|
||||
let token = make_token(&["control:read"]);
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/tenants/echo")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn job_create_is_idempotent() {
|
||||
let token = make_token(&["control:write"]);
|
||||
let app = test_app();
|
||||
let res1 = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/echo")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "same-key")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res1.status(), StatusCode::OK);
|
||||
let body1 = axum::body::to_bytes(res1.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v1: serde_json::Value = serde_json::from_slice(&body1).unwrap();
|
||||
let id1 = Uuid::parse_str(v1.get("job_id").unwrap().as_str().unwrap()).unwrap();
|
||||
|
||||
let res2 = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/echo")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "same-key")
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res2.status(), StatusCode::OK);
|
||||
let body2 = axum::body::to_bytes(res2.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v2: serde_json::Value = serde_json::from_slice(&body2).unwrap();
|
||||
let id2 = Uuid::parse_str(v2.get("job_id").unwrap().as_str().unwrap()).unwrap();
|
||||
|
||||
assert_eq!(id1, id2);
|
||||
}
|
||||
|
||||
async fn wait_for_terminal_status(app: Router, job_id: Uuid) -> JobStatus {
|
||||
let start = tokio::time::Instant::now();
|
||||
loop {
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/jobs/{job_id}"))
|
||||
.header(
|
||||
header::AUTHORIZATION,
|
||||
format!("Bearer {}", make_token(&["control:read"])),
|
||||
)
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
if res.status() == StatusCode::OK {
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let job: crate::jobs::Job = serde_json::from_slice(&body).unwrap();
|
||||
if job.status != JobStatus::Pending && job.status != JobStatus::Running {
|
||||
return job.status;
|
||||
}
|
||||
}
|
||||
|
||||
if start.elapsed() > std::time::Duration::from_millis(500) {
|
||||
return JobStatus::Failed;
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tenant_job_idempotency_does_not_duplicate_effects() {
|
||||
let token = make_token(&["control:write", "control:read"]);
|
||||
let app = test_app();
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let body = serde_json::json!({
|
||||
"tenant_id": tenant_id,
|
||||
"reason": "test",
|
||||
});
|
||||
|
||||
let res1 = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/tenant/drain")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "same-key")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(body.to_string()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res1.status(), StatusCode::OK);
|
||||
|
||||
let res2 = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/tenant/drain")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "same-key")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(body.to_string()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res2.status(), StatusCode::OK);
|
||||
|
||||
let b1 = axum::body::to_bytes(res1.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let b2 = axum::body::to_bytes(res2.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v1: serde_json::Value = serde_json::from_slice(&b1).unwrap();
|
||||
let v2: serde_json::Value = serde_json::from_slice(&b2).unwrap();
|
||||
assert_eq!(v1.get("job_id"), v2.get("job_id"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tenant_lock_prevents_concurrent_mutations() {
|
||||
let token = make_token(&["control:write", "control:read"]);
|
||||
let app = test_app();
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let res1 = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/tenant/drain")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "k1")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({ "tenant_id": tenant_id, "reason": "r" }).to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res1.status(), StatusCode::OK);
|
||||
|
||||
let res2 = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/tenant/migrate")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "k2")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"tenant_id": tenant_id,
|
||||
"runner_target": "node-2",
|
||||
"reason": "r2"
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res2.status(), StatusCode::CONFLICT);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn migrate_preflight_fails_when_fleet_not_ready() {
|
||||
let token = make_token(&["control:write", "control:read"]);
|
||||
let app = test_app_with_fleet(vec![FleetService {
|
||||
name: "unreachable".to_string(),
|
||||
base_url: "http://127.0.0.1:1".to_string(),
|
||||
}]);
|
||||
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/tenant/migrate")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "k3")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"tenant_id": tenant_id,
|
||||
"runner_target": "node-2",
|
||||
"reason": "r"
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap();
|
||||
|
||||
let status = wait_for_terminal_status(app, job_id).await;
|
||||
assert_eq!(status, JobStatus::Failed);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn cancel_marks_job_cancelled() {
|
||||
let token = make_token(&["control:write", "control:read"]);
|
||||
let app = test_app();
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/tenant/migrate")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", "k4")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"tenant_id": tenant_id,
|
||||
"runner_target": "node-2",
|
||||
"reason": "r"
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap();
|
||||
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/jobs/{job_id}/cancel"))
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
let status = wait_for_terminal_status(app, job_id).await;
|
||||
assert_eq!(status, JobStatus::Cancelled);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn migration_plan_is_deterministic() {
|
||||
let token = make_token(&["control:write"]);
|
||||
let app = test_app();
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let res = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/plan/tenant/migrate")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"tenant_id": tenant_id,
|
||||
"runner_target": "node-2",
|
||||
"reason": "r"
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert_eq!(
|
||||
v.get("steps").unwrap(),
|
||||
&serde_json::json!(["preflight", "drain", "update_placement", "reload", "verify"])
|
||||
);
|
||||
}
|
||||
}
|
||||
109
control/api/src/main.rs
Normal file
109
control/api/src/main.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use clap::Parser;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use std::net::SocketAddr;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "control-api")]
|
||||
struct Args {
|
||||
#[arg(long, env = "CONTROL_API_ADDR", default_value = "127.0.0.1:8080")]
|
||||
addr: SocketAddr,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||
)
|
||||
.init();
|
||||
|
||||
let recorder = PrometheusBuilder::new()
|
||||
.set_buckets(&[
|
||||
1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, 2500.0, 5000.0,
|
||||
])
|
||||
.expect("invalid prometheus buckets")
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder");
|
||||
|
||||
let http = reqwest::Client::builder()
|
||||
.user_agent("cloudlysis-control-api")
|
||||
.build()
|
||||
.expect("failed to build http client");
|
||||
|
||||
let placement_path = std::env::var("CONTROL_PLACEMENT_PATH")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "placement/dev.json".to_string())
|
||||
.into();
|
||||
|
||||
let swarm_path = std::env::var("CONTROL_SWARM_STATE_PATH")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "swarm/dev.json".to_string())
|
||||
.into();
|
||||
|
||||
let self_url = std::env::var("CONTROL_SELF_URL")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "http://127.0.0.1:8080".to_string());
|
||||
|
||||
let mut fleet_services = vec![api::FleetService {
|
||||
name: "control-api".to_string(),
|
||||
base_url: self_url,
|
||||
}];
|
||||
if let Ok(spec) = std::env::var("CONTROL_FLEET_SERVICES") {
|
||||
fleet_services.extend(parse_fleet_services(&spec));
|
||||
}
|
||||
|
||||
let app = api::build_app(api::AppState {
|
||||
prometheus: recorder,
|
||||
auth: api::AuthConfig {
|
||||
hs256_secret: std::env::var("CONTROL_GATEWAY_JWT_HS256_SECRET")
|
||||
.ok()
|
||||
.map(|s| s.into_bytes()),
|
||||
},
|
||||
jobs: api::JobStore::default(),
|
||||
audit: api::AuditStore::default(),
|
||||
tenant_locks: api::TenantLocks::default(),
|
||||
http,
|
||||
placement: api::PlacementStore::new(placement_path),
|
||||
fleet_services,
|
||||
swarm: api::SwarmStore::new(swarm_path),
|
||||
});
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(args.addr)
|
||||
.await
|
||||
.expect("failed to bind");
|
||||
|
||||
tracing::info!(addr = %args.addr, "control api listening");
|
||||
|
||||
axum::serve(listener, app)
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await
|
||||
.expect("server failed");
|
||||
}
|
||||
|
||||
async fn shutdown_signal() {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
}
|
||||
|
||||
fn parse_fleet_services(spec: &str) -> Vec<api::FleetService> {
|
||||
spec.split(',')
|
||||
.filter_map(|pair| {
|
||||
let pair = pair.trim();
|
||||
if pair.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let (name, url) = pair.split_once('=')?;
|
||||
let name = name.trim();
|
||||
let url = url.trim();
|
||||
if name.is_empty() || url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(api::FleetService {
|
||||
name: name.to_string(),
|
||||
base_url: url.to_string(),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
227
control/api/src/placement.rs
Normal file
227
control/api/src/placement.rs
Normal file
@@ -0,0 +1,227 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
sync::{Arc, RwLock},
|
||||
time::SystemTime,
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ServiceKind {
|
||||
Aggregate,
|
||||
Projection,
|
||||
Runner,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct PlacementFile {
|
||||
pub revision: Option<String>,
|
||||
pub aggregate_placement: Option<PlacementKind>,
|
||||
pub projection_placement: Option<PlacementKind>,
|
||||
pub runner_placement: Option<PlacementKind>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct PlacementKind {
|
||||
pub placements: Vec<TenantPlacement>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TenantPlacement {
|
||||
pub tenant_id: Uuid,
|
||||
pub targets: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct PlacementResponse {
|
||||
pub kind: ServiceKind,
|
||||
pub revision: String,
|
||||
pub placements: Vec<TenantPlacement>,
|
||||
}
|
||||
|
||||
impl PlacementFile {
|
||||
pub fn load(path: &Path) -> Option<Self> {
|
||||
let raw = fs::read_to_string(path).ok()?;
|
||||
serde_json::from_str(&raw).ok()
|
||||
}
|
||||
|
||||
pub fn for_kind(&self, kind: ServiceKind) -> PlacementResponse {
|
||||
let revision = self.revision.clone().unwrap_or_else(|| "dev".to_string());
|
||||
let placements = match kind {
|
||||
ServiceKind::Aggregate => self
|
||||
.aggregate_placement
|
||||
.as_ref()
|
||||
.map(|p| p.placements.clone())
|
||||
.unwrap_or_default(),
|
||||
ServiceKind::Projection => self
|
||||
.projection_placement
|
||||
.as_ref()
|
||||
.map(|p| p.placements.clone())
|
||||
.unwrap_or_default(),
|
||||
ServiceKind::Runner => self
|
||||
.runner_placement
|
||||
.as_ref()
|
||||
.map(|p| p.placements.clone())
|
||||
.unwrap_or_default(),
|
||||
};
|
||||
|
||||
PlacementResponse {
|
||||
kind,
|
||||
revision,
|
||||
placements,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PlacementStore {
|
||||
inner: Arc<RwLock<Inner>>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
path: PathBuf,
|
||||
last_modified: Option<SystemTime>,
|
||||
cached: Option<PlacementFile>,
|
||||
}
|
||||
|
||||
impl PlacementStore {
|
||||
pub fn new(path: PathBuf) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(Inner {
|
||||
path,
|
||||
last_modified: None,
|
||||
cached: None,
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_for_kind(&self, kind: ServiceKind) -> PlacementResponse {
|
||||
let mut inner = self.inner.write().expect("placement lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
match inner.cached.as_ref() {
|
||||
Some(p) => p.for_kind(kind),
|
||||
None => PlacementResponse {
|
||||
kind,
|
||||
revision: "dev".to_string(),
|
||||
placements: vec![],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tenant_summaries(&self) -> Vec<TenantSummary> {
|
||||
let mut inner = self.inner.write().expect("placement lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
let Some(p) = inner.cached.as_ref() else {
|
||||
return vec![];
|
||||
};
|
||||
|
||||
let mut map: BTreeMap<Uuid, TenantSummary> = BTreeMap::new();
|
||||
|
||||
for (kind, placements) in [
|
||||
(
|
||||
ServiceKind::Aggregate,
|
||||
p.for_kind(ServiceKind::Aggregate).placements,
|
||||
),
|
||||
(
|
||||
ServiceKind::Projection,
|
||||
p.for_kind(ServiceKind::Projection).placements,
|
||||
),
|
||||
(
|
||||
ServiceKind::Runner,
|
||||
p.for_kind(ServiceKind::Runner).placements,
|
||||
),
|
||||
] {
|
||||
for tp in placements {
|
||||
let entry = map.entry(tp.tenant_id).or_insert_with(|| TenantSummary {
|
||||
tenant_id: tp.tenant_id,
|
||||
aggregate_targets: vec![],
|
||||
projection_targets: vec![],
|
||||
runner_targets: vec![],
|
||||
});
|
||||
match kind {
|
||||
ServiceKind::Aggregate => entry.aggregate_targets = tp.targets,
|
||||
ServiceKind::Projection => entry.projection_targets = tp.targets,
|
||||
ServiceKind::Runner => entry.runner_targets = tp.targets,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
map.into_values().collect()
|
||||
}
|
||||
|
||||
pub fn update_runner_target(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
runner_target: String,
|
||||
) -> Result<String, String> {
|
||||
let mut inner = self.inner.write().expect("placement lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
let mut file = inner.cached.clone().unwrap_or(PlacementFile {
|
||||
revision: Some("dev".to_string()),
|
||||
aggregate_placement: Some(PlacementKind { placements: vec![] }),
|
||||
projection_placement: Some(PlacementKind { placements: vec![] }),
|
||||
runner_placement: Some(PlacementKind { placements: vec![] }),
|
||||
});
|
||||
|
||||
let mut runner = file
|
||||
.runner_placement
|
||||
.take()
|
||||
.unwrap_or(PlacementKind { placements: vec![] });
|
||||
|
||||
if let Some(existing) = runner
|
||||
.placements
|
||||
.iter_mut()
|
||||
.find(|p| p.tenant_id == tenant_id)
|
||||
{
|
||||
existing.targets = vec![runner_target];
|
||||
} else {
|
||||
runner.placements.push(TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec![runner_target],
|
||||
});
|
||||
}
|
||||
|
||||
runner.placements.sort_by_key(|p| p.tenant_id);
|
||||
file.runner_placement = Some(runner);
|
||||
|
||||
let revision = format!("rev-{}", Uuid::new_v4());
|
||||
file.revision = Some(revision.clone());
|
||||
|
||||
let raw = serde_json::to_string_pretty(&file).map_err(|e| e.to_string())?;
|
||||
let tmp = inner.path.with_extension("json.tmp");
|
||||
fs::write(&tmp, raw).map_err(|e| e.to_string())?;
|
||||
fs::rename(&tmp, &inner.path).map_err(|e| e.to_string())?;
|
||||
|
||||
inner.last_modified = None;
|
||||
inner.cached = Some(file);
|
||||
|
||||
Ok(revision)
|
||||
}
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
fn reload_if_changed(&mut self) {
|
||||
let meta = fs::metadata(&self.path).ok();
|
||||
let modified = meta.and_then(|m| m.modified().ok());
|
||||
|
||||
if self.cached.is_some() && modified.is_some() && modified == self.last_modified {
|
||||
return;
|
||||
}
|
||||
|
||||
self.last_modified = modified;
|
||||
self.cached = PlacementFile::load(&self.path);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TenantSummary {
|
||||
pub tenant_id: Uuid,
|
||||
pub aggregate_targets: Vec<String>,
|
||||
pub projection_targets: Vec<String>,
|
||||
pub runner_targets: Vec<String>,
|
||||
}
|
||||
62
control/api/src/swarm.rs
Normal file
62
control/api/src/swarm.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{fs, path::Path};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SwarmService {
|
||||
pub name: String,
|
||||
pub image: Option<String>,
|
||||
pub mode: Option<String>,
|
||||
pub replicas: Option<String>,
|
||||
pub updated_at: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SwarmTask {
|
||||
pub id: String,
|
||||
pub service: String,
|
||||
pub node: Option<String>,
|
||||
pub desired_state: Option<String>,
|
||||
pub current_state: Option<String>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct SwarmStateFile {
|
||||
pub services: Vec<SwarmService>,
|
||||
pub tasks: Vec<SwarmTask>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SwarmStore {
|
||||
path: std::path::PathBuf,
|
||||
}
|
||||
|
||||
impl SwarmStore {
|
||||
pub fn new(path: std::path::PathBuf) -> Self {
|
||||
Self { path }
|
||||
}
|
||||
|
||||
pub fn list_services(&self) -> Vec<SwarmService> {
|
||||
self.load().map(|s| s.services).unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn list_tasks(&self, service_name: &str) -> Vec<SwarmTask> {
|
||||
self.load()
|
||||
.map(|s| {
|
||||
s.tasks
|
||||
.into_iter()
|
||||
.filter(|t| t.service == service_name)
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn load(&self) -> Option<SwarmStateFile> {
|
||||
load_state(&self.path)
|
||||
}
|
||||
}
|
||||
|
||||
fn load_state(path: &Path) -> Option<SwarmStateFile> {
|
||||
let raw = fs::read_to_string(path).ok()?;
|
||||
serde_json::from_str(&raw).ok()
|
||||
}
|
||||
Reference in New Issue
Block a user