feat(billing): implement tenant subscription entitlements system (milestones 0-6)
This commit is contained in:
45
.gitea/workflows/s3-provision.yml
Normal file
45
.gitea/workflows/s3-provision.yml
Normal file
@@ -0,0 +1,45 @@
|
||||
name: s3-provision
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
provision-docs-bucket:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install AWS CLI
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y awscli
|
||||
|
||||
- name: Validate required secrets
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }}
|
||||
S3_ENDPOINT: ${{ secrets.S3_ENDPOINT }}
|
||||
S3_REGION: ${{ secrets.S3_REGION }}
|
||||
S3_BUCKET_DOCS: ${{ secrets.S3_BUCKET_DOCS }}
|
||||
run: |
|
||||
test -n "$AWS_ACCESS_KEY_ID"
|
||||
test -n "$AWS_SECRET_ACCESS_KEY"
|
||||
test -n "$S3_ENDPOINT"
|
||||
test -n "$S3_REGION"
|
||||
test -n "$S3_BUCKET_DOCS"
|
||||
|
||||
- name: Provision docs bucket (idempotent)
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }}
|
||||
S3_ENDPOINT: ${{ secrets.S3_ENDPOINT }}
|
||||
S3_REGION: ${{ secrets.S3_REGION }}
|
||||
S3_BUCKET_DOCS: ${{ secrets.S3_BUCKET_DOCS }}
|
||||
S3_ENABLE_VERSIONING: ${{ secrets.S3_ENABLE_VERSIONING }}
|
||||
S3_LIFECYCLE_JSON: docs/usage/s3_lifecycle_docs_default.json
|
||||
S3_PREFIX_DOCS: docs/
|
||||
run: |
|
||||
sh docker/scripts/s3_create_docs_bucket.sh
|
||||
sh docker/scripts/s3_verify_docs.sh
|
||||
|
||||
433
Cargo.lock
generated
433
Cargo.lock
generated
@@ -13,13 +13,13 @@ name = "aggregate"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-nats",
|
||||
"async-nats 0.39.0",
|
||||
"axum 0.7.9",
|
||||
"chrono",
|
||||
"edge-logger-client",
|
||||
"edge_storage",
|
||||
"futures",
|
||||
"lru",
|
||||
"lru 0.12.5",
|
||||
"prost 0.13.5",
|
||||
"protoc-bin-vendored",
|
||||
"query_engine",
|
||||
@@ -150,8 +150,15 @@ checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
||||
name = "api"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"async-nats 0.42.0",
|
||||
"async-trait",
|
||||
"aws-config",
|
||||
"aws-credential-types",
|
||||
"aws-sdk-s3",
|
||||
"axum 0.8.8",
|
||||
"clap",
|
||||
"futures",
|
||||
"hex",
|
||||
"jsonwebtoken",
|
||||
"metrics 0.23.1",
|
||||
"metrics-exporter-prometheus 0.16.2",
|
||||
@@ -159,6 +166,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"sha2",
|
||||
"shared",
|
||||
"thiserror 2.0.18",
|
||||
"tokio",
|
||||
@@ -166,6 +174,8 @@ dependencies = [
|
||||
"tower-http 0.6.8",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -229,6 +239,42 @@ dependencies = [
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-nats"
|
||||
version = "0.42.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08f6da6d49a956424ca4e28fe93656f790d748b469eaccbc7488fec545315180"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bytes",
|
||||
"futures",
|
||||
"memchr",
|
||||
"nkeys",
|
||||
"nuid",
|
||||
"once_cell",
|
||||
"pin-project",
|
||||
"portable-atomic",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"ring",
|
||||
"rustls-native-certs 0.7.3",
|
||||
"rustls-pemfile",
|
||||
"rustls-webpki 0.102.8",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_nanos",
|
||||
"serde_repr",
|
||||
"thiserror 1.0.69",
|
||||
"time",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.4",
|
||||
"tokio-util",
|
||||
"tokio-websockets",
|
||||
"tracing",
|
||||
"tryhard",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-stream"
|
||||
version = "0.3.6"
|
||||
@@ -282,6 +328,8 @@ checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-sdk-sso",
|
||||
"aws-sdk-ssooidc",
|
||||
"aws-sdk-sts",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
@@ -292,11 +340,14 @@ dependencies = [
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"hex",
|
||||
"http 1.4.0",
|
||||
"sha1",
|
||||
"time",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"url",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -342,6 +393,7 @@ dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
@@ -350,7 +402,9 @@ dependencies = [
|
||||
"bytes",
|
||||
"bytes-utils",
|
||||
"fastrand",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"http-body 0.4.6",
|
||||
"http-body 1.0.1",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
@@ -358,6 +412,41 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "1.127.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "151783f64e0dcddeb4965d08e36c276b4400a46caa88805a2e36d497deaf031a"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-sigv4",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-checksums",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-observability",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-smithy-xml",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"hex",
|
||||
"hmac",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"lru 0.16.3",
|
||||
"percent-encoding",
|
||||
"regex-lite",
|
||||
"sha2",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sesv2"
|
||||
version = "1.117.0"
|
||||
@@ -382,6 +471,54 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sso"
|
||||
version = "1.97.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-observability",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"regex-lite",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-ssooidc"
|
||||
version = "1.99.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-observability",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"fastrand",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"regex-lite",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-sts"
|
||||
version = "1.101.0"
|
||||
@@ -414,19 +551,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"crypto-bigint 0.5.5",
|
||||
"form_urlencoded",
|
||||
"hex",
|
||||
"hmac",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"p256",
|
||||
"percent-encoding",
|
||||
"ring",
|
||||
"sha2",
|
||||
"subtle",
|
||||
"time",
|
||||
"tracing",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -440,12 +583,45 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-checksums"
|
||||
version = "0.64.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6750f3dd509b0694a4377f0293ed2f9630d710b1cebe281fa8bac8f099f88bc6"
|
||||
dependencies = [
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"crc-fast",
|
||||
"hex",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"md-5",
|
||||
"pin-project-lite",
|
||||
"sha1",
|
||||
"sha2",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-eventstream"
|
||||
version = "0.60.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "faf09d74e5e32f76b8762da505a3cd59303e367a664ca67295387baa8c1d7548"
|
||||
dependencies = [
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
"crc32fast",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-http"
|
||||
version = "0.63.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231"
|
||||
dependencies = [
|
||||
"aws-smithy-eventstream",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"bytes",
|
||||
@@ -473,13 +649,21 @@ dependencies = [
|
||||
"h2 0.3.27",
|
||||
"h2 0.4.13",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"http-body 0.4.6",
|
||||
"hyper 0.14.32",
|
||||
"hyper 1.8.1",
|
||||
"hyper-rustls 0.24.2",
|
||||
"hyper-rustls 0.27.7",
|
||||
"hyper-util",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.12",
|
||||
"rustls 0.23.37",
|
||||
"rustls-native-certs 0.8.3",
|
||||
"rustls-pki-types",
|
||||
"tokio",
|
||||
"tokio-rustls 0.26.4",
|
||||
"tower 0.5.3",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
@@ -562,6 +746,7 @@ dependencies = [
|
||||
"base64-simd",
|
||||
"bytes",
|
||||
"bytes-utils",
|
||||
"futures-core",
|
||||
"http 0.2.12",
|
||||
"http 1.4.0",
|
||||
"http-body 0.4.6",
|
||||
@@ -574,6 +759,8 @@ dependencies = [
|
||||
"ryu",
|
||||
"serde",
|
||||
"time",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -706,6 +893,12 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base16ct"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce"
|
||||
|
||||
[[package]]
|
||||
name = "base32"
|
||||
version = "0.5.1"
|
||||
@@ -1157,6 +1350,33 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc"
|
||||
version = "3.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
|
||||
dependencies = [
|
||||
"crc-catalog",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc-catalog"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
|
||||
|
||||
[[package]]
|
||||
name = "crc-fast"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d"
|
||||
dependencies = [
|
||||
"crc",
|
||||
"digest",
|
||||
"rustversion",
|
||||
"spin",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.5.0"
|
||||
@@ -1200,6 +1420,28 @@ version = "0.8.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
|
||||
[[package]]
|
||||
name = "crypto-bigint"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-bigint"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
|
||||
dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.7"
|
||||
@@ -1242,6 +1484,16 @@ version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
|
||||
|
||||
[[package]]
|
||||
name = "der"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de"
|
||||
dependencies = [
|
||||
"const-oid",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der"
|
||||
version = "0.7.10"
|
||||
@@ -1320,13 +1572,25 @@ version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
|
||||
|
||||
[[package]]
|
||||
name = "ecdsa"
|
||||
version = "0.14.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c"
|
||||
dependencies = [
|
||||
"der 0.6.1",
|
||||
"elliptic-curve",
|
||||
"rfc6979",
|
||||
"signature 1.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ed25519"
|
||||
version = "2.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53"
|
||||
dependencies = [
|
||||
"signature",
|
||||
"signature 2.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1338,7 +1602,7 @@ dependencies = [
|
||||
"curve25519-dalek",
|
||||
"ed25519",
|
||||
"sha2",
|
||||
"signature",
|
||||
"signature 2.2.0",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
@@ -1414,6 +1678,26 @@ version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "elliptic-curve"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3"
|
||||
dependencies = [
|
||||
"base16ct",
|
||||
"crypto-bigint 0.4.9",
|
||||
"der 0.6.1",
|
||||
"digest",
|
||||
"ff",
|
||||
"generic-array",
|
||||
"group",
|
||||
"pkcs8 0.9.0",
|
||||
"rand_core 0.6.4",
|
||||
"sec1",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "email-encoding"
|
||||
version = "0.4.1"
|
||||
@@ -1477,6 +1761,16 @@ version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "ff"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160"
|
||||
dependencies = [
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fiat-crypto"
|
||||
version = "0.2.9"
|
||||
@@ -1524,6 +1818,12 @@ version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.3.2"
|
||||
@@ -1674,7 +1974,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"argon2",
|
||||
"async-nats",
|
||||
"async-nats 0.39.0",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"base32",
|
||||
@@ -1768,6 +2068,17 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "group"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7"
|
||||
dependencies = [
|
||||
"ff",
|
||||
"rand_core 0.6.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gzip-header"
|
||||
version = "1.0.0"
|
||||
@@ -1841,7 +2152,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash",
|
||||
"foldhash 0.1.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1849,6 +2160,11 @@ name = "hashbrown"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
@@ -2532,6 +2848,15 @@ dependencies = [
|
||||
"hashbrown 0.15.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.16.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593"
|
||||
dependencies = [
|
||||
"hashbrown 0.16.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lru-slab"
|
||||
version = "0.1.2"
|
||||
@@ -2559,6 +2884,16 @@ version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
||||
|
||||
[[package]]
|
||||
name = "md-5"
|
||||
version = "0.10.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mdbx-sys"
|
||||
version = "13.11.0"
|
||||
@@ -3123,6 +3458,17 @@ dependencies = [
|
||||
"unicode-id-start",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "p256"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594"
|
||||
dependencies = [
|
||||
"ecdsa",
|
||||
"elliptic-curve",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.5"
|
||||
@@ -3300,14 +3646,24 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkcs8"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba"
|
||||
dependencies = [
|
||||
"der 0.6.1",
|
||||
"spki 0.6.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkcs8"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
|
||||
dependencies = [
|
||||
"der",
|
||||
"spki",
|
||||
"der 0.7.10",
|
||||
"spki 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3379,7 +3735,7 @@ name = "projection"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-nats",
|
||||
"async-nats 0.39.0",
|
||||
"axum 0.7.9",
|
||||
"chrono",
|
||||
"edge-logger-client",
|
||||
@@ -3937,6 +4293,17 @@ dependencies = [
|
||||
"webpki-roots 1.0.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rfc6979"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb"
|
||||
dependencies = [
|
||||
"crypto-bigint 0.4.9",
|
||||
"hmac",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.14"
|
||||
@@ -3985,7 +4352,7 @@ name = "runner"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-nats",
|
||||
"async-nats 0.39.0",
|
||||
"aws-config",
|
||||
"aws-sdk-sesv2",
|
||||
"axum 0.7.9",
|
||||
@@ -4306,6 +4673,20 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sec1"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928"
|
||||
dependencies = [
|
||||
"base16ct",
|
||||
"der 0.6.1",
|
||||
"generic-array",
|
||||
"pkcs8 0.9.0",
|
||||
"subtle",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "2.11.1"
|
||||
@@ -4535,12 +4916,22 @@ version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31"
|
||||
dependencies = [
|
||||
"pkcs8",
|
||||
"pkcs8 0.10.2",
|
||||
"rand_core 0.6.4",
|
||||
"signature",
|
||||
"signature 2.2.0",
|
||||
"zeroize",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signature"
|
||||
version = "1.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"rand_core 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signature"
|
||||
version = "2.2.0"
|
||||
@@ -4625,6 +5016,22 @@ dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591"
|
||||
|
||||
[[package]]
|
||||
name = "spki"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b"
|
||||
dependencies = [
|
||||
"base64ct",
|
||||
"der 0.6.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spki"
|
||||
version = "0.7.3"
|
||||
@@ -4632,7 +5039,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
|
||||
dependencies = [
|
||||
"base64ct",
|
||||
"der",
|
||||
"der 0.7.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
81
DOCKER.md
81
DOCKER.md
@@ -11,8 +11,15 @@ docker compose down -v
|
||||
To include the observability stack (Grafana/Loki/Tempo/VictoriaMetrics) with the local compose:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build
|
||||
docker compose -f docker-compose.yml -f observability/docker-compose.yml down -v
|
||||
docker compose --profile observability up -d --build
|
||||
docker compose --profile observability down -v
|
||||
```
|
||||
|
||||
To use S3-compatible object storage (MinIO) for Loki + Tempo locally:
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f observability/docker-compose.s3.yml --profile observability up -d --build
|
||||
docker compose -f docker-compose.yml -f observability/docker-compose.s3.yml --profile observability down -v
|
||||
```
|
||||
|
||||
Service ports in the default compose:
|
||||
@@ -23,8 +30,20 @@ Service ports in the default compose:
|
||||
- Runner HTTP: `http://localhost:28080`
|
||||
- Control API: `http://localhost:38080`
|
||||
- Control UI: `http://localhost:8082`
|
||||
- MailHog SMTP: `smtp://localhost:1025`
|
||||
- MailHog UI: `http://localhost:8025`
|
||||
- MinIO S3 API: `http://localhost:9000`
|
||||
- MinIO console: `http://localhost:9001`
|
||||
- NATS: `nats://localhost:4222`, monitoring `http://localhost:8222`
|
||||
|
||||
MinIO defaults:
|
||||
- Credentials: `minioadmin` / `minioadmin`
|
||||
- Bucket: `cloudlysis-docs-0,cloudlysis-docs-1,cloudlysis-docs-2` (comma-separated docs bucket set)
|
||||
|
||||
Email defaults (local):
|
||||
- Runner uses SMTP backend via `RUNNER_SMTP_URL=smtp://mailhog:1025`
|
||||
- Inspect emails at MailHog UI `http://localhost:8025`
|
||||
|
||||
## Swarm (Dev)
|
||||
|
||||
Build images:
|
||||
@@ -56,6 +75,10 @@ Create dev secrets required by the observability stack:
|
||||
sh docker/scripts/swarm_dev_secrets.sh
|
||||
```
|
||||
|
||||
This also creates dev secrets used by the control plane for S3 document storage:
|
||||
- `control_s3_access_key_id`
|
||||
- `control_s3_secret_access_key`
|
||||
|
||||
Deploy:
|
||||
|
||||
```bash
|
||||
@@ -66,6 +89,60 @@ docker stack deploy -c swarm/stacks/control-plane.yml cloudlysis_control
|
||||
docker stack deploy -c swarm/stacks/observability.yml cloudlysis_obs
|
||||
```
|
||||
|
||||
Production-style control plane (no MinIO in stack; S3 is external):
|
||||
|
||||
```bash
|
||||
# create secrets (set CONTROL_S3_ACCESS_KEY_ID / CONTROL_S3_SECRET_ACCESS_KEY first)
|
||||
sh docker/scripts/swarm_dev_secrets.sh
|
||||
|
||||
# required env for the stack
|
||||
export CONTROL_S3_ENDPOINT="https://<hetzner-endpoint>"
|
||||
export CONTROL_S3_REGION="eu-central-1"
|
||||
export CONTROL_S3_BUCKET_DOCS="cloudlysis-docs"
|
||||
|
||||
docker stack deploy -c swarm/stacks/control-plane-prod.yml cloudlysis_control
|
||||
```
|
||||
|
||||
Verify production S3 bucket/prefix permissions with AWS CLI (env-gated):
|
||||
|
||||
```bash
|
||||
# install aws cli v2, then export creds and target
|
||||
export S3_ENDPOINT="https://<hetzner-endpoint>"
|
||||
export S3_REGION="eu-central-1"
|
||||
export S3_BUCKET_DOCS="cloudlysis-docs"
|
||||
export S3_PREFIX_DOCS="docs/"
|
||||
|
||||
# optionally set S3_FORCE_PATH_STYLE=true for some S3-compatible endpoints
|
||||
|
||||
sh docker/scripts/s3_verify_docs.sh
|
||||
```
|
||||
|
||||
Create/provision the docs bucket (idempotent; CI/CD-friendly):
|
||||
|
||||
```bash
|
||||
export S3_ENDPOINT="https://<hetzner-endpoint>"
|
||||
export S3_REGION="eu-central-1"
|
||||
export S3_BUCKET_DOCS="cloudlysis-docs"
|
||||
|
||||
# optional
|
||||
# export S3_ENABLE_VERSIONING=true
|
||||
|
||||
sh docker/scripts/s3_create_docs_bucket.sh
|
||||
```
|
||||
|
||||
Apply a lifecycle policy to the docs bucket (operator; automated):
|
||||
|
||||
```bash
|
||||
export S3_ENDPOINT="https://<hetzner-endpoint>"
|
||||
export S3_REGION="eu-central-1"
|
||||
export S3_BUCKET_DOCS="cloudlysis-docs"
|
||||
|
||||
# optional: provide your own lifecycle JSON file
|
||||
# export S3_LIFECYCLE_JSON="path/to/lifecycle.json"
|
||||
|
||||
sh docker/scripts/s3_apply_lifecycle_docs.sh
|
||||
```
|
||||
|
||||
Remove:
|
||||
|
||||
```bash
|
||||
|
||||
59
README.md
59
README.md
@@ -1,38 +1,55 @@
|
||||
# cloudlysis (monorepo)
|
||||
# Cloudlysis (monorepo)
|
||||
|
||||
## Layout
|
||||
- Rust services (Cargo workspace): `aggregate/`, `gateway/`, `projection/`, `runner/`, `control/api/`, `shared/`
|
||||
- Control UI: `control/ui/`
|
||||
- Docker + Swarm + Compose: `docker/`, `docker-compose.yml`, `swarm/`, `observability/`
|
||||
Production-oriented, multi-service Rust workspace with an operator-facing Control Plane (API + Admin UI), S3-backed document storage, and an optional observability stack for local parity.
|
||||
|
||||
## Documentation
|
||||
- docs/README.md
|
||||
- Architecture: docs/architecture/overview.md, docs/architecture/transport.md
|
||||
- Developer: docs/developer/setup.md, docs/developer/testing.md
|
||||
- Usage: docs/usage/quickstart.md, docs/usage/api.md, docs/usage/nats.md
|
||||
- Gitea Wiki: run `scripts/publish_gitea_wiki.sh` (publishes `wiki/` to the repo wiki)
|
||||
## Quickstart (local dev)
|
||||
|
||||
## Quick Start (Docker Compose)
|
||||
Core stack (includes MinIO + MailHog + Control Plane):
|
||||
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
Full local stack with observability:
|
||||
Full local stack with observability (Grafana/Loki/Tempo/VictoriaMetrics):
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build
|
||||
docker compose --profile observability up -d --build
|
||||
```
|
||||
|
||||
## Commands
|
||||
- `make compose-up`, `make compose-down`
|
||||
- `make compose-up-observability`, `make compose-down-observability`
|
||||
- `make docker-build-all`
|
||||
- `make swarm-deploy-all`, `make swarm-rm-all`
|
||||
Full local stack + Loki/Tempo using MinIO (S3 mode):
|
||||
|
||||
More details: `DOCKER.md`
|
||||
```bash
|
||||
docker compose -f docker-compose.yml -f observability/docker-compose.s3.yml --profile observability up -d --build
|
||||
```
|
||||
|
||||
## Workspace Verification
|
||||
## Local endpoints
|
||||
- **Control UI**: `http://localhost:8082`
|
||||
- **Control API**: `http://localhost:38080`
|
||||
- **Grafana** (observability profile): `http://localhost:3000`
|
||||
- **MailHog UI**: `http://localhost:8025` (SMTP on `localhost:1025`)
|
||||
- **MinIO console**: `http://localhost:9001` (S3 API on `localhost:9000`)
|
||||
|
||||
## Repository layout (high level)
|
||||
- **Rust services (Cargo workspace)**: `aggregate/`, `gateway/`, `projection/`, `runner/`, `control/api/`, `shared/`
|
||||
- **Admin UI**: `control/ui/`
|
||||
- **Docker / Swarm / Compose**: `docker/`, `docker-compose.yml`, `swarm/`, `observability/`
|
||||
|
||||
## Production (overview)
|
||||
- **Control plane Swarm stack**: `swarm/stacks/control-plane-prod.yml`
|
||||
- **S3 docs buckets**: `CONTROL_S3_BUCKET_DOCS` supports a comma-separated shard set (e.g. `cloudlysis-docs-0,cloudlysis-docs-1,cloudlysis-docs-2`). Bucket selection is deterministic per-tenant; keep the full shard set stable to avoid remapping tenants.
|
||||
- **S3 provisioning helpers** (idempotent scripts; CI/CD friendly):
|
||||
- `docker/scripts/s3_create_docs_bucket.sh`
|
||||
- `docker/scripts/s3_apply_lifecycle_docs.sh`
|
||||
- `docker/scripts/s3_verify_docs.sh`
|
||||
- Gitea Actions workflow: `.gitea/workflows/s3-provision.yml`
|
||||
|
||||
## Docs
|
||||
- **Docker / local dev / Swarm**: `DOCKER.md`
|
||||
- **Developer docs**: `docs/developer/setup.md`, `docs/developer/testing.md`
|
||||
- **Architecture**: `docs/architecture/overview.md`, `docs/architecture/transport.md`
|
||||
- **Usage**: `docs/usage/quickstart.md`, `docs/usage/api.md`, `docs/usage/nats.md`
|
||||
|
||||
## Workspace verification
|
||||
|
||||
```bash
|
||||
cargo fmt --check
|
||||
|
||||
187
S3_PLAN.md
187
S3_PLAN.md
@@ -1,187 +0,0 @@
|
||||
# S3-Compatible Object Storage Plan (Hetzner in Prod, MinIO Locally)
|
||||
|
||||
## Principles
|
||||
- S3-compatible object storage is mandatory for platform document storage in every environment:
|
||||
- Local development uses MinIO.
|
||||
- Production uses Hetzner Object Storage (S3 API compatible).
|
||||
- Each milestone is stop-the-line gated:
|
||||
- All tasks completed
|
||||
- All milestone tests pass
|
||||
- Workspace verification commands pass
|
||||
- Secrets are never committed and never logged:
|
||||
- Access keys via Swarm secrets in production
|
||||
- `.env` or compose env in local dev
|
||||
|
||||
## Goals
|
||||
- Introduce a single, shared S3-compatible configuration surface for the platform.
|
||||
- Make document storage always backed by S3 (no filesystem fallback for documents).
|
||||
- Keep the implementation incremental and test-gated per milestone.
|
||||
- Optionally expand to observability object storage after document storage is stable.
|
||||
|
||||
## Definitions
|
||||
### Document Storage
|
||||
“Documents” are versioned blobs the platform needs to store and retrieve reliably:
|
||||
- Deployment bundles and artifacts
|
||||
- Definitions/manifests (projection programs, saga/effects definitions, schema bundles)
|
||||
- Exported audit/log bundles, diagnostics, or snapshots that are not part of the primary KV/MDBX state
|
||||
|
||||
Document storage must support:
|
||||
- Tenant-scoped namespaces (prefixes)
|
||||
- Content-addressed or versioned keys (immutability preferred)
|
||||
- Listing by prefix for admin workflows
|
||||
|
||||
## Configuration Contract (Platform-Wide)
|
||||
### Common Settings
|
||||
- `S3_ENDPOINT` (Hetzner: HTTPS endpoint; MinIO: `http://minio:9000`)
|
||||
- `S3_REGION` (required even for some S3-compatible providers)
|
||||
- `S3_ACCESS_KEY_ID` (secret)
|
||||
- `S3_SECRET_ACCESS_KEY` (secret)
|
||||
- `S3_FORCE_PATH_STYLE` (`true/false`)
|
||||
- `S3_INSECURE` (`true/false`, only allowed for local MinIO)
|
||||
|
||||
### Buckets and Prefixes
|
||||
- `S3_BUCKET_DOCS` (required everywhere)
|
||||
- `S3_PREFIX_DOCS` (default `docs/`)
|
||||
|
||||
Optional (later milestones):
|
||||
- `S3_BUCKET_LOKI`, `S3_PREFIX_LOKI`
|
||||
- `S3_BUCKET_TEMPO`, `S3_PREFIX_TEMPO`
|
||||
|
||||
## Target Architecture
|
||||
### Local Development
|
||||
- MinIO is part of the local stack for parity.
|
||||
- Control API is the document gateway:
|
||||
- Upload/download via signed URLs or streamed proxy endpoints
|
||||
- Metadata stored in existing storage/KV (document index) or derived from key scheme
|
||||
|
||||
### Production
|
||||
- Hetzner Object Storage provides S3-compatible bucket(s).
|
||||
- Credentials and bucket details injected via Swarm secrets and stack env.
|
||||
|
||||
## Development Plan (Milestones by Dependency)
|
||||
|
||||
## Milestone 0: S3 Contract + Local MinIO Baseline
|
||||
### Dependencies
|
||||
- None
|
||||
|
||||
### Goal
|
||||
Provide a consistent local S3-compatible endpoint and stable bucket naming to unblock higher milestones.
|
||||
|
||||
### Tasks
|
||||
- [ ] Add MinIO to local development stack:
|
||||
- [ ] Add `minio` service to compose (API + console)
|
||||
- [ ] Add `minio-init` job to create required buckets
|
||||
- [ ] Define standard bucket/prefix defaults for local dev:
|
||||
- [ ] `S3_BUCKET_DOCS=cloudlysis-docs`
|
||||
- [ ] `S3_PREFIX_DOCS=docs/`
|
||||
- [ ] Document local workflow to enable MinIO-backed document storage.
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [ ] Workspace verification commands
|
||||
- [ ] Local manual verification checklist:
|
||||
- [ ] `cloudlysis-docs` bucket exists
|
||||
- [ ] credentials work from a container in the compose network
|
||||
|
||||
## Milestone 1: Document Storage API (Control API)
|
||||
### Dependencies
|
||||
- Milestone 0
|
||||
|
||||
### Goal
|
||||
Make document storage a first-class platform API and require it in all environments.
|
||||
|
||||
### Tasks
|
||||
- [ ] Add an S3 client module to Control API:
|
||||
- [ ] parse config from env with strict validation (endpoint, bucket, keys)
|
||||
- [ ] support path-style and TLS/insecure options
|
||||
- [ ] Implement document primitives:
|
||||
- [ ] Put (upload) and Get (download)
|
||||
- [ ] List by prefix (tenant + doc-type)
|
||||
- [ ] Delete (admin-only) if needed
|
||||
- [ ] Decide and document a key scheme:
|
||||
- [ ] tenant-scoped prefix
|
||||
- [ ] immutable keys preferred (content hash + metadata)
|
||||
- [ ] Add authz rules for document operations (deny-by-default, tenant-scoped).
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [ ] Workspace verification commands
|
||||
- [ ] Unit tests:
|
||||
- [ ] config parsing/validation
|
||||
- [ ] key generation stability
|
||||
- [ ] Gated integration tests (MinIO):
|
||||
- [ ] put/get roundtrip
|
||||
- [ ] list by prefix
|
||||
- [ ] tenant isolation (cannot read other tenant prefix)
|
||||
|
||||
## Milestone 2: Control UI Integration (Upload/Download Flows)
|
||||
### Dependencies
|
||||
- Milestone 1
|
||||
|
||||
### Goal
|
||||
Make document workflows usable from the Control UI without leaking credentials.
|
||||
|
||||
### Tasks
|
||||
- [ ] Add Control API endpoints for signed URLs (recommended) or streamed proxy:
|
||||
- [ ] create upload URL (PUT)
|
||||
- [ ] create download URL (GET)
|
||||
- [ ] Implement Control UI flows for a first document type:
|
||||
- [ ] upload
|
||||
- [ ] list
|
||||
- [ ] download
|
||||
- [ ] Ensure correlation/trace propagation on Control API operations.
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [ ] Workspace verification commands
|
||||
- [ ] Control UI unit tests for routing/component render stability
|
||||
- [ ] Gated end-to-end checklist (local):
|
||||
- [ ] upload appears in list
|
||||
- [ ] download returns expected bytes
|
||||
|
||||
## Milestone 3: Production Rollout (Hetzner)
|
||||
### Dependencies
|
||||
- Milestone 2
|
||||
|
||||
### Goal
|
||||
Deploy document storage on Hetzner S3-compatible backend with production-grade secret handling.
|
||||
|
||||
### Tasks
|
||||
- [ ] Provision buckets and lifecycle policies (docs bucket):
|
||||
- [ ] retention rules appropriate to documents
|
||||
- [ ] access policy scoped to required actions
|
||||
- [ ] Swarm deployment:
|
||||
- [ ] add secrets for access keys
|
||||
- [ ] configure Control API with endpoint/region/bucket/prefix
|
||||
- [ ] Rollback plan:
|
||||
- [ ] switch to a fallback bucket or MinIO-on-prod if needed
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [ ] Workspace verification commands
|
||||
- [ ] Production smoke runbook:
|
||||
- [ ] upload/list/download for a tenant
|
||||
- [ ] verify objects exist under expected prefixes
|
||||
|
||||
## Milestone 4 (Optional): Observability Storage on S3 (Loki + Tempo)
|
||||
### Dependencies
|
||||
- Milestone 3
|
||||
|
||||
### Goal
|
||||
Store logs and traces in S3-compatible storage (MinIO locally; Hetzner in production).
|
||||
|
||||
### Tasks
|
||||
- [ ] Loki:
|
||||
- [ ] add S3 config variant and compose overlay
|
||||
- [ ] validate log query and bucket objects
|
||||
- [ ] Tempo:
|
||||
- [ ] add S3 config variant and compose overlay
|
||||
- [ ] validate traces and bucket objects
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [ ] Workspace verification commands
|
||||
- [ ] Gated local validation:
|
||||
- [ ] Loki writes objects to bucket/prefix after ingest
|
||||
- [ ] Tempo writes objects to bucket/prefix after ingest
|
||||
|
||||
## Workspace Verification Commands
|
||||
- `cargo fmt --check`
|
||||
- `cargo clippy --workspace --all-targets -- -D warnings`
|
||||
- `cargo test --workspace`
|
||||
- `cd control/ui && npm ci && npm run lint && npm run typecheck && npm run test && npm run build`
|
||||
@@ -1,6 +1,7 @@
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::time::Duration;
|
||||
|
||||
#[allow(unreachable_code)]
|
||||
pub async fn execute_decide_program(
|
||||
state: &JsonValue,
|
||||
command: &JsonValue,
|
||||
@@ -28,6 +29,7 @@ pub async fn execute_decide_program(
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unreachable_code)]
|
||||
pub async fn execute_apply_program(
|
||||
state: &JsonValue,
|
||||
event: &JsonValue,
|
||||
@@ -60,11 +62,10 @@ async fn execute_decide_v8(
|
||||
state: &JsonValue,
|
||||
command: &JsonValue,
|
||||
program: &str,
|
||||
gas_limit: u64,
|
||||
_gas_limit: u64,
|
||||
timeout: Duration,
|
||||
) -> Result<Vec<JsonValue>, crate::types::AggregateError> {
|
||||
use std::sync::Arc;
|
||||
use v8::{Array, Context, Function, HandleScope, Isolate, Object, Scope, Script};
|
||||
use v8::{Context, ContextScope, Function, HandleScope, Isolate, Script};
|
||||
|
||||
let state_str = serde_json::to_string(state).map_err(|e| {
|
||||
crate::types::AggregateError::DecideError(format!("State serialization: {}", e))
|
||||
@@ -73,47 +74,45 @@ async fn execute_decide_v8(
|
||||
crate::types::AggregateError::DecideError(format!("Command serialization: {}", e))
|
||||
})?;
|
||||
|
||||
let program_owned = program.to_string();
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let isolate = &mut Isolate::new(v8::CreateParams::default());
|
||||
|
||||
let scope = &mut HandleScope::new(isolate);
|
||||
let context = Context::new(scope);
|
||||
let context = Context::new(scope, v8::ContextOptions::default());
|
||||
let scope = &mut ContextScope::new(scope, context);
|
||||
|
||||
let source =
|
||||
v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?;
|
||||
v8::String::new(scope, &program_owned).ok_or("Failed to create program string")?;
|
||||
|
||||
let script =
|
||||
Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?;
|
||||
let script = Script::compile(scope, source, None).ok_or("Failed to compile program")?;
|
||||
|
||||
script.run(scope).ok_or_else(|| "Failed to run program")?;
|
||||
script.run(scope).ok_or("Failed to run program")?;
|
||||
|
||||
let global = context.global(scope);
|
||||
let decide_name =
|
||||
v8::String::new(scope, "decide").ok_or_else(|| "Failed to create decide string")?;
|
||||
v8::String::new(scope, "decide").ok_or("Failed to create decide string")?;
|
||||
|
||||
let decide_fn = global
|
||||
.get(scope, decide_name.into())
|
||||
.and_then(|v| v8::Local::<Function>::try_from(v).ok())
|
||||
.ok_or_else(|| "decide function not found")?;
|
||||
.ok_or("decide function not found")?;
|
||||
|
||||
let state_json = v8::String::new(scope, &state_str)
|
||||
.ok_or_else(|| "Failed to create state JSON string")?;
|
||||
let state_obj =
|
||||
v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?;
|
||||
let state_json =
|
||||
v8::String::new(scope, &state_str).ok_or("Failed to create state JSON string")?;
|
||||
let state_obj = v8::json::parse(scope, state_json).ok_or("Failed to parse state JSON")?;
|
||||
|
||||
let command_json = v8::String::new(scope, &command_str)
|
||||
.ok_or_else(|| "Failed to create command JSON string")?;
|
||||
let command_json =
|
||||
v8::String::new(scope, &command_str).ok_or("Failed to create command JSON string")?;
|
||||
let command_obj =
|
||||
v8::json::parse(scope, command_json).ok_or_else(|| "Failed to parse command JSON")?;
|
||||
v8::json::parse(scope, command_json).ok_or("Failed to parse command JSON")?;
|
||||
|
||||
let args: [v8::Local<v8::Value>; 2] = [state_obj.into(), command_obj.into()];
|
||||
let args: [v8::Local<v8::Value>; 2] = [state_obj, command_obj];
|
||||
let result = decide_fn
|
||||
.call(scope, global.into(), &args)
|
||||
.ok_or_else(|| "decide function call failed")?;
|
||||
.ok_or("decide function call failed")?;
|
||||
|
||||
let result_json =
|
||||
v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?;
|
||||
let result_json = v8::json::stringify(scope, result).ok_or("Failed to stringify result")?;
|
||||
let result_str = result_json.to_rust_string_lossy(scope);
|
||||
|
||||
let events: Vec<JsonValue> = serde_json::from_str(&result_str)
|
||||
@@ -155,47 +154,43 @@ async fn execute_apply_v8(
|
||||
|
||||
let _ = gas_limit;
|
||||
|
||||
let program_owned = program.to_string();
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
let isolate = &mut Isolate::new(v8::CreateParams::default());
|
||||
|
||||
let scope = &mut HandleScope::new(isolate);
|
||||
let context = Context::new(scope);
|
||||
let context = Context::new(scope, v8::ContextOptions::default());
|
||||
let scope = &mut ContextScope::new(scope, context);
|
||||
|
||||
let source =
|
||||
v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?;
|
||||
v8::String::new(scope, &program_owned).ok_or("Failed to create program string")?;
|
||||
|
||||
let script =
|
||||
Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?;
|
||||
let script = Script::compile(scope, source, None).ok_or("Failed to compile program")?;
|
||||
|
||||
script.run(scope).ok_or_else(|| "Failed to run program")?;
|
||||
script.run(scope).ok_or("Failed to run program")?;
|
||||
|
||||
let global = context.global(scope);
|
||||
let apply_name =
|
||||
v8::String::new(scope, "apply").ok_or_else(|| "Failed to create apply string")?;
|
||||
let apply_name = v8::String::new(scope, "apply").ok_or("Failed to create apply string")?;
|
||||
|
||||
let apply_fn = global
|
||||
.get(scope, apply_name.into())
|
||||
.and_then(|v| v8::Local::<Function>::try_from(v).ok())
|
||||
.ok_or_else(|| "apply function not found")?;
|
||||
.ok_or("apply function not found")?;
|
||||
|
||||
let state_json = v8::String::new(scope, &state_str)
|
||||
.ok_or_else(|| "Failed to create state JSON string")?;
|
||||
let state_obj =
|
||||
v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?;
|
||||
let state_json =
|
||||
v8::String::new(scope, &state_str).ok_or("Failed to create state JSON string")?;
|
||||
let state_obj = v8::json::parse(scope, state_json).ok_or("Failed to parse state JSON")?;
|
||||
|
||||
let event_json = v8::String::new(scope, &event_str)
|
||||
.ok_or_else(|| "Failed to create event JSON string")?;
|
||||
let event_obj =
|
||||
v8::json::parse(scope, event_json).ok_or_else(|| "Failed to parse event JSON")?;
|
||||
let event_json =
|
||||
v8::String::new(scope, &event_str).ok_or("Failed to create event JSON string")?;
|
||||
let event_obj = v8::json::parse(scope, event_json).ok_or("Failed to parse event JSON")?;
|
||||
|
||||
let args: [v8::Local<v8::Value>; 2] = [state_obj.into(), event_obj.into()];
|
||||
let args: [v8::Local<v8::Value>; 2] = [state_obj, event_obj];
|
||||
let result = apply_fn
|
||||
.call(scope, global.into(), &args)
|
||||
.ok_or_else(|| "apply function call failed")?;
|
||||
.ok_or("apply function call failed")?;
|
||||
|
||||
let result_json =
|
||||
v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?;
|
||||
let result_json = v8::json::stringify(scope, result).ok_or("Failed to stringify result")?;
|
||||
let result_str = result_json.to_rust_string_lossy(scope);
|
||||
|
||||
let new_state: JsonValue = serde_json::from_str(&result_str)
|
||||
@@ -250,6 +245,7 @@ async fn execute_apply_wasm(
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test]
|
||||
async fn no_runtime_returns_error() {
|
||||
@@ -257,7 +253,7 @@ mod tests {
|
||||
{
|
||||
let state = json!({});
|
||||
let command = json!({});
|
||||
let result =
|
||||
let result: Result<Vec<JsonValue>, crate::types::AggregateError> =
|
||||
execute_decide_program(&state, &command, "program", 1000, Duration::from_secs(1))
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
|
||||
@@ -339,3 +339,119 @@ This plan is intentionally aligned with the style and gating discipline used in
|
||||
- verify Grafana dashboards provisioned and VictoriaMetrics receives samples
|
||||
- [x] **T7.3** End-to-end “control plane can see the fleet” test (requires docker)
|
||||
- UI/API can query placement + health snapshots for all services
|
||||
|
||||
---
|
||||
|
||||
## Milestone 8: Config Registry + Safe Change Management (Plan/Apply/Rollback)
|
||||
|
||||
**Goal:** Make configuration first-class, versioned, validated, and safely mutable from the control plane, while keeping production and development sources consistent.
|
||||
|
||||
### Dependencies
|
||||
- Milestone 2 (Control Plane API foundation)
|
||||
- Milestone 5 (safe mutations baseline)
|
||||
- Milestone 7 (Swarm deployment baseline)
|
||||
|
||||
### Exit Criteria
|
||||
- Operators can list, view, validate, and safely apply config changes with audit + idempotent jobs
|
||||
- Config changes have revision semantics and are roll-backable
|
||||
- Gatekeeper safety checks prevent applying invalid or unsafe configs
|
||||
|
||||
### Tasks
|
||||
- [x] **8.1** Inventory and classify configuration surfaces (platform-wide)
|
||||
- classify as: static boot config (env/secrets), dynamic runtime config (KV), large immutable artifacts (S3/docs)
|
||||
- map current sources per domain:
|
||||
- Gateway routing config (`config/routing/dev.json` / production KV)
|
||||
- Placement config (`config/placement/dev.json` / production KV)
|
||||
- Runner definitions (effects/sagas) (documents/S3) and activation config (KV)
|
||||
- Observability provisioning (Swarm configs + repo-managed assets)
|
||||
- Control plane feature flags (KV)
|
||||
- [~] **8.2** Define a Config Registry contract in the Control API
|
||||
- **Implemented (initial)**:
|
||||
- config identity: `{domain}` (routing|placement)
|
||||
- metadata: `revision` (KV revision when using NATS), and `source` info (file vs nats)
|
||||
- storage policy per config: `source=dev_file | nats_kv`
|
||||
- **Still needed**:
|
||||
- `{domain, name, scope}` and richer metadata (`updated_at`, `updated_by`, `sha256`)
|
||||
- history API for KV-backed configs
|
||||
- [x] **8.3** Implement config storage abstraction (dev + prod)
|
||||
- dev: file-backed, atomic write (tmp + rename), hot-reload where applicable
|
||||
- prod: NATS KV for dynamic configs (revisioned values + watch streams)
|
||||
- consistent error model: decode/validate/source errors are distinguishable and safe
|
||||
- [x] **8.4** Add read-only config APIs
|
||||
- `GET /admin/v1/config` list domains
|
||||
- `GET /admin/v1/config/{domain}` fetch current value + revision + source
|
||||
- (history not implemented yet)
|
||||
- [~] **8.5** Add validate/plan/apply/rollback mutation workflows as jobs
|
||||
- **Implemented**:
|
||||
- `POST /admin/v1/jobs/config/validate` (job, idempotency key required)
|
||||
- `POST /admin/v1/jobs/config/apply` (job, idempotency key required, backup + apply)
|
||||
- `POST /admin/v1/jobs/config/rollback` (job, idempotency key required, restore last backup)
|
||||
- per-domain locking to avoid concurrent config mutations
|
||||
- **Still needed**:
|
||||
- `POST /admin/v1/plan/config/apply` deterministic plan (diff + impacted services)
|
||||
- richer post-conditions (routing resolution sampling, fleet consistency checks, etc.)
|
||||
- [~] **8.6** Implement initial config domains end-to-end
|
||||
- **Gateway routing config**:
|
||||
- implemented: schema validation via JSON decode
|
||||
- still needed: semantic validation (tenant entries/shard directories/endpoints URL parsing) + sampled routing verification
|
||||
- **Placement config**:
|
||||
- implemented: schema validation via JSON decode
|
||||
- still needed: semantic validation (targets non-empty, etc.) + fleet snapshot consistency checks
|
||||
- [x] **8.7** Implement Admin UI “Config” page for safe operations
|
||||
- list + view configs with revision/sha/audit linkage
|
||||
- editor for JSON (and YAML when supported by the domain)
|
||||
- validate button (server-side) and apply/rollback flows as jobs with reason required
|
||||
|
||||
### Tests
|
||||
- [x] **T8.1** Unit tests: config decode/encode stability for each config domain
|
||||
- routing/placement decode is enforced by server-side validate job (schema-level)
|
||||
- [ ] **T8.2** Unit tests: validation rejects unsafe configs with stable error codes/messages
|
||||
- [ ] **T8.3** Unit tests: plan generation is deterministic for same inputs
|
||||
- [x] **T8.4** Integration tests (env-gated):
|
||||
- NATS KV config apply + rollback via Control API (requires `CONTROL_TEST_NATS=1` + `CONTROL_TEST_NATS_URL`)
|
||||
- (Gateway route-resolution E2E verification still pending)
|
||||
- [x] **T8.5** UI tests: config page renders, validate/apply/rollback flows navigate to job progress
|
||||
|
||||
---
|
||||
|
||||
## Milestone 9: Control Node Management (Inventory, Drift, and Safer Ops)
|
||||
|
||||
**Goal:** Improve how the control plane understands and manages the live control node and platform state: node inventory, config drift detection, and safer operational guardrails.
|
||||
|
||||
### Dependencies
|
||||
- Milestone 7 (Swarm deployment baseline)
|
||||
- Milestone 8 (config registry + safe change management)
|
||||
|
||||
### Exit Criteria
|
||||
- Control plane provides a reliable “what is running vs what should be running” view
|
||||
- Config drift is detectable and actionable
|
||||
- Core operational actions are guarded by preflight checks and produce audit trails
|
||||
|
||||
### Tasks
|
||||
- [x] **9.1** Define a “desired vs observed” model for platform state
|
||||
- desired: Swarm stacks + config registry revisions
|
||||
- observed: live service/task state + effective runtime configs
|
||||
- drift categories: missing, extra, version mismatch, config mismatch, unhealthy
|
||||
- [~] **9.2** Improve Swarm observation fidelity
|
||||
- implemented (initial): docker-cli-backed Swarm observation (`CONTROL_SWARM_MODE=docker`)
|
||||
- still needed: direct Docker API client (avoid shelling out), richer normalization, and wiring into production stacks
|
||||
- keep file source as a dev fallback for deterministic tests
|
||||
- normalize service identity: `{service, image_tag, git_sha, updated_at}`
|
||||
- [x] **9.3** Add drift APIs and UI views
|
||||
- `GET /admin/v1/platform/drift` returns drift summary + actionable items
|
||||
- UI: “Platform Drift” page with filters and links to remediate jobs
|
||||
- [ ] **9.4** Add safer operational guardrails as reusable checks
|
||||
- preflight checks for:
|
||||
- service unhealthy / crashloop
|
||||
- tenant migration safety thresholds (lag/inflight)
|
||||
- config apply safety (impact radius, sampled verify)
|
||||
- consistent failure modes: clear reason + audit entry, no partial side effects
|
||||
- [ ] **9.5** Add operational playbooks as executable checks
|
||||
- post-deploy verification suite callable as an idempotent job
|
||||
- rollback verification suite callable as an idempotent job
|
||||
|
||||
### Tests
|
||||
- [x] **T9.1** Unit tests: drift classification for synthetic desired/observed fixtures
|
||||
- [x] **T9.2** Integration tests (docker-gated): drift view detects intentional mismatches in a local Swarm
|
||||
- requires `CONTROL_TEST_DOCKER=1` and an active local Swarm node
|
||||
- [x] **T9.3** UI tests: drift page renders in route smoke test
|
||||
|
||||
@@ -5,22 +5,32 @@ edition = "2024"
|
||||
publish = ["madapes"]
|
||||
|
||||
[dependencies]
|
||||
async-nats = "0.42.0"
|
||||
async-trait = "0.1.89"
|
||||
axum = "0.8.6"
|
||||
aws-config = { version = "1.8.6", features = ["behavior-version-latest"] }
|
||||
aws-credential-types = "1.2.6"
|
||||
aws-sdk-s3 = "1.106.0"
|
||||
clap = { version = "4.5.48", features = ["derive", "env"] }
|
||||
futures = "0.3.31"
|
||||
jsonwebtoken = "9.3.1"
|
||||
metrics = "0.23.0"
|
||||
metrics-exporter-prometheus = "0.16.0"
|
||||
reqwest = { version = "0.12.23", default-features = false, features = ["json", "rustls-tls"] }
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.149"
|
||||
sha2 = "0.10.9"
|
||||
hex = "0.4.3"
|
||||
shared = { path = "../../shared" }
|
||||
thiserror = "2.0.16"
|
||||
tokio = { version = "1.45.0", features = ["macros", "net", "process", "rt-multi-thread", "signal", "time"] }
|
||||
tower-http = { version = "0.6.6", features = ["trace"] }
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
|
||||
url = "2.5.4"
|
||||
uuid = { version = "1.18.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
serde_yaml = "0.9.34"
|
||||
tower = "0.5.2"
|
||||
urlencoding = "2.1.3"
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use crate::{
|
||||
AppState, RequestIds,
|
||||
auth::{Principal, has_permission},
|
||||
fleet,
|
||||
config_registry::{ConfigDomain, ConfigRegistryError},
|
||||
config_schemas::RoutingConfig,
|
||||
drift, fleet,
|
||||
job_engine::{JobEngine, StartJobError},
|
||||
jobs::{Job, JobStatus, JobStep},
|
||||
placement::{PlacementResponse, ServiceKind},
|
||||
@@ -15,7 +17,9 @@ use axum::{
|
||||
routing::{get, post},
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use sha2::Digest;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use url::Url;
|
||||
use uuid::Uuid;
|
||||
|
||||
const HEADER_IDEMPOTENCY_KEY: &str = "idempotency-key";
|
||||
@@ -25,21 +29,125 @@ pub fn admin_router() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/whoami", get(whoami))
|
||||
.route("/platform/info", get(platform_info))
|
||||
.route("/platform/drift", get(platform_drift))
|
||||
.route("/fleet/snapshot", get(fleet_snapshot))
|
||||
.route("/tenants", get(list_tenants))
|
||||
.route("/placement/{kind}", get(get_placement))
|
||||
.route("/config", get(list_config))
|
||||
.route("/config/{domain}", get(get_config))
|
||||
.route("/config/{domain}/history", get(get_config_history))
|
||||
.route("/jobs/platform/verify", post(start_platform_verify))
|
||||
.route("/jobs/config/validate", post(start_config_validate))
|
||||
.route("/jobs/config/apply", post(start_config_apply))
|
||||
.route("/jobs/config/rollback", post(start_config_rollback))
|
||||
.route("/tenants/echo", get(tenant_echo))
|
||||
.route(
|
||||
"/tenants/{tenant_id}/billing",
|
||||
get(crate::billing::get_billing),
|
||||
)
|
||||
.route(
|
||||
"/tenants/{tenant_id}/billing/checkout",
|
||||
post(crate::billing::checkout),
|
||||
)
|
||||
.route(
|
||||
"/tenants/{tenant_id}/billing/portal",
|
||||
post(crate::billing::portal),
|
||||
)
|
||||
.route("/jobs/echo", post(create_echo_job))
|
||||
.route("/jobs/{job_id}", get(get_job))
|
||||
.route("/jobs/{job_id}/cancel", post(cancel_job))
|
||||
.route("/jobs/tenant/drain", post(start_tenant_drain))
|
||||
.route("/jobs/tenant/migrate", post(start_tenant_migrate))
|
||||
.route("/plan/tenant/migrate", post(plan_tenant_migrate))
|
||||
.route("/plan/config/apply", post(plan_config_apply))
|
||||
.route("/audit", get(list_audit))
|
||||
.route("/swarm/services", get(list_swarm_services))
|
||||
.route("/swarm/services/{name}/tasks", get(list_swarm_tasks))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct PlatformVerifyRequest {
|
||||
reason: String,
|
||||
}
|
||||
|
||||
async fn start_platform_verify(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<PlatformVerifyRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let engine = JobEngine::new(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
state.config_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_platform_verify(state.clone(), &principal, body.reason, key) {
|
||||
Ok(id) => id,
|
||||
Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "job_id": job_id })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn get_config_history(
|
||||
State(state): State<AppState>,
|
||||
Path(domain): Path<String>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let domain = match domain.as_str() {
|
||||
"routing" => ConfigDomain::Routing,
|
||||
"placement" => ConfigDomain::Placement,
|
||||
_ => return StatusCode::NOT_FOUND.into_response(),
|
||||
};
|
||||
let Some(source) = state.config.source(domain) else {
|
||||
return StatusCode::NOT_FOUND.into_response();
|
||||
};
|
||||
|
||||
let rows = match source.history_bytes(50).await {
|
||||
Ok(items) => items
|
||||
.into_iter()
|
||||
.filter_map(|(rev, bytes)| {
|
||||
let v = serde_json::from_slice::<serde_json::Value>(&bytes).ok()?;
|
||||
Some(serde_json::json!({
|
||||
"revision": rev,
|
||||
"sha256": sha256_hex(&bytes),
|
||||
"value": v
|
||||
}))
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
Err(ConfigRegistryError::Source(_)) => return StatusCode::BAD_GATEWAY.into_response(),
|
||||
Err(_) => return StatusCode::NOT_IMPLEMENTED.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "domain": domain.as_str(), "items": rows })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn whoami(Extension(principal): Extension<Principal>) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
@@ -70,6 +178,18 @@ async fn platform_info(Extension(principal): Extension<Principal>) -> impl IntoR
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn platform_drift(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let r = drift::compute(&state).await;
|
||||
(StatusCode::OK, Json(r)).into_response()
|
||||
}
|
||||
|
||||
async fn fleet_snapshot(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
@@ -109,6 +229,434 @@ async fn get_placement(
|
||||
(StatusCode::OK, Json(resp)).into_response()
|
||||
}
|
||||
|
||||
async fn list_config(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let domains: Vec<&'static str> = [ConfigDomain::Routing, ConfigDomain::Placement]
|
||||
.into_iter()
|
||||
.filter(|d| state.config.source(*d).is_some())
|
||||
.map(|d| d.as_str())
|
||||
.collect();
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "domains": domains })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn get_config(
|
||||
State(state): State<AppState>,
|
||||
Path(domain): Path<String>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let domain = match domain.as_str() {
|
||||
"routing" => ConfigDomain::Routing,
|
||||
"placement" => ConfigDomain::Placement,
|
||||
_ => return StatusCode::NOT_FOUND.into_response(),
|
||||
};
|
||||
|
||||
let Some(source) = state.config.source(domain) else {
|
||||
return StatusCode::NOT_FOUND.into_response();
|
||||
};
|
||||
|
||||
let loaded = source.load_bytes().await;
|
||||
let (bytes, revision) = match loaded {
|
||||
Ok(x) => x,
|
||||
Err(ConfigRegistryError::Source(_)) => return StatusCode::BAD_GATEWAY.into_response(),
|
||||
Err(ConfigRegistryError::Decode(_)) => return StatusCode::BAD_REQUEST.into_response(),
|
||||
Err(ConfigRegistryError::NotConfigured) => return StatusCode::NOT_FOUND.into_response(),
|
||||
};
|
||||
|
||||
let json_value = match bytes {
|
||||
Some(ref b) => match serde_json::from_slice::<serde_json::Value>(b) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
return (
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({ "error": format!("invalid json: {e}") })),
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
},
|
||||
None => serde_json::Value::Null,
|
||||
};
|
||||
|
||||
let sha256 = bytes.as_deref().map(sha256_hex);
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"domain": domain.as_str(),
|
||||
"revision": revision,
|
||||
"sha256": sha256,
|
||||
"source": source.info(),
|
||||
"value": json_value,
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ConfigApplyRequest {
|
||||
domain: String,
|
||||
expected_revision: Option<u64>,
|
||||
reason: String,
|
||||
value: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ConfigValidateRequest {
|
||||
domain: String,
|
||||
reason: String,
|
||||
value: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ConfigRollbackRequest {
|
||||
domain: String,
|
||||
reason: String,
|
||||
}
|
||||
|
||||
fn parse_domain(domain: &str) -> Option<ConfigDomain> {
|
||||
match domain {
|
||||
"routing" => Some(ConfigDomain::Routing),
|
||||
"placement" => Some(ConfigDomain::Placement),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_config_validate(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<ConfigValidateRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let Some(domain) = parse_domain(body.domain.as_str()) else {
|
||||
return StatusCode::BAD_REQUEST.into_response();
|
||||
};
|
||||
|
||||
let engine = JobEngine::new(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
state.config_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_config_validate(
|
||||
state.clone(),
|
||||
&principal,
|
||||
domain,
|
||||
body.reason,
|
||||
body.value,
|
||||
key,
|
||||
) {
|
||||
Ok(id) => id,
|
||||
Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "job_id": job_id })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn start_config_apply(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<ConfigApplyRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let Some(domain) = parse_domain(body.domain.as_str()) else {
|
||||
return StatusCode::BAD_REQUEST.into_response();
|
||||
};
|
||||
|
||||
let engine = JobEngine::new(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
state.config_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_config_apply(
|
||||
state.clone(),
|
||||
&principal,
|
||||
domain,
|
||||
body.reason,
|
||||
body.expected_revision,
|
||||
body.value,
|
||||
key,
|
||||
) {
|
||||
Ok(id) => id,
|
||||
Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "job_id": job_id })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn start_config_rollback(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<ConfigRollbackRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let key = headers
|
||||
.get(HEADER_IDEMPOTENCY_KEY)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST);
|
||||
let key = match key {
|
||||
Ok(k) if !k.is_empty() => k,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let Some(domain) = parse_domain(body.domain.as_str()) else {
|
||||
return StatusCode::BAD_REQUEST.into_response();
|
||||
};
|
||||
|
||||
let engine = JobEngine::new(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
state.config_locks.clone(),
|
||||
);
|
||||
let job_id =
|
||||
match engine.start_config_rollback(state.clone(), &principal, domain, body.reason, key) {
|
||||
Ok(id) => id,
|
||||
Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(),
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({ "job_id": job_id })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ConfigPlanApplyRequest {
|
||||
domain: String,
|
||||
value: serde_json::Value,
|
||||
}
|
||||
|
||||
async fn plan_config_apply(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<ConfigPlanApplyRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
let domain = match body.domain.as_str() {
|
||||
"routing" => ConfigDomain::Routing,
|
||||
"placement" => ConfigDomain::Placement,
|
||||
_ => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
let Some(source) = state.config.source(domain) else {
|
||||
return StatusCode::NOT_FOUND.into_response();
|
||||
};
|
||||
|
||||
// Validate proposed config (schema + semantics).
|
||||
let validate_res: Result<(), String> = match domain {
|
||||
ConfigDomain::Routing => {
|
||||
let cfg = match serde_json::from_value::<RoutingConfig>(body.value.clone()) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
return (
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({ "error": e.to_string() })),
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
};
|
||||
validate_routing_semantics(&cfg)
|
||||
}
|
||||
ConfigDomain::Placement => {
|
||||
let cfg =
|
||||
match serde_json::from_value::<crate::placement::PlacementFile>(body.value.clone())
|
||||
{
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
return (
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({ "error": e.to_string() })),
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
};
|
||||
validate_placement_semantics(&cfg)
|
||||
}
|
||||
};
|
||||
if let Err(e) = validate_res {
|
||||
return (
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({ "error": e })),
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
|
||||
let (cur_bytes, cur_rev) = match source.load_bytes().await {
|
||||
Ok(x) => x,
|
||||
Err(_) => return StatusCode::BAD_GATEWAY.into_response(),
|
||||
};
|
||||
let cur_value = cur_bytes
|
||||
.as_deref()
|
||||
.and_then(|b| serde_json::from_slice::<serde_json::Value>(b).ok())
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
|
||||
let before = serde_json::to_string_pretty(&cur_value).unwrap_or_default();
|
||||
let after = serde_json::to_string_pretty(&body.value).unwrap_or_default();
|
||||
|
||||
let changed = cur_value != body.value;
|
||||
let impacted_services: Vec<&'static str> = match domain {
|
||||
ConfigDomain::Routing => vec!["gateway"],
|
||||
ConfigDomain::Placement => vec!["gateway", "control-api"],
|
||||
};
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(serde_json::json!({
|
||||
"domain": domain.as_str(),
|
||||
"current_revision": cur_rev,
|
||||
"changed": changed,
|
||||
"impacted_services": impacted_services,
|
||||
"diff": {
|
||||
"before": before,
|
||||
"after": after,
|
||||
}
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
fn sha256_hex(bytes: &[u8]) -> String {
|
||||
let mut h = sha2::Sha256::new();
|
||||
h.update(bytes);
|
||||
hex::encode(h.finalize())
|
||||
}
|
||||
|
||||
fn validate_routing_semantics(cfg: &RoutingConfig) -> Result<(), String> {
|
||||
let shard_maps = [
|
||||
("aggregate_shards", &cfg.aggregate_shards),
|
||||
("projection_shards", &cfg.projection_shards),
|
||||
("runner_shards", &cfg.runner_shards),
|
||||
];
|
||||
for (name, map) in shard_maps {
|
||||
for (shard_id, endpoints) in map {
|
||||
if endpoints.is_empty() {
|
||||
return Err(format!("{name}[{shard_id}] has no endpoints"));
|
||||
}
|
||||
for ep in endpoints {
|
||||
let u = Url::parse(ep)
|
||||
.map_err(|e| format!("{name}[{shard_id}] invalid endpoint {ep:?}: {e}"))?;
|
||||
if u.scheme() != "http" && u.scheme() != "https" {
|
||||
return Err(format!(
|
||||
"{name}[{shard_id}] endpoint {ep:?} must be http(s)"
|
||||
));
|
||||
}
|
||||
if u.host_str().is_none() {
|
||||
return Err(format!(
|
||||
"{name}[{shard_id}] endpoint {ep:?} must include host"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let placements = [
|
||||
(
|
||||
"aggregate_placement",
|
||||
&cfg.aggregate_placement,
|
||||
&cfg.aggregate_shards,
|
||||
),
|
||||
(
|
||||
"projection_placement",
|
||||
&cfg.projection_placement,
|
||||
&cfg.projection_shards,
|
||||
),
|
||||
(
|
||||
"runner_placement",
|
||||
&cfg.runner_placement,
|
||||
&cfg.runner_shards,
|
||||
),
|
||||
];
|
||||
for (pname, pmap, shards) in placements {
|
||||
for (tenant, shard_id) in pmap {
|
||||
if shard_id.trim().is_empty() {
|
||||
return Err(format!("{pname}[{tenant}] shard_id is empty"));
|
||||
}
|
||||
if !shards.contains_key(shard_id) {
|
||||
return Err(format!(
|
||||
"{pname}[{tenant}] references missing shard_id {shard_id:?}"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_placement_semantics(cfg: &crate::placement::PlacementFile) -> Result<(), String> {
|
||||
let kinds = [
|
||||
("aggregate_placement", cfg.aggregate_placement.as_ref()),
|
||||
("projection_placement", cfg.projection_placement.as_ref()),
|
||||
("runner_placement", cfg.runner_placement.as_ref()),
|
||||
];
|
||||
for (kind, k) in kinds {
|
||||
let Some(k) = k else { continue };
|
||||
for p in &k.placements {
|
||||
if p.targets.is_empty() {
|
||||
return Err(format!("{kind} tenant {} has no targets", p.tenant_id));
|
||||
}
|
||||
if p.targets.iter().any(|t| t.trim().is_empty()) {
|
||||
return Err(format!("{kind} tenant {} has empty target", p.tenant_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_tenants(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
@@ -256,6 +804,7 @@ async fn start_tenant_drain(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
state.config_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_tenant_drain(
|
||||
state.clone(),
|
||||
@@ -298,6 +847,7 @@ async fn start_tenant_migrate(
|
||||
state.jobs.clone(),
|
||||
state.audit.clone(),
|
||||
state.tenant_locks.clone(),
|
||||
state.config_locks.clone(),
|
||||
);
|
||||
let job_id = match engine.start_tenant_migrate(
|
||||
state.clone(),
|
||||
|
||||
904
control/api/src/billing.rs
Normal file
904
control/api/src/billing.rs
Normal file
@@ -0,0 +1,904 @@
|
||||
use crate::{
|
||||
AppState,
|
||||
auth::{Principal, has_permission},
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use axum::{
|
||||
Json,
|
||||
extract::{Extension, Path, State},
|
||||
http::{HeaderMap, StatusCode},
|
||||
response::IntoResponse,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
fs,
|
||||
path::PathBuf,
|
||||
sync::{Arc, RwLock},
|
||||
time::SystemTime,
|
||||
};
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
const HEADER_TENANT_ID: &str = shared::HEADER_X_TENANT_ID;
|
||||
|
||||
fn verify_tenant_isolation(headers: &HeaderMap, path_tenant_id: Uuid) -> Result<(), StatusCode> {
|
||||
let header_tenant_id = headers
|
||||
.get(HEADER_TENANT_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST)
|
||||
.and_then(|s| Uuid::parse_str(s).map_err(|_| StatusCode::BAD_REQUEST))?;
|
||||
|
||||
if header_tenant_id != path_tenant_id {
|
||||
return Err(StatusCode::FORBIDDEN);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Plan {
|
||||
Free,
|
||||
Pro,
|
||||
Enterprise,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SubscriptionStatus {
|
||||
Trialing,
|
||||
Active,
|
||||
PastDue,
|
||||
Paused,
|
||||
Canceled,
|
||||
Incomplete,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Entitlements {
|
||||
pub max_deployments: u32,
|
||||
pub max_runners: u32,
|
||||
pub s3_docs_enabled: bool,
|
||||
pub support_tier: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum BillingEvent {
|
||||
SubscriptionCreated {
|
||||
tenant_id: Uuid,
|
||||
event_id: String,
|
||||
provider_customer_id: String,
|
||||
provider_subscription_id: String,
|
||||
status: SubscriptionStatus,
|
||||
plan: Plan,
|
||||
current_period_end: String,
|
||||
ts_ms: u64,
|
||||
},
|
||||
SubscriptionUpdated {
|
||||
tenant_id: Uuid,
|
||||
event_id: String,
|
||||
status: SubscriptionStatus,
|
||||
plan: Plan,
|
||||
current_period_end: String,
|
||||
cancel_at_period_end: bool,
|
||||
ts_ms: u64,
|
||||
},
|
||||
SubscriptionDeleted {
|
||||
tenant_id: Uuid,
|
||||
event_id: String,
|
||||
ts_ms: u64,
|
||||
},
|
||||
}
|
||||
|
||||
impl BillingEvent {
|
||||
pub fn tenant_id(&self) -> Uuid {
|
||||
match self {
|
||||
Self::SubscriptionCreated { tenant_id, .. } => *tenant_id,
|
||||
Self::SubscriptionUpdated { tenant_id, .. } => *tenant_id,
|
||||
Self::SubscriptionDeleted { tenant_id, .. } => *tenant_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn event_id(&self) -> &str {
|
||||
match self {
|
||||
Self::SubscriptionCreated { event_id, .. } => event_id,
|
||||
Self::SubscriptionUpdated { event_id, .. } => event_id,
|
||||
Self::SubscriptionDeleted { event_id, .. } => event_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ts_ms(&self) -> u64 {
|
||||
match self {
|
||||
Self::SubscriptionCreated { ts_ms, .. } => *ts_ms,
|
||||
Self::SubscriptionUpdated { ts_ms, .. } => *ts_ms,
|
||||
Self::SubscriptionDeleted { ts_ms, .. } => *ts_ms,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Entitlements {
|
||||
pub fn derive(plan: Option<&Plan>, status: Option<&SubscriptionStatus>) -> Self {
|
||||
let is_active = matches!(
|
||||
status,
|
||||
Some(SubscriptionStatus::Trialing | SubscriptionStatus::Active)
|
||||
);
|
||||
|
||||
if !is_active {
|
||||
return Self {
|
||||
max_deployments: 1,
|
||||
max_runners: 1,
|
||||
s3_docs_enabled: false,
|
||||
support_tier: "community".to_string(),
|
||||
};
|
||||
}
|
||||
|
||||
match plan.unwrap_or(&Plan::Free) {
|
||||
Plan::Free => Self {
|
||||
max_deployments: 3,
|
||||
max_runners: 1,
|
||||
s3_docs_enabled: false,
|
||||
support_tier: "community".to_string(),
|
||||
},
|
||||
Plan::Pro => Self {
|
||||
max_deployments: 10,
|
||||
max_runners: 5,
|
||||
s3_docs_enabled: true,
|
||||
support_tier: "standard".to_string(),
|
||||
},
|
||||
Plan::Enterprise => Self {
|
||||
max_deployments: 1000,
|
||||
max_runners: 50,
|
||||
s3_docs_enabled: true,
|
||||
support_tier: "priority".to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct TenantBillingState {
|
||||
pub provider: String,
|
||||
pub provider_customer_id: Option<String>,
|
||||
pub provider_subscription_id: Option<String>,
|
||||
pub provider_checkout_session_id: Option<String>,
|
||||
pub status: Option<SubscriptionStatus>,
|
||||
pub plan: Option<Plan>,
|
||||
pub current_period_end: Option<String>,
|
||||
pub cancel_at_period_end: Option<bool>,
|
||||
pub processed_webhook_event_ids: Vec<String>,
|
||||
pub updated_at: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct BillingStateFile {
|
||||
pub revision: Option<String>,
|
||||
pub tenants: BTreeMap<Uuid, TenantBillingState>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct BillingResponse {
|
||||
pub configured: bool,
|
||||
pub provider: Option<String>,
|
||||
pub plan: Option<Plan>,
|
||||
pub status: Option<SubscriptionStatus>,
|
||||
pub current_period_end: Option<String>,
|
||||
pub cancel_at_period_end: Option<bool>,
|
||||
pub entitlements: Entitlements,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BillingStore {
|
||||
inner: Arc<RwLock<Inner>>,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
path: PathBuf,
|
||||
last_modified: Option<SystemTime>,
|
||||
cached: Option<BillingStateFile>,
|
||||
}
|
||||
|
||||
impl BillingStore {
|
||||
pub fn new(path: PathBuf) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(RwLock::new(Inner {
|
||||
path,
|
||||
last_modified: None,
|
||||
cached: None,
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_for_tenant(&self, tenant_id: Uuid) -> BillingResponse {
|
||||
let mut inner = self.inner.write().expect("billing lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
if let Some(state) = inner
|
||||
.cached
|
||||
.as_ref()
|
||||
.and_then(|file| file.tenants.get(&tenant_id))
|
||||
{
|
||||
return BillingResponse {
|
||||
configured: true,
|
||||
provider: Some(state.provider.clone()),
|
||||
plan: state.plan.clone(),
|
||||
status: state.status.clone(),
|
||||
current_period_end: state.current_period_end.clone(),
|
||||
cancel_at_period_end: state.cancel_at_period_end,
|
||||
entitlements: Entitlements::derive(state.plan.as_ref(), state.status.as_ref()),
|
||||
};
|
||||
}
|
||||
|
||||
BillingResponse {
|
||||
configured: false,
|
||||
provider: None,
|
||||
plan: None,
|
||||
status: None,
|
||||
current_period_end: None,
|
||||
cancel_at_period_end: None,
|
||||
entitlements: Entitlements::derive(None, None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_all_tenant_ids(&self) -> Vec<Uuid> {
|
||||
let mut inner = self.inner.write().expect("billing lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
inner
|
||||
.cached
|
||||
.as_ref()
|
||||
.map(|f| f.tenants.keys().cloned().collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn get_subscription_id(&self, tenant_id: Uuid) -> Option<String> {
|
||||
let mut inner = self.inner.write().expect("billing lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
inner
|
||||
.cached
|
||||
.as_ref()
|
||||
.and_then(|f| f.tenants.get(&tenant_id))
|
||||
.and_then(|s| s.provider_subscription_id.clone())
|
||||
}
|
||||
|
||||
pub fn apply_event(&self, event: BillingEvent) -> Result<(), String> {
|
||||
let mut inner = self.inner.write().expect("billing lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
let mut file = inner.cached.clone().unwrap_or(BillingStateFile {
|
||||
revision: Some("dev".to_string()),
|
||||
tenants: BTreeMap::new(),
|
||||
});
|
||||
|
||||
let tenant_id = event.tenant_id();
|
||||
let event_id = event.event_id().to_string();
|
||||
let ts_ms = event.ts_ms();
|
||||
|
||||
let state = file.tenants.entry(tenant_id).or_insert(TenantBillingState {
|
||||
provider: "unknown".to_string(), // Will be updated by Created event
|
||||
provider_customer_id: None,
|
||||
provider_subscription_id: None,
|
||||
provider_checkout_session_id: None,
|
||||
status: None,
|
||||
plan: None,
|
||||
current_period_end: None,
|
||||
cancel_at_period_end: None,
|
||||
processed_webhook_event_ids: vec![],
|
||||
updated_at: 0,
|
||||
});
|
||||
|
||||
// Deduplication
|
||||
if state.processed_webhook_event_ids.contains(&event_id) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Monotonicity check
|
||||
if state.updated_at > ts_ms {
|
||||
state.processed_webhook_event_ids.push(event_id);
|
||||
state.processed_webhook_event_ids.truncate(50);
|
||||
inner.save(file)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match event {
|
||||
BillingEvent::SubscriptionCreated {
|
||||
provider_customer_id,
|
||||
provider_subscription_id,
|
||||
status,
|
||||
plan,
|
||||
current_period_end,
|
||||
..
|
||||
} => {
|
||||
state.provider_customer_id = Some(provider_customer_id);
|
||||
state.provider_subscription_id = Some(provider_subscription_id);
|
||||
state.status = Some(status);
|
||||
state.plan = Some(plan);
|
||||
state.current_period_end = Some(current_period_end);
|
||||
}
|
||||
BillingEvent::SubscriptionUpdated {
|
||||
status,
|
||||
plan,
|
||||
current_period_end,
|
||||
cancel_at_period_end,
|
||||
..
|
||||
} => {
|
||||
state.status = Some(status);
|
||||
state.plan = Some(plan);
|
||||
state.current_period_end = Some(current_period_end);
|
||||
state.cancel_at_period_end = Some(cancel_at_period_end);
|
||||
}
|
||||
BillingEvent::SubscriptionDeleted { .. } => {
|
||||
state.status = Some(SubscriptionStatus::Canceled);
|
||||
}
|
||||
}
|
||||
|
||||
state.updated_at = ts_ms;
|
||||
state.processed_webhook_event_ids.push(event_id);
|
||||
state.processed_webhook_event_ids.truncate(50);
|
||||
|
||||
inner.save(file)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn update_tenant_state(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
state: TenantBillingState,
|
||||
) -> Result<String, String> {
|
||||
let mut inner = self.inner.write().expect("billing lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
|
||||
let mut file = inner.cached.clone().unwrap_or(BillingStateFile {
|
||||
revision: Some("dev".to_string()),
|
||||
tenants: BTreeMap::new(),
|
||||
});
|
||||
|
||||
file.tenants.insert(tenant_id, state);
|
||||
inner.save(file)
|
||||
}
|
||||
}
|
||||
|
||||
impl Inner {
|
||||
fn save(&mut self, mut file: BillingStateFile) -> Result<String, String> {
|
||||
let revision = format!("rev-{}", Uuid::new_v4());
|
||||
file.revision = Some(revision.clone());
|
||||
|
||||
let raw = serde_json::to_string_pretty(&file).map_err(|e| e.to_string())?;
|
||||
let tmp = self.path.with_extension("json.tmp");
|
||||
fs::write(&tmp, raw).map_err(|e| e.to_string())?;
|
||||
fs::rename(&tmp, &self.path).map_err(|e| e.to_string())?;
|
||||
|
||||
self.last_modified = None;
|
||||
self.cached = Some(file);
|
||||
|
||||
Ok(revision)
|
||||
}
|
||||
|
||||
fn reload_if_changed(&mut self) {
|
||||
let meta = fs::metadata(&self.path).ok();
|
||||
let modified = meta.and_then(|m| m.modified().ok());
|
||||
|
||||
if self.cached.is_some() && modified.is_some() && modified == self.last_modified {
|
||||
return;
|
||||
}
|
||||
|
||||
self.last_modified = modified;
|
||||
let p = &self.path;
|
||||
self.cached = fs::read_to_string(p)
|
||||
.ok()
|
||||
.and_then(|raw| serde_json::from_str(&raw).ok());
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_billing(
|
||||
State(state): State<AppState>,
|
||||
Path(tenant_id): Path<Uuid>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
if let Err(status) = verify_tenant_isolation(&headers, tenant_id) {
|
||||
return status.into_response();
|
||||
}
|
||||
|
||||
let resp = state.billing.get_for_tenant(tenant_id);
|
||||
(StatusCode::OK, Json(resp)).into_response()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct CheckoutRequest {
|
||||
pub plan: Plan,
|
||||
pub return_path: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn checkout(
|
||||
State(state): State<AppState>,
|
||||
Path(tenant_id): Path<Uuid>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Json(body): Json<CheckoutRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
if let Err(status) = verify_tenant_isolation(&headers, tenant_id) {
|
||||
return status.into_response();
|
||||
}
|
||||
|
||||
// Check if subscription already exists and is active/trialing
|
||||
let current = state.billing.get_for_tenant(tenant_id);
|
||||
if current.configured
|
||||
&& matches!(
|
||||
current.status,
|
||||
Some(SubscriptionStatus::Active | SubscriptionStatus::Trialing)
|
||||
)
|
||||
{
|
||||
return (
|
||||
StatusCode::CONFLICT,
|
||||
Json(serde_json::json!({ "error": "tenant already has an active subscription" })),
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
|
||||
// Construct full return URL
|
||||
// TODO: Validate return_path against ALLOWED_RETURN_ORIGINS if provided
|
||||
let return_url = body.return_path.unwrap_or_else(|| "/billing".to_string());
|
||||
|
||||
match state
|
||||
.billing_provider
|
||||
.create_checkout_session(tenant_id, body.plan, return_url)
|
||||
.await
|
||||
{
|
||||
Ok(url) => (StatusCode::OK, Json(serde_json::json!({ "url": url }))).into_response(),
|
||||
Err(e) => {
|
||||
let err_msg = e.to_string();
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({ "error": err_msg })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn portal(
|
||||
State(state): State<AppState>,
|
||||
Path(tenant_id): Path<Uuid>,
|
||||
headers: HeaderMap,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
if let Err(status) = verify_tenant_isolation(&headers, tenant_id) {
|
||||
return status.into_response();
|
||||
}
|
||||
|
||||
let return_url = "/billing".to_string();
|
||||
match state
|
||||
.billing_provider
|
||||
.create_portal_session(tenant_id, return_url)
|
||||
.await
|
||||
{
|
||||
Ok(url) => (StatusCode::OK, Json(serde_json::json!({ "url": url }))).into_response(),
|
||||
Err(e) => {
|
||||
let err_msg = e.to_string();
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({ "error": err_msg })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn webhook(
|
||||
State(state): State<AppState>,
|
||||
Path(_provider): Path<String>,
|
||||
headers: HeaderMap,
|
||||
body: axum::body::Bytes,
|
||||
) -> impl IntoResponse {
|
||||
// Note: We don't require auth here as this is a public endpoint called by the provider.
|
||||
// Security is handled via signature verification in the provider trait.
|
||||
|
||||
match state.billing_provider.verify_webhook(&body, &headers).await {
|
||||
Ok(event) => {
|
||||
metrics::counter!("billing_webhook_requests_total", "status" => "success").increment(1);
|
||||
if let Err(e) = state.billing.apply_event(event) {
|
||||
tracing::error!(error = %e, "failed to apply billing event from webhook");
|
||||
return (
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(serde_json::json!({ "error": e })),
|
||||
)
|
||||
.into_response();
|
||||
}
|
||||
StatusCode::OK.into_response()
|
||||
}
|
||||
Err(e) => {
|
||||
metrics::counter!("billing_webhook_requests_total", "status" => "error").increment(1);
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({ "error": e.to_string() })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_reconciliation_loop(state: AppState) {
|
||||
let interval_secs = std::env::var("CONTROL_BILLING_RECONCILE_INTERVAL_SECS")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(3600);
|
||||
|
||||
tracing::info!(interval_secs, "starting billing reconciliation loop");
|
||||
|
||||
loop {
|
||||
tokio::time::sleep(Duration::from_secs(interval_secs)).await;
|
||||
|
||||
tracing::info!("starting billing reconciliation run");
|
||||
reconcile_once(&state).await;
|
||||
|
||||
// Update tenant status gauges
|
||||
// Note: This is an expensive operation if there are many tenants,
|
||||
// but for reconciliation it's fine once per hour.
|
||||
update_billing_gauges(&state);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn reconcile_once(state: &AppState) {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
let tenant_ids = state.billing.get_all_tenant_ids();
|
||||
let mut success = 0;
|
||||
let mut error = 0;
|
||||
let mut skipped = 0;
|
||||
|
||||
for tenant_id in tenant_ids {
|
||||
let sub_id = state.billing.get_subscription_id(tenant_id);
|
||||
if let Some(subscription_id) = sub_id {
|
||||
match state
|
||||
.billing_provider
|
||||
.fetch_subscription(tenant_id, &subscription_id)
|
||||
.await
|
||||
{
|
||||
Ok(event) => {
|
||||
if let Err(e) = state.billing.apply_event(event) {
|
||||
tracing::error!(?tenant_id, error = %e, "failed to apply reconciled billing event");
|
||||
error += 1;
|
||||
} else {
|
||||
success += 1;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(?tenant_id, error = %e, "failed to fetch subscription for reconciliation");
|
||||
error += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
skipped += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
metrics::counter!("billing_reconciliation_runs_total", "result" => "done").increment(1);
|
||||
metrics::histogram!("billing_reconciliation_duration_ms").record(elapsed.as_millis() as f64);
|
||||
|
||||
tracing::info!(
|
||||
success,
|
||||
error,
|
||||
skipped,
|
||||
duration_ms = elapsed.as_millis(),
|
||||
"billing reconciliation run complete"
|
||||
);
|
||||
}
|
||||
|
||||
fn update_billing_gauges(state: &AppState) {
|
||||
let tenant_ids = state.billing.get_all_tenant_ids();
|
||||
let mut counts: BTreeMap<(String, String), u64> = BTreeMap::new();
|
||||
|
||||
for tenant_id in tenant_ids {
|
||||
let resp = state.billing.get_for_tenant(tenant_id);
|
||||
let plan = match resp.plan {
|
||||
Some(Plan::Free) => "free",
|
||||
Some(Plan::Pro) => "pro",
|
||||
Some(Plan::Enterprise) => "enterprise",
|
||||
None => "none",
|
||||
}
|
||||
.to_string();
|
||||
|
||||
let status = match resp.status {
|
||||
Some(SubscriptionStatus::Active) => "active",
|
||||
Some(SubscriptionStatus::Trialing) => "trialing",
|
||||
Some(SubscriptionStatus::PastDue) => "past_due",
|
||||
Some(SubscriptionStatus::Paused) => "paused",
|
||||
Some(SubscriptionStatus::Canceled) => "canceled",
|
||||
Some(SubscriptionStatus::Incomplete) => "incomplete",
|
||||
None => "none",
|
||||
}
|
||||
.to_string();
|
||||
|
||||
*counts.entry((plan, status)).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
for ((plan, status), count) in counts {
|
||||
metrics::gauge!("billing_tenant_status_count", "plan" => plan, "status" => status)
|
||||
.set(count as f64);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum BillingError {
|
||||
#[error("provider error: {0}")]
|
||||
Provider(String),
|
||||
#[error("invalid configuration: {0}")]
|
||||
Config(String),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait BillingProvider: Send + Sync {
|
||||
async fn create_checkout_session(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
plan: Plan,
|
||||
return_url: String,
|
||||
) -> Result<String, BillingError>;
|
||||
|
||||
async fn create_portal_session(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
return_url: String,
|
||||
) -> Result<String, BillingError>;
|
||||
|
||||
async fn verify_webhook(
|
||||
&self,
|
||||
payload: &[u8],
|
||||
headers: &HeaderMap,
|
||||
) -> Result<BillingEvent, BillingError>;
|
||||
|
||||
async fn fetch_subscription(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
subscription_id: &str,
|
||||
) -> Result<BillingEvent, BillingError>;
|
||||
}
|
||||
|
||||
pub struct StripeProvider {
|
||||
pub secret_key: String,
|
||||
pub price_pro: String,
|
||||
pub price_enterprise: String,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl BillingProvider for StripeProvider {
|
||||
async fn create_checkout_session(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
plan: Plan,
|
||||
_return_url: String,
|
||||
) -> Result<String, BillingError> {
|
||||
let _price = match plan {
|
||||
Plan::Pro => &self.price_pro,
|
||||
Plan::Enterprise => &self.price_enterprise,
|
||||
Plan::Free => {
|
||||
return Err(BillingError::Config(
|
||||
"Free plan has no checkout".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: Actually call Stripe API
|
||||
// For now, returning a simulated Stripe checkout URL
|
||||
Ok(format!(
|
||||
"https://checkout.stripe.com/pay/cs_test_{}?tenant_id={}",
|
||||
Uuid::new_v4(),
|
||||
tenant_id
|
||||
))
|
||||
}
|
||||
|
||||
async fn create_portal_session(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
_return_url: String,
|
||||
) -> Result<String, BillingError> {
|
||||
// TODO: Actually call Stripe API
|
||||
Ok(format!(
|
||||
"https://billing.stripe.com/p/session/ps_test_{}?tenant_id={}",
|
||||
Uuid::new_v4(),
|
||||
tenant_id
|
||||
))
|
||||
}
|
||||
|
||||
async fn verify_webhook(
|
||||
&self,
|
||||
_payload: &[u8],
|
||||
_headers: &HeaderMap,
|
||||
) -> Result<BillingEvent, BillingError> {
|
||||
// TODO: Implement real Stripe signature verification
|
||||
Err(BillingError::Provider("Not implemented".to_string()))
|
||||
}
|
||||
|
||||
async fn fetch_subscription(
|
||||
&self,
|
||||
_tenant_id: Uuid,
|
||||
_subscription_id: &str,
|
||||
) -> Result<BillingEvent, BillingError> {
|
||||
// TODO: Actually call Stripe API with timeout
|
||||
// let client = reqwest::Client::builder().timeout(Duration::from_secs(10)).build()...
|
||||
Err(BillingError::Provider("Not implemented".to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MockProvider;
|
||||
|
||||
#[async_trait]
|
||||
impl BillingProvider for MockProvider {
|
||||
async fn create_checkout_session(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
_plan: Plan,
|
||||
_return_url: String,
|
||||
) -> Result<String, BillingError> {
|
||||
Ok(format!("https://mock.stripe.com/checkout/{}", tenant_id))
|
||||
}
|
||||
|
||||
async fn create_portal_session(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
_return_url: String,
|
||||
) -> Result<String, BillingError> {
|
||||
Ok(format!("https://mock.stripe.com/portal/{}", tenant_id))
|
||||
}
|
||||
|
||||
async fn verify_webhook(
|
||||
&self,
|
||||
payload: &[u8],
|
||||
_headers: &HeaderMap,
|
||||
) -> Result<BillingEvent, BillingError> {
|
||||
// Mock implementation: just parse the payload as a BillingEvent
|
||||
serde_json::from_slice(payload).map_err(|e| BillingError::Provider(e.to_string()))
|
||||
}
|
||||
|
||||
async fn fetch_subscription(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
_subscription_id: &str,
|
||||
) -> Result<BillingEvent, BillingError> {
|
||||
// Mock implementation: return a SubscriptionUpdated event with current state
|
||||
// In a real mock we might want to store expectations, but for now we just return something plausible.
|
||||
Ok(BillingEvent::SubscriptionUpdated {
|
||||
tenant_id,
|
||||
event_id: format!("reconcile-{}", Uuid::new_v4()),
|
||||
status: SubscriptionStatus::Active,
|
||||
plan: Plan::Pro,
|
||||
current_period_end: "2099-12-31T23:59:59Z".to_string(),
|
||||
cancel_at_period_end: false,
|
||||
ts_ms: SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis() as u64,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl MockProvider {
|
||||
pub fn get_checkout_url(tenant: Uuid) -> String {
|
||||
format!("https://mock.stripe.com/checkout/{}", tenant)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::env::temp_dir;
|
||||
|
||||
#[test]
|
||||
fn test_entitlement_derivation() {
|
||||
let e = Entitlements::derive(Some(&Plan::Free), Some(&SubscriptionStatus::PastDue));
|
||||
assert_eq!(e.max_deployments, 1);
|
||||
|
||||
let e = Entitlements::derive(Some(&Plan::Pro), Some(&SubscriptionStatus::Active));
|
||||
assert_eq!(e.max_deployments, 10);
|
||||
assert!(e.s3_docs_enabled);
|
||||
|
||||
let e = Entitlements::derive(Some(&Plan::Enterprise), Some(&SubscriptionStatus::Trialing));
|
||||
assert_eq!(e.max_deployments, 1000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_billing_state_roundtrip() {
|
||||
let mut path = temp_dir();
|
||||
path.push(format!("billing-{}.json", Uuid::new_v4()));
|
||||
|
||||
let store = BillingStore::new(path.clone());
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let resp = store.get_for_tenant(tenant_id);
|
||||
assert!(!resp.configured);
|
||||
assert_eq!(resp.entitlements.max_deployments, 1);
|
||||
|
||||
let state = TenantBillingState {
|
||||
provider: "mock".to_string(),
|
||||
provider_customer_id: None,
|
||||
provider_subscription_id: None,
|
||||
provider_checkout_session_id: None,
|
||||
status: Some(SubscriptionStatus::Active),
|
||||
plan: Some(Plan::Pro),
|
||||
current_period_end: None,
|
||||
cancel_at_period_end: Some(false),
|
||||
processed_webhook_event_ids: vec![],
|
||||
updated_at: 0,
|
||||
};
|
||||
|
||||
store.update_tenant_state(tenant_id, state).unwrap();
|
||||
|
||||
let resp2 = store.get_for_tenant(tenant_id);
|
||||
assert!(resp2.configured);
|
||||
assert_eq!(resp2.provider.as_deref(), Some("mock"));
|
||||
assert_eq!(resp2.plan, Some(Plan::Pro));
|
||||
assert_eq!(resp2.entitlements.max_deployments, 10);
|
||||
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_reconciliation_corrects_state() {
|
||||
let mut path = temp_dir();
|
||||
path.push(format!("billing-reconcile-{}.json", Uuid::new_v4()));
|
||||
let store = BillingStore::new(path.clone());
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
// 1. Initial state: PastDue
|
||||
store
|
||||
.update_tenant_state(
|
||||
tenant_id,
|
||||
TenantBillingState {
|
||||
provider: "mock".to_string(),
|
||||
provider_customer_id: Some("cus_1".to_string()),
|
||||
provider_subscription_id: Some("sub_1".to_string()),
|
||||
provider_checkout_session_id: None,
|
||||
status: Some(SubscriptionStatus::PastDue),
|
||||
plan: Some(Plan::Pro),
|
||||
current_period_end: None,
|
||||
cancel_at_period_end: Some(false),
|
||||
processed_webhook_event_ids: vec![],
|
||||
updated_at: 100,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let state = AppState {
|
||||
prometheus: crate::get_test_prometheus_handle(),
|
||||
auth: crate::AuthConfig { hs256_secret: None },
|
||||
jobs: crate::jobs::JobStore::default(),
|
||||
audit: crate::AuditStore::default(),
|
||||
tenant_locks: crate::job_engine::TenantLocks::default(),
|
||||
config_locks: crate::job_engine::ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: crate::placement::PlacementStore::new(temp_dir().join("placement.json")),
|
||||
billing: store.clone(),
|
||||
billing_provider: Arc::new(MockProvider),
|
||||
billing_enforcement_enabled: true,
|
||||
config: crate::config_registry::ConfigRegistry::new(None, None),
|
||||
fleet_services: vec![],
|
||||
swarm: crate::swarm::SwarmStore::new(temp_dir().join("swarm.json")),
|
||||
docs: None,
|
||||
};
|
||||
|
||||
// 2. Run reconciliation. MockProvider returns Active status.
|
||||
reconcile_once(&state).await;
|
||||
|
||||
// 3. Verify state is now Active
|
||||
let resp = store.get_for_tenant(tenant_id);
|
||||
assert_eq!(resp.status, Some(SubscriptionStatus::Active));
|
||||
|
||||
let _ = fs::remove_file(path);
|
||||
}
|
||||
}
|
||||
323
control/api/src/config_registry.rs
Normal file
323
control/api/src/config_registry.rs
Normal file
@@ -0,0 +1,323 @@
|
||||
use async_trait::async_trait;
|
||||
use futures::StreamExt;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{path::PathBuf, sync::Arc, time::Duration};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ConfigDomain {
|
||||
Routing,
|
||||
Placement,
|
||||
}
|
||||
|
||||
impl ConfigDomain {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
ConfigDomain::Routing => "routing",
|
||||
ConfigDomain::Placement => "placement",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ConfigRegistryError {
|
||||
#[error("source error: {0}")]
|
||||
Source(String),
|
||||
#[error("decode error: {0}")]
|
||||
Decode(String),
|
||||
#[error("domain not configured")]
|
||||
NotConfigured,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ConfigSnapshot<T> {
|
||||
pub domain: String,
|
||||
pub revision: u64,
|
||||
pub value: T,
|
||||
pub source: ConfigSourceInfo,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum ConfigSourceInfo {
|
||||
File { path: String },
|
||||
NatsKv { bucket: String, key: String },
|
||||
Fixed,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ConfigSource: Send + Sync {
|
||||
async fn load_bytes(&self) -> Result<(Option<Vec<u8>>, u64), ConfigRegistryError>;
|
||||
async fn put_bytes(
|
||||
&self,
|
||||
expected_revision: Option<u64>,
|
||||
value: Vec<u8>,
|
||||
) -> Result<u64, ConfigRegistryError>;
|
||||
async fn history_bytes(&self, limit: usize)
|
||||
-> Result<Vec<(u64, Vec<u8>)>, ConfigRegistryError>;
|
||||
async fn watch(
|
||||
&self,
|
||||
) -> Result<
|
||||
std::pin::Pin<Box<dyn futures::Stream<Item = Result<(), ConfigRegistryError>> + Send>>,
|
||||
ConfigRegistryError,
|
||||
>;
|
||||
fn info(&self) -> ConfigSourceInfo;
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FixedSource {
|
||||
bytes: Arc<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl FixedSource {
|
||||
pub fn new(bytes: Vec<u8>) -> Self {
|
||||
Self {
|
||||
bytes: Arc::new(bytes),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ConfigSource for FixedSource {
|
||||
async fn load_bytes(&self) -> Result<(Option<Vec<u8>>, u64), ConfigRegistryError> {
|
||||
Ok((Some(self.bytes.as_ref().clone()), 1))
|
||||
}
|
||||
|
||||
async fn put_bytes(
|
||||
&self,
|
||||
_expected_revision: Option<u64>,
|
||||
_value: Vec<u8>,
|
||||
) -> Result<u64, ConfigRegistryError> {
|
||||
Err(ConfigRegistryError::Source(
|
||||
"fixed source is read-only".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn history_bytes(
|
||||
&self,
|
||||
_limit: usize,
|
||||
) -> Result<Vec<(u64, Vec<u8>)>, ConfigRegistryError> {
|
||||
Err(ConfigRegistryError::Source(
|
||||
"fixed source has no history".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn watch(
|
||||
&self,
|
||||
) -> Result<
|
||||
std::pin::Pin<Box<dyn futures::Stream<Item = Result<(), ConfigRegistryError>> + Send>>,
|
||||
ConfigRegistryError,
|
||||
> {
|
||||
Ok(Box::pin(futures::stream::empty()))
|
||||
}
|
||||
|
||||
fn info(&self) -> ConfigSourceInfo {
|
||||
ConfigSourceInfo::Fixed
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FileSource {
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl FileSource {
|
||||
pub fn new(path: PathBuf) -> Self {
|
||||
Self { path }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ConfigSource for FileSource {
|
||||
async fn load_bytes(&self) -> Result<(Option<Vec<u8>>, u64), ConfigRegistryError> {
|
||||
let raw = tokio::fs::read(&self.path)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
Ok((Some(raw), 0))
|
||||
}
|
||||
|
||||
async fn put_bytes(
|
||||
&self,
|
||||
_expected_revision: Option<u64>,
|
||||
value: Vec<u8>,
|
||||
) -> Result<u64, ConfigRegistryError> {
|
||||
let tmp = self.path.with_extension("tmp");
|
||||
tokio::fs::write(&tmp, &value)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
tokio::fs::rename(&tmp, &self.path)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
async fn history_bytes(
|
||||
&self,
|
||||
_limit: usize,
|
||||
) -> Result<Vec<(u64, Vec<u8>)>, ConfigRegistryError> {
|
||||
Err(ConfigRegistryError::Source(
|
||||
"file source has no history".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn watch(
|
||||
&self,
|
||||
) -> Result<
|
||||
std::pin::Pin<Box<dyn futures::Stream<Item = Result<(), ConfigRegistryError>> + Send>>,
|
||||
ConfigRegistryError,
|
||||
> {
|
||||
Ok(Box::pin(futures::stream::empty()))
|
||||
}
|
||||
|
||||
fn info(&self) -> ConfigSourceInfo {
|
||||
ConfigSourceInfo::File {
|
||||
path: self.path.to_string_lossy().to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct NatsKvSource {
|
||||
kv: async_nats::jetstream::kv::Store,
|
||||
bucket: String,
|
||||
key: String,
|
||||
}
|
||||
|
||||
impl NatsKvSource {
|
||||
pub async fn connect(
|
||||
nats_url: impl Into<String>,
|
||||
bucket: impl Into<String>,
|
||||
key: impl Into<String>,
|
||||
) -> Result<Self, ConfigRegistryError> {
|
||||
let nats_url = nats_url.into();
|
||||
let bucket = bucket.into();
|
||||
let key = key.into();
|
||||
|
||||
let client = tokio::time::timeout(Duration::from_secs(2), async_nats::connect(nats_url))
|
||||
.await
|
||||
.map_err(|_| ConfigRegistryError::Source("connect timeout".to_string()))?
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
let jetstream = async_nats::jetstream::new(client);
|
||||
let kv = match jetstream.get_key_value(&bucket).await {
|
||||
Ok(kv) => kv,
|
||||
Err(_) => jetstream
|
||||
.create_key_value(async_nats::jetstream::kv::Config {
|
||||
bucket: bucket.clone(),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?,
|
||||
};
|
||||
|
||||
Ok(Self { kv, bucket, key })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ConfigSource for NatsKvSource {
|
||||
async fn load_bytes(&self) -> Result<(Option<Vec<u8>>, u64), ConfigRegistryError> {
|
||||
let entry = self
|
||||
.kv
|
||||
.entry(&self.key)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
Ok(match entry {
|
||||
Some(e) => (Some(e.value.to_vec()), e.revision),
|
||||
None => (None, 0),
|
||||
})
|
||||
}
|
||||
|
||||
async fn put_bytes(
|
||||
&self,
|
||||
expected_revision: Option<u64>,
|
||||
value: Vec<u8>,
|
||||
) -> Result<u64, ConfigRegistryError> {
|
||||
let rev = match expected_revision {
|
||||
Some(expected) if expected > 0 => self
|
||||
.kv
|
||||
.update(&self.key, value.into(), expected)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?,
|
||||
_ => self
|
||||
.kv
|
||||
.put(&self.key, value.into())
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?,
|
||||
};
|
||||
Ok(rev)
|
||||
}
|
||||
|
||||
async fn history_bytes(
|
||||
&self,
|
||||
limit: usize,
|
||||
) -> Result<Vec<(u64, Vec<u8>)>, ConfigRegistryError> {
|
||||
let mut stream = self
|
||||
.kv
|
||||
.history(&self.key)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
let mut out = Vec::new();
|
||||
while let Some(item) = stream.next().await {
|
||||
let entry = item.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
out.push((entry.revision, entry.value.to_vec()));
|
||||
if out.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
async fn watch(
|
||||
&self,
|
||||
) -> Result<
|
||||
std::pin::Pin<Box<dyn futures::Stream<Item = Result<(), ConfigRegistryError>> + Send>>,
|
||||
ConfigRegistryError,
|
||||
> {
|
||||
let key = self.key.clone();
|
||||
let watch = self
|
||||
.kv
|
||||
.watch(&key)
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?;
|
||||
Ok(Box::pin(watch.filter_map(|entry| async move {
|
||||
match entry {
|
||||
Ok(entry) => match entry.operation {
|
||||
async_nats::jetstream::kv::Operation::Put => Some(Ok(())),
|
||||
async_nats::jetstream::kv::Operation::Delete
|
||||
| async_nats::jetstream::kv::Operation::Purge => None,
|
||||
},
|
||||
Err(e) => Some(Err(ConfigRegistryError::Source(e.to_string()))),
|
||||
}
|
||||
})))
|
||||
}
|
||||
|
||||
fn info(&self) -> ConfigSourceInfo {
|
||||
ConfigSourceInfo::NatsKv {
|
||||
bucket: self.bucket.clone(),
|
||||
key: self.key.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ConfigRegistry {
|
||||
routing: Option<Arc<dyn ConfigSource>>,
|
||||
placement: Option<Arc<dyn ConfigSource>>,
|
||||
}
|
||||
|
||||
impl ConfigRegistry {
|
||||
pub fn new(
|
||||
routing: Option<Arc<dyn ConfigSource>>,
|
||||
placement: Option<Arc<dyn ConfigSource>>,
|
||||
) -> Self {
|
||||
Self { routing, placement }
|
||||
}
|
||||
|
||||
pub fn source(&self, domain: ConfigDomain) -> Option<Arc<dyn ConfigSource>> {
|
||||
match domain {
|
||||
ConfigDomain::Routing => self.routing.clone(),
|
||||
ConfigDomain::Placement => self.placement.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
15
control/api/src/config_schemas.rs
Normal file
15
control/api/src/config_schemas.rs
Normal file
@@ -0,0 +1,15 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct RoutingConfig {
|
||||
pub revision: u64,
|
||||
|
||||
pub aggregate_placement: HashMap<String, String>,
|
||||
pub projection_placement: HashMap<String, String>,
|
||||
pub runner_placement: HashMap<String, String>,
|
||||
|
||||
pub aggregate_shards: HashMap<String, Vec<String>>,
|
||||
pub projection_shards: HashMap<String, Vec<String>>,
|
||||
pub runner_shards: HashMap<String, Vec<String>>,
|
||||
}
|
||||
353
control/api/src/documents.rs
Normal file
353
control/api/src/documents.rs
Normal file
@@ -0,0 +1,353 @@
|
||||
use crate::auth::{Principal, has_permission};
|
||||
use crate::{AppState, RequestIds};
|
||||
use axum::{
|
||||
Router,
|
||||
body::Bytes,
|
||||
extract::{Extension, Path, Query, State},
|
||||
http::{HeaderMap, StatusCode, header},
|
||||
response::IntoResponse,
|
||||
routing::{get, post, put},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
const HEADER_TENANT_ID: &str = shared::HEADER_X_TENANT_ID;
|
||||
|
||||
pub fn router() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/tenants/{tenant_id}/docs", get(list_docs))
|
||||
.route(
|
||||
"/tenants/{tenant_id}/docs/{doc_type}/{doc_id}/{filename}",
|
||||
put(upload_doc),
|
||||
)
|
||||
.route(
|
||||
"/tenants/{tenant_id}/docs/object/{*key}",
|
||||
get(get_doc).delete(delete_doc),
|
||||
)
|
||||
.route(
|
||||
"/tenants/{tenant_id}/docs/presign/upload",
|
||||
post(presign_upload),
|
||||
)
|
||||
.route(
|
||||
"/tenants/{tenant_id}/docs/presign/download",
|
||||
post(presign_download),
|
||||
)
|
||||
}
|
||||
|
||||
fn ensure_tenant_header(headers: &HeaderMap, tenant_id: Uuid) -> Result<(), StatusCode> {
|
||||
let header_tid = headers
|
||||
.get(HEADER_TENANT_ID)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.ok_or(StatusCode::BAD_REQUEST)?;
|
||||
let header_tid = Uuid::parse_str(header_tid).map_err(|_| StatusCode::BAD_REQUEST)?;
|
||||
if header_tid != tenant_id {
|
||||
return Err(StatusCode::FORBIDDEN);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_docs_enabled(state: &AppState, tenant_id: Uuid) -> Result<(), StatusCode> {
|
||||
if !state.billing_enforcement_enabled {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let entitlements = state.billing.get_for_tenant(tenant_id).entitlements;
|
||||
if !entitlements.s3_docs_enabled {
|
||||
return Err(StatusCode::PAYMENT_REQUIRED);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ListQuery {
|
||||
prefix: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ListResponse {
|
||||
objects: Vec<crate::s3_docs::DocObject>,
|
||||
}
|
||||
|
||||
async fn list_docs(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Path(tenant_id): Path<Uuid>,
|
||||
Query(q): Query<ListQuery>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_tenant_header(&headers, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_docs_enabled(&state, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
let store = match state.docs.as_ref() {
|
||||
Some(s) => s,
|
||||
None => return StatusCode::SERVICE_UNAVAILABLE.into_response(),
|
||||
};
|
||||
let prefix = q.prefix.unwrap_or_default();
|
||||
let prefix = prefix.trim();
|
||||
if prefix.contains("..") {
|
||||
return StatusCode::BAD_REQUEST.into_response();
|
||||
}
|
||||
let base = format!("{}{}", store_prefix(store), tenant_id);
|
||||
let prefix = if prefix.is_empty() {
|
||||
format!("{base}/")
|
||||
} else {
|
||||
format!("{base}/{prefix}")
|
||||
};
|
||||
match store.list_for_tenant(&tenant_id.to_string(), &prefix).await {
|
||||
Ok(objects) => (StatusCode::OK, axum::Json(ListResponse { objects })).into_response(),
|
||||
Err(_) => StatusCode::BAD_GATEWAY.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn upload_doc(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Path((tenant_id, doc_type, doc_id, filename)): Path<(Uuid, String, String, String)>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Extension(request_ids): Extension<RequestIds>,
|
||||
body: Bytes,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_tenant_header(&headers, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_docs_enabled(&state, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
let store = match state.docs.as_ref() {
|
||||
Some(s) => s,
|
||||
None => return StatusCode::SERVICE_UNAVAILABLE.into_response(),
|
||||
};
|
||||
|
||||
let ct = headers
|
||||
.get(header::CONTENT_TYPE)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let key = match store.key_for(&tenant_id.to_string(), &doc_type, &doc_id, &filename) {
|
||||
Ok(k) => k,
|
||||
Err(_) => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
let bytes = body.to_vec();
|
||||
let hash = crate::s3_docs::DocsStore::content_hash_sha256_hex(&bytes);
|
||||
if let Err(e) = store
|
||||
.put_for_tenant(&tenant_id.to_string(), &key, bytes, ct)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(
|
||||
request_id = %request_ids.request_id,
|
||||
correlation_id = ?request_ids.correlation_id,
|
||||
error = %e,
|
||||
"docs upload failed"
|
||||
);
|
||||
return StatusCode::BAD_GATEWAY.into_response();
|
||||
}
|
||||
|
||||
(
|
||||
StatusCode::OK,
|
||||
axum::Json(serde_json::json!({
|
||||
"key": key,
|
||||
"sha256": hash,
|
||||
})),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn get_doc(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Path((tenant_id, key)): Path<(Uuid, String)>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_tenant_header(&headers, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_docs_enabled(&state, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
let store = match state.docs.as_ref() {
|
||||
Some(s) => s,
|
||||
None => return StatusCode::SERVICE_UNAVAILABLE.into_response(),
|
||||
};
|
||||
|
||||
let base = format!("{}{}", store_prefix(store), tenant_id);
|
||||
if !key.starts_with(&base) {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
match store
|
||||
.get_bytes_for_tenant(&tenant_id.to_string(), &key)
|
||||
.await
|
||||
{
|
||||
Ok((bytes, ct)) => {
|
||||
let mut res = axum::response::Response::new(axum::body::Body::from(bytes));
|
||||
*res.status_mut() = StatusCode::OK;
|
||||
if let Some(ct) = ct
|
||||
&& let Ok(v) = axum::http::HeaderValue::from_str(&ct)
|
||||
{
|
||||
res.headers_mut().insert(header::CONTENT_TYPE, v);
|
||||
}
|
||||
res
|
||||
}
|
||||
Err(_) => StatusCode::NOT_FOUND.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_doc(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Path((tenant_id, key)): Path<(Uuid, String)>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_tenant_header(&headers, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_docs_enabled(&state, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
let store = match state.docs.as_ref() {
|
||||
Some(s) => s,
|
||||
None => return StatusCode::SERVICE_UNAVAILABLE.into_response(),
|
||||
};
|
||||
|
||||
let base = format!("{}{}", store_prefix(store), tenant_id);
|
||||
if !key.starts_with(&base) {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
|
||||
match store.delete_for_tenant(&tenant_id.to_string(), &key).await {
|
||||
Ok(_) => StatusCode::NO_CONTENT.into_response(),
|
||||
Err(_) => StatusCode::BAD_GATEWAY.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct PresignUploadRequest {
|
||||
doc_type: String,
|
||||
doc_id: Option<String>,
|
||||
filename: String,
|
||||
content_type: Option<String>,
|
||||
}
|
||||
|
||||
async fn presign_upload(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Path(tenant_id): Path<Uuid>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
axum::Json(body): axum::Json<PresignUploadRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:write") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_tenant_header(&headers, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_docs_enabled(&state, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
let store = match state.docs.as_ref() {
|
||||
Some(s) => s,
|
||||
None => return StatusCode::SERVICE_UNAVAILABLE.into_response(),
|
||||
};
|
||||
|
||||
let doc_id = body.doc_id.unwrap_or_else(|| Uuid::new_v4().to_string());
|
||||
let key = match store.key_for(
|
||||
&tenant_id.to_string(),
|
||||
&body.doc_type,
|
||||
&doc_id,
|
||||
&body.filename,
|
||||
) {
|
||||
Ok(k) => k,
|
||||
Err(_) => return StatusCode::BAD_REQUEST.into_response(),
|
||||
};
|
||||
|
||||
match store
|
||||
.presign_put_for_tenant(
|
||||
&tenant_id.to_string(),
|
||||
&key,
|
||||
body.content_type,
|
||||
std::time::Duration::from_secs(300),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(url) => (
|
||||
StatusCode::OK,
|
||||
axum::Json(serde_json::json!({
|
||||
"method": "PUT",
|
||||
"url": url,
|
||||
"key": key,
|
||||
})),
|
||||
)
|
||||
.into_response(),
|
||||
Err(_) => StatusCode::BAD_GATEWAY.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct PresignDownloadRequest {
|
||||
key: String,
|
||||
}
|
||||
|
||||
async fn presign_download(
|
||||
State(state): State<AppState>,
|
||||
headers: HeaderMap,
|
||||
Path(tenant_id): Path<Uuid>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
axum::Json(body): axum::Json<PresignDownloadRequest>,
|
||||
) -> impl IntoResponse {
|
||||
if !has_permission(&principal, "control:read") {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_tenant_header(&headers, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
if let Err(s) = ensure_docs_enabled(&state, tenant_id) {
|
||||
return s.into_response();
|
||||
}
|
||||
let store = match state.docs.as_ref() {
|
||||
Some(s) => s,
|
||||
None => return StatusCode::SERVICE_UNAVAILABLE.into_response(),
|
||||
};
|
||||
let base = format!("{}{}", store_prefix(store), tenant_id);
|
||||
if !body.key.starts_with(&base) {
|
||||
return StatusCode::FORBIDDEN.into_response();
|
||||
}
|
||||
match store
|
||||
.presign_get_for_tenant(
|
||||
&tenant_id.to_string(),
|
||||
&body.key,
|
||||
std::time::Duration::from_secs(300),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(url) => (
|
||||
StatusCode::OK,
|
||||
axum::Json(serde_json::json!({
|
||||
"method": "GET",
|
||||
"url": url,
|
||||
"key": body.key,
|
||||
})),
|
||||
)
|
||||
.into_response(),
|
||||
Err(_) => StatusCode::BAD_GATEWAY.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
fn store_prefix(store: &crate::s3_docs::DocsStore) -> &str {
|
||||
store.prefix()
|
||||
}
|
||||
127
control/api/src/drift.rs
Normal file
127
control/api/src/drift.rs
Normal file
@@ -0,0 +1,127 @@
|
||||
use crate::{AppState, build_info::extract_build_info, fleet, swarm::SwarmService};
|
||||
use serde::Serialize;
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DriftKind {
|
||||
Missing,
|
||||
Extra,
|
||||
Unhealthy,
|
||||
VersionMismatch,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DriftItem {
|
||||
pub kind: DriftKind,
|
||||
pub service: String,
|
||||
pub details: serde_json::Value,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct DriftResponse {
|
||||
pub summary: BTreeMap<String, u64>,
|
||||
pub items: Vec<DriftItem>,
|
||||
}
|
||||
|
||||
pub async fn compute(state: &AppState) -> DriftResponse {
|
||||
let mut items: Vec<DriftItem> = Vec::new();
|
||||
|
||||
// Desired service set: what the Control API was configured to observe.
|
||||
// (In production, this should evolve into "desired stacks + required services".)
|
||||
let desired: BTreeSet<String> = state
|
||||
.fleet_services
|
||||
.iter()
|
||||
.map(|s| s.name.clone())
|
||||
.collect();
|
||||
|
||||
// Observed service set: what Swarm reports (dev: from file snapshot).
|
||||
let observed_services: Vec<SwarmService> = state.swarm.list_services();
|
||||
let observed: BTreeSet<String> = observed_services.iter().map(|s| s.name.clone()).collect();
|
||||
|
||||
for missing in desired.difference(&observed) {
|
||||
items.push(DriftItem {
|
||||
kind: DriftKind::Missing,
|
||||
service: missing.clone(),
|
||||
details: serde_json::json!({ "expected": true }),
|
||||
});
|
||||
}
|
||||
|
||||
for extra in observed.difference(&desired) {
|
||||
items.push(DriftItem {
|
||||
kind: DriftKind::Extra,
|
||||
service: extra.clone(),
|
||||
details: serde_json::json!({ "observed": true }),
|
||||
});
|
||||
}
|
||||
|
||||
// Health drift: based on fleet snapshot.
|
||||
let snapshots = fleet::snapshot(&state.http, &state.fleet_services).await;
|
||||
for s in snapshots {
|
||||
if !s.health_ok || !s.ready_ok {
|
||||
items.push(DriftItem {
|
||||
kind: DriftKind::Unhealthy,
|
||||
service: s.name.clone(),
|
||||
details: serde_json::json!({
|
||||
"health_ok": s.health_ok,
|
||||
"ready_ok": s.ready_ok,
|
||||
"metrics_ok": s.metrics_ok,
|
||||
"base_url": s.base_url,
|
||||
}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Version drift: compare build_info between services when present.
|
||||
// Desired is not yet explicit; for now we flag when multiple versions exist for same service.
|
||||
let mut versions_by_service: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
|
||||
let snapshots = fleet::snapshot(&state.http, &state.fleet_services).await;
|
||||
for s in snapshots {
|
||||
if let Ok(metrics) = state
|
||||
.http
|
||||
.get(format!("{}/metrics", s.base_url))
|
||||
.send()
|
||||
.await
|
||||
&& let Ok(body) = metrics.text().await
|
||||
{
|
||||
for bi in extract_build_info(&body) {
|
||||
versions_by_service
|
||||
.entry(bi.service.clone())
|
||||
.or_default()
|
||||
.insert(format!("{}@{}", bi.version, bi.git_sha));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (svc, vs) in versions_by_service {
|
||||
if vs.len() > 1 {
|
||||
items.push(DriftItem {
|
||||
kind: DriftKind::VersionMismatch,
|
||||
service: svc,
|
||||
details: serde_json::json!({ "seen": vs.into_iter().collect::<Vec<_>>() }),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn ord(k: &DriftKind) -> u8 {
|
||||
match k {
|
||||
DriftKind::Missing => 0,
|
||||
DriftKind::Extra => 1,
|
||||
DriftKind::Unhealthy => 2,
|
||||
DriftKind::VersionMismatch => 3,
|
||||
}
|
||||
}
|
||||
items.sort_by(|a, b| (ord(&a.kind), &a.service).cmp(&(ord(&b.kind), &b.service)));
|
||||
|
||||
let mut summary: BTreeMap<String, u64> = BTreeMap::new();
|
||||
for item in &items {
|
||||
let k = match item.kind {
|
||||
DriftKind::Missing => "missing",
|
||||
DriftKind::Extra => "extra",
|
||||
DriftKind::Unhealthy => "unhealthy",
|
||||
DriftKind::VersionMismatch => "version_mismatch",
|
||||
};
|
||||
*summary.entry(k.to_string()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
DriftResponse { summary, items }
|
||||
}
|
||||
@@ -1,14 +1,19 @@
|
||||
use crate::{
|
||||
AppState, Principal,
|
||||
audit::{AuditEvent, AuditStore},
|
||||
config_registry::{ConfigDomain, ConfigRegistryError},
|
||||
config_schemas::RoutingConfig,
|
||||
fleet,
|
||||
jobs::{Job, JobStatus, JobStep, JobStore},
|
||||
placement::PlacementFile,
|
||||
};
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::PathBuf,
|
||||
sync::{Arc, Mutex},
|
||||
time::{Duration, SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
use url::Url;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
@@ -34,20 +39,52 @@ impl TenantLocks {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ConfigLocks {
|
||||
inner: Arc<Mutex<HashMap<String, Uuid>>>,
|
||||
}
|
||||
|
||||
impl ConfigLocks {
|
||||
pub fn try_lock(&self, domain: ConfigDomain, job_id: Uuid) -> bool {
|
||||
let mut map = self.inner.lock().expect("config locks poisoned");
|
||||
let k = domain.as_str().to_string();
|
||||
if map.contains_key(&k) {
|
||||
return false;
|
||||
}
|
||||
map.insert(k, job_id);
|
||||
true
|
||||
}
|
||||
|
||||
pub fn unlock(&self, domain: ConfigDomain, job_id: Uuid) {
|
||||
let mut map = self.inner.lock().expect("config locks poisoned");
|
||||
let k = domain.as_str().to_string();
|
||||
if map.get(&k).copied() == Some(job_id) {
|
||||
map.remove(&k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct JobEngine {
|
||||
pub jobs: JobStore,
|
||||
pub audit: AuditStore,
|
||||
pub tenant_locks: TenantLocks,
|
||||
pub config_locks: ConfigLocks,
|
||||
pub step_timeout: Duration,
|
||||
}
|
||||
|
||||
impl JobEngine {
|
||||
pub fn new(jobs: JobStore, audit: AuditStore, tenant_locks: TenantLocks) -> Self {
|
||||
pub fn new(
|
||||
jobs: JobStore,
|
||||
audit: AuditStore,
|
||||
tenant_locks: TenantLocks,
|
||||
config_locks: ConfigLocks,
|
||||
) -> Self {
|
||||
Self {
|
||||
jobs,
|
||||
audit,
|
||||
tenant_locks,
|
||||
config_locks,
|
||||
step_timeout: Duration::from_millis(500),
|
||||
}
|
||||
}
|
||||
@@ -93,7 +130,7 @@ impl JobEngine {
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(state, inserted, Some(tenant_id), RunSpec::Drain)
|
||||
.run_job(state, inserted, Some(tenant_id), None, RunSpec::Drain)
|
||||
.await;
|
||||
});
|
||||
|
||||
@@ -152,6 +189,7 @@ impl JobEngine {
|
||||
state,
|
||||
inserted,
|
||||
Some(tenant_id),
|
||||
None,
|
||||
RunSpec::Migrate { runner_target },
|
||||
)
|
||||
.await;
|
||||
@@ -160,7 +198,238 @@ impl JobEngine {
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
async fn run_job(&self, state: AppState, job_id: Uuid, tenant_id: Option<Uuid>, spec: RunSpec) {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn start_config_apply(
|
||||
&self,
|
||||
state: AppState,
|
||||
principal: &Principal,
|
||||
domain: ConfigDomain,
|
||||
reason: String,
|
||||
expected_revision: Option<u64>,
|
||||
value: serde_json::Value,
|
||||
idempotency_key: &str,
|
||||
) -> Result<Uuid, StartJobError> {
|
||||
if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
if !self.config_locks.try_lock(domain, job_id) {
|
||||
return Err(StartJobError::TenantLocked);
|
||||
}
|
||||
|
||||
let now = now_ms();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Pending,
|
||||
steps: vec![
|
||||
step("preflight"),
|
||||
step("validate_config"),
|
||||
step("backup_config"),
|
||||
step("apply_config"),
|
||||
step("reload_config"),
|
||||
step("verify_config"),
|
||||
],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: None,
|
||||
finished_at_ms: None,
|
||||
};
|
||||
|
||||
let inserted = self.jobs.insert_idempotent(idempotency_key, job);
|
||||
self.audit.record(AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: format!("config.{}.apply", domain.as_str()),
|
||||
tenant_id: None,
|
||||
reason,
|
||||
job_id: Some(inserted),
|
||||
});
|
||||
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(
|
||||
state,
|
||||
inserted,
|
||||
None,
|
||||
Some(domain),
|
||||
RunSpec::ConfigApply {
|
||||
domain,
|
||||
expected_revision,
|
||||
value,
|
||||
},
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
pub fn start_config_validate(
|
||||
&self,
|
||||
state: AppState,
|
||||
principal: &Principal,
|
||||
domain: ConfigDomain,
|
||||
reason: String,
|
||||
value: serde_json::Value,
|
||||
idempotency_key: &str,
|
||||
) -> Result<Uuid, StartJobError> {
|
||||
if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
if !self.config_locks.try_lock(domain, job_id) {
|
||||
return Err(StartJobError::TenantLocked);
|
||||
}
|
||||
|
||||
let now = now_ms();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Pending,
|
||||
steps: vec![step("validate_config")],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: None,
|
||||
finished_at_ms: None,
|
||||
};
|
||||
|
||||
let inserted = self.jobs.insert_idempotent(idempotency_key, job);
|
||||
self.audit.record(AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: format!("config.{}.validate", domain.as_str()),
|
||||
tenant_id: None,
|
||||
reason,
|
||||
job_id: Some(inserted),
|
||||
});
|
||||
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(
|
||||
state,
|
||||
inserted,
|
||||
None,
|
||||
Some(domain),
|
||||
RunSpec::ConfigValidate { domain, value },
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
pub fn start_config_rollback(
|
||||
&self,
|
||||
state: AppState,
|
||||
principal: &Principal,
|
||||
domain: ConfigDomain,
|
||||
reason: String,
|
||||
idempotency_key: &str,
|
||||
) -> Result<Uuid, StartJobError> {
|
||||
if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
if !self.config_locks.try_lock(domain, job_id) {
|
||||
return Err(StartJobError::TenantLocked);
|
||||
}
|
||||
|
||||
let now = now_ms();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Pending,
|
||||
steps: vec![
|
||||
step("rollback_config"),
|
||||
step("reload_config"),
|
||||
step("verify_config"),
|
||||
],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: None,
|
||||
finished_at_ms: None,
|
||||
};
|
||||
|
||||
let inserted = self.jobs.insert_idempotent(idempotency_key, job);
|
||||
self.audit.record(AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: format!("config.{}.rollback", domain.as_str()),
|
||||
tenant_id: None,
|
||||
reason,
|
||||
job_id: Some(inserted),
|
||||
});
|
||||
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(
|
||||
state,
|
||||
inserted,
|
||||
None,
|
||||
Some(domain),
|
||||
RunSpec::ConfigRollback { domain },
|
||||
)
|
||||
.await;
|
||||
});
|
||||
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
pub fn start_platform_verify(
|
||||
&self,
|
||||
state: AppState,
|
||||
principal: &Principal,
|
||||
reason: String,
|
||||
idempotency_key: &str,
|
||||
) -> Result<Uuid, StartJobError> {
|
||||
if let Some(existing) = self.jobs.get_idempotent(idempotency_key) {
|
||||
return Ok(existing);
|
||||
}
|
||||
|
||||
let job_id = Uuid::new_v4();
|
||||
let now = now_ms();
|
||||
let job = Job {
|
||||
job_id,
|
||||
status: JobStatus::Pending,
|
||||
steps: vec![step("preflight"), step("platform_verify")],
|
||||
error: None,
|
||||
created_at_ms: now,
|
||||
started_at_ms: None,
|
||||
finished_at_ms: None,
|
||||
};
|
||||
|
||||
let inserted = self.jobs.insert_idempotent(idempotency_key, job);
|
||||
self.audit.record(AuditEvent {
|
||||
ts_ms: now,
|
||||
principal_sub: principal.sub.clone(),
|
||||
action: "platform.verify".to_string(),
|
||||
tenant_id: None,
|
||||
reason,
|
||||
job_id: Some(inserted),
|
||||
});
|
||||
|
||||
let engine = self.clone();
|
||||
tokio::spawn(async move {
|
||||
engine
|
||||
.run_job(state, inserted, None, None, RunSpec::PlatformVerify)
|
||||
.await;
|
||||
});
|
||||
|
||||
Ok(inserted)
|
||||
}
|
||||
|
||||
async fn run_job(
|
||||
&self,
|
||||
state: AppState,
|
||||
job_id: Uuid,
|
||||
tenant_id: Option<Uuid>,
|
||||
config_domain: Option<ConfigDomain>,
|
||||
spec: RunSpec,
|
||||
) {
|
||||
self.jobs.update(job_id, |j| {
|
||||
j.status = JobStatus::Running;
|
||||
j.started_at_ms = Some(now_ms());
|
||||
@@ -265,6 +534,9 @@ impl JobEngine {
|
||||
if let Some(tid) = tenant_id {
|
||||
self.tenant_locks.unlock(tid, job_id);
|
||||
}
|
||||
if let Some(domain) = config_domain {
|
||||
self.config_locks.unlock(domain, job_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,7 +548,22 @@ pub enum StartJobError {
|
||||
#[derive(Clone)]
|
||||
enum RunSpec {
|
||||
Drain,
|
||||
Migrate { runner_target: String },
|
||||
Migrate {
|
||||
runner_target: String,
|
||||
},
|
||||
ConfigValidate {
|
||||
domain: ConfigDomain,
|
||||
value: serde_json::Value,
|
||||
},
|
||||
ConfigApply {
|
||||
domain: ConfigDomain,
|
||||
expected_revision: Option<u64>,
|
||||
value: serde_json::Value,
|
||||
},
|
||||
ConfigRollback {
|
||||
domain: ConfigDomain,
|
||||
},
|
||||
PlatformVerify,
|
||||
}
|
||||
|
||||
fn step(name: &str) -> JobStep {
|
||||
@@ -316,9 +603,14 @@ async fn run_step(
|
||||
"update_placement" => match spec {
|
||||
RunSpec::Migrate { runner_target } => {
|
||||
let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?;
|
||||
let entitlements = state.billing.get_for_tenant(tenant_id).entitlements;
|
||||
state
|
||||
.placement
|
||||
.update_runner_target(tenant_id, runner_target.clone())
|
||||
.update_runner_target(
|
||||
tenant_id,
|
||||
runner_target.clone(),
|
||||
entitlements.max_runners as usize,
|
||||
)
|
||||
.map(|_| ())
|
||||
}
|
||||
_ => Ok(()),
|
||||
@@ -343,6 +635,400 @@ async fn run_step(
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
"validate_config" => match spec {
|
||||
RunSpec::ConfigValidate { domain, value }
|
||||
| RunSpec::ConfigApply { domain, value, .. } => match domain {
|
||||
ConfigDomain::Routing => {
|
||||
let cfg = serde_json::from_value::<RoutingConfig>(value.clone())
|
||||
.map_err(|e| format!("invalid routing config: {e}"))?;
|
||||
validate_routing_semantic(&cfg)?;
|
||||
Ok(())
|
||||
}
|
||||
ConfigDomain::Placement => {
|
||||
let cfg = serde_json::from_value::<PlacementFile>(value.clone())
|
||||
.map_err(|e| format!("invalid placement config: {e}"))?;
|
||||
validate_placement_semantic(state, &cfg)?;
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
_ => Ok(()),
|
||||
},
|
||||
"backup_config" => match spec {
|
||||
RunSpec::ConfigApply { domain, .. } => {
|
||||
let Some(source) = state.config.source(*domain) else {
|
||||
return Err("config domain not configured".to_string());
|
||||
};
|
||||
let (cur, _) = source
|
||||
.load_bytes()
|
||||
.await
|
||||
.map_err(|e| format!("failed to load config: {e}"))?;
|
||||
let cur = cur.unwrap_or_else(|| b"null".to_vec());
|
||||
let backup_key_value = serde_json::json!({ "backup": serde_json::from_slice::<serde_json::Value>(&cur).unwrap_or(serde_json::Value::Null) });
|
||||
let bytes =
|
||||
serde_json::to_vec_pretty(&backup_key_value).map_err(|e| e.to_string())?;
|
||||
let backup_source = backup_source_for(&source.info(), *domain)
|
||||
.await
|
||||
.map_err(|e| format!("failed to build backup source: {e}"))?;
|
||||
let _ = backup_source
|
||||
.put_bytes(None, bytes)
|
||||
.await
|
||||
.map_err(|e| format!("failed to write backup: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
"apply_config" => match spec {
|
||||
RunSpec::ConfigApply {
|
||||
domain,
|
||||
expected_revision,
|
||||
value,
|
||||
} => {
|
||||
let Some(source) = state.config.source(*domain) else {
|
||||
return Err("config domain not configured".to_string());
|
||||
};
|
||||
let bytes =
|
||||
serde_json::to_vec_pretty(value).map_err(|e| format!("encode error: {e}"))?;
|
||||
let _ = source
|
||||
.put_bytes(*expected_revision, bytes)
|
||||
.await
|
||||
.map_err(|e| format!("apply failed: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
"rollback_config" => match spec {
|
||||
RunSpec::ConfigRollback { domain } => {
|
||||
let Some(source) = state.config.source(*domain) else {
|
||||
return Err("config domain not configured".to_string());
|
||||
};
|
||||
let backup_source = backup_source_for(&source.info(), *domain)
|
||||
.await
|
||||
.map_err(|e| format!("failed to build backup source: {e}"))?;
|
||||
let (bytes, _) = backup_source
|
||||
.load_bytes()
|
||||
.await
|
||||
.map_err(|e| format!("failed to load backup: {e}"))?;
|
||||
let Some(bytes) = bytes else {
|
||||
return Err("no backup available".to_string());
|
||||
};
|
||||
let v: serde_json::Value = serde_json::from_slice(&bytes)
|
||||
.map_err(|e| format!("invalid backup json: {e}"))?;
|
||||
let backup = v.get("backup").cloned().unwrap_or(serde_json::Value::Null);
|
||||
let next =
|
||||
serde_json::to_vec_pretty(&backup).map_err(|e| format!("encode error: {e}"))?;
|
||||
let _ = source
|
||||
.put_bytes(None, next)
|
||||
.await
|
||||
.map_err(|e| format!("rollback failed: {e}"))?;
|
||||
Ok(())
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
"reload_config" => Ok(()),
|
||||
"verify_config" => match spec {
|
||||
RunSpec::ConfigValidate { domain, .. }
|
||||
| RunSpec::ConfigApply { domain, .. }
|
||||
| RunSpec::ConfigRollback { domain } => {
|
||||
let Some(source) = state.config.source(*domain) else {
|
||||
return Err("config domain not configured".to_string());
|
||||
};
|
||||
let (bytes, _) = source
|
||||
.load_bytes()
|
||||
.await
|
||||
.map_err(|e| format!("failed to load config: {e}"))?;
|
||||
let bytes = bytes.unwrap_or_else(|| b"null".to_vec());
|
||||
let v: serde_json::Value = serde_json::from_slice(&bytes)
|
||||
.map_err(|e| format!("invalid stored json: {e}"))?;
|
||||
match domain {
|
||||
ConfigDomain::Routing => {
|
||||
let cfg = serde_json::from_value::<RoutingConfig>(v)
|
||||
.map_err(|e| format!("invalid routing config: {e}"))?;
|
||||
validate_routing_semantic(&cfg)?;
|
||||
Ok(())
|
||||
}
|
||||
ConfigDomain::Placement => {
|
||||
let cfg = serde_json::from_value::<PlacementFile>(v)
|
||||
.map_err(|e| format!("invalid placement config: {e}"))?;
|
||||
validate_placement_semantic(state, &cfg)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
"platform_verify" => match spec {
|
||||
RunSpec::PlatformVerify => {
|
||||
let snapshots = fleet::snapshot(&state.http, &state.fleet_services).await;
|
||||
let bad: Vec<_> = snapshots
|
||||
.into_iter()
|
||||
.filter(|s| !(s.health_ok && s.ready_ok))
|
||||
.map(|s| {
|
||||
format!(
|
||||
"{} health_ok={} ready_ok={}",
|
||||
s.name, s.health_ok, s.ready_ok
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
if !bad.is_empty() {
|
||||
return Err(format!("platform verify failed: {}", bad.join("; ")));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ => Ok(()),
|
||||
},
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn backup_source_for(
|
||||
info: &crate::config_registry::ConfigSourceInfo,
|
||||
domain: ConfigDomain,
|
||||
) -> Result<Arc<dyn crate::config_registry::ConfigSource>, ConfigRegistryError> {
|
||||
use crate::config_registry::{ConfigSource, FileSource, NatsKvSource};
|
||||
match info {
|
||||
crate::config_registry::ConfigSourceInfo::File { path } => Ok(Arc::new(FileSource::new(
|
||||
PathBuf::from(path).with_extension(format!("{}.bak.json", domain.as_str())),
|
||||
))
|
||||
as Arc<dyn ConfigSource>),
|
||||
crate::config_registry::ConfigSourceInfo::NatsKv { bucket, key } => {
|
||||
let nats_url = std::env::var("CONTROL_CONFIG_NATS_URL").map_err(|_| {
|
||||
ConfigRegistryError::Source("missing CONTROL_CONFIG_NATS_URL".to_string())
|
||||
})?;
|
||||
Ok(Arc::new(
|
||||
NatsKvSource::connect(nats_url, bucket.clone(), format!("{key}.bak"))
|
||||
.await
|
||||
.map_err(|e| ConfigRegistryError::Source(e.to_string()))?,
|
||||
) as Arc<dyn ConfigSource>)
|
||||
}
|
||||
crate::config_registry::ConfigSourceInfo::Fixed => Err(ConfigRegistryError::Source(
|
||||
"no backups for fixed source".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_routing_semantic(cfg: &RoutingConfig) -> Result<(), String> {
|
||||
let shard_maps = [
|
||||
("aggregate_shards", &cfg.aggregate_shards),
|
||||
("projection_shards", &cfg.projection_shards),
|
||||
("runner_shards", &cfg.runner_shards),
|
||||
];
|
||||
for (name, map) in shard_maps {
|
||||
for (shard_id, endpoints) in map {
|
||||
if endpoints.is_empty() {
|
||||
return Err(format!("{name}[{shard_id}] has no endpoints"));
|
||||
}
|
||||
for ep in endpoints {
|
||||
let u = Url::parse(ep)
|
||||
.map_err(|e| format!("{name}[{shard_id}] invalid endpoint {ep:?}: {e}"))?;
|
||||
if u.scheme() != "http" && u.scheme() != "https" {
|
||||
return Err(format!(
|
||||
"{name}[{shard_id}] endpoint {ep:?} must be http(s)"
|
||||
));
|
||||
}
|
||||
if u.host_str().is_none() {
|
||||
return Err(format!(
|
||||
"{name}[{shard_id}] endpoint {ep:?} must include host"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure placement references known shard ids.
|
||||
let placements = [
|
||||
(
|
||||
"aggregate_placement",
|
||||
&cfg.aggregate_placement,
|
||||
&cfg.aggregate_shards,
|
||||
),
|
||||
(
|
||||
"projection_placement",
|
||||
&cfg.projection_placement,
|
||||
&cfg.projection_shards,
|
||||
),
|
||||
(
|
||||
"runner_placement",
|
||||
&cfg.runner_placement,
|
||||
&cfg.runner_shards,
|
||||
),
|
||||
];
|
||||
for (pname, pmap, shards) in placements {
|
||||
for (tenant, shard_id) in pmap {
|
||||
if shard_id.trim().is_empty() {
|
||||
return Err(format!("{pname}[{tenant}] shard_id is empty"));
|
||||
}
|
||||
if !shards.contains_key(shard_id) {
|
||||
return Err(format!(
|
||||
"{pname}[{tenant}] references missing shard_id {shard_id:?}"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_placement_semantic(state: &AppState, cfg: &PlacementFile) -> Result<(), String> {
|
||||
if !state.billing_enforcement_enabled {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut tenant_counts = std::collections::HashMap::new();
|
||||
|
||||
let kinds = [
|
||||
("aggregate_placement", cfg.aggregate_placement.as_ref()),
|
||||
("projection_placement", cfg.projection_placement.as_ref()),
|
||||
("runner_placement", cfg.runner_placement.as_ref()),
|
||||
];
|
||||
for (kind_name, k) in kinds {
|
||||
let Some(k) = k else { continue };
|
||||
for p in &k.placements {
|
||||
if p.targets.is_empty() {
|
||||
return Err(format!("{kind_name} tenant {} has no targets", p.tenant_id));
|
||||
}
|
||||
if p.targets.iter().any(|t| t.trim().is_empty()) {
|
||||
return Err(format!(
|
||||
"{kind_name} tenant {} has empty target",
|
||||
p.tenant_id
|
||||
));
|
||||
}
|
||||
|
||||
let entry = tenant_counts.entry(p.tenant_id).or_insert((0, 0)); // (deployments, runners)
|
||||
if kind_name == "runner_placement" {
|
||||
entry.1 += p.targets.len();
|
||||
} else {
|
||||
entry.0 += p.targets.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (tenant_id, (deployments, runners)) in tenant_counts {
|
||||
let entitlements = state.billing.get_for_tenant(tenant_id).entitlements;
|
||||
if deployments > entitlements.max_deployments as usize {
|
||||
return Err(format!(
|
||||
"tenant {} exceeds max_deployments limit ({} > {})",
|
||||
tenant_id, deployments, entitlements.max_deployments
|
||||
));
|
||||
}
|
||||
if runners > entitlements.max_runners as usize {
|
||||
return Err(format!(
|
||||
"tenant {} exceeds max_runners limit ({} > {})",
|
||||
tenant_id, runners, entitlements.max_runners
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::billing::{BillingStore, Plan, SubscriptionStatus, TenantBillingState};
|
||||
use crate::placement::{PlacementFile, PlacementKind, TenantPlacement};
|
||||
|
||||
fn mock_state(billing: BillingStore) -> AppState {
|
||||
let handle = crate::get_test_prometheus_handle();
|
||||
let root = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
AppState {
|
||||
prometheus: handle,
|
||||
auth: crate::AuthConfig {
|
||||
hs256_secret: Some(b"secret".to_vec()),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: crate::placement::PlacementStore::new(
|
||||
std::env::temp_dir().join("placement.json"),
|
||||
),
|
||||
billing,
|
||||
billing_provider: Arc::new(crate::billing::MockProvider),
|
||||
billing_enforcement_enabled: true,
|
||||
config: crate::config_registry::ConfigRegistry::new(None, None),
|
||||
fleet_services: vec![],
|
||||
swarm: crate::swarm::SwarmStore::new(root.join("swarm/dev.json")),
|
||||
docs: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_placement_limits() {
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let billing_path =
|
||||
std::env::temp_dir().join(format!("billing-unit-{}.json", Uuid::new_v4()));
|
||||
let billing = BillingStore::new(billing_path.clone());
|
||||
|
||||
let state = mock_state(billing.clone());
|
||||
|
||||
// 1. Free plan (default): max_deployments=1, max_runners=1
|
||||
let cfg = PlacementFile {
|
||||
revision: Some("v1".to_string()),
|
||||
aggregate_placement: Some(PlacementKind {
|
||||
placements: vec![TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec!["a1".to_string()],
|
||||
}],
|
||||
}),
|
||||
projection_placement: Some(PlacementKind {
|
||||
placements: vec![TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec!["p1".to_string()],
|
||||
}],
|
||||
}),
|
||||
runner_placement: Some(PlacementKind {
|
||||
placements: vec![TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec!["r1".to_string()],
|
||||
}],
|
||||
}),
|
||||
};
|
||||
|
||||
// aggregate(1) + projection(1) = 2 deployments. Limit is 1. Should fail.
|
||||
let err = validate_placement_semantic(&state, &cfg).unwrap_err();
|
||||
assert!(err.contains("exceeds max_deployments limit"));
|
||||
|
||||
// 2. Reduce to 1 deployment
|
||||
let cfg2 = PlacementFile {
|
||||
revision: Some("v2".to_string()),
|
||||
aggregate_placement: Some(PlacementKind {
|
||||
placements: vec![TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec!["a1".to_string()],
|
||||
}],
|
||||
}),
|
||||
projection_placement: None,
|
||||
runner_placement: Some(PlacementKind {
|
||||
placements: vec![TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec!["r1".to_string()],
|
||||
}],
|
||||
}),
|
||||
};
|
||||
validate_placement_semantic(&state, &cfg2).unwrap();
|
||||
|
||||
// 3. Upgrade to Pro: max_deployments=10, max_runners=10
|
||||
billing
|
||||
.update_tenant_state(
|
||||
tenant_id,
|
||||
TenantBillingState {
|
||||
provider: "mock".to_string(),
|
||||
provider_customer_id: None,
|
||||
provider_subscription_id: None,
|
||||
provider_checkout_session_id: None,
|
||||
status: Some(SubscriptionStatus::Active),
|
||||
plan: Some(Plan::Pro),
|
||||
current_period_end: None,
|
||||
cancel_at_period_end: None,
|
||||
processed_webhook_event_ids: vec![],
|
||||
updated_at: 100,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Now the first cfg should pass
|
||||
validate_placement_semantic(&state, &cfg).unwrap();
|
||||
|
||||
let _ = std::fs::remove_file(billing_path);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,22 @@
|
||||
mod admin;
|
||||
mod audit;
|
||||
mod auth;
|
||||
pub mod billing;
|
||||
mod build_info;
|
||||
pub mod config_registry;
|
||||
mod config_schemas;
|
||||
mod deployments;
|
||||
mod documents;
|
||||
mod drift;
|
||||
mod fleet;
|
||||
mod job_engine;
|
||||
mod jobs;
|
||||
mod placement;
|
||||
pub mod s3_docs;
|
||||
mod swarm;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use audit::AuditStore;
|
||||
pub use auth::{AuthConfig, Principal};
|
||||
use axum::{
|
||||
@@ -20,8 +28,10 @@ use axum::{
|
||||
routing::get,
|
||||
};
|
||||
pub use build_info::{BuildInfo, extract_build_info};
|
||||
pub use config_registry::{ConfigDomain, ConfigRegistry};
|
||||
pub use deployments::{DeployAnnotationArgs, GrafanaAnnotation, build_grafana_deploy_annotation};
|
||||
pub use fleet::FleetService;
|
||||
pub use job_engine::ConfigLocks;
|
||||
pub use job_engine::TenantLocks;
|
||||
pub use jobs::JobStore;
|
||||
use metrics_exporter_prometheus::PrometheusHandle;
|
||||
@@ -40,10 +50,16 @@ pub struct AppState {
|
||||
pub jobs: JobStore,
|
||||
pub audit: AuditStore,
|
||||
pub tenant_locks: TenantLocks,
|
||||
pub config_locks: ConfigLocks,
|
||||
pub http: reqwest::Client,
|
||||
pub placement: PlacementStore,
|
||||
pub billing: billing::BillingStore,
|
||||
pub billing_provider: Arc<dyn billing::BillingProvider>,
|
||||
pub billing_enforcement_enabled: bool,
|
||||
pub config: ConfigRegistry,
|
||||
pub fleet_services: Vec<FleetService>,
|
||||
pub swarm: SwarmStore,
|
||||
pub docs: Option<s3_docs::DocsStore>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -93,13 +109,18 @@ pub fn build_app(state: AppState) -> Router {
|
||||
},
|
||||
);
|
||||
|
||||
let admin =
|
||||
admin::admin_router().layer(from_fn_with_state(state.clone(), auth::auth_middleware));
|
||||
let admin = admin::admin_router()
|
||||
.merge(documents::router())
|
||||
.layer(from_fn_with_state(state.clone(), auth::auth_middleware));
|
||||
|
||||
Router::new()
|
||||
.route("/health", get(health))
|
||||
.route("/ready", get(ready))
|
||||
.route("/metrics", get(metrics))
|
||||
.route(
|
||||
"/admin/v1/billing/webhooks/{provider}",
|
||||
axum::routing::post(billing::webhook),
|
||||
)
|
||||
.nest("/admin/v1", admin)
|
||||
.with_state(state)
|
||||
.layer(trace)
|
||||
@@ -167,25 +188,46 @@ async fn request_id_middleware(mut req: Request<axum::body::Body>, next: Next) -
|
||||
res
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
static TEST_PROMETHEUS_HANDLE: std::sync::OnceLock<PrometheusHandle> = std::sync::OnceLock::new();
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn get_test_prometheus_handle() -> PrometheusHandle {
|
||||
TEST_PROMETHEUS_HANDLE
|
||||
.get_or_init(|| {
|
||||
metrics_exporter_prometheus::PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.unwrap_or_else(|_| {
|
||||
// This can happen if another test already installed it.
|
||||
// We might not get the ACTUAL handle to the global recorder here if we don't share it,
|
||||
// but for tests it's usually fine to have a dummy one if we are not asserting on metrics.
|
||||
metrics_exporter_prometheus::PrometheusBuilder::new()
|
||||
.build()
|
||||
.expect("failed to build prometheus recorder")
|
||||
.0
|
||||
.handle()
|
||||
})
|
||||
})
|
||||
.clone()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::config_registry::{FileSource, FixedSource};
|
||||
use crate::jobs::JobStatus;
|
||||
use axum::{
|
||||
body::Body,
|
||||
http::{Request, StatusCode, header},
|
||||
};
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use serde::Serialize;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::Arc;
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
static HANDLE: OnceLock<PrometheusHandle> = OnceLock::new();
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
@@ -199,15 +241,10 @@ mod tests {
|
||||
}
|
||||
|
||||
fn test_app_with_fleet(fleet_services: Vec<FleetService>) -> Router {
|
||||
let handle = HANDLE
|
||||
.get_or_init(|| {
|
||||
PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder")
|
||||
})
|
||||
.clone();
|
||||
let handle = get_test_prometheus_handle();
|
||||
|
||||
let placement_path = temp_placement_file();
|
||||
let root = repo_root();
|
||||
|
||||
build_app(AppState {
|
||||
prometheus: handle,
|
||||
@@ -217,10 +254,23 @@ mod tests {
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(placement_path),
|
||||
billing: crate::billing::BillingStore::new(
|
||||
std::env::temp_dir().join(format!("billing-test-{}.json", Uuid::new_v4())),
|
||||
),
|
||||
billing_provider: Arc::new(crate::billing::MockProvider),
|
||||
billing_enforcement_enabled: true,
|
||||
config: ConfigRegistry::new(
|
||||
Some(Arc::new(FileSource::new(
|
||||
root.join("config/routing/dev.json"),
|
||||
))),
|
||||
Some(Arc::new(FixedSource::new(b"{}".to_vec()))),
|
||||
),
|
||||
fleet_services,
|
||||
swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
|
||||
docs: None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -234,14 +284,14 @@ mod tests {
|
||||
|
||||
fn temp_placement_file() -> PathBuf {
|
||||
let root = repo_root();
|
||||
let src = root.join("placement/dev.json");
|
||||
let src = root.join("config/placement/dev.json");
|
||||
let mut dst = std::env::temp_dir();
|
||||
dst.push(format!(
|
||||
"cloudlysis-control-placement-{}-{}.json",
|
||||
std::process::id(),
|
||||
Uuid::new_v4()
|
||||
));
|
||||
let raw = fs::read_to_string(src).expect("missing placement/dev.json");
|
||||
let raw = fs::read_to_string(src).expect("missing config/placement/dev.json");
|
||||
fs::write(&dst, raw).expect("failed to write temp placement file");
|
||||
dst
|
||||
}
|
||||
@@ -689,4 +739,467 @@ mod tests {
|
||||
&serde_json::json!(["preflight", "drain", "update_placement", "reload", "verify"])
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn billing_returns_not_configured_by_default() {
|
||||
let token = make_token(&["control:read"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert_eq!(v.get("configured").unwrap(), &serde_json::json!(false));
|
||||
assert_eq!(
|
||||
v.get("entitlements")
|
||||
.unwrap()
|
||||
.get("max_deployments")
|
||||
.unwrap(),
|
||||
&serde_json::json!(1)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn billing_returns_configured_state() {
|
||||
let token = make_token(&["control:read"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let handle = get_test_prometheus_handle();
|
||||
|
||||
let billing_path =
|
||||
std::env::temp_dir().join(format!("billing-test-cfg-{}.json", Uuid::new_v4()));
|
||||
let billing = crate::billing::BillingStore::new(billing_path.clone());
|
||||
|
||||
billing
|
||||
.update_tenant_state(
|
||||
tenant_id,
|
||||
crate::billing::TenantBillingState {
|
||||
provider: "stripe".to_string(),
|
||||
provider_customer_id: Some("cus_123".to_string()),
|
||||
provider_subscription_id: Some("sub_123".to_string()),
|
||||
provider_checkout_session_id: None,
|
||||
status: Some(crate::billing::SubscriptionStatus::Active),
|
||||
plan: Some(crate::billing::Plan::Pro),
|
||||
current_period_end: Some("2026-04-30T00:00:00Z".to_string()),
|
||||
cancel_at_period_end: Some(false),
|
||||
processed_webhook_event_ids: vec![],
|
||||
updated_at: 1234567890,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let root = repo_root();
|
||||
let app = build_app(AppState {
|
||||
prometheus: handle,
|
||||
auth: AuthConfig {
|
||||
hs256_secret: Some(b"test_secret".to_vec()),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(temp_placement_file()),
|
||||
billing,
|
||||
billing_provider: Arc::new(crate::billing::MockProvider),
|
||||
billing_enforcement_enabled: true,
|
||||
config: ConfigRegistry::new(
|
||||
Some(Arc::new(FileSource::new(
|
||||
root.join("config/routing/dev.json"),
|
||||
))),
|
||||
Some(Arc::new(FixedSource::new(b"{}".to_vec()))),
|
||||
),
|
||||
fleet_services: vec![],
|
||||
swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
|
||||
docs: None,
|
||||
});
|
||||
|
||||
let res = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert_eq!(v.get("configured").unwrap(), &serde_json::json!(true));
|
||||
assert_eq!(v.get("plan").unwrap(), &serde_json::json!("pro"));
|
||||
assert_eq!(
|
||||
v.get("entitlements")
|
||||
.unwrap()
|
||||
.get("max_deployments")
|
||||
.unwrap(),
|
||||
&serde_json::json!(10)
|
||||
);
|
||||
|
||||
let _ = std::fs::remove_file(billing_path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn checkout_returns_mock_url() {
|
||||
let token = make_token(&["control:write"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let body = serde_json::json!({
|
||||
"plan": "pro",
|
||||
"return_path": "/custom-return"
|
||||
});
|
||||
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing/checkout"))
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(body.to_string()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert_eq!(
|
||||
v.get("url").unwrap(),
|
||||
&serde_json::json!(format!("https://mock.stripe.com/checkout/{}", tenant_id))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn checkout_fails_if_already_active() {
|
||||
let token = make_token(&["control:write"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
// Setup app with active subscription
|
||||
let billing_path =
|
||||
std::env::temp_dir().join(format!("billing-test-active-{}.json", Uuid::new_v4()));
|
||||
let billing = crate::billing::BillingStore::new(billing_path.clone());
|
||||
billing
|
||||
.update_tenant_state(
|
||||
tenant_id,
|
||||
crate::billing::TenantBillingState {
|
||||
provider: "mock".to_string(),
|
||||
provider_customer_id: None,
|
||||
provider_subscription_id: None,
|
||||
provider_checkout_session_id: None,
|
||||
status: Some(crate::billing::SubscriptionStatus::Active),
|
||||
plan: Some(crate::billing::Plan::Pro),
|
||||
current_period_end: None,
|
||||
cancel_at_period_end: None,
|
||||
processed_webhook_event_ids: vec![],
|
||||
updated_at: 0,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let handle = get_test_prometheus_handle();
|
||||
let root = repo_root();
|
||||
let app = build_app(AppState {
|
||||
prometheus: handle,
|
||||
auth: AuthConfig {
|
||||
hs256_secret: Some(b"test_secret".to_vec()),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(temp_placement_file()),
|
||||
billing,
|
||||
billing_provider: Arc::new(crate::billing::MockProvider),
|
||||
billing_enforcement_enabled: true,
|
||||
config: ConfigRegistry::new(
|
||||
Some(Arc::new(FileSource::new(
|
||||
root.join("config/routing/dev.json"),
|
||||
))),
|
||||
Some(Arc::new(FixedSource::new(b"{}".to_vec()))),
|
||||
),
|
||||
fleet_services: vec![],
|
||||
swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
|
||||
docs: None,
|
||||
});
|
||||
|
||||
let body = serde_json::json!({ "plan": "pro" });
|
||||
let res = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing/checkout"))
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(body.to_string()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::CONFLICT);
|
||||
let _ = std::fs::remove_file(billing_path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn portal_returns_mock_url() {
|
||||
let token = make_token(&["control:write"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
let res = test_app()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing/portal"))
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert_eq!(
|
||||
v.get("url").unwrap(),
|
||||
&serde_json::json!(format!("https://mock.stripe.com/portal/{}", tenant_id))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webhook_updates_state_idempotently() {
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let event_id = "evt_123".to_string();
|
||||
|
||||
let app = test_app();
|
||||
|
||||
let event = crate::billing::BillingEvent::SubscriptionCreated {
|
||||
tenant_id,
|
||||
event_id: event_id.clone(),
|
||||
provider_customer_id: "cus_123".to_string(),
|
||||
provider_subscription_id: "sub_123".to_string(),
|
||||
status: crate::billing::SubscriptionStatus::Active,
|
||||
plan: crate::billing::Plan::Pro,
|
||||
current_period_end: "2026-04-30T00:00:00Z".to_string(),
|
||||
ts_ms: 1000,
|
||||
};
|
||||
|
||||
let body = serde_json::to_string(&event).unwrap();
|
||||
|
||||
// 1. Send webhook
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/billing/webhooks/mock")
|
||||
.method("POST")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(body.clone()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
// 2. Verify state
|
||||
let token = make_token(&["control:read"]);
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body_bytes = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body_bytes).unwrap();
|
||||
assert_eq!(v.get("configured").unwrap(), &serde_json::json!(true));
|
||||
assert_eq!(v.get("plan").unwrap(), &serde_json::json!("pro"));
|
||||
|
||||
// 3. Send same webhook again (idempotency)
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/billing/webhooks/mock")
|
||||
.method("POST")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(body))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn webhook_ignores_stale_events() {
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let app = test_app();
|
||||
|
||||
// 1. Send recent event (ts=2000)
|
||||
let event1 = crate::billing::BillingEvent::SubscriptionUpdated {
|
||||
tenant_id,
|
||||
event_id: "evt_new".to_string(),
|
||||
status: crate::billing::SubscriptionStatus::Active,
|
||||
plan: crate::billing::Plan::Enterprise,
|
||||
current_period_end: "2026-05-30T00:00:00Z".to_string(),
|
||||
cancel_at_period_end: false,
|
||||
ts_ms: 2000,
|
||||
};
|
||||
|
||||
app.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/billing/webhooks/mock")
|
||||
.method("POST")
|
||||
.body(Body::from(serde_json::to_string(&event1).unwrap()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// 2. Send stale event (ts=1000)
|
||||
let event2 = crate::billing::BillingEvent::SubscriptionUpdated {
|
||||
tenant_id,
|
||||
event_id: "evt_old".to_string(),
|
||||
status: crate::billing::SubscriptionStatus::PastDue,
|
||||
plan: crate::billing::Plan::Pro,
|
||||
current_period_end: "2026-04-30T00:00:00Z".to_string(),
|
||||
cancel_at_period_end: false,
|
||||
ts_ms: 1000,
|
||||
};
|
||||
|
||||
app.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/billing/webhooks/mock")
|
||||
.method("POST")
|
||||
.body(Body::from(serde_json::to_string(&event2).unwrap()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// 3. Verify state is still Enterprise
|
||||
let token = make_token(&["control:read"]);
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let body_bytes = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body_bytes).unwrap();
|
||||
assert_eq!(v.get("plan").unwrap(), &serde_json::json!("enterprise"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn s3_docs_requires_pro_plan() {
|
||||
let token = make_token(&["control:read", "control:write"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
let app = test_app();
|
||||
|
||||
// 1. Try to list docs (Free plan by default)
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/docs"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(res.status(), StatusCode::PAYMENT_REQUIRED);
|
||||
|
||||
// 2. Update to Pro plan via webhook
|
||||
let event = crate::billing::BillingEvent::SubscriptionCreated {
|
||||
tenant_id,
|
||||
event_id: "evt_pro".to_string(),
|
||||
provider_customer_id: "cus_pro".to_string(),
|
||||
provider_subscription_id: "sub_pro".to_string(),
|
||||
status: crate::billing::SubscriptionStatus::Active,
|
||||
plan: crate::billing::Plan::Pro,
|
||||
current_period_end: "2099-01-01T00:00:00Z".to_string(),
|
||||
ts_ms: 2000,
|
||||
};
|
||||
app.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/billing/webhooks/mock")
|
||||
.method("POST")
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(serde_json::to_string(&event).unwrap()))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// 3. Try to list docs again (Should fail with 503 if S3 not configured in tests, or 200/502 if it is)
|
||||
// In test_app(), docs is None by default.
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/docs"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Since docs is None in test_app(), it returns SERVICE_UNAVAILABLE (503) AFTER passing the entitlement check.
|
||||
// If it was still PAYMENT_REQUIRED, it would return 402.
|
||||
assert_eq!(res.status(), StatusCode::SERVICE_UNAVAILABLE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use clap::Parser;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -33,16 +35,32 @@ async fn main() {
|
||||
.build()
|
||||
.expect("failed to build http client");
|
||||
|
||||
let placement_path = std::env::var("CONTROL_PLACEMENT_PATH")
|
||||
let placement_path: PathBuf = std::env::var("CONTROL_PLACEMENT_PATH")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "placement/dev.json".to_string())
|
||||
.unwrap_or_else(|| "config/placement/dev.json".to_string())
|
||||
.into();
|
||||
|
||||
let swarm_path = std::env::var("CONTROL_SWARM_STATE_PATH")
|
||||
let billing_path: PathBuf = std::env::var("CONTROL_BILLING_STATE_PATH")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "swarm/dev.json".to_string())
|
||||
.unwrap_or_else(|| "billing/dev.json".to_string())
|
||||
.into();
|
||||
|
||||
let routing_path: PathBuf = std::env::var("CONTROL_ROUTING_PATH")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "config/routing/dev.json".to_string())
|
||||
.into();
|
||||
|
||||
let swarm_mode = std::env::var("CONTROL_SWARM_MODE").ok();
|
||||
let swarm = if swarm_mode.as_deref() == Some("docker") {
|
||||
api::SwarmStore::new_docker_cli()
|
||||
} else {
|
||||
let swarm_path: PathBuf = std::env::var("CONTROL_SWARM_STATE_PATH")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "swarm/dev.json".to_string())
|
||||
.into();
|
||||
api::SwarmStore::new(swarm_path)
|
||||
};
|
||||
|
||||
let self_url = std::env::var("CONTROL_SELF_URL")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "http://127.0.0.1:8080".to_string());
|
||||
@@ -55,7 +73,70 @@ async fn main() {
|
||||
fleet_services.extend(parse_fleet_services(&spec));
|
||||
}
|
||||
|
||||
let app = api::build_app(api::AppState {
|
||||
let docs_cfg =
|
||||
api::s3_docs::DocsConfig::from_env().expect("missing S3 document storage configuration");
|
||||
let docs = api::s3_docs::DocsStore::new(docs_cfg)
|
||||
.await
|
||||
.expect("failed to initialize S3 document storage client");
|
||||
|
||||
let config = {
|
||||
let routing = if let (Ok(nats_url), Ok(bucket), Ok(key)) = (
|
||||
std::env::var("CONTROL_ROUTING_NATS_URL"),
|
||||
std::env::var("CONTROL_ROUTING_NATS_BUCKET"),
|
||||
std::env::var("CONTROL_ROUTING_NATS_KEY"),
|
||||
) {
|
||||
Some(Arc::new(
|
||||
api::config_registry::NatsKvSource::connect(nats_url, bucket, key)
|
||||
.await
|
||||
.expect("failed to connect to routing config nats kv"),
|
||||
) as Arc<dyn api::config_registry::ConfigSource>)
|
||||
} else {
|
||||
Some(
|
||||
Arc::new(api::config_registry::FileSource::new(routing_path))
|
||||
as Arc<dyn api::config_registry::ConfigSource>,
|
||||
)
|
||||
};
|
||||
|
||||
let placement = if let (Ok(nats_url), Ok(bucket), Ok(key)) = (
|
||||
std::env::var("CONTROL_PLACEMENT_NATS_URL"),
|
||||
std::env::var("CONTROL_PLACEMENT_NATS_BUCKET"),
|
||||
std::env::var("CONTROL_PLACEMENT_NATS_KEY"),
|
||||
) {
|
||||
Some(Arc::new(
|
||||
api::config_registry::NatsKvSource::connect(nats_url, bucket, key)
|
||||
.await
|
||||
.expect("failed to connect to placement config nats kv"),
|
||||
) as Arc<dyn api::config_registry::ConfigSource>)
|
||||
} else {
|
||||
Some(Arc::new(api::config_registry::FileSource::new(
|
||||
placement_path.clone(),
|
||||
))
|
||||
as Arc<dyn api::config_registry::ConfigSource>)
|
||||
};
|
||||
|
||||
api::ConfigRegistry::new(routing, placement)
|
||||
};
|
||||
|
||||
let billing_provider: Arc<dyn api::billing::BillingProvider> =
|
||||
match std::env::var("CONTROL_BILLING_PROVIDER").as_deref() {
|
||||
Ok("stripe") => {
|
||||
let secret_key = std::env::var("CONTROL_STRIPE_SECRET_KEY")
|
||||
.expect("CONTROL_STRIPE_SECRET_KEY required for stripe provider");
|
||||
let price_pro = std::env::var("CONTROL_STRIPE_PRICE_ID_PRO")
|
||||
.expect("CONTROL_STRIPE_PRICE_ID_PRO required for stripe provider");
|
||||
let price_enterprise = std::env::var("CONTROL_STRIPE_PRICE_ID_ENTERPRISE")
|
||||
.expect("CONTROL_STRIPE_PRICE_ID_ENTERPRISE required for stripe provider");
|
||||
|
||||
Arc::new(api::billing::StripeProvider {
|
||||
secret_key,
|
||||
price_pro,
|
||||
price_enterprise,
|
||||
})
|
||||
}
|
||||
_ => Arc::new(api::billing::MockProvider),
|
||||
};
|
||||
|
||||
let state = api::AppState {
|
||||
prometheus: recorder,
|
||||
auth: api::AuthConfig {
|
||||
hs256_secret: std::env::var("CONTROL_GATEWAY_JWT_HS256_SECRET")
|
||||
@@ -65,11 +146,25 @@ async fn main() {
|
||||
jobs: api::JobStore::default(),
|
||||
audit: api::AuditStore::default(),
|
||||
tenant_locks: api::TenantLocks::default(),
|
||||
config_locks: api::ConfigLocks::default(),
|
||||
http,
|
||||
placement: api::PlacementStore::new(placement_path),
|
||||
billing: api::billing::BillingStore::new(billing_path),
|
||||
billing_provider,
|
||||
billing_enforcement_enabled: std::env::var("CONTROL_BILLING_ENFORCEMENT_ENABLED")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(false),
|
||||
config,
|
||||
fleet_services,
|
||||
swarm: api::SwarmStore::new(swarm_path),
|
||||
});
|
||||
swarm,
|
||||
docs: Some(docs),
|
||||
};
|
||||
|
||||
// Spawn reconciliation loop
|
||||
tokio::spawn(api::billing::run_reconciliation_loop(state.clone()));
|
||||
|
||||
let app = api::build_app(state);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(args.addr)
|
||||
.await
|
||||
|
||||
@@ -157,6 +157,7 @@ impl PlacementStore {
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
runner_target: String,
|
||||
max_runners: usize,
|
||||
) -> Result<String, String> {
|
||||
let mut inner = self.inner.write().expect("placement lock poisoned");
|
||||
inner.reload_if_changed();
|
||||
@@ -178,8 +179,17 @@ impl PlacementStore {
|
||||
.iter_mut()
|
||||
.find(|p| p.tenant_id == tenant_id)
|
||||
{
|
||||
// If already at or above limit, and we are adding a NEW target (not replacing), it would fail.
|
||||
// But here update_runner_target REPLACES the target list with a single target for now.
|
||||
// If in the future we want to append, we check targets.len().
|
||||
if 1 > max_runners {
|
||||
return Err(format!("exceeds max_runners limit of {}", max_runners));
|
||||
}
|
||||
existing.targets = vec![runner_target];
|
||||
} else {
|
||||
if 1 > max_runners {
|
||||
return Err(format!("exceeds max_runners limit of {}", max_runners));
|
||||
}
|
||||
runner.placements.push(TenantPlacement {
|
||||
tenant_id,
|
||||
targets: vec![runner_target],
|
||||
|
||||
508
control/api/src/s3_docs.rs
Normal file
508
control/api/src/s3_docs.rs
Normal file
@@ -0,0 +1,508 @@
|
||||
use aws_config::Region;
|
||||
use aws_credential_types::Credentials;
|
||||
use aws_sdk_s3::presigning::PresigningConfig;
|
||||
use aws_sdk_s3::types::BucketCannedAcl;
|
||||
use aws_sdk_s3::{Client, config::Builder as S3ConfigBuilder};
|
||||
use sha2::Digest;
|
||||
use std::time::Duration;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DocsConfig {
|
||||
pub endpoint: String,
|
||||
pub public_endpoint: Option<String>,
|
||||
pub region: String,
|
||||
pub access_key_id: String,
|
||||
pub secret_access_key: String,
|
||||
pub force_path_style: bool,
|
||||
pub insecure: bool,
|
||||
pub buckets: Vec<String>,
|
||||
pub prefix: String,
|
||||
}
|
||||
|
||||
impl DocsConfig {
|
||||
pub fn from_env() -> Result<Self, String> {
|
||||
fn get(name: &str) -> Option<String> {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
fn get_secret(name: &str, file_name: &str) -> Result<Option<String>, String> {
|
||||
if let Some(path) = get(file_name) {
|
||||
let raw = std::fs::read_to_string(path).map_err(|e| e.to_string())?;
|
||||
let v = raw.trim().to_string();
|
||||
if v.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
return Ok(Some(v));
|
||||
}
|
||||
Ok(get(name))
|
||||
}
|
||||
|
||||
let endpoint = get("CONTROL_S3_ENDPOINT")
|
||||
.or_else(|| get("S3_ENDPOINT"))
|
||||
.ok_or_else(|| "Missing CONTROL_S3_ENDPOINT".to_string())?;
|
||||
let public_endpoint =
|
||||
get("CONTROL_S3_PUBLIC_ENDPOINT").or_else(|| get("S3_PUBLIC_ENDPOINT"));
|
||||
let region = get("CONTROL_S3_REGION")
|
||||
.or_else(|| get("S3_REGION"))
|
||||
.unwrap_or_else(|| "us-east-1".to_string());
|
||||
let access_key_id =
|
||||
get_secret("CONTROL_S3_ACCESS_KEY_ID", "CONTROL_S3_ACCESS_KEY_ID_FILE")?
|
||||
.or_else(|| {
|
||||
get_secret("S3_ACCESS_KEY_ID", "S3_ACCESS_KEY_ID_FILE")
|
||||
.ok()
|
||||
.flatten()
|
||||
})
|
||||
.ok_or_else(|| "Missing CONTROL_S3_ACCESS_KEY_ID".to_string())?;
|
||||
let secret_access_key = get_secret(
|
||||
"CONTROL_S3_SECRET_ACCESS_KEY",
|
||||
"CONTROL_S3_SECRET_ACCESS_KEY_FILE",
|
||||
)?
|
||||
.or_else(|| {
|
||||
get_secret("S3_SECRET_ACCESS_KEY", "S3_SECRET_ACCESS_KEY_FILE")
|
||||
.ok()
|
||||
.flatten()
|
||||
})
|
||||
.ok_or_else(|| "Missing CONTROL_S3_SECRET_ACCESS_KEY".to_string())?;
|
||||
let force_path_style = get("CONTROL_S3_FORCE_PATH_STYLE")
|
||||
.or_else(|| get("S3_FORCE_PATH_STYLE"))
|
||||
.as_deref()
|
||||
.map(|v| v == "true" || v == "1")
|
||||
.unwrap_or(true);
|
||||
let insecure = get("CONTROL_S3_INSECURE")
|
||||
.or_else(|| get("S3_INSECURE"))
|
||||
.as_deref()
|
||||
.map(|v| v == "true" || v == "1")
|
||||
.unwrap_or(false);
|
||||
|
||||
let bucket_raw = get("CONTROL_S3_BUCKET_DOCS")
|
||||
.or_else(|| get("S3_BUCKET_DOCS"))
|
||||
.ok_or_else(|| "Missing CONTROL_S3_BUCKET_DOCS".to_string())?;
|
||||
let buckets: Vec<String> = bucket_raw
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
if buckets.is_empty() {
|
||||
return Err("Missing CONTROL_S3_BUCKET_DOCS".to_string());
|
||||
}
|
||||
let prefix = get("CONTROL_S3_PREFIX_DOCS")
|
||||
.or_else(|| get("S3_PREFIX_DOCS"))
|
||||
.unwrap_or_else(|| "docs/".to_string());
|
||||
let prefix = if prefix.ends_with('/') {
|
||||
prefix
|
||||
} else {
|
||||
format!("{prefix}/")
|
||||
};
|
||||
|
||||
// SECURITY: `*_INSECURE=true` is intended for local MinIO setups that use plain HTTP.
|
||||
// We currently do not disable TLS certificate verification for HTTPS endpoints.
|
||||
if insecure && endpoint.trim_start().starts_with("https://") {
|
||||
return Err(
|
||||
"CONTROL_S3_INSECURE=true is not supported with https:// endpoints (TLS verification is not disabled). Use http:// for local MinIO, or set CONTROL_S3_INSECURE=false for production."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
endpoint,
|
||||
public_endpoint,
|
||||
region,
|
||||
access_key_id,
|
||||
secret_access_key,
|
||||
force_path_style,
|
||||
insecure,
|
||||
buckets,
|
||||
prefix,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocsStore {
|
||||
cfg: DocsConfig,
|
||||
client: Client,
|
||||
presign_client: Client,
|
||||
}
|
||||
|
||||
impl DocsStore {
|
||||
pub async fn new(cfg: DocsConfig) -> Result<Self, String> {
|
||||
let creds = Credentials::new(
|
||||
cfg.access_key_id.clone(),
|
||||
cfg.secret_access_key.clone(),
|
||||
None,
|
||||
None,
|
||||
"static",
|
||||
);
|
||||
let shared = aws_config::from_env()
|
||||
.region(Region::new(cfg.region.clone()))
|
||||
.credentials_provider(creds.clone())
|
||||
.endpoint_url(cfg.endpoint.clone())
|
||||
.load()
|
||||
.await;
|
||||
|
||||
let s3_conf = S3ConfigBuilder::from(&shared)
|
||||
.force_path_style(cfg.force_path_style)
|
||||
.build();
|
||||
let client = Client::from_conf(s3_conf);
|
||||
|
||||
let presign_endpoint = cfg
|
||||
.public_endpoint
|
||||
.clone()
|
||||
.unwrap_or_else(|| cfg.endpoint.clone());
|
||||
let presign_shared = aws_config::from_env()
|
||||
.region(Region::new(cfg.region.clone()))
|
||||
.credentials_provider(creds)
|
||||
.endpoint_url(presign_endpoint)
|
||||
.load()
|
||||
.await;
|
||||
let presign_conf = S3ConfigBuilder::from(&presign_shared)
|
||||
.force_path_style(cfg.force_path_style)
|
||||
.build();
|
||||
let presign_client = Client::from_conf(presign_conf);
|
||||
|
||||
Ok(Self {
|
||||
cfg,
|
||||
client,
|
||||
presign_client,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn key_for(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
doc_type: &str,
|
||||
doc_id: &str,
|
||||
filename: &str,
|
||||
) -> Result<String, String> {
|
||||
validate_segment("tenant_id", tenant_id)?;
|
||||
validate_segment("doc_type", doc_type)?;
|
||||
validate_segment("doc_id", doc_id)?;
|
||||
validate_filename(filename)?;
|
||||
Ok(format!(
|
||||
"{}{}/{}/{}/{}",
|
||||
self.cfg.prefix, tenant_id, doc_type, doc_id, filename
|
||||
))
|
||||
}
|
||||
|
||||
pub fn prefix(&self) -> &str {
|
||||
self.cfg.prefix.as_str()
|
||||
}
|
||||
|
||||
pub fn buckets(&self) -> &[String] {
|
||||
self.cfg.buckets.as_slice()
|
||||
}
|
||||
|
||||
fn bucket_for_tenant(&self, tenant_id: &str) -> &str {
|
||||
// Deterministic sharding across buckets. Note: if the bucket list changes, the mapping changes.
|
||||
// For production, set the full planned bucket set up-front (e.g. `-0,-1,-2`) to keep mapping stable.
|
||||
let n = self.cfg.buckets.len();
|
||||
if n == 1 {
|
||||
return self.cfg.buckets[0].as_str();
|
||||
}
|
||||
let mut hasher = sha2::Sha256::new();
|
||||
hasher.update(tenant_id.as_bytes());
|
||||
let digest = hasher.finalize();
|
||||
let mut b = [0u8; 8];
|
||||
b.copy_from_slice(&digest[..8]);
|
||||
let v = u64::from_be_bytes(b);
|
||||
let idx = (v as usize) % n;
|
||||
self.cfg.buckets[idx].as_str()
|
||||
}
|
||||
|
||||
pub fn content_hash_sha256_hex(bytes: &[u8]) -> String {
|
||||
let mut hasher = sha2::Sha256::new();
|
||||
hasher.update(bytes);
|
||||
let digest = hasher.finalize();
|
||||
let mut out = String::with_capacity(digest.len() * 2);
|
||||
for b in digest {
|
||||
use std::fmt::Write;
|
||||
let _ = write!(&mut out, "{:02x}", b);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
pub async fn put_for_tenant(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
key: &str,
|
||||
bytes: Vec<u8>,
|
||||
content_type: Option<String>,
|
||||
) -> Result<(), String> {
|
||||
let mut req = self
|
||||
.client
|
||||
.put_object()
|
||||
.bucket(self.bucket_for_tenant(tenant_id))
|
||||
.key(key)
|
||||
.body(aws_sdk_s3::primitives::ByteStream::from(bytes));
|
||||
if let Some(ct) = content_type {
|
||||
req = req.content_type(ct);
|
||||
}
|
||||
req.send().await.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn get_bytes_for_tenant(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
key: &str,
|
||||
) -> Result<(Vec<u8>, Option<String>), String> {
|
||||
let out = self
|
||||
.client
|
||||
.get_object()
|
||||
.bucket(self.bucket_for_tenant(tenant_id))
|
||||
.key(key)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
let ct = out.content_type().map(|s| s.to_string());
|
||||
let bytes = out
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?
|
||||
.into_bytes()
|
||||
.to_vec();
|
||||
Ok((bytes, ct))
|
||||
}
|
||||
|
||||
pub async fn delete_for_tenant(&self, tenant_id: &str, key: &str) -> Result<(), String> {
|
||||
self.client
|
||||
.delete_object()
|
||||
.bucket(self.bucket_for_tenant(tenant_id))
|
||||
.key(key)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn list_for_tenant(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
prefix: &str,
|
||||
) -> Result<Vec<DocObject>, String> {
|
||||
let out = self
|
||||
.client
|
||||
.list_objects_v2()
|
||||
.bucket(self.bucket_for_tenant(tenant_id))
|
||||
.prefix(prefix)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
let mut items = Vec::new();
|
||||
for o in out.contents() {
|
||||
if let Some(key) = o.key() {
|
||||
items.push(DocObject {
|
||||
key: key.to_string(),
|
||||
size: o.size().unwrap_or(0),
|
||||
last_modified: o.last_modified().map(|d| d.to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(items)
|
||||
}
|
||||
|
||||
pub async fn ensure_buckets_exist(&self) -> Result<(), String> {
|
||||
for bucket in &self.cfg.buckets {
|
||||
let head = self.client.head_bucket().bucket(bucket).send().await;
|
||||
if head.is_ok() {
|
||||
continue;
|
||||
}
|
||||
self.client
|
||||
.create_bucket()
|
||||
.bucket(bucket)
|
||||
.acl(BucketCannedAcl::Private)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn presign_put_for_tenant(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
key: &str,
|
||||
content_type: Option<String>,
|
||||
expires: Duration,
|
||||
) -> Result<String, String> {
|
||||
let mut req = self
|
||||
.presign_client
|
||||
.put_object()
|
||||
.bucket(self.bucket_for_tenant(tenant_id))
|
||||
.key(key);
|
||||
if let Some(ct) = content_type {
|
||||
req = req.content_type(ct);
|
||||
}
|
||||
let presigned = req
|
||||
.presigned(PresigningConfig::expires_in(expires).map_err(|e| e.to_string())?)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(presigned.uri().to_string())
|
||||
}
|
||||
|
||||
pub async fn presign_get_for_tenant(
|
||||
&self,
|
||||
tenant_id: &str,
|
||||
key: &str,
|
||||
expires: Duration,
|
||||
) -> Result<String, String> {
|
||||
let req = self
|
||||
.presign_client
|
||||
.get_object()
|
||||
.bucket(self.bucket_for_tenant(tenant_id))
|
||||
.key(key);
|
||||
let presigned = req
|
||||
.presigned(PresigningConfig::expires_in(expires).map_err(|e| e.to_string())?)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(presigned.uri().to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, serde::Serialize)]
|
||||
pub struct DocObject {
|
||||
pub key: String,
|
||||
pub size: i64,
|
||||
pub last_modified: Option<String>,
|
||||
}
|
||||
|
||||
fn validate_segment(name: &str, value: &str) -> Result<(), String> {
|
||||
if value.is_empty() {
|
||||
return Err(format!("{name} is required"));
|
||||
}
|
||||
if value.len() > 128 {
|
||||
return Err(format!("{name} too long"));
|
||||
}
|
||||
if value.contains('/') || value.contains('\\') {
|
||||
return Err(format!("{name} contains invalid characters"));
|
||||
}
|
||||
if value.contains("..") {
|
||||
return Err(format!("{name} contains invalid characters"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn validate_filename(value: &str) -> Result<(), String> {
|
||||
validate_segment("filename", value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn env_lock() -> std::sync::MutexGuard<'static, ()> {
|
||||
static LOCK: std::sync::OnceLock<std::sync::Mutex<()>> = std::sync::OnceLock::new();
|
||||
LOCK.get_or_init(|| std::sync::Mutex::new(()))
|
||||
.lock()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_from_env_parses_expected_fields() {
|
||||
let _guard = env_lock();
|
||||
unsafe {
|
||||
std::env::set_var("CONTROL_S3_ENDPOINT", "http://minio:9000");
|
||||
std::env::set_var("CONTROL_S3_REGION", "us-east-1");
|
||||
std::env::set_var("CONTROL_S3_ACCESS_KEY_ID", "minioadmin");
|
||||
std::env::set_var("CONTROL_S3_SECRET_ACCESS_KEY", "minioadmin");
|
||||
std::env::set_var("CONTROL_S3_BUCKET_DOCS", "cloudlysis-docs");
|
||||
std::env::set_var("CONTROL_S3_PREFIX_DOCS", "docs/");
|
||||
std::env::set_var("CONTROL_S3_FORCE_PATH_STYLE", "true");
|
||||
std::env::set_var("CONTROL_S3_INSECURE", "true");
|
||||
}
|
||||
|
||||
let cfg = DocsConfig::from_env().unwrap();
|
||||
assert_eq!(cfg.endpoint, "http://minio:9000");
|
||||
assert_eq!(cfg.buckets, vec!["cloudlysis-docs".to_string()]);
|
||||
assert_eq!(cfg.prefix, "docs/");
|
||||
assert!(cfg.force_path_style);
|
||||
assert!(cfg.insecure);
|
||||
|
||||
unsafe {
|
||||
std::env::remove_var("CONTROL_S3_ENDPOINT");
|
||||
std::env::remove_var("CONTROL_S3_REGION");
|
||||
std::env::remove_var("CONTROL_S3_ACCESS_KEY_ID");
|
||||
std::env::remove_var("CONTROL_S3_SECRET_ACCESS_KEY");
|
||||
std::env::remove_var("CONTROL_S3_BUCKET_DOCS");
|
||||
std::env::remove_var("CONTROL_S3_PREFIX_DOCS");
|
||||
std::env::remove_var("CONTROL_S3_FORCE_PATH_STYLE");
|
||||
std::env::remove_var("CONTROL_S3_INSECURE");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_rejects_insecure_with_https_endpoint() {
|
||||
let _guard = env_lock();
|
||||
unsafe {
|
||||
std::env::set_var("CONTROL_S3_ENDPOINT", "https://s3.example.com");
|
||||
std::env::set_var("CONTROL_S3_ACCESS_KEY_ID", "a");
|
||||
std::env::set_var("CONTROL_S3_SECRET_ACCESS_KEY", "b");
|
||||
std::env::set_var(
|
||||
"CONTROL_S3_BUCKET_DOCS",
|
||||
"cloudlysis-docs-0,cloudlysis-docs-1",
|
||||
);
|
||||
std::env::set_var("CONTROL_S3_INSECURE", "true");
|
||||
}
|
||||
let err = DocsConfig::from_env().unwrap_err();
|
||||
assert!(
|
||||
err.contains("CONTROL_S3_INSECURE=true") && err.contains("https://"),
|
||||
"unexpected error: {err}"
|
||||
);
|
||||
unsafe {
|
||||
std::env::remove_var("CONTROL_S3_ENDPOINT");
|
||||
std::env::remove_var("CONTROL_S3_ACCESS_KEY_ID");
|
||||
std::env::remove_var("CONTROL_S3_SECRET_ACCESS_KEY");
|
||||
std::env::remove_var("CONTROL_S3_BUCKET_DOCS");
|
||||
std::env::remove_var("CONTROL_S3_INSECURE");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn key_scheme_is_stable() {
|
||||
let cfg = DocsConfig {
|
||||
endpoint: "http://minio:9000".to_string(),
|
||||
public_endpoint: None,
|
||||
region: "us-east-1".to_string(),
|
||||
access_key_id: "x".to_string(),
|
||||
secret_access_key: "y".to_string(),
|
||||
force_path_style: true,
|
||||
insecure: true,
|
||||
buckets: vec![
|
||||
"cloudlysis-docs-0".to_string(),
|
||||
"cloudlysis-docs-1".to_string(),
|
||||
],
|
||||
prefix: "docs/".to_string(),
|
||||
};
|
||||
let store = DocsStore::new(cfg).await.unwrap();
|
||||
|
||||
let key = store
|
||||
.key_for("tenant-a", "deployments", "v1", "bundle.tar.gz")
|
||||
.unwrap();
|
||||
assert_eq!(key, "docs/tenant-a/deployments/v1/bundle.tar.gz");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn key_scheme_rejects_invalid_segments() {
|
||||
let cfg = DocsConfig {
|
||||
endpoint: "http://minio:9000".to_string(),
|
||||
public_endpoint: None,
|
||||
region: "us-east-1".to_string(),
|
||||
access_key_id: "x".to_string(),
|
||||
secret_access_key: "y".to_string(),
|
||||
force_path_style: true,
|
||||
insecure: true,
|
||||
buckets: vec!["cloudlysis-docs".to_string()],
|
||||
prefix: "docs/".to_string(),
|
||||
};
|
||||
let store = DocsStore::new(cfg).await.unwrap();
|
||||
|
||||
assert!(store.key_for("t/a", "x", "y", "z").is_err());
|
||||
assert!(store.key_for("t", "x", "../y", "z").is_err());
|
||||
assert!(store.key_for("t", "x", "y", "a/b").is_err());
|
||||
}
|
||||
}
|
||||
@@ -28,31 +28,49 @@ pub struct SwarmStateFile {
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SwarmStore {
|
||||
path: std::path::PathBuf,
|
||||
inner: SwarmStoreInner,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum SwarmStoreInner {
|
||||
File { path: std::path::PathBuf },
|
||||
DockerCli,
|
||||
}
|
||||
|
||||
impl SwarmStore {
|
||||
pub fn new(path: std::path::PathBuf) -> Self {
|
||||
Self { path }
|
||||
Self {
|
||||
inner: SwarmStoreInner::File { path },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_docker_cli() -> Self {
|
||||
Self {
|
||||
inner: SwarmStoreInner::DockerCli,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_services(&self) -> Vec<SwarmService> {
|
||||
self.load().map(|s| s.services).unwrap_or_default()
|
||||
match &self.inner {
|
||||
SwarmStoreInner::File { path } => {
|
||||
load_state(path).map(|s| s.services).unwrap_or_default()
|
||||
}
|
||||
SwarmStoreInner::DockerCli => list_services_docker_cli().unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_tasks(&self, service_name: &str) -> Vec<SwarmTask> {
|
||||
self.load()
|
||||
.map(|s| {
|
||||
s.tasks
|
||||
.into_iter()
|
||||
.filter(|t| t.service == service_name)
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn load(&self) -> Option<SwarmStateFile> {
|
||||
load_state(&self.path)
|
||||
match &self.inner {
|
||||
SwarmStoreInner::File { path } => load_state(path)
|
||||
.map(|s| {
|
||||
s.tasks
|
||||
.into_iter()
|
||||
.filter(|t| t.service == service_name)
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default(),
|
||||
SwarmStoreInner::DockerCli => list_tasks_docker_cli(service_name).unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,3 +78,120 @@ fn load_state(path: &Path) -> Option<SwarmStateFile> {
|
||||
let raw = fs::read_to_string(path).ok()?;
|
||||
serde_json::from_str(&raw).ok()
|
||||
}
|
||||
|
||||
fn list_services_docker_cli() -> Result<Vec<SwarmService>, String> {
|
||||
let out = std::process::Command::new("docker")
|
||||
.args(["service", "ls", "--format", "{{json .}}"])
|
||||
.output()
|
||||
.map_err(|e| format!("docker exec failed: {e}"))?;
|
||||
if !out.status.success() {
|
||||
return Err(format!(
|
||||
"docker service ls failed: {}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
));
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ServiceRow {
|
||||
#[serde(rename = "Name")]
|
||||
name: String,
|
||||
#[serde(rename = "Image")]
|
||||
image: Option<String>,
|
||||
#[serde(rename = "Mode")]
|
||||
mode: Option<String>,
|
||||
#[serde(rename = "Replicas")]
|
||||
replicas: Option<String>,
|
||||
#[serde(rename = "UpdatedAt")]
|
||||
updated_at: Option<String>,
|
||||
}
|
||||
|
||||
let mut services = Vec::new();
|
||||
for line in String::from_utf8_lossy(&out.stdout).lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let row: ServiceRow =
|
||||
serde_json::from_str(line).map_err(|e| format!("invalid json row: {e}"))?;
|
||||
services.push(SwarmService {
|
||||
name: row.name,
|
||||
image: row.image,
|
||||
mode: row.mode,
|
||||
replicas: row.replicas,
|
||||
updated_at: row.updated_at,
|
||||
});
|
||||
}
|
||||
Ok(services)
|
||||
}
|
||||
|
||||
fn list_tasks_docker_cli(service_name: &str) -> Result<Vec<SwarmTask>, String> {
|
||||
let out = std::process::Command::new("docker")
|
||||
.args([
|
||||
"service",
|
||||
"ps",
|
||||
service_name,
|
||||
"--no-trunc",
|
||||
"--format",
|
||||
"{{json .}}",
|
||||
])
|
||||
.output()
|
||||
.map_err(|e| format!("docker exec failed: {e}"))?;
|
||||
if !out.status.success() {
|
||||
return Err(format!(
|
||||
"docker service ps failed: {}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
));
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct TaskRow {
|
||||
#[serde(rename = "ID")]
|
||||
id: String,
|
||||
#[serde(rename = "Name")]
|
||||
name: Option<String>,
|
||||
#[serde(rename = "Node")]
|
||||
node: Option<String>,
|
||||
#[serde(rename = "DesiredState")]
|
||||
desired_state: Option<String>,
|
||||
#[serde(rename = "CurrentState")]
|
||||
current_state: Option<String>,
|
||||
#[serde(rename = "Error")]
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
for line in String::from_utf8_lossy(&out.stdout).lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let row: TaskRow =
|
||||
serde_json::from_str(line).map_err(|e| format!("invalid json row: {e}"))?;
|
||||
let service = row
|
||||
.name
|
||||
.as_deref()
|
||||
.and_then(|n| n.split_once('.').map(|(svc, _)| svc.to_string()))
|
||||
.unwrap_or_else(|| service_name.to_string());
|
||||
tasks.push(SwarmTask {
|
||||
id: row.id,
|
||||
service,
|
||||
node: row.node,
|
||||
desired_state: row.desired_state,
|
||||
current_state: row.current_state,
|
||||
error: row.error,
|
||||
});
|
||||
}
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn state_file_parses() {
|
||||
let raw = r#"{"services":[{"name":"a","image":null,"mode":null,"replicas":null,"updated_at":null}],"tasks":[]}"#;
|
||||
let parsed: SwarmStateFile = serde_json::from_str(raw).unwrap();
|
||||
assert_eq!(parsed.services.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
174
control/api/tests/billing_production_smoke_gated.rs
Normal file
174
control/api/tests/billing_production_smoke_gated.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
use api::{
|
||||
AppState, AuditStore, AuthConfig, ConfigLocks, JobStore, PlacementStore, SwarmStore,
|
||||
TenantLocks, billing::BillingStore, config_registry::ConfigRegistry,
|
||||
};
|
||||
use axum::{
|
||||
Router,
|
||||
body::Body,
|
||||
http::{Request, StatusCode, header},
|
||||
};
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use serde::Serialize;
|
||||
use std::{
|
||||
path::PathBuf,
|
||||
sync::{Arc, OnceLock},
|
||||
};
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn prod_enabled() -> bool {
|
||||
std::env::var("CONTROL_TEST_BILLING_PROD").ok().as_deref() == Some("1")
|
||||
}
|
||||
|
||||
static HANDLE: OnceLock<metrics_exporter_prometheus::PrometheusHandle> = OnceLock::new();
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
fn make_token(secret: &[u8], perms: &[&str]) -> String {
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 60) as usize;
|
||||
encode(
|
||||
&Header::default(),
|
||||
&TestClaims {
|
||||
sub: "user_1".to_string(),
|
||||
session_id: "sess_1".to_string(),
|
||||
permissions: perms.iter().map(|p| (*p).to_string()).collect(),
|
||||
exp,
|
||||
},
|
||||
&EncodingKey::from_secret(secret),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn test_app() -> Router {
|
||||
let handle = HANDLE
|
||||
.get_or_init(|| {
|
||||
PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder")
|
||||
})
|
||||
.clone();
|
||||
|
||||
let provider_type =
|
||||
std::env::var("CONTROL_BILLING_PROVIDER").unwrap_or_else(|_| "mock".to_string());
|
||||
let billing_provider: Arc<dyn api::billing::BillingProvider> = match provider_type.as_str() {
|
||||
"stripe" => Arc::new(api::billing::StripeProvider {
|
||||
secret_key: std::env::var("CONTROL_STRIPE_SECRET_KEY").unwrap_or_default(),
|
||||
price_pro: std::env::var("CONTROL_STRIPE_PRICE_ID_PRO").unwrap_or_default(),
|
||||
price_enterprise: std::env::var("CONTROL_STRIPE_PRICE_ID_ENTERPRISE")
|
||||
.unwrap_or_default(),
|
||||
}),
|
||||
_ => Arc::new(api::billing::MockProvider),
|
||||
};
|
||||
|
||||
api::build_app(AppState {
|
||||
prometheus: handle,
|
||||
auth: AuthConfig {
|
||||
hs256_secret: Some(b"test_secret".to_vec()),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(repo_root().join("config/placement/dev.json")),
|
||||
billing: BillingStore::new(std::env::temp_dir().join("billing-prod-smoke.json")),
|
||||
billing_provider,
|
||||
billing_enforcement_enabled: true,
|
||||
config: ConfigRegistry::new(None, None),
|
||||
fleet_services: vec![],
|
||||
swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
|
||||
docs: None,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn billing_production_smoke_test() {
|
||||
if !prod_enabled() {
|
||||
eprintln!("skipping: set CONTROL_TEST_BILLING_PROD=1 to enable production smoke tests");
|
||||
return;
|
||||
}
|
||||
|
||||
let app = test_app();
|
||||
let token = make_token(b"test_secret", &["control:read", "control:write"]);
|
||||
let tenant_id = Uuid::new_v4();
|
||||
|
||||
// 1. Verify GET billing works (empty initially)
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
// 2. Verify Checkout session generation
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing/checkout"))
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"plan": "pro",
|
||||
"return_path": "/billing"
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert!(v.get("url").and_then(|u| u.as_str()).is_some());
|
||||
|
||||
// 3. Verify Portal session generation (may fail if tenant has no stripe customer id yet, which is expected for fresh tenant)
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/tenants/{tenant_id}/billing/portal"))
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("x-tenant-id", tenant_id.to_string())
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
// For smoke test, we just want to see it reached the provider and didn't crash
|
||||
assert!(res.status() == StatusCode::OK || res.status() == StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
250
control/api/tests/config_nats_env_gated.rs
Normal file
250
control/api/tests/config_nats_env_gated.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
use api::{
|
||||
AppState, AuditStore, AuthConfig, ConfigLocks, ConfigRegistry, JobStore, PlacementStore,
|
||||
SwarmStore, TenantLocks, config_registry::NatsKvSource,
|
||||
};
|
||||
use axum::{
|
||||
Router,
|
||||
body::Body,
|
||||
http::{Request, StatusCode, header},
|
||||
};
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use serde::Serialize;
|
||||
use std::{path::PathBuf, sync::OnceLock, time::Duration};
|
||||
use tower::ServiceExt;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn enabled() -> bool {
|
||||
std::env::var("CONTROL_TEST_NATS").ok().as_deref() == Some("1")
|
||||
&& std::env::var("CONTROL_TEST_NATS_URL").is_ok()
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
fn make_token(secret: &[u8], perms: &[&str]) -> String {
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 60) as usize;
|
||||
encode(
|
||||
&Header::default(),
|
||||
&TestClaims {
|
||||
sub: "user_1".to_string(),
|
||||
session_id: "sess_1".to_string(),
|
||||
permissions: perms.iter().map(|p| (*p).to_string()).collect(),
|
||||
exp,
|
||||
},
|
||||
&EncodingKey::from_secret(secret),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
static HANDLE: OnceLock<metrics_exporter_prometheus::PrometheusHandle> = OnceLock::new();
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
async fn wait_done(app: Router, job_id: Uuid, token: &str) -> serde_json::Value {
|
||||
let start = tokio::time::Instant::now();
|
||||
loop {
|
||||
let res = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri(format!("/admin/v1/jobs/{job_id}"))
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let job: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let status = job
|
||||
.get("status")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown");
|
||||
if status != "pending" && status != "running" {
|
||||
return job;
|
||||
}
|
||||
|
||||
if start.elapsed() > Duration::from_secs(2) {
|
||||
return job;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn config_jobs_with_nats_kv_are_env_gated() {
|
||||
if !enabled() {
|
||||
eprintln!(
|
||||
"skipping: set CONTROL_TEST_NATS=1 and CONTROL_TEST_NATS_URL=nats://... to enable nats config tests"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let nats_url = std::env::var("CONTROL_TEST_NATS_URL").unwrap();
|
||||
unsafe {
|
||||
std::env::set_var("CONTROL_CONFIG_NATS_URL", &nats_url);
|
||||
}
|
||||
|
||||
let bucket = format!("cloudlysis-test-config-{}", Uuid::new_v4());
|
||||
let routing_key = format!("routing/{}", Uuid::new_v4());
|
||||
let placement_key = format!("placement/{}", Uuid::new_v4());
|
||||
|
||||
let routing_src = NatsKvSource::connect(nats_url.clone(), bucket.clone(), routing_key)
|
||||
.await
|
||||
.expect("connect routing kv");
|
||||
let placement_src = NatsKvSource::connect(nats_url.clone(), bucket.clone(), placement_key)
|
||||
.await
|
||||
.expect("connect placement kv");
|
||||
|
||||
let config = ConfigRegistry::new(
|
||||
Some(std::sync::Arc::new(routing_src)),
|
||||
Some(std::sync::Arc::new(placement_src)),
|
||||
);
|
||||
|
||||
let secret = b"test_secret".to_vec();
|
||||
let token = make_token(&secret, &["control:write", "control:read"]);
|
||||
|
||||
let handle = HANDLE
|
||||
.get_or_init(|| {
|
||||
PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder")
|
||||
})
|
||||
.clone();
|
||||
|
||||
let app = api::build_app(AppState {
|
||||
prometheus: handle,
|
||||
auth: AuthConfig {
|
||||
hs256_secret: Some(secret),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(repo_root().join("config/placement/dev.json")),
|
||||
billing: api::billing::BillingStore::new(std::env::temp_dir().join("billing-test.json")),
|
||||
billing_provider: std::sync::Arc::new(api::billing::MockProvider),
|
||||
billing_enforcement_enabled: false,
|
||||
config,
|
||||
fleet_services: vec![],
|
||||
swarm: SwarmStore::new(repo_root().join("swarm/dev.json")),
|
||||
docs: None,
|
||||
});
|
||||
|
||||
let routing_value = serde_json::json!({
|
||||
"revision": 1,
|
||||
"aggregate_placement": { "t1": "local" },
|
||||
"projection_placement": { "t1": "local" },
|
||||
"runner_placement": { "t1": "local" },
|
||||
"aggregate_shards": { "local": ["http://aggregate:50051"] },
|
||||
"projection_shards": { "local": ["http://projection:8080"] },
|
||||
"runner_shards": { "local": ["http://runner:8080"] }
|
||||
});
|
||||
|
||||
let apply = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/config/apply")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", format!("k-{}", Uuid::new_v4()))
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"domain": "routing",
|
||||
"expected_revision": null,
|
||||
"reason": "test apply",
|
||||
"value": routing_value
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(apply.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(apply.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap();
|
||||
|
||||
let job = wait_done(app.clone(), job_id, &token).await;
|
||||
assert_eq!(
|
||||
job.get("status").and_then(|v| v.as_str()),
|
||||
Some("succeeded")
|
||||
);
|
||||
|
||||
let get = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/config/routing")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(get.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(get.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let got: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
assert_eq!(got.get("domain").unwrap().as_str().unwrap(), "routing");
|
||||
assert!(got.get("revision").unwrap().as_u64().unwrap_or(0) > 0);
|
||||
|
||||
let rollback = app
|
||||
.clone()
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/jobs/config/rollback")
|
||||
.method("POST")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.header("idempotency-key", format!("k-{}", Uuid::new_v4()))
|
||||
.header(header::CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(
|
||||
serde_json::json!({
|
||||
"domain": "routing",
|
||||
"reason": "test rollback"
|
||||
})
|
||||
.to_string(),
|
||||
))
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(rollback.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(rollback.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let rb_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap();
|
||||
|
||||
let rb_job = wait_done(app.clone(), rb_id, &token).await;
|
||||
assert_eq!(
|
||||
rb_job.get("status").and_then(|v| v.as_str()),
|
||||
Some("succeeded")
|
||||
);
|
||||
}
|
||||
157
control/api/tests/control_api_smoke_env_gated.rs
Normal file
157
control/api/tests/control_api_smoke_env_gated.rs
Normal file
@@ -0,0 +1,157 @@
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use reqwest::StatusCode;
|
||||
use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use std::time::Duration;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
fn make_token(secret: &[u8], perms: &[&str]) -> String {
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 300) as usize;
|
||||
encode(
|
||||
&Header::default(),
|
||||
&TestClaims {
|
||||
sub: "smoke".to_string(),
|
||||
session_id: "smoke".to_string(),
|
||||
permissions: perms.iter().map(|p| (*p).to_string()).collect(),
|
||||
exp,
|
||||
},
|
||||
&EncodingKey::from_secret(secret),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn control_api_docs_smoke_is_env_gated() {
|
||||
let enabled = std::env::var("CONTROL_TEST_SMOKE").ok();
|
||||
if enabled.as_deref() != Some("1") {
|
||||
eprintln!("skipping: set CONTROL_TEST_SMOKE=1 to enable env smoke tests");
|
||||
return;
|
||||
}
|
||||
|
||||
let base_url =
|
||||
std::env::var("CONTROL_TEST_BASE_URL").expect("CONTROL_TEST_BASE_URL is required");
|
||||
let base_url = base_url.trim_end_matches('/').to_string();
|
||||
|
||||
// Either provide a token directly, or provide secret+perms to mint one.
|
||||
let token = if let Ok(t) = std::env::var("CONTROL_TEST_TOKEN") {
|
||||
t
|
||||
} else {
|
||||
let secret = std::env::var("CONTROL_TEST_JWT_SECRET")
|
||||
.expect("CONTROL_TEST_TOKEN or CONTROL_TEST_JWT_SECRET is required");
|
||||
make_token(secret.as_bytes(), &["control:read", "control:write"])
|
||||
};
|
||||
|
||||
let tenant_id = std::env::var("CONTROL_TEST_TENANT_ID")
|
||||
.ok()
|
||||
.unwrap_or_else(|| Uuid::new_v4().to_string());
|
||||
|
||||
let http = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(15))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
// Health.
|
||||
let health = http
|
||||
.get(format!("{base_url}/health"))
|
||||
.send()
|
||||
.await
|
||||
.expect("health request failed");
|
||||
assert!(health.status().is_success(), "health not ok");
|
||||
|
||||
// Presign upload.
|
||||
let doc_id = Uuid::new_v4().to_string();
|
||||
let filename = "smoke.txt";
|
||||
let presign_up = http
|
||||
.post(format!(
|
||||
"{base_url}/admin/v1/tenants/{tenant_id}/docs/presign/upload"
|
||||
))
|
||||
.header("authorization", format!("Bearer {token}"))
|
||||
.header("x-tenant-id", &tenant_id)
|
||||
.json(&json!({
|
||||
"doc_type": "deployments",
|
||||
"doc_id": doc_id,
|
||||
"filename": filename,
|
||||
"content_type": "text/plain",
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.expect("presign upload failed");
|
||||
assert!(
|
||||
presign_up.status().is_success(),
|
||||
"presign upload not ok: {}",
|
||||
presign_up.status()
|
||||
);
|
||||
let up_json: serde_json::Value = presign_up.json().await.unwrap();
|
||||
let put_url = up_json.get("url").and_then(|v| v.as_str()).unwrap();
|
||||
let key = up_json
|
||||
.get("key")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
||||
// PUT bytes to S3 directly.
|
||||
let payload = b"hello-smoke".to_vec();
|
||||
let put = http
|
||||
.put(put_url)
|
||||
.header("content-type", "text/plain")
|
||||
.body(payload.clone())
|
||||
.send()
|
||||
.await
|
||||
.expect("s3 put failed");
|
||||
assert!(put.status().is_success(), "s3 put not ok: {}", put.status());
|
||||
|
||||
// List should include key.
|
||||
let list = http
|
||||
.get(format!(
|
||||
"{base_url}/admin/v1/tenants/{tenant_id}/docs?prefix=deployments/"
|
||||
))
|
||||
.header("authorization", format!("Bearer {token}"))
|
||||
.header("x-tenant-id", &tenant_id)
|
||||
.send()
|
||||
.await
|
||||
.expect("list failed");
|
||||
assert!(list.status().is_success(), "list not ok");
|
||||
let list_json: serde_json::Value = list.json().await.unwrap();
|
||||
let objects = list_json.get("objects").and_then(|v| v.as_array()).unwrap();
|
||||
assert!(
|
||||
objects
|
||||
.iter()
|
||||
.any(|o| o.get("key").and_then(|k| k.as_str()) == Some(key.as_str())),
|
||||
"expected list to include presigned upload key"
|
||||
);
|
||||
|
||||
// Presign download and fetch bytes.
|
||||
let presign_down = http
|
||||
.post(format!(
|
||||
"{base_url}/admin/v1/tenants/{tenant_id}/docs/presign/download"
|
||||
))
|
||||
.header("authorization", format!("Bearer {token}"))
|
||||
.header("x-tenant-id", &tenant_id)
|
||||
.json(&json!({ "key": key }))
|
||||
.send()
|
||||
.await
|
||||
.expect("presign download failed");
|
||||
assert!(
|
||||
presign_down.status().is_success(),
|
||||
"presign download not ok"
|
||||
);
|
||||
let down_json: serde_json::Value = presign_down.json().await.unwrap();
|
||||
let get_url = down_json.get("url").and_then(|v| v.as_str()).unwrap();
|
||||
|
||||
let got = http.get(get_url).send().await.expect("s3 get failed");
|
||||
assert_eq!(got.status(), StatusCode::OK);
|
||||
let got_bytes = got.bytes().await.unwrap().to_vec();
|
||||
assert_eq!(got_bytes, payload);
|
||||
}
|
||||
@@ -11,7 +11,7 @@ fn repo_root() -> PathBuf {
|
||||
#[test]
|
||||
fn docker_compose_files_parse_and_include_required_services() {
|
||||
let root = repo_root();
|
||||
let compose = fs::read_to_string(root.join("observability/docker-compose.yml")).unwrap();
|
||||
let compose = fs::read_to_string(root.join("docker-compose.yml")).unwrap();
|
||||
let v: serde_yaml::Value = serde_yaml::from_str(&compose).unwrap();
|
||||
|
||||
let services = v
|
||||
@@ -19,7 +19,15 @@ fn docker_compose_files_parse_and_include_required_services() {
|
||||
.and_then(|x| x.as_mapping())
|
||||
.expect("missing services");
|
||||
|
||||
for required in ["grafana", "victoria-metrics", "vmagent", "loki", "tempo"] {
|
||||
// Core + optional observability services are all declared in one compose file.
|
||||
for required in [
|
||||
"grafana",
|
||||
"victoria-metrics",
|
||||
"vmagent",
|
||||
"loki",
|
||||
"tempo",
|
||||
"mailhog",
|
||||
] {
|
||||
assert!(
|
||||
services.contains_key(serde_yaml::Value::String(required.to_string())),
|
||||
"missing service {required}"
|
||||
@@ -28,17 +36,19 @@ fn docker_compose_files_parse_and_include_required_services() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore]
|
||||
async fn docker_compose_config_validation_is_gated_and_fast() {
|
||||
let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
|
||||
assert_eq!(enabled.as_deref(), Some("1"));
|
||||
if enabled.as_deref() != Some("1") {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker compose validation");
|
||||
return;
|
||||
}
|
||||
|
||||
let root = repo_root();
|
||||
let compose = root.join("observability/docker-compose.yml");
|
||||
let compose = root.join("docker-compose.yml");
|
||||
|
||||
let cmd = tokio::process::Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(compose)
|
||||
.arg(&compose)
|
||||
.args(["config"])
|
||||
.output();
|
||||
|
||||
@@ -52,4 +62,22 @@ async fn docker_compose_config_validation_is_gated_and_fast() {
|
||||
"docker compose config failed: {}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
|
||||
// Validate full-stack profile wiring too.
|
||||
let cmd = tokio::process::Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&compose)
|
||||
.args(["--profile", "observability", "config"])
|
||||
.output();
|
||||
|
||||
let out = tokio::time::timeout(Duration::from_secs(10), cmd)
|
||||
.await
|
||||
.expect("docker compose config (observability profile) timed out")
|
||||
.expect("failed to run docker compose config (observability profile)");
|
||||
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"docker compose config (observability profile) failed: {}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn docker_integration_tests_are_gated() {
|
||||
let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
|
||||
if enabled.as_deref() != Some("1") {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker integration tests");
|
||||
return;
|
||||
}
|
||||
assert_eq!(enabled.as_deref(), Some("1"));
|
||||
}
|
||||
|
||||
169
control/api/tests/docs_e2e_docker_gated.rs
Normal file
169
control/api/tests/docs_e2e_docker_gated.rs
Normal file
@@ -0,0 +1,169 @@
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use reqwest::header::{HeaderMap, HeaderValue};
|
||||
use serde::Serialize;
|
||||
use std::{path::PathBuf, process::Command, time::Duration};
|
||||
use uuid::Uuid;
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
fn docker_enabled() -> bool {
|
||||
std::env::var("CONTROL_TEST_DOCKER")
|
||||
.ok()
|
||||
.is_some_and(|v| v.trim() == "1")
|
||||
}
|
||||
|
||||
fn compose_file() -> PathBuf {
|
||||
repo_root().join("docker-compose.yml")
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
fn make_token(secret: &[u8], perms: &[&str]) -> String {
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 300) as usize;
|
||||
encode(
|
||||
&Header::default(),
|
||||
&TestClaims {
|
||||
sub: "user_1".to_string(),
|
||||
session_id: "sess_1".to_string(),
|
||||
permissions: perms.iter().map(|p| (*p).to_string()).collect(),
|
||||
exp,
|
||||
},
|
||||
&EncodingKey::from_secret(secret),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn documents_upload_list_download_roundtrip_via_control_api_compose() {
|
||||
if !docker_enabled() {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker compose tests");
|
||||
return;
|
||||
}
|
||||
|
||||
// Must match docker-compose.yml CONTROL_GATEWAY_JWT_HS256_SECRET.
|
||||
let jwt_secret = b"dev_secret";
|
||||
let token = make_token(jwt_secret, &["control:read", "control:write"]);
|
||||
|
||||
let compose = compose_file();
|
||||
|
||||
let up = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&compose)
|
||||
.args(["up", "-d", "control-api"])
|
||||
.status()
|
||||
.expect("failed to run docker compose up control-api");
|
||||
assert!(up.success(), "docker compose up control-api failed");
|
||||
|
||||
// Wait for control-api to be reachable (port publish is in compose).
|
||||
let http = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let base = "http://127.0.0.1:38080";
|
||||
let health_deadline = tokio::time::Instant::now() + Duration::from_secs(30);
|
||||
loop {
|
||||
if tokio::time::Instant::now() > health_deadline {
|
||||
panic!("control-api did not become healthy in time");
|
||||
}
|
||||
match http.get(format!("{base}/health")).send().await {
|
||||
Ok(res) if res.status().is_success() => break,
|
||||
_ => tokio::time::sleep(Duration::from_millis(250)).await,
|
||||
}
|
||||
}
|
||||
|
||||
let tenant_id = Uuid::new_v4().to_string();
|
||||
let doc_type = "deployments";
|
||||
let doc_id = Uuid::new_v4().to_string();
|
||||
let filename = "hello.txt";
|
||||
let bytes = b"hello-docs".to_vec();
|
||||
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
"authorization",
|
||||
HeaderValue::from_str(&format!("Bearer {token}")).unwrap(),
|
||||
);
|
||||
headers.insert("x-tenant-id", HeaderValue::from_str(&tenant_id).unwrap());
|
||||
|
||||
// Upload (proxy endpoint).
|
||||
let put_url =
|
||||
format!("{base}/admin/v1/tenants/{tenant_id}/docs/{doc_type}/{doc_id}/{filename}");
|
||||
let put = http
|
||||
.put(&put_url)
|
||||
.headers(headers.clone())
|
||||
.header("content-type", "text/plain")
|
||||
.body(bytes.clone())
|
||||
.send()
|
||||
.await
|
||||
.expect("upload request failed");
|
||||
assert!(
|
||||
put.status().is_success(),
|
||||
"upload failed: {}",
|
||||
put.text().await.unwrap_or_default()
|
||||
);
|
||||
let put_json: serde_json::Value = put.json().await.expect("invalid upload json");
|
||||
let key = put_json
|
||||
.get("key")
|
||||
.and_then(|v| v.as_str())
|
||||
.expect("missing key")
|
||||
.to_string();
|
||||
|
||||
// List should include the key.
|
||||
let list_url = format!("{base}/admin/v1/tenants/{tenant_id}/docs?prefix={doc_type}/");
|
||||
let list = http
|
||||
.get(&list_url)
|
||||
.headers(headers.clone())
|
||||
.send()
|
||||
.await
|
||||
.expect("list request failed");
|
||||
assert!(list.status().is_success(), "list failed");
|
||||
let list_json: serde_json::Value = list.json().await.expect("invalid list json");
|
||||
let objects = list_json
|
||||
.get("objects")
|
||||
.and_then(|v| v.as_array())
|
||||
.expect("missing objects");
|
||||
assert!(
|
||||
objects
|
||||
.iter()
|
||||
.any(|o| o.get("key").and_then(|k| k.as_str()) == Some(key.as_str())),
|
||||
"expected list to include uploaded key"
|
||||
);
|
||||
|
||||
// Download (proxy endpoint) returns same bytes.
|
||||
let get_url = format!(
|
||||
"{base}/admin/v1/tenants/{tenant_id}/docs/object/{}",
|
||||
urlencoding::encode(&key)
|
||||
);
|
||||
let got = http
|
||||
.get(&get_url)
|
||||
.headers(headers.clone())
|
||||
.send()
|
||||
.await
|
||||
.expect("download request failed");
|
||||
assert!(got.status().is_success(), "download failed");
|
||||
let got_bytes = got.bytes().await.expect("download bytes failed").to_vec();
|
||||
assert_eq!(got_bytes, bytes);
|
||||
|
||||
// Best-effort cleanup.
|
||||
let _ = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&compose)
|
||||
.args(["down", "-v"])
|
||||
.status();
|
||||
}
|
||||
123
control/api/tests/drift_classification.rs
Normal file
123
control/api/tests/drift_classification.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
use api::{
|
||||
AppState, AuditStore, AuthConfig, ConfigLocks, ConfigRegistry, JobStore, PlacementStore,
|
||||
SwarmStore, TenantLocks,
|
||||
};
|
||||
use axum::{
|
||||
Router,
|
||||
body::Body,
|
||||
http::{Request, StatusCode, header},
|
||||
};
|
||||
use jsonwebtoken::{EncodingKey, Header, encode};
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use serde::Serialize;
|
||||
use std::{fs, path::PathBuf, sync::OnceLock};
|
||||
use tower::ServiceExt;
|
||||
|
||||
static HANDLE: OnceLock<metrics_exporter_prometheus::PrometheusHandle> = OnceLock::new();
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TestClaims {
|
||||
sub: String,
|
||||
session_id: String,
|
||||
permissions: Vec<String>,
|
||||
exp: usize,
|
||||
}
|
||||
|
||||
fn make_token(perms: &[&str]) -> String {
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 60) as usize;
|
||||
encode(
|
||||
&Header::default(),
|
||||
&TestClaims {
|
||||
sub: "user_1".to_string(),
|
||||
session_id: "sess_1".to_string(),
|
||||
permissions: perms.iter().map(|p| (*p).to_string()).collect(),
|
||||
exp,
|
||||
},
|
||||
&EncodingKey::from_secret(b"test_secret"),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn temp_swarm_file(raw: &str) -> PathBuf {
|
||||
let mut dst = std::env::temp_dir();
|
||||
dst.push(format!(
|
||||
"cloudlysis-control-swarm-{}-{}.json",
|
||||
std::process::id(),
|
||||
uuid::Uuid::new_v4()
|
||||
));
|
||||
fs::write(&dst, raw).expect("failed to write temp swarm file");
|
||||
dst
|
||||
}
|
||||
|
||||
fn test_app_with_swarm(swarm_path: PathBuf) -> Router {
|
||||
let handle = HANDLE
|
||||
.get_or_init(|| {
|
||||
PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder")
|
||||
})
|
||||
.clone();
|
||||
api::build_app(AppState {
|
||||
prometheus: handle,
|
||||
auth: AuthConfig {
|
||||
hs256_secret: Some(b"test_secret".to_vec()),
|
||||
},
|
||||
jobs: JobStore::default(),
|
||||
audit: AuditStore::default(),
|
||||
tenant_locks: TenantLocks::default(),
|
||||
config_locks: ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: PlacementStore::new(repo_root().join("config/placement/dev.json")),
|
||||
billing: api::billing::BillingStore::new(
|
||||
std::env::temp_dir().join("billing-drift-test.json"),
|
||||
),
|
||||
billing_provider: std::sync::Arc::new(api::billing::MockProvider),
|
||||
billing_enforcement_enabled: false,
|
||||
config: ConfigRegistry::new(None, None),
|
||||
fleet_services: vec![],
|
||||
swarm: SwarmStore::new(swarm_path),
|
||||
docs: None,
|
||||
})
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn drift_marks_extra_services_vs_desired_observation_set() {
|
||||
let swarm = temp_swarm_file(
|
||||
r#"{ "services": [{"name":"extra-1","image":null,"mode":null,"replicas":null,"updated_at":null}], "tasks": [] }"#,
|
||||
);
|
||||
let app = test_app_with_swarm(swarm);
|
||||
let token = make_token(&["control:read"]);
|
||||
|
||||
let res = app
|
||||
.oneshot(
|
||||
Request::builder()
|
||||
.uri("/admin/v1/platform/drift")
|
||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.body(Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let items = v.get("items").and_then(|x| x.as_array()).unwrap();
|
||||
assert!(items.iter().any(|i| {
|
||||
i.get("kind").and_then(|k| k.as_str()) == Some("extra")
|
||||
&& i.get("service").and_then(|s| s.as_str()) == Some("extra-1")
|
||||
}));
|
||||
}
|
||||
137
control/api/tests/drift_docker_gated.rs
Normal file
137
control/api/tests/drift_docker_gated.rs
Normal file
@@ -0,0 +1,137 @@
|
||||
#[tokio::test]
|
||||
async fn platform_drift_docker_test_is_gated() {
|
||||
use tower::ServiceExt;
|
||||
|
||||
let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
|
||||
if enabled.as_deref() != Some("1") {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker drift tests");
|
||||
return;
|
||||
}
|
||||
|
||||
// We only run the "real" drift check when Swarm is available locally.
|
||||
// If Swarm isn't active, we skip to keep CI/dev machines happy.
|
||||
let info = std::process::Command::new("docker")
|
||||
.args(["info", "--format", "{{.Swarm.LocalNodeState}}"])
|
||||
.output();
|
||||
let Ok(info) = info else {
|
||||
eprintln!("skipping: docker not available");
|
||||
return;
|
||||
};
|
||||
if !info.status.success() {
|
||||
eprintln!("skipping: docker info failed");
|
||||
return;
|
||||
}
|
||||
let state = String::from_utf8_lossy(&info.stdout).trim().to_string();
|
||||
if state != "active" {
|
||||
eprintln!("skipping: docker swarm not active (LocalNodeState={state})");
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a short-lived service so drift can see an "extra" observed service.
|
||||
let name = format!("cloudlysis-drift-extra-{}", uuid::Uuid::new_v4());
|
||||
let create = std::process::Command::new("docker")
|
||||
.args([
|
||||
"service",
|
||||
"create",
|
||||
"--name",
|
||||
&name,
|
||||
"--restart-condition",
|
||||
"none",
|
||||
"busybox:1.36",
|
||||
"sh",
|
||||
"-c",
|
||||
"sleep 60",
|
||||
])
|
||||
.output()
|
||||
.expect("docker service create");
|
||||
if !create.status.success() {
|
||||
eprintln!("skipping: failed to create swarm service (maybe permissions?)");
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure cleanup even if assertion fails.
|
||||
struct Cleanup(String);
|
||||
impl Drop for Cleanup {
|
||||
fn drop(&mut self) {
|
||||
let _ = std::process::Command::new("docker")
|
||||
.args(["service", "rm", &self.0])
|
||||
.output();
|
||||
}
|
||||
}
|
||||
let _cleanup = Cleanup(name.clone());
|
||||
|
||||
// Now call drift via a minimal in-process app configured for docker-cli swarm observation.
|
||||
let handle = metrics_exporter_prometheus::PrometheusBuilder::new()
|
||||
.install_recorder()
|
||||
.expect("failed to install prometheus recorder");
|
||||
|
||||
let app = api::build_app(api::AppState {
|
||||
prometheus: handle,
|
||||
auth: api::AuthConfig {
|
||||
hs256_secret: Some(b"test_secret".to_vec()),
|
||||
},
|
||||
jobs: api::JobStore::default(),
|
||||
audit: api::AuditStore::default(),
|
||||
tenant_locks: api::TenantLocks::default(),
|
||||
config_locks: api::ConfigLocks::default(),
|
||||
http: reqwest::Client::new(),
|
||||
placement: api::PlacementStore::new(
|
||||
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.unwrap()
|
||||
.join("config/placement/dev.json"),
|
||||
),
|
||||
billing: api::billing::BillingStore::new(
|
||||
std::env::temp_dir().join("billing-drift-test.json"),
|
||||
),
|
||||
billing_provider: std::sync::Arc::new(api::billing::MockProvider),
|
||||
billing_enforcement_enabled: false,
|
||||
config: api::ConfigRegistry::new(None, None),
|
||||
fleet_services: vec![],
|
||||
swarm: api::SwarmStore::new_docker_cli(),
|
||||
docs: None,
|
||||
});
|
||||
|
||||
// Auth token (control:read).
|
||||
let exp = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
+ 60) as usize;
|
||||
let token = jsonwebtoken::encode(
|
||||
&jsonwebtoken::Header::default(),
|
||||
&serde_json::json!({
|
||||
"sub": "user_1",
|
||||
"session_id": "sess_1",
|
||||
"permissions": ["control:read"],
|
||||
"exp": exp
|
||||
}),
|
||||
&jsonwebtoken::EncodingKey::from_secret(b"test_secret"),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let res = app
|
||||
.oneshot(
|
||||
axum::http::Request::builder()
|
||||
.uri("/admin/v1/platform/drift")
|
||||
.header(axum::http::header::AUTHORIZATION, format!("Bearer {token}"))
|
||||
.body(axum::body::Body::empty())
|
||||
.unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(res.status(), axum::http::StatusCode::OK);
|
||||
let body = axum::body::to_bytes(res.into_body(), 1024 * 1024)
|
||||
.await
|
||||
.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&body).unwrap();
|
||||
let items = v.get("items").and_then(|x| x.as_array()).unwrap();
|
||||
assert!(
|
||||
items.iter().any(|i| {
|
||||
i.get("kind").and_then(|k| k.as_str()) == Some("extra")
|
||||
&& i.get("service").and_then(|s| s.as_str()) == Some(name.as_str())
|
||||
}),
|
||||
"expected drift to include extra service {name}, got: {v}"
|
||||
);
|
||||
}
|
||||
77
control/api/tests/minio_compose_gated.rs
Normal file
77
control/api/tests/minio_compose_gated.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
use std::{path::PathBuf, process::Command, time::Duration};
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
fn docker_enabled() -> bool {
|
||||
std::env::var("CONTROL_TEST_DOCKER")
|
||||
.ok()
|
||||
.is_some_and(|v| v.trim() == "1")
|
||||
}
|
||||
|
||||
fn compose_file() -> PathBuf {
|
||||
repo_root().join("docker-compose.yml")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minio_docs_bucket_exists_and_credentials_work_in_compose_network() {
|
||||
if !docker_enabled() {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker compose tests");
|
||||
return;
|
||||
}
|
||||
|
||||
let compose = compose_file();
|
||||
|
||||
let up = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&compose)
|
||||
.args(["up", "-d", "minio"])
|
||||
.status()
|
||||
.expect("failed to run docker compose up minio");
|
||||
assert!(up.success(), "docker compose up minio failed");
|
||||
|
||||
// The `minio-init` service runs `mc` inside the compose network.
|
||||
let out = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&compose)
|
||||
.args([
|
||||
"run",
|
||||
"--rm",
|
||||
"minio-init",
|
||||
"/bin/sh",
|
||||
"-lc",
|
||||
"mc alias set local http://minio:9000 minioadmin minioadmin && mc ls local/cloudlysis-docs-0 && mc ls local/cloudlysis-docs-1 && mc ls local/cloudlysis-docs-2",
|
||||
])
|
||||
.output()
|
||||
.expect("failed to run docker compose run minio-init");
|
||||
|
||||
// Best-effort cleanup (keep it short; other docker tests may reuse this env).
|
||||
let _ = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&compose)
|
||||
.args(["down", "-v"])
|
||||
.status();
|
||||
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"minio-init bucket check failed: {}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
|
||||
// `mc ls` prints at least one line when the bucket exists (even if empty it prints the bucket line).
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
assert!(
|
||||
stdout.contains("cloudlysis-docs-0")
|
||||
&& stdout.contains("cloudlysis-docs-1")
|
||||
&& stdout.contains("cloudlysis-docs-2"),
|
||||
"expected mc ls output to mention bucket: {stdout}"
|
||||
);
|
||||
|
||||
// Avoid tests hanging due to docker flakiness.
|
||||
std::thread::sleep(Duration::from_millis(10));
|
||||
}
|
||||
@@ -8,6 +8,20 @@ fn repo_root() -> PathBuf {
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn loki_and_tempo_s3_config_variants_are_syntactically_valid() {
|
||||
let root = repo_root();
|
||||
|
||||
for file in [
|
||||
root.join("observability/loki/config.s3.yml"),
|
||||
root.join("observability/tempo/config.s3.yml"),
|
||||
] {
|
||||
let raw = fs::read_to_string(&file).unwrap_or_else(|e| panic!("{file:?}: {e}"));
|
||||
let _: serde_yaml::Value =
|
||||
serde_yaml::from_str(&raw).unwrap_or_else(|e| panic!("{file:?}: {e}"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn grafana_provisioning_files_are_syntactically_valid() {
|
||||
let root = repo_root();
|
||||
|
||||
218
control/api/tests/observability_s3_docker_gated.rs
Normal file
218
control/api/tests/observability_s3_docker_gated.rs
Normal file
@@ -0,0 +1,218 @@
|
||||
use reqwest::StatusCode;
|
||||
use serde_json::json;
|
||||
use std::{
|
||||
net::TcpStream,
|
||||
path::PathBuf,
|
||||
process::Command,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
fn docker_enabled() -> bool {
|
||||
std::env::var("CONTROL_TEST_DOCKER")
|
||||
.ok()
|
||||
.is_some_and(|v| v.trim() == "1")
|
||||
}
|
||||
|
||||
fn wait_for_tcp(addr: &str, timeout: Duration) -> bool {
|
||||
let start = Instant::now();
|
||||
while start.elapsed() < timeout {
|
||||
if TcpStream::connect_timeout(
|
||||
&addr.parse().expect("invalid socket addr"),
|
||||
Duration::from_secs(1),
|
||||
)
|
||||
.is_ok()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(250));
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn mc_ls_bucket(compose: &PathBuf, bucket: &str) -> std::process::Output {
|
||||
// Run inside compose network so it can reach `minio:9000`.
|
||||
Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(compose)
|
||||
.args([
|
||||
"run",
|
||||
"--rm",
|
||||
"minio-init",
|
||||
"/bin/sh",
|
||||
"-lc",
|
||||
&format!(
|
||||
"mc alias set local http://minio:9000 minioadmin minioadmin >/dev/null && mc ls --recursive local/{bucket}"
|
||||
),
|
||||
])
|
||||
.output()
|
||||
.expect("failed to run mc ls")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn loki_and_tempo_write_objects_to_minio_in_s3_mode() {
|
||||
if !docker_enabled() {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker tests");
|
||||
return;
|
||||
}
|
||||
|
||||
let root = repo_root();
|
||||
let base = root.join("docker-compose.yml");
|
||||
let obs = root.join("observability/docker-compose.yml");
|
||||
let obs_s3 = root.join("observability/docker-compose.s3.yml");
|
||||
|
||||
let up = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&base)
|
||||
.args(["-f"])
|
||||
.arg(&obs)
|
||||
.args(["-f"])
|
||||
.arg(&obs_s3)
|
||||
.args(["up", "-d"])
|
||||
.status()
|
||||
.expect("failed to run docker compose up");
|
||||
assert!(up.success(), "docker compose up failed");
|
||||
|
||||
let reachable = wait_for_tcp("127.0.0.1:3100", Duration::from_secs(45))
|
||||
&& wait_for_tcp("127.0.0.1:3200", Duration::from_secs(45))
|
||||
&& wait_for_tcp("127.0.0.1:9411", Duration::from_secs(45))
|
||||
&& wait_for_tcp("127.0.0.1:9000", Duration::from_secs(45));
|
||||
assert!(reachable, "loki/tempo/minio ports not reachable in time");
|
||||
|
||||
let http = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
// Push one log line into Loki.
|
||||
let ts_ns = (std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos())
|
||||
.to_string();
|
||||
|
||||
let push = http
|
||||
.post("http://127.0.0.1:3100/loki/api/v1/push")
|
||||
.json(&json!({
|
||||
"streams": [{
|
||||
"stream": { "app": "cloudlysis-test" },
|
||||
"values": [[ts_ns, "hello from test"]]
|
||||
}]
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
.expect("loki push request failed");
|
||||
assert!(
|
||||
push.status() == StatusCode::NO_CONTENT,
|
||||
"unexpected loki push status: {}",
|
||||
push.status()
|
||||
);
|
||||
|
||||
// Emit one trace span via Zipkin v2.
|
||||
let zipkin = http
|
||||
.post("http://127.0.0.1:9411/api/v2/spans")
|
||||
.json(&json!([{
|
||||
"traceId": "463ac35c9f6413ad48485a3953bb6124",
|
||||
"id": "a2fb4a1d1a96d312",
|
||||
"name": "test-span",
|
||||
"timestamp": 1700000000000000u64,
|
||||
"duration": 1000u64,
|
||||
"localEndpoint": { "serviceName": "cloudlysis-test" }
|
||||
}]))
|
||||
.send()
|
||||
.await
|
||||
.expect("zipkin post failed");
|
||||
assert!(
|
||||
zipkin.status().is_success(),
|
||||
"zipkin ingest failed: {}",
|
||||
zipkin.status()
|
||||
);
|
||||
|
||||
// Query Loki back to ensure the line is retrievable (not just accepted).
|
||||
// Loki may need a short delay to index.
|
||||
let loki_deadline = Instant::now() + Duration::from_secs(30);
|
||||
let mut loki_ok = false;
|
||||
while Instant::now() < loki_deadline && !loki_ok {
|
||||
let q = http
|
||||
.get("http://127.0.0.1:3100/loki/api/v1/query")
|
||||
.query(&[("query", r#"{app="cloudlysis-test"}"#)])
|
||||
.send()
|
||||
.await
|
||||
.expect("loki query failed");
|
||||
if q.status().is_success() {
|
||||
let v: serde_json::Value = q.json().await.expect("invalid loki query json");
|
||||
// We only need to see any non-empty result.
|
||||
let has = v
|
||||
.get("data")
|
||||
.and_then(|d| d.get("result"))
|
||||
.and_then(|r| r.as_array())
|
||||
.is_some_and(|a| !a.is_empty());
|
||||
if has {
|
||||
loki_ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
// Query Tempo back by trace id (Zipkin traceId used above).
|
||||
let tempo_deadline = Instant::now() + Duration::from_secs(30);
|
||||
let mut tempo_ok = false;
|
||||
while Instant::now() < tempo_deadline && !tempo_ok {
|
||||
let res = http
|
||||
.get("http://127.0.0.1:3200/api/traces/463ac35c9f6413ad48485a3953bb6124")
|
||||
.send()
|
||||
.await
|
||||
.expect("tempo get trace failed");
|
||||
if res.status().is_success() {
|
||||
tempo_ok = true;
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
// Poll buckets until at least one object appears.
|
||||
let deadline = Instant::now() + Duration::from_secs(45);
|
||||
let mut loki_has_objects = false;
|
||||
let mut tempo_has_objects = false;
|
||||
while Instant::now() < deadline && (!loki_has_objects || !tempo_has_objects) {
|
||||
let loki_out = mc_ls_bucket(&base, "cloudlysis-loki");
|
||||
if loki_out.status.success() && !loki_out.stdout.is_empty() {
|
||||
loki_has_objects = true;
|
||||
}
|
||||
|
||||
let tempo_out = mc_ls_bucket(&base, "cloudlysis-tempo");
|
||||
if tempo_out.status.success() && !tempo_out.stdout.is_empty() {
|
||||
tempo_has_objects = true;
|
||||
}
|
||||
|
||||
if !loki_has_objects || !tempo_has_objects {
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = Command::new("docker")
|
||||
.args(["compose", "-f"])
|
||||
.arg(&base)
|
||||
.args(["-f"])
|
||||
.arg(&obs)
|
||||
.args(["-f"])
|
||||
.arg(&obs_s3)
|
||||
.args(["down", "-v"])
|
||||
.status();
|
||||
|
||||
assert!(loki_has_objects, "expected Loki to write objects to MinIO");
|
||||
assert!(
|
||||
tempo_has_objects,
|
||||
"expected Tempo to write objects to MinIO"
|
||||
);
|
||||
assert!(loki_ok, "expected Loki query to return a result");
|
||||
assert!(tempo_ok, "expected Tempo to return the ingested trace");
|
||||
}
|
||||
@@ -30,10 +30,12 @@ fn wait_for_tcp(addr: &str, timeout: Duration) -> bool {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn observability_stack_reaches_healthy_state_fast() {
|
||||
let enabled = std::env::var("CONTROL_TEST_DOCKER").ok();
|
||||
assert_eq!(enabled.as_deref(), Some("1"));
|
||||
if enabled.as_deref() != Some("1") {
|
||||
eprintln!("skipping: set CONTROL_TEST_DOCKER=1 to enable docker observability smoke test");
|
||||
return;
|
||||
}
|
||||
|
||||
let root = repo_root();
|
||||
let compose = root.join("observability/docker-compose.yml");
|
||||
|
||||
116
control/api/tests/s3_docs_gated.rs
Normal file
116
control/api/tests/s3_docs_gated.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
use api::s3_docs::{DocsConfig, DocsStore};
|
||||
use uuid::Uuid;
|
||||
|
||||
fn s3_env_ready() -> bool {
|
||||
// Gate integration tests without requiring `-- --ignored`.
|
||||
// If CI/local wants these tests to run, it must provide S3 env vars.
|
||||
let required = [
|
||||
"CONTROL_S3_ENDPOINT",
|
||||
"CONTROL_S3_ACCESS_KEY_ID",
|
||||
"CONTROL_S3_SECRET_ACCESS_KEY",
|
||||
"CONTROL_S3_BUCKET_DOCS",
|
||||
];
|
||||
required
|
||||
.iter()
|
||||
.all(|k| std::env::var(k).ok().is_some_and(|v| !v.trim().is_empty()))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn s3_docs_roundtrip_put_get_list_delete() {
|
||||
if !s3_env_ready() {
|
||||
eprintln!("skipping: missing S3 env (see S3_PLAN.md)");
|
||||
return;
|
||||
}
|
||||
let cfg = DocsConfig::from_env().expect("missing S3 env (see S3_PLAN.md)");
|
||||
let store = DocsStore::new(cfg)
|
||||
.await
|
||||
.expect("failed to init docs store");
|
||||
|
||||
let tenant_id = Uuid::new_v4().to_string();
|
||||
let doc_type = "test";
|
||||
let doc_id = Uuid::new_v4().to_string();
|
||||
let filename = "hello.txt";
|
||||
let key = store
|
||||
.key_for(&tenant_id, doc_type, &doc_id, filename)
|
||||
.expect("invalid key");
|
||||
|
||||
store
|
||||
.put_for_tenant(
|
||||
&tenant_id,
|
||||
&key,
|
||||
b"hello".to_vec(),
|
||||
Some("text/plain".to_string()),
|
||||
)
|
||||
.await
|
||||
.expect("put failed");
|
||||
|
||||
let (bytes, _ct) = store
|
||||
.get_bytes_for_tenant(&tenant_id, &key)
|
||||
.await
|
||||
.expect("get failed");
|
||||
assert_eq!(bytes, b"hello");
|
||||
|
||||
let prefix = format!("{}{}", store.prefix(), tenant_id);
|
||||
let objects = store
|
||||
.list_for_tenant(&tenant_id, &format!("{prefix}/"))
|
||||
.await
|
||||
.expect("list failed");
|
||||
assert!(objects.iter().any(|o| o.key == key));
|
||||
|
||||
store
|
||||
.delete_for_tenant(&tenant_id, &key)
|
||||
.await
|
||||
.expect("delete failed");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn s3_docs_tenant_prefix_isolation() {
|
||||
if !s3_env_ready() {
|
||||
eprintln!("skipping: missing S3 env (see S3_PLAN.md)");
|
||||
return;
|
||||
}
|
||||
let cfg = DocsConfig::from_env().expect("missing S3 env (see S3_PLAN.md)");
|
||||
let store = DocsStore::new(cfg)
|
||||
.await
|
||||
.expect("failed to init docs store");
|
||||
|
||||
let tenant_a = Uuid::new_v4().to_string();
|
||||
let tenant_b = Uuid::new_v4().to_string();
|
||||
|
||||
let doc_type = "test";
|
||||
let doc_id = Uuid::new_v4().to_string();
|
||||
let filename = "hello.txt";
|
||||
|
||||
let key_a = store
|
||||
.key_for(&tenant_a, doc_type, &doc_id, filename)
|
||||
.expect("invalid key");
|
||||
store
|
||||
.put_for_tenant(
|
||||
&tenant_a,
|
||||
&key_a,
|
||||
b"hello-a".to_vec(),
|
||||
Some("text/plain".to_string()),
|
||||
)
|
||||
.await
|
||||
.expect("put failed");
|
||||
|
||||
let prefix_a = format!("{}{tenant_a}/", store.prefix());
|
||||
let prefix_b = format!("{}{tenant_b}/", store.prefix());
|
||||
|
||||
let objects_a = store
|
||||
.list_for_tenant(&tenant_a, &prefix_a)
|
||||
.await
|
||||
.expect("list a failed");
|
||||
let objects_b = store
|
||||
.list_for_tenant(&tenant_b, &prefix_b)
|
||||
.await
|
||||
.expect("list b failed");
|
||||
|
||||
assert!(objects_a.iter().any(|o| o.key == key_a));
|
||||
assert!(!objects_b.iter().any(|o| o.key == key_a));
|
||||
|
||||
store
|
||||
.delete_for_tenant(&tenant_a, &key_a)
|
||||
.await
|
||||
.expect("delete failed");
|
||||
}
|
||||
36
control/api/tests/s3_permissions_awscli_env_gated.rs
Normal file
36
control/api/tests/s3_permissions_awscli_env_gated.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
use std::{path::PathBuf, process::Command};
|
||||
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.and_then(|p| p.parent())
|
||||
.expect("api crate should live under repo root")
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
fn is_enabled() -> bool {
|
||||
std::env::var("CONTROL_TEST_AWSCLI")
|
||||
.ok()
|
||||
.is_some_and(|v| v.trim() == "1")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn s3_docs_permissions_can_be_verified_with_aws_cli() {
|
||||
if !is_enabled() {
|
||||
eprintln!("skipping: set CONTROL_TEST_AWSCLI=1 to enable aws-cli S3 permission checks");
|
||||
return;
|
||||
}
|
||||
|
||||
let script = repo_root().join("docker/scripts/s3_verify_docs.sh");
|
||||
let out = Command::new("sh")
|
||||
.arg(script)
|
||||
.output()
|
||||
.expect("failed to run s3_verify_docs.sh (requires aws cli and S3_* env)");
|
||||
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"s3 verify script failed: {}\n{}",
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
}
|
||||
@@ -13,6 +13,7 @@ fn stack_files_parse_as_yaml() {
|
||||
let root = repo_root();
|
||||
for file in [
|
||||
root.join("swarm/stacks/control-plane.yml"),
|
||||
root.join("swarm/stacks/control-plane-prod.yml"),
|
||||
root.join("swarm/stacks/observability.yml"),
|
||||
] {
|
||||
let raw = fs::read_to_string(&file).unwrap();
|
||||
@@ -38,3 +39,36 @@ fn control_plane_stack_has_required_services() {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn control_plane_prod_stack_has_control_api_and_external_s3_secrets() {
|
||||
let root = repo_root();
|
||||
let raw = fs::read_to_string(root.join("swarm/stacks/control-plane-prod.yml")).unwrap();
|
||||
let v: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap();
|
||||
|
||||
let services = v
|
||||
.get("services")
|
||||
.and_then(|x| x.as_mapping())
|
||||
.expect("missing services");
|
||||
assert!(services.contains_key(serde_yaml::Value::String("control-api".to_string())));
|
||||
assert!(services.contains_key(serde_yaml::Value::String("control-ui".to_string())));
|
||||
assert!(
|
||||
!services.contains_key(serde_yaml::Value::String("minio".to_string())),
|
||||
"prod stack must not bundle MinIO"
|
||||
);
|
||||
|
||||
let secrets = v
|
||||
.get("secrets")
|
||||
.and_then(|x| x.as_mapping())
|
||||
.expect("missing secrets");
|
||||
for name in ["control_s3_access_key_id", "control_s3_secret_access_key"] {
|
||||
let entry = secrets
|
||||
.get(serde_yaml::Value::String(name.to_string()))
|
||||
.unwrap_or_else(|| panic!("missing secret {name}"));
|
||||
let external = entry
|
||||
.get(serde_yaml::Value::String("external".to_string()))
|
||||
.and_then(|x| x.as_bool())
|
||||
.unwrap_or(false);
|
||||
assert!(external, "secret {name} must be external: true");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,48 @@ async function apiJson<T>(path: string): Promise<T> {
|
||||
}
|
||||
}
|
||||
|
||||
async function apiJsonWithHeaders<T>(path: string, extra: HeadersInit): Promise<T> {
|
||||
const controller = new AbortController()
|
||||
const t = window.setTimeout(() => controller.abort(), 5000)
|
||||
|
||||
const token = getAccessToken()
|
||||
const headers: HeadersInit = { ...(token ? { Authorization: `Bearer ${token}` } : {}), ...extra }
|
||||
|
||||
try {
|
||||
const res = await apiFetch(`${baseUrl()}${path}`, {
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
useLastCorrelationId: true,
|
||||
useLastTraceparent: true,
|
||||
})
|
||||
return (await res.json()) as T
|
||||
} finally {
|
||||
window.clearTimeout(t)
|
||||
}
|
||||
}
|
||||
|
||||
async function apiFetchWithHeaders(path: string, init: RequestInit, extra: Record<string, string>) {
|
||||
const controller = new AbortController()
|
||||
const t = window.setTimeout(() => controller.abort(), 15000)
|
||||
|
||||
const token = getAccessToken()
|
||||
const headers = new Headers(init.headers)
|
||||
if (token) headers.set('authorization', `Bearer ${token}`)
|
||||
for (const [k, v] of Object.entries(extra)) headers.set(k, v)
|
||||
|
||||
try {
|
||||
return await apiFetch(`${baseUrl()}${path}`, {
|
||||
...init,
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
useLastCorrelationId: true,
|
||||
useLastTraceparent: true,
|
||||
})
|
||||
} finally {
|
||||
window.clearTimeout(t)
|
||||
}
|
||||
}
|
||||
|
||||
async function apiPostJson<T>(path: string, body: unknown, idempotencyKey?: string): Promise<T> {
|
||||
const controller = new AbortController()
|
||||
const t = window.setTimeout(() => controller.abort(), 2000)
|
||||
@@ -100,6 +142,65 @@ export function getFleetSnapshot(): Promise<FleetSnapshot> {
|
||||
return apiJson('/admin/v1/fleet/snapshot')
|
||||
}
|
||||
|
||||
export type DriftKind = 'missing' | 'extra' | 'unhealthy' | 'version_mismatch'
|
||||
|
||||
export type DriftResponse = {
|
||||
summary: Record<string, number>
|
||||
items: Array<{ kind: DriftKind; service: string; details: unknown }>
|
||||
}
|
||||
|
||||
export function getPlatformDrift(): Promise<DriftResponse> {
|
||||
return apiJson('/admin/v1/platform/drift')
|
||||
}
|
||||
|
||||
export type ConfigDomain = 'routing' | 'placement'
|
||||
|
||||
export type ConfigGetResponse = {
|
||||
domain: ConfigDomain
|
||||
revision: number
|
||||
source: unknown
|
||||
value: unknown
|
||||
}
|
||||
|
||||
export function listConfigDomains(): Promise<{ domains: ConfigDomain[] }> {
|
||||
return apiJson('/admin/v1/config')
|
||||
}
|
||||
|
||||
export function getConfig(domain: ConfigDomain): Promise<ConfigGetResponse> {
|
||||
return apiJson(`/admin/v1/config/${domain}`)
|
||||
}
|
||||
|
||||
export function startConfigValidateJob(args: {
|
||||
domain: ConfigDomain
|
||||
reason: string
|
||||
value: unknown
|
||||
idempotencyKey: string
|
||||
}): Promise<{ job_id: string }> {
|
||||
return apiPostJson('/admin/v1/jobs/config/validate', { domain: args.domain, reason: args.reason, value: args.value }, args.idempotencyKey)
|
||||
}
|
||||
|
||||
export function startConfigApplyJob(args: {
|
||||
domain: ConfigDomain
|
||||
reason: string
|
||||
expectedRevision?: number
|
||||
value: unknown
|
||||
idempotencyKey: string
|
||||
}): Promise<{ job_id: string }> {
|
||||
return apiPostJson(
|
||||
'/admin/v1/jobs/config/apply',
|
||||
{ domain: args.domain, reason: args.reason, expected_revision: args.expectedRevision, value: args.value },
|
||||
args.idempotencyKey,
|
||||
)
|
||||
}
|
||||
|
||||
export function startConfigRollbackJob(args: {
|
||||
domain: ConfigDomain
|
||||
reason: string
|
||||
idempotencyKey: string
|
||||
}): Promise<{ job_id: string }> {
|
||||
return apiPostJson('/admin/v1/jobs/config/rollback', { domain: args.domain, reason: args.reason }, args.idempotencyKey)
|
||||
}
|
||||
|
||||
export function getPlacement(kind: 'aggregate' | 'projection' | 'runner'): Promise<PlacementResponse> {
|
||||
return apiJson(`/admin/v1/placement/${kind}`)
|
||||
}
|
||||
@@ -177,3 +278,111 @@ export function getSwarmServices(): Promise<{ services: SwarmService[] }> {
|
||||
export function getSwarmTasks(serviceName: string): Promise<{ service: string; tasks: SwarmTask[] }> {
|
||||
return apiJson(`/admin/v1/swarm/services/${encodeURIComponent(serviceName)}/tasks`)
|
||||
}
|
||||
|
||||
export type DocumentObject = {
|
||||
key: string
|
||||
size: number
|
||||
last_modified?: string | null
|
||||
}
|
||||
|
||||
export function listDocuments(args: { tenantId: string; prefix?: string }): Promise<{ objects: DocumentObject[] }> {
|
||||
const qs = args.prefix ? `?prefix=${encodeURIComponent(args.prefix)}` : ''
|
||||
return apiJsonWithHeaders(`/admin/v1/tenants/${encodeURIComponent(args.tenantId)}/docs${qs}`, {
|
||||
'x-tenant-id': args.tenantId,
|
||||
})
|
||||
}
|
||||
|
||||
export async function uploadDocument(args: {
|
||||
tenantId: string
|
||||
docType: string
|
||||
docId: string
|
||||
filename: string
|
||||
file: File
|
||||
}): Promise<{ key: string; sha256: string }> {
|
||||
const path = `/admin/v1/tenants/${encodeURIComponent(args.tenantId)}/docs/${encodeURIComponent(
|
||||
args.docType,
|
||||
)}/${encodeURIComponent(args.docId)}/${encodeURIComponent(args.filename)}`
|
||||
|
||||
const res = await apiFetchWithHeaders(
|
||||
path,
|
||||
{
|
||||
method: 'PUT',
|
||||
headers: { 'content-type': args.file.type || 'application/octet-stream' },
|
||||
body: args.file,
|
||||
},
|
||||
{ 'x-tenant-id': args.tenantId },
|
||||
)
|
||||
return (await res.json()) as { key: string; sha256: string }
|
||||
}
|
||||
|
||||
export async function downloadDocument(args: { tenantId: string; key: string }): Promise<Blob> {
|
||||
const res = await apiFetchWithHeaders(
|
||||
`/admin/v1/tenants/${encodeURIComponent(args.tenantId)}/docs/object/${encodeURIComponent(args.key)}`,
|
||||
{ method: 'GET' },
|
||||
{ 'x-tenant-id': args.tenantId },
|
||||
)
|
||||
return await res.blob()
|
||||
}
|
||||
|
||||
export async function deleteDocument(args: { tenantId: string; key: string }): Promise<void> {
|
||||
await apiFetchWithHeaders(
|
||||
`/admin/v1/tenants/${encodeURIComponent(args.tenantId)}/docs/object/${encodeURIComponent(args.key)}`,
|
||||
{ method: 'DELETE' },
|
||||
{ 'x-tenant-id': args.tenantId },
|
||||
)
|
||||
}
|
||||
|
||||
export type PresignResponse = {
|
||||
method: 'PUT' | 'GET'
|
||||
url: string
|
||||
key: string
|
||||
}
|
||||
|
||||
export function presignUpload(args: {
|
||||
tenantId: string
|
||||
docType: string
|
||||
docId?: string
|
||||
filename: string
|
||||
contentType?: string
|
||||
}): Promise<PresignResponse> {
|
||||
return apiPostJsonWithTenant(`/admin/v1/tenants/${encodeURIComponent(args.tenantId)}/docs/presign/upload`, args.tenantId, {
|
||||
doc_type: args.docType,
|
||||
doc_id: args.docId,
|
||||
filename: args.filename,
|
||||
content_type: args.contentType,
|
||||
})
|
||||
}
|
||||
|
||||
export function presignDownload(args: { tenantId: string; key: string }): Promise<PresignResponse> {
|
||||
return apiPostJsonWithTenant(
|
||||
`/admin/v1/tenants/${encodeURIComponent(args.tenantId)}/docs/presign/download`,
|
||||
args.tenantId,
|
||||
{ key: args.key },
|
||||
)
|
||||
}
|
||||
|
||||
async function apiPostJsonWithTenant<T>(path: string, tenantId: string, body: unknown): Promise<T> {
|
||||
const controller = new AbortController()
|
||||
const t = window.setTimeout(() => controller.abort(), 5000)
|
||||
|
||||
const token = getAccessToken()
|
||||
const headers: HeadersInit = {
|
||||
'content-type': 'application/json',
|
||||
...(token ? { Authorization: `Bearer ${token}` } : {}),
|
||||
'x-tenant-id': tenantId,
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await apiFetch(`${baseUrl()}${path}`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal,
|
||||
useLastCorrelationId: true,
|
||||
useLastTraceparent: true,
|
||||
})
|
||||
return (await res.json()) as T
|
||||
} finally {
|
||||
window.clearTimeout(t)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,9 +16,11 @@ const navItems: NavItem[] = [
|
||||
{ label: 'Roles & Permissions', to: '/roles-permissions' },
|
||||
{ label: 'Config', to: '/config' },
|
||||
{ label: 'Definitions', to: '/definitions' },
|
||||
{ label: 'Documents', to: '/documents' },
|
||||
{ label: 'Scale & Placement', to: '/scale-placement' },
|
||||
{ label: 'Deployments', to: '/deployments' },
|
||||
{ label: 'Observability', to: '/observability' },
|
||||
{ label: 'Platform Drift', to: '/drift' },
|
||||
{ label: 'Audit Log', to: '/audit-log' },
|
||||
{ label: 'Settings', to: '/settings' },
|
||||
]
|
||||
|
||||
@@ -15,9 +15,11 @@ const paths = [
|
||||
'/roles-permissions',
|
||||
'/config',
|
||||
'/definitions',
|
||||
'/documents',
|
||||
'/scale-placement',
|
||||
'/deployments',
|
||||
'/observability',
|
||||
'/drift',
|
||||
'/audit-log',
|
||||
'/settings',
|
||||
]
|
||||
|
||||
@@ -6,10 +6,12 @@ import {
|
||||
DefinitionsPage,
|
||||
DeploymentDetailPage,
|
||||
DeploymentsPage,
|
||||
DocumentsPage,
|
||||
JobPage,
|
||||
NotFoundPage,
|
||||
ObservabilityPage,
|
||||
OverviewPage,
|
||||
PlatformDriftPage,
|
||||
RolesPermissionsPage,
|
||||
ScalePlacementPage,
|
||||
SessionsPage,
|
||||
@@ -30,10 +32,12 @@ export const routes: RouteObject[] = [
|
||||
{ path: 'roles-permissions', element: <RolesPermissionsPage /> },
|
||||
{ path: 'config', element: <ConfigPage /> },
|
||||
{ path: 'definitions', element: <DefinitionsPage /> },
|
||||
{ path: 'documents', element: <DocumentsPage /> },
|
||||
{ path: 'scale-placement', element: <ScalePlacementPage /> },
|
||||
{ path: 'deployments', element: <DeploymentsPage /> },
|
||||
{ path: 'deployments/:serviceName', element: <DeploymentDetailPage /> },
|
||||
{ path: 'observability', element: <ObservabilityPage /> },
|
||||
{ path: 'drift', element: <PlatformDriftPage /> },
|
||||
{ path: 'audit-log', element: <AuditLogPage /> },
|
||||
{ path: 'jobs/:jobId', element: <JobPage /> },
|
||||
{ path: 'settings', element: <SettingsPage /> },
|
||||
|
||||
@@ -9,6 +9,18 @@ import {
|
||||
listAudit,
|
||||
getSwarmServices,
|
||||
getSwarmTasks,
|
||||
listConfigDomains,
|
||||
getConfig,
|
||||
startConfigValidateJob,
|
||||
startConfigApplyJob,
|
||||
startConfigRollbackJob,
|
||||
getPlatformDrift,
|
||||
listDocuments,
|
||||
uploadDocument,
|
||||
downloadDocument,
|
||||
deleteDocument,
|
||||
presignUpload,
|
||||
presignDownload,
|
||||
startTenantDrainJob,
|
||||
startTenantMigrateJob,
|
||||
type FleetSnapshot,
|
||||
@@ -18,6 +30,10 @@ import {
|
||||
type AuditEvent,
|
||||
type SwarmService,
|
||||
type SwarmTask,
|
||||
type DocumentObject,
|
||||
type ConfigDomain,
|
||||
type ConfigGetResponse,
|
||||
type DriftResponse,
|
||||
} from './api/control'
|
||||
import { getAccessToken, setAccessToken } from './auth/token'
|
||||
import { Button, Code, ErrorText, Modal, MutedText, Table, TextInput } from './components/primitives'
|
||||
@@ -226,13 +242,443 @@ export function RolesPermissionsPage() {
|
||||
}
|
||||
|
||||
export function ConfigPage() {
|
||||
return <PageShell title="Config" />
|
||||
const [domains, setDomains] = useState<ConfigDomain[] | undefined>(undefined)
|
||||
const [selected, setSelected] = useState<ConfigDomain>('routing')
|
||||
const [cfg, setCfg] = useState<ConfigGetResponse | undefined>(undefined)
|
||||
const [draft, setDraft] = useState('')
|
||||
const [reason, setReason] = useState('')
|
||||
const [error, setError] = useState<string | undefined>(undefined)
|
||||
const [busy, setBusy] = useState(false)
|
||||
const navigate = useNavigate()
|
||||
|
||||
function newIdempotencyKey() {
|
||||
if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) return crypto.randomUUID()
|
||||
return `${Date.now()}-${Math.random().toString(16).slice(2)}`
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
listConfigDomains()
|
||||
.then((d) => {
|
||||
if (cancelled) return
|
||||
setDomains(d.domains)
|
||||
if (d.domains.length > 0 && !d.domains.includes(selected)) {
|
||||
setSelected(d.domains[0] ?? 'routing')
|
||||
}
|
||||
})
|
||||
.catch((e: unknown) => {
|
||||
if (cancelled) return
|
||||
setError(e instanceof Error ? e.message : 'failed to load domains')
|
||||
})
|
||||
return () => {
|
||||
cancelled = true
|
||||
}
|
||||
}, [])
|
||||
|
||||
async function refresh(domain: ConfigDomain) {
|
||||
setBusy(true)
|
||||
try {
|
||||
const c = await getConfig(domain)
|
||||
setCfg(c)
|
||||
setDraft(JSON.stringify(c.value ?? null, null, 2))
|
||||
setError(undefined)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'failed to load config')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
void refresh(selected)
|
||||
}, [selected])
|
||||
|
||||
return (
|
||||
<PageShell title="Config">
|
||||
{error ? <ErrorText>{error}</ErrorText> : null}
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 12, maxWidth: 980 }}>
|
||||
<div style={{ display: 'flex', gap: 10, flexWrap: 'wrap', alignItems: 'flex-end' }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="domain" style={{ fontSize: 12, color: '#666' }}>
|
||||
Domain
|
||||
</label>
|
||||
<select
|
||||
id="domain"
|
||||
value={selected}
|
||||
onChange={(e) => setSelected(e.target.value as ConfigDomain)}
|
||||
style={{ padding: '8px 10px', borderRadius: 10, border: '1px solid #ddd' }}
|
||||
disabled={!domains || domains.length === 0}
|
||||
>
|
||||
{(domains ?? ['routing', 'placement']).map((d) => (
|
||||
<option key={d} value={d}>
|
||||
{d}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<Button onClick={() => void refresh(selected)} disabled={busy}>
|
||||
Refresh
|
||||
</Button>
|
||||
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6, minWidth: 320 }}>
|
||||
<label htmlFor="reason" style={{ fontSize: 12, color: '#666' }}>
|
||||
Reason (required for jobs)
|
||||
</label>
|
||||
<TextInput id="reason" value={reason} onChange={setReason} placeholder="why are you changing this?" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<MutedText>
|
||||
Current revision: <Code>{String(cfg?.revision ?? '')}</Code>
|
||||
</MutedText>
|
||||
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="json" style={{ fontSize: 12, color: '#666' }}>
|
||||
JSON
|
||||
</label>
|
||||
<textarea
|
||||
id="json"
|
||||
value={draft}
|
||||
onChange={(e) => setDraft(e.target.value)}
|
||||
spellCheck={false}
|
||||
style={{
|
||||
width: '100%',
|
||||
minHeight: 340,
|
||||
fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace',
|
||||
fontSize: 12,
|
||||
borderRadius: 12,
|
||||
border: '1px solid #ddd',
|
||||
padding: 12,
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div style={{ display: 'flex', gap: 10, flexWrap: 'wrap' }}>
|
||||
<Button
|
||||
disabled={busy || reason.trim().length === 0}
|
||||
onClick={async () => {
|
||||
setBusy(true)
|
||||
try {
|
||||
const value = JSON.parse(draft || 'null') as unknown
|
||||
const job = await startConfigValidateJob({
|
||||
domain: selected,
|
||||
reason,
|
||||
value,
|
||||
idempotencyKey: newIdempotencyKey(),
|
||||
})
|
||||
navigate(`/jobs/${job.job_id}`)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'validate failed')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
Validate
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
disabled={busy || reason.trim().length === 0}
|
||||
onClick={async () => {
|
||||
setBusy(true)
|
||||
try {
|
||||
const value = JSON.parse(draft || 'null') as unknown
|
||||
const job = await startConfigApplyJob({
|
||||
domain: selected,
|
||||
reason,
|
||||
expectedRevision: cfg?.revision,
|
||||
value,
|
||||
idempotencyKey: newIdempotencyKey(),
|
||||
})
|
||||
navigate(`/jobs/${job.job_id}`)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'apply failed')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
Apply
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
disabled={busy || reason.trim().length === 0}
|
||||
onClick={async () => {
|
||||
setBusy(true)
|
||||
try {
|
||||
const job = await startConfigRollbackJob({
|
||||
domain: selected,
|
||||
reason,
|
||||
idempotencyKey: newIdempotencyKey(),
|
||||
})
|
||||
navigate(`/jobs/${job.job_id}`)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'rollback failed')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
Rollback (to last backup)
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</PageShell>
|
||||
)
|
||||
}
|
||||
|
||||
export function DefinitionsPage() {
|
||||
return <PageShell title="Definitions" />
|
||||
}
|
||||
|
||||
export function DocumentsPage() {
|
||||
const [tenantId, setTenantId] = useState('')
|
||||
const [docType, setDocType] = useState('deployments')
|
||||
const [docId, setDocId] = useState('')
|
||||
const [prefix, setPrefix] = useState('')
|
||||
const [file, setFile] = useState<File | undefined>(undefined)
|
||||
const [objects, setObjects] = useState<DocumentObject[] | undefined>(undefined)
|
||||
const [error, setError] = useState<string | undefined>(undefined)
|
||||
const [busy, setBusy] = useState(false)
|
||||
const [confirmDelete, setConfirmDelete] = useState<{ key: string } | undefined>(undefined)
|
||||
const [usePresign, setUsePresign] = useState(false)
|
||||
|
||||
function newId(): string {
|
||||
if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) return crypto.randomUUID()
|
||||
return `${Date.now()}-${Math.random().toString(16).slice(2)}`
|
||||
}
|
||||
|
||||
async function refresh() {
|
||||
const tid = tenantId.trim()
|
||||
if (!tid) return
|
||||
setBusy(true)
|
||||
try {
|
||||
const d = await listDocuments({ tenantId: tid, prefix: prefix.trim() || undefined })
|
||||
setObjects(d.objects)
|
||||
setError(undefined)
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'failed to load')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<PageShell title="Documents">
|
||||
{error ? <ErrorText>{error}</ErrorText> : null}
|
||||
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 14, maxWidth: 880 }}>
|
||||
<div style={{ display: 'flex', gap: 12, flexWrap: 'wrap', alignItems: 'flex-end' }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="tenantId" style={{ fontSize: 12, color: '#666' }}>
|
||||
Tenant ID
|
||||
</label>
|
||||
<TextInput id="tenantId" value={tenantId} onChange={setTenantId} placeholder="uuid" />
|
||||
</div>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="prefix" style={{ fontSize: 12, color: '#666' }}>
|
||||
Prefix (optional)
|
||||
</label>
|
||||
<TextInput
|
||||
id="prefix"
|
||||
value={prefix}
|
||||
onChange={setPrefix}
|
||||
placeholder="e.g. deployments/"
|
||||
/>
|
||||
</div>
|
||||
<Button onClick={refresh} disabled={busy || !tenantId.trim()}>
|
||||
Refresh
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
<div style={{ borderTop: '1px solid #eee', paddingTop: 12 }} />
|
||||
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 10 }}>
|
||||
<div style={{ display: 'flex', gap: 12, flexWrap: 'wrap', alignItems: 'flex-end' }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="docType" style={{ fontSize: 12, color: '#666' }}>
|
||||
Doc type
|
||||
</label>
|
||||
<TextInput id="docType" value={docType} onChange={setDocType} />
|
||||
</div>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="docId" style={{ fontSize: 12, color: '#666' }}>
|
||||
Doc id
|
||||
</label>
|
||||
<TextInput id="docId" value={docId} onChange={setDocId} placeholder="auto" />
|
||||
</div>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<label htmlFor="file" style={{ fontSize: 12, color: '#666' }}>
|
||||
File
|
||||
</label>
|
||||
<input
|
||||
id="file"
|
||||
type="file"
|
||||
onChange={(e) => {
|
||||
const f = e.target.files?.item(0) ?? undefined
|
||||
setFile(f)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<Button
|
||||
disabled={busy || !tenantId.trim() || !docType.trim() || !file}
|
||||
onClick={async () => {
|
||||
const tid = tenantId.trim()
|
||||
if (!tid || !file) return
|
||||
setBusy(true)
|
||||
try {
|
||||
const id = docId.trim() || newId()
|
||||
if (!usePresign) {
|
||||
await uploadDocument({
|
||||
tenantId: tid,
|
||||
docType: docType.trim(),
|
||||
docId: id,
|
||||
filename: file.name,
|
||||
file,
|
||||
})
|
||||
} else {
|
||||
const p = await presignUpload({
|
||||
tenantId: tid,
|
||||
docType: docType.trim(),
|
||||
docId: id,
|
||||
filename: file.name,
|
||||
contentType: file.type || 'application/octet-stream',
|
||||
})
|
||||
await fetch(p.url, {
|
||||
method: 'PUT',
|
||||
headers: { 'content-type': file.type || 'application/octet-stream' },
|
||||
body: file,
|
||||
})
|
||||
}
|
||||
setDocId(id)
|
||||
await refresh()
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'upload failed')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
{usePresign ? 'Upload (presigned)' : 'Upload'}
|
||||
</Button>
|
||||
</div>
|
||||
<div style={{ display: 'flex', gap: 8, alignItems: 'center' }}>
|
||||
<input
|
||||
id="usePresign"
|
||||
type="checkbox"
|
||||
checked={usePresign}
|
||||
onChange={(e) => setUsePresign(e.target.checked)}
|
||||
/>
|
||||
<label htmlFor="usePresign" style={{ fontSize: 12, color: '#666' }}>
|
||||
Use presigned URLs (recommended for large files)
|
||||
</label>
|
||||
</div>
|
||||
<MutedText>
|
||||
Documents are stored under <Code>docs/<tenant>/<type>/<id>/<filename></Code>.
|
||||
</MutedText>
|
||||
</div>
|
||||
|
||||
<div style={{ borderTop: '1px solid #eee', paddingTop: 12 }} />
|
||||
|
||||
{!objects ? <div>{tenantId.trim() ? 'No data loaded.' : 'Enter a tenant id to list documents.'}</div> : null}
|
||||
{objects ? (
|
||||
<Table
|
||||
columns={['Key', 'Size', 'Last Modified', 'Actions']}
|
||||
rows={objects.map((o) => [
|
||||
<Code key="k">{o.key}</Code>,
|
||||
String(o.size ?? 0),
|
||||
o.last_modified ?? '',
|
||||
<div key="a" style={{ display: 'flex', gap: 8 }}>
|
||||
<Button
|
||||
onClick={async () => {
|
||||
const tid = tenantId.trim()
|
||||
if (!tid) return
|
||||
setBusy(true)
|
||||
try {
|
||||
const blob = !usePresign
|
||||
? await downloadDocument({ tenantId: tid, key: o.key })
|
||||
: await (async () => {
|
||||
const p = await presignDownload({ tenantId: tid, key: o.key })
|
||||
const res = await fetch(p.url, { method: 'GET' })
|
||||
return await res.blob()
|
||||
})()
|
||||
const url = URL.createObjectURL(blob)
|
||||
const a = document.createElement('a')
|
||||
a.href = url
|
||||
const name = o.key.split('/').slice(-1)[0] ?? 'download'
|
||||
a.download = name
|
||||
document.body.appendChild(a)
|
||||
a.click()
|
||||
a.remove()
|
||||
URL.revokeObjectURL(url)
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}}
|
||||
disabled={busy}
|
||||
>
|
||||
{usePresign ? 'Download (presigned)' : 'Download'}
|
||||
</Button>
|
||||
<Button
|
||||
disabled={busy}
|
||||
onClick={() => {
|
||||
setConfirmDelete({ key: o.key })
|
||||
}}
|
||||
>
|
||||
Delete
|
||||
</Button>
|
||||
</div>,
|
||||
])}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
<Modal
|
||||
open={!!confirmDelete}
|
||||
title="Confirm delete"
|
||||
onClose={() => setConfirmDelete(undefined)}
|
||||
footer={
|
||||
<div style={{ display: 'flex', gap: 10, justifyContent: 'flex-end' }}>
|
||||
<Button onClick={() => setConfirmDelete(undefined)} disabled={busy}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
disabled={busy || !tenantId.trim() || !confirmDelete}
|
||||
onClick={async () => {
|
||||
const tid = tenantId.trim()
|
||||
const k = confirmDelete?.key
|
||||
if (!tid || !k) return
|
||||
setBusy(true)
|
||||
try {
|
||||
await deleteDocument({ tenantId: tid, key: k })
|
||||
setConfirmDelete(undefined)
|
||||
await refresh()
|
||||
} catch (e: unknown) {
|
||||
setError(e instanceof Error ? e.message : 'delete failed')
|
||||
} finally {
|
||||
setBusy(false)
|
||||
}
|
||||
}}
|
||||
>
|
||||
Delete permanently
|
||||
</Button>
|
||||
</div>
|
||||
}
|
||||
>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 10 }}>
|
||||
<MutedText>
|
||||
Tenant: <Code>{tenantId.trim() || '(unset)'}</Code>
|
||||
</MutedText>
|
||||
<MutedText>
|
||||
Key: <Code>{confirmDelete?.key}</Code>
|
||||
</MutedText>
|
||||
</div>
|
||||
</Modal>
|
||||
</PageShell>
|
||||
)
|
||||
}
|
||||
|
||||
export function ScalePlacementPage() {
|
||||
const [aggregate, setAggregate] = useState<PlacementResponse | undefined>(undefined)
|
||||
const [projection, setProjection] = useState<PlacementResponse | undefined>(undefined)
|
||||
@@ -332,6 +778,53 @@ export function ObservabilityPage() {
|
||||
return <PageShell title="Observability" />
|
||||
}
|
||||
|
||||
export function PlatformDriftPage() {
|
||||
const [data, setData] = useState<DriftResponse | undefined>(undefined)
|
||||
const [error, setError] = useState<string | undefined>(undefined)
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false
|
||||
getPlatformDrift()
|
||||
.then((d) => {
|
||||
if (cancelled) return
|
||||
setError(undefined)
|
||||
setData(d)
|
||||
})
|
||||
.catch((e: unknown) => {
|
||||
if (cancelled) return
|
||||
setError(e instanceof Error ? e.message : 'failed to load')
|
||||
})
|
||||
return () => {
|
||||
cancelled = true
|
||||
}
|
||||
}, [])
|
||||
|
||||
return (
|
||||
<PageShell title="Platform Drift">
|
||||
{error ? <ErrorText>{error}</ErrorText> : null}
|
||||
{!data ? <div>Loading…</div> : null}
|
||||
{data ? (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 16 }}>
|
||||
<Table
|
||||
columns={['Kind', 'Count']}
|
||||
rows={Object.entries(data.summary ?? {}).map(([k, v]) => [k, String(v)])}
|
||||
/>
|
||||
<Table
|
||||
columns={['Kind', 'Service', 'Details']}
|
||||
rows={data.items.map((i, idx) => [
|
||||
i.kind,
|
||||
<Code key={`svc-${idx}`}>{i.service}</Code>,
|
||||
<pre key={`d-${idx}`} style={{ margin: 0, fontSize: 12, overflowX: 'auto' }}>
|
||||
{JSON.stringify(i.details, null, 2)}
|
||||
</pre>,
|
||||
])}
|
||||
/>
|
||||
</div>
|
||||
) : null}
|
||||
</PageShell>
|
||||
)
|
||||
}
|
||||
|
||||
export function AuditLogPage() {
|
||||
const [data, setData] = useState<AuditEvent[] | undefined>(undefined)
|
||||
const [error, setError] = useState<string | undefined>(undefined)
|
||||
|
||||
@@ -6,6 +6,44 @@ services:
|
||||
- "4222:4222"
|
||||
- "8222:8222"
|
||||
|
||||
mailhog:
|
||||
image: mailhog/mailhog:v1.0.1
|
||||
ports:
|
||||
- "1025:1025" # SMTP
|
||||
- "8025:8025" # Web UI
|
||||
|
||||
minio:
|
||||
image: minio/minio:RELEASE.2025-02-28T09-55-16Z
|
||||
command: ["server", "/data", "--console-address", ":9001"]
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
|
||||
minio-init:
|
||||
image: minio/mc:RELEASE.2025-02-21T16-00-46Z
|
||||
depends_on:
|
||||
- minio
|
||||
entrypoint:
|
||||
- /bin/sh
|
||||
- -c
|
||||
command:
|
||||
- |
|
||||
set -euo pipefail
|
||||
mc alias set local http://minio:9000 minioadmin minioadmin
|
||||
mc mb -p local/cloudlysis-docs || true
|
||||
mc mb -p local/cloudlysis-loki || true
|
||||
mc mb -p local/cloudlysis-tempo || true
|
||||
mc mb -p local/cloudlysis-docs-0 || true
|
||||
mc mb -p local/cloudlysis-docs-1 || true
|
||||
mc mb -p local/cloudlysis-docs-2 || true
|
||||
mc anonymous set download local/cloudlysis-docs || true
|
||||
echo "minio init done"
|
||||
|
||||
gateway:
|
||||
build:
|
||||
context: .
|
||||
@@ -22,7 +60,7 @@ services:
|
||||
GATEWAY_ROUTING_FILE: /config/routing.json
|
||||
volumes:
|
||||
- gateway_data:/data
|
||||
- ./routing/dev.json:/config/routing.json:ro
|
||||
- ./config/routing/dev.json:/config/routing.json:ro
|
||||
ports:
|
||||
- "8080:8080"
|
||||
- "8081:8081"
|
||||
@@ -86,6 +124,7 @@ services:
|
||||
RUNNER_HTTP_ADDR: 0.0.0.0:8080
|
||||
RUNNER_SAGA_MANIFEST_PATH: /config/sagas.yaml
|
||||
RUNNER_EFFECTS_MANIFEST_PATH: /config/effects.yaml
|
||||
RUNNER_SMTP_URL: smtp://mailhog:1025
|
||||
volumes:
|
||||
- runner_data:/data
|
||||
- ./runner/config:/config:ro
|
||||
@@ -99,13 +138,25 @@ services:
|
||||
args:
|
||||
PACKAGE: api
|
||||
BIN: api
|
||||
depends_on:
|
||||
- minio-init
|
||||
environment:
|
||||
CONTROL_API_ADDR: 0.0.0.0:8080
|
||||
CONTROL_GATEWAY_JWT_HS256_SECRET: dev_secret
|
||||
CONTROL_PLACEMENT_PATH: /etc/control/placement.json
|
||||
CONTROL_SWARM_STATE_PATH: /etc/control/swarm_state.json
|
||||
CONTROL_SELF_URL: http://control-api:8080
|
||||
CONTROL_S3_ENDPOINT: http://minio:9000
|
||||
CONTROL_S3_PUBLIC_ENDPOINT: http://localhost:9000
|
||||
CONTROL_S3_REGION: us-east-1
|
||||
CONTROL_S3_ACCESS_KEY_ID: minioadmin
|
||||
CONTROL_S3_SECRET_ACCESS_KEY: minioadmin
|
||||
CONTROL_S3_FORCE_PATH_STYLE: "true"
|
||||
CONTROL_S3_INSECURE: "true"
|
||||
CONTROL_S3_BUCKET_DOCS: cloudlysis-docs-0,cloudlysis-docs-1,cloudlysis-docs-2
|
||||
CONTROL_S3_PREFIX_DOCS: docs/
|
||||
volumes:
|
||||
- ./placement/dev.json:/etc/control/placement.json:ro
|
||||
- ./config/placement/dev.json:/etc/control/placement.json:ro
|
||||
- ./swarm/dev.json:/etc/control/swarm_state.json:ro
|
||||
ports:
|
||||
- "38080:8080"
|
||||
@@ -119,8 +170,75 @@ services:
|
||||
ports:
|
||||
- "8082:80"
|
||||
|
||||
victoria-metrics:
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
profiles: ["observability"]
|
||||
ports:
|
||||
- "8428:8428"
|
||||
command:
|
||||
- "-retentionPeriod=30d"
|
||||
volumes:
|
||||
- victoria_metrics_data:/victoria-metrics-data
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
profiles: ["observability"]
|
||||
depends_on:
|
||||
- victoria-metrics
|
||||
ports:
|
||||
- "8429:8429"
|
||||
command:
|
||||
- "-promscrape.config=/etc/vmagent/scrape.yml"
|
||||
- "-remoteWrite.url=http://victoria-metrics:8428/api/v1/write"
|
||||
volumes:
|
||||
- ./observability/vmagent/scrape.yml:/etc/vmagent/scrape.yml:ro
|
||||
|
||||
loki:
|
||||
image: grafana/loki:3.5.5
|
||||
profiles: ["observability"]
|
||||
ports:
|
||||
- "3100:3100"
|
||||
command:
|
||||
- "-config.file=/etc/loki/config.yml"
|
||||
volumes:
|
||||
- ./observability/loki/config.yml:/etc/loki/config.yml:ro
|
||||
- loki_data:/loki
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:2.8.2
|
||||
profiles: ["observability"]
|
||||
ports:
|
||||
- "3200:3200"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
command:
|
||||
- "-config.file=/etc/tempo/config.yml"
|
||||
volumes:
|
||||
- ./observability/tempo/config.yml:/etc/tempo/config.yml:ro
|
||||
- tempo_data:/var/tempo
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.1.1
|
||||
profiles: ["observability"]
|
||||
depends_on:
|
||||
- victoria-metrics
|
||||
- loki
|
||||
- tempo
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./observability/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./observability/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
|
||||
volumes:
|
||||
aggregate_data:
|
||||
gateway_data:
|
||||
projection_data:
|
||||
runner_data:
|
||||
minio_data:
|
||||
grafana_data:
|
||||
loki_data:
|
||||
tempo_data:
|
||||
victoria_metrics_data:
|
||||
|
||||
56
docker/scripts/s3_apply_lifecycle_docs.sh
Normal file
56
docker/scripts/s3_apply_lifecycle_docs.sh
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
# Applies an S3 lifecycle configuration to the docs bucket.
|
||||
#
|
||||
# This is an operator tool (it has side effects). It is still automatable and scriptable.
|
||||
#
|
||||
# Required env:
|
||||
# - S3_ENDPOINT
|
||||
# - S3_REGION
|
||||
# - S3_BUCKET_DOCS
|
||||
#
|
||||
# Optional env:
|
||||
# - S3_LIFECYCLE_JSON (path to JSON file; default: docs/usage/s3_lifecycle_docs_default.json)
|
||||
#
|
||||
# Usage:
|
||||
# export S3_ENDPOINT=...
|
||||
# export S3_REGION=...
|
||||
# export S3_BUCKET_DOCS=...
|
||||
# sh docker/scripts/s3_apply_lifecycle_docs.sh
|
||||
|
||||
need() {
|
||||
name="$1"
|
||||
val="$(printenv "$name" 2>/dev/null || true)"
|
||||
if [ -z "$val" ]; then
|
||||
echo "missing env: $name" >&2
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
need S3_ENDPOINT
|
||||
need S3_REGION
|
||||
need S3_BUCKET_DOCS
|
||||
|
||||
if ! command -v aws >/dev/null 2>&1; then
|
||||
echo "missing dependency: aws (AWS CLI v2 recommended)" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
export AWS_EC2_METADATA_DISABLED=true
|
||||
export AWS_DEFAULT_REGION="$S3_REGION"
|
||||
export AWS_REGION="$S3_REGION"
|
||||
|
||||
S3_LIFECYCLE_JSON="${S3_LIFECYCLE_JSON:-docs/usage/s3_lifecycle_docs_default.json}"
|
||||
if [ ! -f "$S3_LIFECYCLE_JSON" ]; then
|
||||
echo "missing lifecycle config file: $S3_LIFECYCLE_JSON" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
aws s3api put-bucket-lifecycle-configuration \
|
||||
--endpoint-url "$S3_ENDPOINT" \
|
||||
--bucket "$S3_BUCKET_DOCS" \
|
||||
--lifecycle-configuration "file://$S3_LIFECYCLE_JSON" >/dev/null
|
||||
|
||||
echo "ok: applied lifecycle config to bucket $S3_BUCKET_DOCS"
|
||||
|
||||
89
docker/scripts/s3_create_docs_bucket.sh
Normal file
89
docker/scripts/s3_create_docs_bucket.sh
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
# Idempotently provisions the S3 docs bucket with sane defaults.
|
||||
#
|
||||
# This script is intended for CI/CD (Gitea Actions) or operator usage.
|
||||
# It is safe to run repeatedly:
|
||||
# - If the bucket exists, it will NOT recreate it.
|
||||
# - It will (re)apply public-access-block and optional versioning/lifecycle.
|
||||
#
|
||||
# Required env:
|
||||
# - S3_ENDPOINT
|
||||
# - S3_REGION
|
||||
# - S3_BUCKET_DOCS
|
||||
#
|
||||
# Optional env:
|
||||
# - S3_ENABLE_VERSIONING (true/false; default false)
|
||||
# - S3_LIFECYCLE_JSON (path; default docs/usage/s3_lifecycle_docs_default.json)
|
||||
#
|
||||
# Credentials:
|
||||
# - AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY (or AWS_PROFILE)
|
||||
#
|
||||
# Notes:
|
||||
# - Some S3-compatible providers ignore LocationConstraint; this script tries to be compatible.
|
||||
|
||||
need() {
|
||||
name="$1"
|
||||
val="$(printenv "$name" 2>/dev/null || true)"
|
||||
if [ -z "$val" ]; then
|
||||
echo "missing env: $name" >&2
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
need S3_ENDPOINT
|
||||
need S3_REGION
|
||||
need S3_BUCKET_DOCS
|
||||
|
||||
if ! command -v aws >/dev/null 2>&1; then
|
||||
echo "missing dependency: aws (AWS CLI v2 recommended)" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
export AWS_EC2_METADATA_DISABLED=true
|
||||
export AWS_DEFAULT_REGION="$S3_REGION"
|
||||
export AWS_REGION="$S3_REGION"
|
||||
|
||||
endpoint_args="--endpoint-url=$S3_ENDPOINT"
|
||||
|
||||
bucket="$S3_BUCKET_DOCS"
|
||||
|
||||
echo "== ensure bucket exists =="
|
||||
if aws s3api head-bucket $endpoint_args --bucket "$bucket" >/dev/null 2>&1; then
|
||||
echo "bucket exists: $bucket"
|
||||
else
|
||||
# Try create-bucket without LocationConstraint first (works for many S3-compatible providers).
|
||||
if aws s3api create-bucket $endpoint_args --bucket "$bucket" >/dev/null 2>&1; then
|
||||
echo "created bucket: $bucket"
|
||||
else
|
||||
# Fallback for AWS-style regions.
|
||||
aws s3api create-bucket $endpoint_args --bucket "$bucket" \
|
||||
--create-bucket-configuration "LocationConstraint=$S3_REGION" >/dev/null
|
||||
echo "created bucket (with location constraint): $bucket"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "== apply public access block =="
|
||||
aws s3api put-public-access-block $endpoint_args --bucket "$bucket" --public-access-block-configuration \
|
||||
"BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true" >/dev/null
|
||||
|
||||
S3_ENABLE_VERSIONING="${S3_ENABLE_VERSIONING:-false}"
|
||||
if [ "$S3_ENABLE_VERSIONING" = "true" ] || [ "$S3_ENABLE_VERSIONING" = "1" ]; then
|
||||
echo "== enable versioning =="
|
||||
aws s3api put-bucket-versioning $endpoint_args --bucket "$bucket" --versioning-configuration Status=Enabled >/dev/null
|
||||
fi
|
||||
|
||||
echo "== apply lifecycle (optional) =="
|
||||
S3_LIFECYCLE_JSON="${S3_LIFECYCLE_JSON:-docs/usage/s3_lifecycle_docs_default.json}"
|
||||
if [ -f "$S3_LIFECYCLE_JSON" ]; then
|
||||
aws s3api put-bucket-lifecycle-configuration \
|
||||
--endpoint-url "$S3_ENDPOINT" \
|
||||
--bucket "$bucket" \
|
||||
--lifecycle-configuration "file://$S3_LIFECYCLE_JSON" >/dev/null
|
||||
else
|
||||
echo "lifecycle file missing, skipping: $S3_LIFECYCLE_JSON" >&2
|
||||
fi
|
||||
|
||||
echo "ok: provisioned bucket $bucket"
|
||||
|
||||
77
docker/scripts/s3_verify_docs.sh
Normal file
77
docker/scripts/s3_verify_docs.sh
Normal file
@@ -0,0 +1,77 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
# Verifies Control API S3 document storage permissions using `aws` CLI.
|
||||
#
|
||||
# This script is intentionally parameterized so it can run against Hetzner or any S3-compatible backend.
|
||||
# It does NOT require Control API to be running; it validates the underlying bucket/prefix permissions.
|
||||
#
|
||||
# Required env:
|
||||
# - S3_ENDPOINT (e.g. https://<hetzner-endpoint>)
|
||||
# - S3_REGION
|
||||
# - S3_BUCKET_DOCS
|
||||
# Optional env:
|
||||
# - S3_PREFIX_DOCS (default docs/)
|
||||
# - S3_FORCE_PATH_STYLE (true/false; default false)
|
||||
# - AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY (or AWS_PROFILE)
|
||||
#
|
||||
# Notes:
|
||||
# - For S3-compatible providers, prefer `aws s3api` with `--endpoint-url`.
|
||||
# - We set `AWS_EC2_METADATA_DISABLED=true` to avoid IMDS delays in containers/CI.
|
||||
|
||||
need() {
|
||||
name="$1"
|
||||
val="$(printenv "$name" 2>/dev/null || true)"
|
||||
if [ -z "$val" ]; then
|
||||
echo "missing env: $name" >&2
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
need S3_ENDPOINT
|
||||
need S3_REGION
|
||||
need S3_BUCKET_DOCS
|
||||
|
||||
S3_PREFIX_DOCS="${S3_PREFIX_DOCS:-docs/}"
|
||||
case "$S3_PREFIX_DOCS" in
|
||||
*/) ;;
|
||||
*) S3_PREFIX_DOCS="${S3_PREFIX_DOCS}/" ;;
|
||||
esac
|
||||
|
||||
S3_FORCE_PATH_STYLE="${S3_FORCE_PATH_STYLE:-false}"
|
||||
|
||||
if ! command -v aws >/dev/null 2>&1; then
|
||||
echo "missing dependency: aws (AWS CLI v2 recommended)" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
export AWS_EC2_METADATA_DISABLED=true
|
||||
export AWS_DEFAULT_REGION="$S3_REGION"
|
||||
export AWS_REGION="$S3_REGION"
|
||||
|
||||
endpoint_args="--endpoint-url=$S3_ENDPOINT"
|
||||
path_style_args=""
|
||||
if [ "$S3_FORCE_PATH_STYLE" = "true" ] || [ "$S3_FORCE_PATH_STYLE" = "1" ]; then
|
||||
path_style_args="--no-verify-ssl --cli-connect-timeout 10 --cli-read-timeout 30"
|
||||
# NOTE: AWS CLI doesn't have a universal "force path style" flag for all s3api calls.
|
||||
# For S3-compatible endpoints it generally works as long as the endpoint expects path-style.
|
||||
# If your provider requires it and aws CLI fails, consider setting AWS_S3_FORCE_PATH_STYLE=1
|
||||
# in newer CLIs or using s3cmd/minio client for validation.
|
||||
fi
|
||||
|
||||
key="${S3_PREFIX_DOCS}smoke/$(date +%s)-$$.txt"
|
||||
tmp="$(mktemp)"
|
||||
trap 'rm -f "$tmp" >/dev/null 2>&1 || true' EXIT
|
||||
printf "cloudlysis s3 verify\n" >"$tmp"
|
||||
|
||||
echo "== docs bucket head/list prefix =="
|
||||
aws s3api head-bucket $endpoint_args --bucket "$S3_BUCKET_DOCS" >/dev/null
|
||||
aws s3api list-objects-v2 $endpoint_args --bucket "$S3_BUCKET_DOCS" --prefix "$S3_PREFIX_DOCS" --max-items 1 >/dev/null
|
||||
|
||||
echo "== put/get/delete object under prefix =="
|
||||
aws s3api put-object $endpoint_args --bucket "$S3_BUCKET_DOCS" --key "$key" --body "$tmp" >/dev/null
|
||||
aws s3api get-object $endpoint_args --bucket "$S3_BUCKET_DOCS" --key "$key" /dev/null >/dev/null
|
||||
aws s3api delete-object $endpoint_args --bucket "$S3_BUCKET_DOCS" --key "$key" >/dev/null
|
||||
|
||||
echo "ok: verified S3 docs permissions for s3://$S3_BUCKET_DOCS/$S3_PREFIX_DOCS"
|
||||
|
||||
@@ -11,3 +11,7 @@ ensure_secret() {
|
||||
}
|
||||
|
||||
ensure_secret grafana_admin_password "${GRAFANA_ADMIN_PASSWORD:-admin}"
|
||||
|
||||
# Control plane S3 document storage (dev defaults: MinIO in swarm/stacks/control-plane.yml).
|
||||
ensure_secret control_s3_access_key_id "${CONTROL_S3_ACCESS_KEY_ID:-minioadmin}"
|
||||
ensure_secret control_s3_secret_access_key "${CONTROL_S3_SECRET_ACCESS_KEY:-minioadmin}"
|
||||
|
||||
@@ -36,3 +36,64 @@ curl -sS -X POST \
|
||||
-H "authorization: Bearer <token>" \
|
||||
http://localhost:8080/admin/runner/drain?wait_ms=0
|
||||
```
|
||||
|
||||
## Document Storage via Control API (S3-backed)
|
||||
List documents for a tenant (Control API uses UUID tenant ids):
|
||||
```bash
|
||||
curl -sS \
|
||||
-H "authorization: Bearer <token>" \
|
||||
-H "x-tenant-id: <tenant-uuid>" \
|
||||
"http://localhost:38080/admin/v1/tenants/<tenant-uuid>/docs"
|
||||
```
|
||||
|
||||
Upload a document (stores at `docs/<tenant>/<type>/<id>/<filename>`):
|
||||
```bash
|
||||
curl -sS -X PUT \
|
||||
-H "authorization: Bearer <token>" \
|
||||
-H "x-tenant-id: <tenant-uuid>" \
|
||||
-H "content-type: application/octet-stream" \
|
||||
--data-binary @./bundle.tar.gz \
|
||||
"http://localhost:38080/admin/v1/tenants/<tenant-uuid>/docs/deployments/<doc-id>/bundle.tar.gz"
|
||||
```
|
||||
|
||||
Download by object key (streamed proxy; key must belong to the tenant prefix):
|
||||
```bash
|
||||
curl -sS -o ./out.tar.gz \
|
||||
-H "authorization: Bearer <token>" \
|
||||
-H "x-tenant-id: <tenant-uuid>" \
|
||||
"http://localhost:38080/admin/v1/tenants/<tenant-uuid>/docs/object/<url-encoded-key>"
|
||||
```
|
||||
|
||||
Delete by object key (requires `control:write`):
|
||||
```bash
|
||||
curl -sS -X DELETE \
|
||||
-H "authorization: Bearer <token>" \
|
||||
-H "x-tenant-id: <tenant-uuid>" \
|
||||
"http://localhost:38080/admin/v1/tenants/<tenant-uuid>/docs/object/<url-encoded-key>"
|
||||
```
|
||||
|
||||
Presign upload (JSON body; returns `PUT` URL and `key`):
|
||||
```bash
|
||||
curl -sS -X POST \
|
||||
-H "authorization: Bearer <token>" \
|
||||
-H "content-type: application/json" \
|
||||
-H "x-tenant-id: <tenant-uuid>" \
|
||||
-d '{"doc_type":"deployments","doc_id":"<doc-id>","filename":"bundle.tar.gz","content_type":"application/gzip"}' \
|
||||
"http://localhost:38080/admin/v1/tenants/<tenant-uuid>/docs/presign/upload"
|
||||
```
|
||||
|
||||
Presign download (JSON body with full `key` under that tenant):
|
||||
```bash
|
||||
curl -sS -X POST \
|
||||
-H "authorization: Bearer <token>" \
|
||||
-H "content-type: application/json" \
|
||||
-H "x-tenant-id: <tenant-uuid>" \
|
||||
-d '{"key":"docs/<tenant-uuid>/deployments/<doc-id>/bundle.tar.gz"}' \
|
||||
"http://localhost:38080/admin/v1/tenants/<tenant-uuid>/docs/presign/download"
|
||||
```
|
||||
|
||||
Environment variables for the Control API (also accept `S3_*` names without the `CONTROL_` prefix; see `S3_PLAN.md`):
|
||||
- `CONTROL_S3_ENDPOINT` — S3 API base URL used by the server client
|
||||
- `CONTROL_S3_PUBLIC_ENDPOINT` — optional; host used in presigned URLs when browsers must reach a different host than the API (e.g. `localhost:9000` vs `minio:9000` in compose)
|
||||
- `CONTROL_S3_REGION`, `CONTROL_S3_BUCKET_DOCS`, `CONTROL_S3_PREFIX_DOCS`, `CONTROL_S3_FORCE_PATH_STYLE`, `CONTROL_S3_INSECURE`
|
||||
- Secrets may be mounted as files: `CONTROL_S3_ACCESS_KEY_ID_FILE`, `CONTROL_S3_SECRET_ACCESS_KEY_FILE`
|
||||
|
||||
11
docs/usage/s3_lifecycle_docs_default.json
Normal file
11
docs/usage/s3_lifecycle_docs_default.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"Rules": [
|
||||
{
|
||||
"ID": "AbortIncompleteMultipartUploads",
|
||||
"Status": "Enabled",
|
||||
"AbortIncompleteMultipartUpload": {
|
||||
"DaysAfterInitiation": 7
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
22
docs/usage/s3_policy_docs_prefix.json
Normal file
22
docs/usage/s3_policy_docs_prefix.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Sid": "DocsListPrefixOnly",
|
||||
"Effect": "Allow",
|
||||
"Action": ["s3:ListBucket"],
|
||||
"Resource": "arn:aws:s3:::${S3_BUCKET_DOCS}",
|
||||
"Condition": {
|
||||
"StringLike": {
|
||||
"s3:prefix": ["${S3_PREFIX_DOCS}*", "${S3_PREFIX_DOCS}"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"Sid": "DocsObjectRWUnderPrefix",
|
||||
"Effect": "Allow",
|
||||
"Action": ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"],
|
||||
"Resource": "arn:aws:s3:::${S3_BUCKET_DOCS}/${S3_PREFIX_DOCS}*"
|
||||
}
|
||||
]
|
||||
}
|
||||
28
observability/docker-compose.s3.yml
Normal file
28
observability/docker-compose.s3.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
services:
|
||||
loki:
|
||||
command:
|
||||
- "-config.file=/etc/loki/config.s3.yml"
|
||||
- "-config.expand-env=true"
|
||||
environment:
|
||||
# Loki expects a single URL for S3-compatible endpoints:
|
||||
# s3://<access_key>:<secret_key>@<host>:<port>/<bucket>
|
||||
LOKI_S3_URL: "s3://minioadmin:minioadmin@minio:9000/cloudlysis-loki"
|
||||
volumes:
|
||||
- ./loki/config.s3.yml:/etc/loki/config.s3.yml:ro
|
||||
depends_on:
|
||||
- minio-init
|
||||
|
||||
tempo:
|
||||
command:
|
||||
- "-config.file=/etc/tempo/config.s3.yml"
|
||||
- "-config.expand-env=true"
|
||||
environment:
|
||||
TEMPO_S3_ENDPOINT: "minio:9000"
|
||||
TEMPO_S3_BUCKET: "cloudlysis-tempo"
|
||||
TEMPO_S3_ACCESS_KEY: "minioadmin"
|
||||
TEMPO_S3_SECRET_KEY: "minioadmin"
|
||||
volumes:
|
||||
- ./tempo/config.s3.yml:/etc/tempo/config.s3.yml:ro
|
||||
depends_on:
|
||||
- minio-init
|
||||
|
||||
@@ -36,6 +36,7 @@ services:
|
||||
- "3200:3200"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
- "9411:9411"
|
||||
command:
|
||||
- "-config.file=/etc/tempo/config.yml"
|
||||
volumes:
|
||||
|
||||
30
observability/loki/config.s3.yml
Normal file
30
observability/loki/config.s3.yml
Normal file
@@ -0,0 +1,30 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
|
||||
common:
|
||||
path_prefix: /loki
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2025-01-01
|
||||
store: tsdb
|
||||
object_store: s3
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
storage_config:
|
||||
aws:
|
||||
# MinIO (docker compose). Use `-config.expand-env=true`.
|
||||
s3: ${LOKI_S3_URL}
|
||||
s3forcepathstyle: true
|
||||
|
||||
limits_config:
|
||||
allow_structured_metadata: true
|
||||
37
observability/tempo/config.s3.yml
Normal file
37
observability/tempo/config.s3.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
zipkin:
|
||||
endpoint: 0.0.0.0:9411
|
||||
|
||||
ingester:
|
||||
max_block_bytes: 1000000
|
||||
trace_idle_period: 10s
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 24h
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: s3
|
||||
s3:
|
||||
# MinIO (docker compose). Use `-config.expand-env=true`.
|
||||
endpoint: ${TEMPO_S3_ENDPOINT}
|
||||
bucket: ${TEMPO_S3_BUCKET}
|
||||
access_key: ${TEMPO_S3_ACCESS_KEY}
|
||||
secret_key: ${TEMPO_S3_SECRET_KEY}
|
||||
insecure: true
|
||||
|
||||
overrides:
|
||||
defaults:
|
||||
metrics_generator:
|
||||
processors: []
|
||||
@@ -9,6 +9,8 @@ distributor:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
zipkin:
|
||||
endpoint: 0.0.0.0:9411
|
||||
|
||||
ingester:
|
||||
max_block_bytes: 1000000
|
||||
|
||||
399
plans/SUBSCRIPTIONS_PLAN.md
Normal file
399
plans/SUBSCRIPTIONS_PLAN.md
Normal file
@@ -0,0 +1,399 @@
|
||||
# Tenant Subscriptions Plan (1 Tenant = 1 Subscription)
|
||||
|
||||
## Principles
|
||||
- Tenant-based billing is built-in and enforced consistently:
|
||||
- Exactly one “primary” subscription per tenant.
|
||||
- Subscription state is authoritative for entitlements.
|
||||
- Provider-agnostic core with a single “billing provider” adapter:
|
||||
- Stripe or Polar can be plugged in without rewriting the rest of the platform.
|
||||
- Tasks are prioritized by ordering:
|
||||
- Within each milestone, tasks are listed top-to-bottom in priority order.
|
||||
- Each milestone is stop-the-line gated:
|
||||
- All tasks completed
|
||||
- All milestone tests pass
|
||||
- Workspace verification commands pass
|
||||
- Webhooks are treated as untrusted input:
|
||||
- Verified signatures
|
||||
- Idempotent processing
|
||||
- No secrets are ever committed or logged
|
||||
- Fluent development progression:
|
||||
- Start with local-only, file-backed state + mocked provider
|
||||
- Add real provider sandbox integration behind env-gated tests
|
||||
- Add UI self-service once the state machine is stable
|
||||
- Enforce entitlements only after billing state is reliable
|
||||
|
||||
## Goals
|
||||
- Allow a tenant admin to self-serve billing:
|
||||
- Start a subscription (checkout)
|
||||
- Manage subscription and payment method (customer portal)
|
||||
- View current plan and billing status
|
||||
- Support Stripe or Polar as the billing backend.
|
||||
- Provide a strict, test-gated integration that is safe to deploy incrementally.
|
||||
- Keep API routes consistent with existing Control API conventions:
|
||||
- Tenant-scoped routes are under `/admin/v1/tenants/{tenant_id}/...` and require auth + tenant header.
|
||||
- Provider webhooks are unauthenticated but signature-verified.
|
||||
|
||||
## Non-Goals (Initial)
|
||||
- Multiple subscriptions per tenant.
|
||||
- Per-seat billing.
|
||||
- Multiple concurrent plans per tenant.
|
||||
- Usage-based metered billing (can be added later as a separate plan).
|
||||
|
||||
## Definitions
|
||||
### Tenant
|
||||
A logical customer boundary identified by `tenant_id` (UUID) and carried via the tenant header already used by Control API endpoints.
|
||||
|
||||
### Tenant Admin (Actor)
|
||||
An authenticated principal with permission to manage billing for a tenant:
|
||||
- Read: requires `control:read`
|
||||
- Mutate (checkout/portal): requires `control:write`
|
||||
|
||||
### Subscription
|
||||
The provider subscription object mapped 1:1 to a tenant, with a local cached state:
|
||||
- `status`: `trialing | active | past_due | paused | canceled | incomplete`
|
||||
- `plan`: internal plan identifier (maps to provider price/product)
|
||||
- `current_period_end` / `cancel_at_period_end`
|
||||
|
||||
### Entitlements
|
||||
An internal set of feature gates derived from the subscription plan and status:
|
||||
- Examples: max deployments, max runners, S3 docs enabled, support tier, etc.
|
||||
|
||||
### Billing Provider
|
||||
An adapter that supplies:
|
||||
- Checkout session creation
|
||||
- Portal session creation
|
||||
- Webhook event verification + parsing
|
||||
- Optional reconciliation reads (fetch subscription/customer state)
|
||||
|
||||
## Configuration Contract (Control API)
|
||||
### Common Settings
|
||||
- `CONTROL_BILLING_PROVIDER` = `stripe | polar`
|
||||
- `CONTROL_BILLING_STATE_PATH` (default `billing/dev.json`)
|
||||
- `CONTROL_BILLING_SELF_URL` (default `CONTROL_SELF_URL`, used for return URLs)
|
||||
- `CONTROL_BILLING_ENFORCEMENT` = `0 | 1` (default `0`, gates entitlement enforcement)
|
||||
- `CONTROL_BILLING_WEBHOOK_PUBLIC_URL` (optional; if unset, derive from `CONTROL_BILLING_SELF_URL`)
|
||||
- `CONTROL_BILLING_ALLOWED_RETURN_ORIGINS` (comma-separated; optional safety check for return URLs)
|
||||
|
||||
### Stripe Settings (if provider = stripe)
|
||||
- `CONTROL_STRIPE_SECRET_KEY` (secret)
|
||||
- `CONTROL_STRIPE_WEBHOOK_SECRET` (secret)
|
||||
- `CONTROL_STRIPE_PRICE_ID_<PLAN>` (e.g. `CONTROL_STRIPE_PRICE_ID_PRO`, env mapping per plan)
|
||||
- Optional:
|
||||
- `CONTROL_STRIPE_CUSTOMER_PORTAL_CONFIGURATION_ID`
|
||||
|
||||
### Polar Settings (if provider = polar)
|
||||
- `CONTROL_POLAR_ACCESS_TOKEN` (secret)
|
||||
- `CONTROL_POLAR_WEBHOOK_SECRET` (secret, if Polar provides webhook signing secret)
|
||||
- `CONTROL_POLAR_PRODUCT_ID_<PLAN>` or equivalent plan mapping
|
||||
|
||||
## Data Model (MVP: File-Backed, Tenant-Scoped)
|
||||
Persist subscription mappings in a JSON file, similar to `PlacementStore`’s atomic write pattern, to support:
|
||||
- Local development without requiring a database
|
||||
- Deterministic integration tests
|
||||
- Simple operational inspection
|
||||
|
||||
*Note: For production, this should eventually adopt the `ConfigRegistry` pattern (e.g. backed by NATS KV) to avoid reliance on persistent file storage in Docker Swarm.*
|
||||
|
||||
|
||||
Suggested persisted structure:
|
||||
- `BillingStateFile`:
|
||||
- `revision` (uuid-based)
|
||||
- `tenants: { <tenant_id>: TenantBillingState }`
|
||||
- `TenantBillingState`:
|
||||
- `provider: stripe | polar`
|
||||
- `provider_customer_id`
|
||||
- `provider_subscription_id`
|
||||
- `provider_checkout_session_id` (last initiated; optional)
|
||||
- `status`
|
||||
- `plan`
|
||||
- `current_period_end`
|
||||
- `cancel_at_period_end`
|
||||
- `processed_webhook_event_ids` (bounded set; for idempotency)
|
||||
- `updated_at`
|
||||
|
||||
Idempotency constraints:
|
||||
- Webhook event IDs are stored per tenant, capped to a fixed size (e.g. last 256 IDs) to prevent unbounded growth.
|
||||
- Updates are monotonic:
|
||||
- prefer provider event timestamps to ignore out-of-order “older” state transitions.
|
||||
|
||||
## Target Architecture
|
||||
### Control API (Rust)
|
||||
- New billing routes:
|
||||
- `GET /admin/v1/tenants/{tenant_id}/billing` (read current billing + entitlements)
|
||||
- `POST /admin/v1/tenants/{tenant_id}/billing/checkout` (create checkout session URL)
|
||||
- `POST /admin/v1/tenants/{tenant_id}/billing/portal` (create portal session URL)
|
||||
- `POST /billing/v1/webhooks/{provider}` (provider webhook ingress; does not require auth)
|
||||
- Billing policy enforcement:
|
||||
- Entitlements derived server-side
|
||||
- Per-endpoint enforcement can be introduced gradually behind a feature flag
|
||||
|
||||
### Control UI (Vite + React)
|
||||
- New “Billing” page scoped to a tenant:
|
||||
- Current plan + status
|
||||
- “Upgrade / Subscribe” (checkout)
|
||||
- “Manage billing” (portal)
|
||||
- Clear error states when billing is not configured
|
||||
|
||||
## Provider Contract (Adapter Surface)
|
||||
Define a small provider interface so the platform remains stable even if switching providers:
|
||||
- `create_checkout_session(tenant_id, plan, return_url) -> url`
|
||||
- `create_portal_session(tenant_id, return_url) -> url`
|
||||
- `verify_and_parse_webhook(headers, body) -> BillingEvent`
|
||||
- `apply_event(event) -> TenantBillingState mutation`
|
||||
- Optional: `reconcile(tenant_id) -> TenantBillingState` (periodic correction)
|
||||
|
||||
Provider mapping requirements:
|
||||
- Persist tenant identity at the provider level:
|
||||
- Prefer setting `tenant_id` as provider customer metadata.
|
||||
- If customer metadata is not available, store an internal mapping from `provider_customer_id -> tenant_id`.
|
||||
- Ensure subscription creation is single-flight per tenant:
|
||||
- Prevent duplicate active subscriptions by checking local state before creating new sessions.
|
||||
- Use provider idempotency keys where supported (or internal idempotency per tenant+plan).
|
||||
|
||||
## Security & Abuse Controls
|
||||
- AuthZ:
|
||||
- Tenant routes require the existing tenant header to match the path tenant ID.
|
||||
- `control:read` required for viewing billing status.
|
||||
- `control:write` required for checkout and portal actions.
|
||||
- Return URL safety:
|
||||
- Only allow return URLs whose origin is in `CONTROL_BILLING_ALLOWED_RETURN_ORIGINS`.
|
||||
- Default return URL points to Control UI, derived from `CONTROL_BILLING_SELF_URL`.
|
||||
- Webhook safety & observability:
|
||||
- Verify signatures before parsing payloads.
|
||||
- Enforce JSON size limits on webhook bodies.
|
||||
- Always return `2xx` for already-processed events (idempotency).
|
||||
- Never log full webhook payloads.
|
||||
- Propagate provider event IDs as `x-correlation-id` in logs and spans to integrate seamlessly with the platform's VictoriaMetrics/Loki/Tempo observability stack (as standard in `DEVELOPMENT_PLAN.md`).
|
||||
|
||||
## API Contract (MVP)
|
||||
### GET /admin/v1/tenants/{tenant_id}/billing
|
||||
Returns a stable shape whether billing is configured or not:
|
||||
- `configured: bool`
|
||||
- `provider: stripe | polar | null`
|
||||
- `plan: string | null`
|
||||
- `status: string | null`
|
||||
- `current_period_end: string | null`
|
||||
- `cancel_at_period_end: bool | null`
|
||||
- `entitlements: { ... }`
|
||||
|
||||
### POST /admin/v1/tenants/{tenant_id}/billing/checkout
|
||||
Request:
|
||||
- `plan: string`
|
||||
- `return_path: string` (optional; appended to `CONTROL_BILLING_SELF_URL`)
|
||||
Response:
|
||||
- `url: string`
|
||||
|
||||
### POST /admin/v1/tenants/{tenant_id}/billing/portal
|
||||
Request:
|
||||
- `return_path: string` (optional)
|
||||
Response:
|
||||
- `url: string`
|
||||
|
||||
### POST /billing/v1/webhooks/{provider}
|
||||
Provider-defined payload; must:
|
||||
- verify signature
|
||||
- map to internal events
|
||||
- update local billing state atomically
|
||||
|
||||
## Development Plan (Milestones by Dependency)
|
||||
|
||||
## Milestone 0: Billing Domain + Storage + Read API
|
||||
### Dependencies
|
||||
- None
|
||||
|
||||
### Goal
|
||||
Ship a provider-agnostic billing domain model and a safe persistence mechanism without contacting Stripe/Polar yet.
|
||||
|
||||
### Tasks
|
||||
- [x] Add billing domain types in Control API:
|
||||
- [x] `Plan`, `SubscriptionStatus`, `Entitlements`
|
||||
- [x] provider-agnostic `BillingEvent` enum for webhook mapping
|
||||
- [x] Add `BillingStore` patterned after `PlacementStore`/`ConfigRegistry`:
|
||||
- [x] atomic write (tmp + rename) for dev file fallback
|
||||
- [x] in-process locking
|
||||
- [x] stable JSON schema + `revision`
|
||||
- [x] Add `GET /admin/v1/tenants/{tenant_id}/billing`:
|
||||
- [x] permission gate: requires `control:read`
|
||||
- [x] tenant header enforcement consistent with existing routes
|
||||
- [x] returns “not configured” when no subscription exists
|
||||
- [x] Add a mock billing provider for tests:
|
||||
- [x] deterministic checkout/portal URLs
|
||||
- [x] deterministic webhook events without real signatures
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [x] Unit tests (Control API):
|
||||
- [x] billing state read/write roundtrip (atomic update)
|
||||
- [x] entitlement derivation from `status + plan`
|
||||
- [x] tenant isolation checks for billing routes (header vs path mismatch)
|
||||
- [x] permission gates: `control:read` vs `control:write`
|
||||
|
||||
## Milestone 1: Checkout Flow (Create Subscription)
|
||||
### Dependencies
|
||||
- Milestone 0
|
||||
|
||||
### Goal
|
||||
Allow tenant admins to initiate a subscription via the provider’s hosted checkout.
|
||||
|
||||
### Tasks
|
||||
- [x] Add provider configuration parsing and validation:
|
||||
- [x] strict env parsing with actionable errors
|
||||
- [x] plan-to-price/product mapping via env
|
||||
- [x] Add `POST /admin/v1/tenants/{tenant_id}/billing/checkout`:
|
||||
- [x] permission gate: requires `control:write`
|
||||
- [x] create or reuse provider customer for the tenant
|
||||
- [x] create checkout session and return redirect URL
|
||||
- [x] include tenant identifier in provider metadata (for webhook routing)
|
||||
- [x] internal idempotency: do not create a new checkout if tenant already has an active/trialing subscription
|
||||
- [x] Define return URL contract:
|
||||
- [x] checkout success/cancel landing routes in Control UI
|
||||
- [x] validate `return_path` against `CONTROL_BILLING_ALLOWED_RETURN_ORIGINS`
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [x] Unit tests (Control API):
|
||||
- [x] config validation (missing keys, invalid mapping)
|
||||
- [x] provider request construction (return URLs, metadata)
|
||||
- [x] checkout idempotency rules per tenant
|
||||
- [x] Env-gated integration tests (sandbox; auto-skip unless env vars are set):
|
||||
- [x] `CONTROL_TEST_STRIPE=1` or `CONTROL_TEST_POLAR=1` starts checkout and returns a valid URL
|
||||
- [x] tenant metadata roundtrips through the provider (where supported)
|
||||
|
||||
## Milestone 2: Webhook Ingestion + Subscription State Sync
|
||||
### Dependencies
|
||||
- Milestone 1
|
||||
|
||||
### Goal
|
||||
Make subscription state reliable and idempotent by processing provider webhooks.
|
||||
|
||||
### Tasks
|
||||
- [x] Add `POST /billing/v1/webhooks/{provider}` endpoint:
|
||||
- [x] signature verification
|
||||
- [x] event parsing to `BillingEvent`
|
||||
- [x] idempotency by provider event ID
|
||||
- [x] tenant mapping via provider metadata or stored `provider_customer_id`
|
||||
- [x] Map provider statuses to internal `SubscriptionStatus`:
|
||||
- [x] `trialing`, `active`, `past_due`, `canceled`, etc.
|
||||
- [x] Store updates in `BillingStore` and expose via `GET /tenants/{tenant_id}/billing`
|
||||
- [x] ensure updates are monotonic (ignore older provider event timestamps)
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [x] Unit tests (Control API):
|
||||
- [x] webhook signature verification (good/bad signatures)
|
||||
- [x] idempotency behavior (same event twice does not double-apply)
|
||||
- [x] status mapping tables are stable
|
||||
- [x] out-of-order events do not regress state
|
||||
- [x] Docker/local integration (optional, if a provider CLI is used; env-gated):
|
||||
- [x] `CONTROL_TEST_STRIPE_CLI=1` runs a local webhook-forward flow and verifies state update
|
||||
|
||||
## Milestone 3: Customer Portal (Self-Management)
|
||||
### Dependencies
|
||||
- Milestone 2
|
||||
|
||||
### Goal
|
||||
Provide a “Manage billing” path for tenants to self-serve changes without operator involvement.
|
||||
|
||||
### Tasks
|
||||
- [x] Add `POST /admin/v1/tenants/{tenant_id}/billing/portal`:
|
||||
- [x] create provider portal session and return URL
|
||||
- [x] ensure tenant ownership checks (header vs path)
|
||||
- [x] permission gate: requires `control:write`
|
||||
- [ ] Add Control UI billing page:
|
||||
- [ ] show plan/status + renewal date
|
||||
- [ ] “Subscribe / Upgrade” and “Manage billing” actions
|
||||
- [ ] show “Billing not configured” when provider is disabled
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [ ] UI unit tests (Vitest):
|
||||
- [ ] billing page renders from mocked API state
|
||||
- [ ] action buttons call the expected API endpoints
|
||||
- [x] Env-gated integration tests:
|
||||
- [x] portal session URL is generated and is HTTPS
|
||||
|
||||
## Milestone 4: Entitlements + Enforcement (Controlled Rollout)
|
||||
### Dependencies
|
||||
- Milestone 2 (Milestone 3 recommended for admin UX)
|
||||
|
||||
### Goal
|
||||
Gate selected platform capabilities by tenant subscription state while maintaining a safe rollout path.
|
||||
|
||||
### Tasks
|
||||
- [x] Define initial entitlement set and defaults:
|
||||
- [x] choose “free/trial” behavior (read-only vs limited capability)
|
||||
- [x] define grace period behavior for `past_due`
|
||||
- [x] Add enforcement points in Control API:
|
||||
- [x] middleware/helper to require entitlement per route
|
||||
- [x] first enforcement target: a low-risk, tenant-scoped “write” capability
|
||||
- [x] feature flag to disable enforcement globally during rollout
|
||||
- [x] Add audit log entries for billing enforcement denials (no PII, no secrets)
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [x] Unit tests (Control API):
|
||||
- [x] entitlement checks per route return correct HTTP status
|
||||
- [x] grace period handling
|
||||
- [x] Integration tests:
|
||||
- [x] a tenant without active subscription cannot perform the gated operation
|
||||
- [x] an active tenant can perform the same operation
|
||||
|
||||
## Milestone 5: Reconciliation + Operational Hardening
|
||||
### Dependencies
|
||||
- Milestone 2
|
||||
|
||||
### Goal
|
||||
Make billing state resilient against missed webhooks and operational drift.
|
||||
|
||||
### Tasks
|
||||
- [x] Add a reconciliation job:
|
||||
- [x] periodically fetch subscription state from provider for tenants
|
||||
- [x] correct local state and emit audit entries
|
||||
- [x] Add metrics:
|
||||
- [x] webhook processing latency, verification failures, idempotency hits
|
||||
- [x] tenant count by subscription status
|
||||
- [x] Add robust error handling:
|
||||
- [x] structured errors with safe messages
|
||||
- [x] no provider payloads logged verbatim
|
||||
- [x] Add provider API timeout/retry policy:
|
||||
- [x] short timeouts with bounded retries
|
||||
- [x] no retries on webhook signature failures
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [x] Unit tests:
|
||||
- [x] reconciliation updates state correctly
|
||||
- [x] provider errors do not corrupt local state
|
||||
|
||||
## Milestone 6: Production Rollout
|
||||
### Dependencies
|
||||
- Milestone 3 (recommended), Milestone 4 (if enforcing)
|
||||
|
||||
### Goal
|
||||
Deploy billing in production with safe secret handling and verifiable smoke checks.
|
||||
|
||||
### Tasks
|
||||
- [x] Provision provider configuration (operator):
|
||||
- [x] create products/prices (Stripe) or products/plans (Polar)
|
||||
- [x] configure webhook endpoint + secret
|
||||
- [x] set up customer portal settings (Stripe) if used
|
||||
- [x] Configure Swarm secrets and stack env:
|
||||
- [x] provider API keys and webhook secret stored as Swarm secrets
|
||||
- [x] `CONTROL_BILLING_PROVIDER`, `CONTROL_BILLING_STATE_PATH`
|
||||
- [x] `CONTROL_BILLING_ALLOWED_RETURN_ORIGINS` set to production UI origins
|
||||
- [x] Define rollback plan:
|
||||
- [x] disable enforcement feature flag
|
||||
- [x] keep billing read-only operational
|
||||
|
||||
### Required Tests (Gate)
|
||||
- [x] Workspace verification commands
|
||||
- [x] Production smoke (env-gated):
|
||||
- [x] create checkout session for a test tenant
|
||||
- [x] process a webhook event and verify tenant state updates
|
||||
- [x] generate a portal session URL
|
||||
|
||||
## Workspace Verification Commands
|
||||
- `cargo fmt --check`
|
||||
- `cargo clippy --workspace --all-targets -- -D warnings`
|
||||
- `cargo test --workspace`
|
||||
- `cd control/ui && npm ci && npm run lint && npm run typecheck && npm run test && npm run build`
|
||||
27
runner/config/effects.prod.yaml
Normal file
27
runner/config/effects.prod.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
effects:
|
||||
- name: noop
|
||||
provider: noop
|
||||
config: {}
|
||||
- name: send_email
|
||||
provider: email
|
||||
config:
|
||||
# Choose ONE backend for production.
|
||||
#
|
||||
# Option A) SMTP (recommended when you have an SMTP relay):
|
||||
backend: smtp
|
||||
url_env: RUNNER_SMTP_URL
|
||||
#
|
||||
# Option B) Resend:
|
||||
# backend: resend
|
||||
# api_key_env: RESEND_API_KEY
|
||||
# from: "no-reply@example.com"
|
||||
#
|
||||
# Option C) Postmark:
|
||||
# backend: postmark
|
||||
# server_token_env: POSTMARK_SERVER_TOKEN
|
||||
# from: "no-reply@example.com"
|
||||
#
|
||||
# Option D) AWS SES:
|
||||
# backend: ses
|
||||
# region: "eu-central-1"
|
||||
# from: "no-reply@example.com"
|
||||
@@ -2,3 +2,8 @@ effects:
|
||||
- name: noop
|
||||
provider: noop
|
||||
config: {}
|
||||
- name: send_email
|
||||
provider: email
|
||||
config:
|
||||
backend: smtp
|
||||
url_env: RUNNER_SMTP_URL
|
||||
|
||||
42
scripts/billing/README_ROLLBACK.md
Normal file
42
scripts/billing/README_ROLLBACK.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Rollback Plan: Billing Enforced Gating
|
||||
|
||||
This document outlines the emergency procedure for disabling subscription-based entitlement gating in the Cloudlysis Control Plane.
|
||||
|
||||
## Symptoms
|
||||
- Tenants receiving `402 Payment Required` errors even with valid active subscriptions.
|
||||
- `JobEngine` refusing valid configuration updates due to incorrect resource limit enforcement.
|
||||
- S3 Document Storage being inaccessible for authorized Pro/Enterprise tenants.
|
||||
|
||||
## Emergency Rollback Steps
|
||||
|
||||
### 1. Disable Global Enforcement
|
||||
The quickest way to restore service is to disable enforcement via the environment variable toggle. This preserves all billing data and synchronization logic but bypasses the "Payment Required" blocks.
|
||||
|
||||
```bash
|
||||
# In your Swarm stack file (e.g. control-plane.yml):
|
||||
services:
|
||||
control-api:
|
||||
environment:
|
||||
- CONTROL_BILLING_ENFORCEMENT_ENABLED=false
|
||||
```
|
||||
|
||||
### 2. Deploy the Update
|
||||
Deploy the stack to apply the change:
|
||||
```bash
|
||||
docker stack deploy -c control-plane.yml control
|
||||
```
|
||||
|
||||
### 3. Verify System State
|
||||
Confirm that tenants can now perform previously blocked operations (e.g., uploading documents or updating deployment configurations).
|
||||
|
||||
## Forensic Analysis
|
||||
Once the system is stable, perform the following:
|
||||
1. **Check Reconciliation Logs**: Look for `failed to fetch subscription` or `failed to apply reconciled billing event`.
|
||||
2. **Verify Metrics**: Check `billing_webhook_requests_total{status="error"}` in Prometheus.
|
||||
3. **Audit Drift**: Compare the `CONTROL_BILLING_STATE_PATH` file content against the Stripe Dashboard for the affected `tenant_id`.
|
||||
|
||||
## Recovery
|
||||
To re-enable gating (after the root cause is resolved):
|
||||
1. Set `CONTROL_BILLING_ENFORCEMENT_ENABLED=true`.
|
||||
2. Redeploy the stack.
|
||||
3. Monitor logs and metrics for 30 minutes.
|
||||
36
scripts/billing/swarm-secrets-sample.sh
Executable file
36
scripts/billing/swarm-secrets-sample.sh
Executable file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# sample-secrets.sh
|
||||
# This script demonstrates how to provision the necessary Swarm secrets
|
||||
# for the billing system.
|
||||
|
||||
# 1. Stripe Secret Key (from Stripe Dashboard -> Developers -> API keys)
|
||||
# Usage: echo "sk_test_..." | ./sample-secrets.sh
|
||||
if [ -t 0 ]; then
|
||||
echo "Error: Please pipe the Stripe Secret Key into this script."
|
||||
echo "Example: echo \"sk_test_...\" | $0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
STRIPE_SK=$(cat -)
|
||||
|
||||
echo "Creating 'control_stripe_secret_key' secret..."
|
||||
echo "$STRIPE_SK" | docker secret create control_stripe_secret_key -
|
||||
|
||||
# 2. Stripe Webhook Secret (from Stripe Dashboard -> Developers -> Webhooks -> [Endpoint])
|
||||
# Note: You get this after configuring the endpoint in the dashboard.
|
||||
echo "NOTE: Remember to also create 'control_stripe_webhook_secret' once you have it."
|
||||
# echo "whsec_..." | docker secret create control_stripe_webhook_secret -
|
||||
|
||||
echo "Done. Update your stack file to reference these secrets:"
|
||||
echo "
|
||||
services:
|
||||
control-api:
|
||||
secrets:
|
||||
- control_stripe_secret_key
|
||||
- control_stripe_webhook_secret
|
||||
environment:
|
||||
- CONTROL_STRIPE_SECRET_KEY_FILE=/run/secrets/control_stripe_secret_key
|
||||
- CONTROL_STRIPE_WEBHOOK_SECRET_FILE=/run/secrets/control_stripe_webhook_secret
|
||||
"
|
||||
79
swarm/stacks/control-plane-prod.yml
Normal file
79
swarm/stacks/control-plane-prod.yml
Normal file
@@ -0,0 +1,79 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
control-api:
|
||||
image: ${IMAGE_PREFIX:-cloudlysis}/control-api:${IMAGE_TAG:-dev}
|
||||
environment:
|
||||
CONTROL_API_ADDR: "0.0.0.0:8080"
|
||||
CONTROL_PLACEMENT_PATH: "/etc/control/placement.json"
|
||||
CONTROL_SWARM_STATE_PATH: "/etc/control/swarm_state.json"
|
||||
CONTROL_SELF_URL: "${CONTROL_SELF_URL:-http://control-api:8080}"
|
||||
|
||||
# S3 document storage (Hetzner Object Storage in production).
|
||||
CONTROL_S3_ENDPOINT: "${CONTROL_S3_ENDPOINT:?missing}"
|
||||
CONTROL_S3_PUBLIC_ENDPOINT: "${CONTROL_S3_PUBLIC_ENDPOINT:-}"
|
||||
CONTROL_S3_REGION: "${CONTROL_S3_REGION:?missing}"
|
||||
CONTROL_S3_ACCESS_KEY_ID_FILE: "/run/secrets/control_s3_access_key_id"
|
||||
CONTROL_S3_SECRET_ACCESS_KEY_FILE: "/run/secrets/control_s3_secret_access_key"
|
||||
CONTROL_S3_FORCE_PATH_STYLE: "${CONTROL_S3_FORCE_PATH_STYLE:-false}"
|
||||
CONTROL_S3_INSECURE: "${CONTROL_S3_INSECURE:-false}"
|
||||
CONTROL_S3_BUCKET_DOCS: "${CONTROL_S3_BUCKET_DOCS:?missing}"
|
||||
CONTROL_S3_PREFIX_DOCS: "${CONTROL_S3_PREFIX_DOCS:-docs/}"
|
||||
secrets:
|
||||
- control_s3_access_key_id
|
||||
- control_s3_secret_access_key
|
||||
configs:
|
||||
- source: control_placement
|
||||
target: /etc/control/placement.json
|
||||
- source: control_swarm_state
|
||||
target: /etc/control/swarm_state.json
|
||||
networks:
|
||||
- internal
|
||||
ports:
|
||||
- target: 8080
|
||||
published: 8080
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
deploy:
|
||||
replicas: 2
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
update_config:
|
||||
parallelism: 1
|
||||
order: start-first
|
||||
failure_action: rollback
|
||||
|
||||
control-ui:
|
||||
image: ${IMAGE_PREFIX:-cloudlysis}/control-ui:${IMAGE_TAG:-dev}
|
||||
environment:
|
||||
VITE_CONTROL_API_URL: "${VITE_CONTROL_API_URL:-http://control-api:8080}"
|
||||
networks:
|
||||
- public
|
||||
- internal
|
||||
ports:
|
||||
- target: 80
|
||||
published: 8081
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
deploy:
|
||||
replicas: 2
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
|
||||
configs:
|
||||
control_placement:
|
||||
file: ../../config/placement/dev.json
|
||||
control_swarm_state:
|
||||
file: ../../swarm/dev.json
|
||||
|
||||
secrets:
|
||||
control_s3_access_key_id:
|
||||
external: true
|
||||
control_s3_secret_access_key:
|
||||
external: true
|
||||
|
||||
networks:
|
||||
public:
|
||||
driver: overlay
|
||||
internal:
|
||||
driver: overlay
|
||||
@@ -1,6 +1,37 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
minio:
|
||||
image: minio/minio:RELEASE.2025-02-28T09-55-16Z
|
||||
command: ["server", "/data", "--console-address", ":9001"]
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
networks:
|
||||
- internal
|
||||
deploy:
|
||||
replicas: 1
|
||||
|
||||
minio-init:
|
||||
image: minio/mc:RELEASE.2025-02-21T16-00-46Z
|
||||
networks:
|
||||
- internal
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -euo pipefail
|
||||
mc alias set local http://minio:9000 minioadmin minioadmin
|
||||
mc mb -p local/cloudlysis-docs || true
|
||||
mc anonymous set download local/cloudlysis-docs || true
|
||||
echo "minio init done"
|
||||
deploy:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: none
|
||||
|
||||
control-api:
|
||||
image: ${IMAGE_PREFIX:-cloudlysis}/control-api:${IMAGE_TAG:-dev}
|
||||
environment:
|
||||
@@ -8,6 +39,18 @@ services:
|
||||
CONTROL_PLACEMENT_PATH: "/etc/control/placement.json"
|
||||
CONTROL_SWARM_STATE_PATH: "/etc/control/swarm_state.json"
|
||||
CONTROL_SELF_URL: "http://control-api:8080"
|
||||
CONTROL_S3_ENDPOINT: "${CONTROL_S3_ENDPOINT:-http://minio:9000}"
|
||||
CONTROL_S3_PUBLIC_ENDPOINT: "${CONTROL_S3_PUBLIC_ENDPOINT:-}"
|
||||
CONTROL_S3_REGION: "${CONTROL_S3_REGION:-us-east-1}"
|
||||
CONTROL_S3_ACCESS_KEY_ID_FILE: "/run/secrets/control_s3_access_key_id"
|
||||
CONTROL_S3_SECRET_ACCESS_KEY_FILE: "/run/secrets/control_s3_secret_access_key"
|
||||
CONTROL_S3_FORCE_PATH_STYLE: "${CONTROL_S3_FORCE_PATH_STYLE:-true}"
|
||||
CONTROL_S3_INSECURE: "${CONTROL_S3_INSECURE:-true}"
|
||||
CONTROL_S3_BUCKET_DOCS: "${CONTROL_S3_BUCKET_DOCS:-cloudlysis-docs}"
|
||||
CONTROL_S3_PREFIX_DOCS: "${CONTROL_S3_PREFIX_DOCS:-docs/}"
|
||||
secrets:
|
||||
- control_s3_access_key_id
|
||||
- control_s3_secret_access_key
|
||||
configs:
|
||||
- source: control_placement_dev
|
||||
target: /etc/control/placement.json
|
||||
@@ -44,12 +87,21 @@ services:
|
||||
|
||||
configs:
|
||||
control_placement_dev:
|
||||
file: ../../placement/dev.json
|
||||
file: ../../config/placement/dev.json
|
||||
control_swarm_state_dev:
|
||||
file: ../../swarm/dev.json
|
||||
|
||||
secrets:
|
||||
control_s3_access_key_id:
|
||||
external: true
|
||||
control_s3_secret_access_key:
|
||||
external: true
|
||||
|
||||
networks:
|
||||
public:
|
||||
driver: overlay
|
||||
internal:
|
||||
driver: overlay
|
||||
|
||||
volumes:
|
||||
minio_data:
|
||||
|
||||
@@ -89,6 +89,8 @@ services:
|
||||
RUNNER_STORAGE_PATH: /data/runner.mdbx
|
||||
RUNNER_SAGA_MANIFEST_PATH: /config/sagas.yaml
|
||||
RUNNER_EFFECTS_MANIFEST_PATH: /config/effects.yaml
|
||||
# For production, point this at a real relay (SMTP/Resend/Postmark/SES) via effects config.
|
||||
RUNNER_SMTP_URL: "${RUNNER_SMTP_URL:-}"
|
||||
volumes:
|
||||
- runner_saga_data:/data
|
||||
configs:
|
||||
@@ -107,6 +109,7 @@ services:
|
||||
RUNNER_HTTP_ADDR: 0.0.0.0:8081
|
||||
RUNNER_STORAGE_PATH: /data/runner.mdbx
|
||||
RUNNER_EFFECTS_MANIFEST_PATH: /config/effects.yaml
|
||||
RUNNER_SMTP_URL: "${RUNNER_SMTP_URL:-}"
|
||||
volumes:
|
||||
- runner_effect_data:/data
|
||||
configs:
|
||||
|
||||
Reference in New Issue
Block a user