diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..b7a0f2a --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[registries.madapes] +index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b0eefa5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +target/ +**/target/ + +node_modules/ +**/node_modules/ + +dist/ +**/dist/ + +.git/ +**/.git/ + +.DS_Store + +control/ui/.vite/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d5e5b12 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,47 @@ +name: ci + +on: + push: + pull_request: + +jobs: + ui: + runs-on: ubuntu-latest + timeout-minutes: 10 + defaults: + run: + working-directory: control/ui + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + cache-dependency-path: control/ui/package-lock.json + + - run: npm config set registry https://registry.npmjs.org + - run: npm ci + - run: npm run lint + - run: npm run typecheck + - run: npm run test + - run: npm run build + + rust: + runs-on: ubuntu-latest + timeout-minutes: 15 + defaults: + run: + working-directory: . + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - uses: Swatinem/rust-cache@v2 + + - run: cargo fmt --check + - run: cargo clippy --workspace --all-targets -- -D warnings + - run: cargo test --workspace diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b5b20d1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +target/ +/target-*/ +**/target/ +**/target-*/ + +node_modules/ +**/node_modules/ + +dist/ +**/dist/ + +.DS_Store diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..e872ba8 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,6050 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aggregate" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-nats", + "axum 0.7.9", + "chrono", + "edge-logger-client", + "edge_storage", + "futures", + "lru", + "prost 0.13.5", + "protoc-bin-vendored", + "query_engine", + "runtime-function", + "serde", + "serde_json", + "serde_yaml", + "shared", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "toml", + "tonic", + "tonic-build", + "tracing", + "tracing-subscriber", + "uuid", + "v8", +] + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "serde", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "api" +version = "0.1.0" +dependencies = [ + "axum 0.8.8", + "clap", + "jsonwebtoken", + "metrics 0.23.1", + "metrics-exporter-prometheus 0.16.2", + "reqwest", + "serde", + "serde_json", + "serde_yaml", + "thiserror 2.0.18", + "tokio", + "tower 0.5.3", + "tower-http 0.6.8", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "argon2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" +dependencies = [ + "base64ct", + "blake2", + "cpufeatures", + "password-hash", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "assert-unchecked" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7330592adf847ee2e3513587b4db2db410a0d751378654e7e993d9adcbe5c795" + +[[package]] +name = "async-nats" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a798aab0c0203b31d67d501e5ed1f3ac6c36a329899ce47fc93c3bea53f3ae89" +dependencies = [ + "base64", + "bytes", + "futures", + "memchr", + "nkeys", + "nuid", + "once_cell", + "pin-project", + "portable-atomic", + "rand 0.8.5", + "regex", + "ring", + "rustls-native-certs 0.7.3", + "rustls-pemfile", + "rustls-webpki 0.102.8", + "serde", + "serde_json", + "serde_nanos", + "serde_repr", + "thiserror 1.0.69", + "time", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "tokio-websockets", + "tracing", + "tryhard", + "url", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-config" +version = "1.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 1.4.0", + "time", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.39.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + +[[package]] +name = "aws-runtime" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "bytes-utils", + "fastrand", + "http 1.4.0", + "http-body 1.0.1", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", +] + +[[package]] +name = "aws-sdk-sesv2" +version = "1.117.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e781e767ea1b8d404c99f5f641284e3afb928e909f04921ffcd8a84d5b969a86" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sdk-sts" +version = "1.101.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-observability", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "regex-lite", + "tracing", +] + +[[package]] +name = "aws-sigv4" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "percent-encoding", + "sha2", + "time", + "tracing", +] + +[[package]] +name = "aws-smithy-async" +version = "1.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "aws-smithy-http" +version = "0.63.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231" +dependencies = [ + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + +[[package]] +name = "aws-smithy-http-client" +version = "1.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.13", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-rustls 0.24.2", + "pin-project-lite", + "rustls 0.21.12", + "rustls-native-certs 0.8.3", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-json" +version = "0.62.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a" +dependencies = [ + "aws-smithy-types", +] + +[[package]] +name = "aws-smithy-observability" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c" +dependencies = [ + "aws-smithy-runtime-api", +] + +[[package]] +name = "aws-smithy-query" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] + +[[package]] +name = "aws-smithy-runtime" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "028999056d2d2fd58a697232f9eec4a643cf73a71cf327690a7edad1d2af2110" +dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.4.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", +] + +[[package]] +name = "aws-smithy-types" +version = "1.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", +] + +[[package]] +name = "aws-smithy-xml" +version = "0.60.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3" +dependencies = [ + "xmlparser", +] + +[[package]] +name = "aws-types" +version = "1.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9" +dependencies = [ + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower 0.5.3", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +dependencies = [ + "axum-core 0.5.6", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "itoa", + "matchit 0.8.4", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower 0.5.3", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "base32" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "022dfe9eb35f19ebbcb51e0b40a5ab759f46ad60cadf7297e0bd085afb50e076" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bindgen" +version = "0.69.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 1.1.0", + "shlex", + "syn 2.0.117", + "which 4.4.2", +] + +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.2", + "shlex", + "syn 2.0.117", +] + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "borrow-or-share" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c" + +[[package]] +name = "borsh" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd1e3f8955a5d7de9fab72fc8373fade9fb8a703968cb200ae3dc6cf08e185a" +dependencies = [ + "borsh-derive", + "bytes", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfcfdc083699101d5a7965e49925975f2f55060f94f9a05e7187be95d530ca59" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +dependencies = [ + "allocator-api2", +] + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + +[[package]] +name = "bytes" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +dependencies = [ + "serde", +] + +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + +[[package]] +name = "cbindgen" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadd868a2ce9ca38de7eeafdcec9c7065ef89b42b32f0839278d55f35c54d1ff" +dependencies = [ + "clap", + "heck 0.4.1", + "indexmap 2.13.0", + "log", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 2.0.117", + "tempfile", + "toml", +] + +[[package]] +name = "cc" +version = "1.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom 7.1.3", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link", +] + +[[package]] +name = "chrono-tz" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +dependencies = [ + "chrono", + "phf 0.12.1", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "compact_str" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cow-utils" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "417bef24afe1460300965a25ff4a24b8b45ad011948302ec221e8a0a81eb2c79" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "curve25519-dalek" +version = "4.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" +dependencies = [ + "cfg-if", + "cpufeatures", + "curve25519-dalek-derive", + "digest", + "fiat-crypto", + "rustc_version", + "subtle", +] + +[[package]] +name = "curve25519-dalek-derive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "data-encoding" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", + "serde_core", +] + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn 2.0.117", + "unicode-xid", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "ed25519" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" +dependencies = [ + "signature", +] + +[[package]] +name = "ed25519-dalek" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e796c081cee67dc755e1a36a0a172b897fab85fc3f6bc48307991f64e4eca9" +dependencies = [ + "curve25519-dalek", + "ed25519", + "sha2", + "signature", + "subtle", +] + +[[package]] +name = "edge-logger-client" +version = "0.1.12" +source = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" +checksum = "de2df18e25bce88b70fe2ac02f7c40a580bfa525697c65d35b30f474f5cf9c00" +dependencies = [ + "anyhow", + "bytes", + "crossbeam-channel", + "edge-logger-common", + "edge-logger-proto", + "futures", + "prost 0.12.6", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "edge-logger-common" +version = "0.1.12" +source = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" +checksum = "6d9bf9fba2d48890c79ff610850148169ad32de79fbe431eb27bc2d015de573f" +dependencies = [ + "anyhow", + "crossbeam-channel", + "edge-logger-proto", + "hostname", + "humantime-serde", + "num_cpus", + "serde", + "serde_json", + "serde_yaml", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "edge-logger-proto" +version = "0.1.12" +source = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" +checksum = "89f01c924a8cb4bc4e03fe8732e038378962f1feb9d2d422ea400056271757a0" +dependencies = [ + "prost 0.12.6", + "prost-build 0.12.6", + "prost-types 0.12.6", +] + +[[package]] +name = "edge_storage" +version = "0.1.17" +source = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" +checksum = "c3809b47a9ed965fba6fd99eb69296f78c0e55e6fe40632a58954eb224a19e0a" +dependencies = [ + "cbindgen", + "crossbeam-channel", + "libmdbx", + "parking_lot", + "query_engine", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "email-encoding" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9298e6504d9b9e780ed3f7dfd43a61be8cd0e09eb07f7706a945b0072b6670b6" +dependencies = [ + "base64", + "memchr", +] + +[[package]] +name = "email_address" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449" +dependencies = [ + "serde", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fancy-regex" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +dependencies = [ + "bit-set 0.5.3", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set 0.8.0", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "fluent-uri" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1918b65d96df47d3591bed19c5cca17e3fa5d0707318e4b5ef2eae01764df7e5" +dependencies = [ + "borrow-or-share", + "ref-cast", + "serde", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fraction" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f158e3ff0a1b334408dc9fb811cd99b446986f4d8b741bb08f9df1604085ae7" +dependencies = [ + "lazy_static", + "num", +] + +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "fslock" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "futures" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-executor" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" + +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "futures-sink" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "slab", +] + +[[package]] +name = "gateway" +version = "0.1.0" +dependencies = [ + "anyhow", + "argon2", + "async-nats", + "async-trait", + "axum 0.7.9", + "base32", + "chrono", + "edge-logger-client", + "edge_storage", + "futures", + "hex", + "hmac", + "http 1.4.0", + "jsonwebtoken", + "libmdbx", + "metrics 0.23.1", + "metrics-exporter-prometheus 0.15.3", + "prost 0.13.5", + "protoc-bin-vendored", + "rand_core 0.6.4", + "reqwest", + "serde", + "serde_json", + "serde_yaml", + "sha1", + "sha2", + "shared", + "subtle", + "thiserror 2.0.18", + "tokio", + "tonic", + "tonic-build", + "tower 0.5.3", + "tower-http 0.6.8", + "tracing", + "tracing-subscriber", + "urlencoding", + "uuid", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 5.3.0", + "wasip2", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "gzip-header" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95cc527b92e6029a62960ad99aa8a6660faa4555fe5f731aab13aa6a921795a2" +dependencies = [ + "crc32fast", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.4.0", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "hostname" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd" +dependencies = [ + "cfg-if", + "libc", + "windows-link", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.4.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http 1.4.0", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" + +[[package]] +name = "humantime-serde" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a3db5ea5923d99402c94e9feb261dc5ee9b4efa158b0315f788cf549cc200c" +dependencies = [ + "humantime", + "serde", +] + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.4.0", + "hyper 1.8.1", + "hyper-util", + "log", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", + "webpki-roots 1.0.6", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper 1.8.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", + "ipnet", + "libc", + "percent-encoding", + "pin-project-lite", + "socket2 0.6.3", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "ipnet" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" + +[[package]] +name = "iri-string" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "iso8601" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1082f0c48f143442a1ac6122f67e360ceee130b967af4d50996e5154a45df46" +dependencies = [ + "nom 8.0.0", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "jsonschema" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa0f4bea31643be4c6a678e9aa4ae44f0db9e5609d5ca9dc9083d06eb3e9a27a" +dependencies = [ + "ahash 0.8.12", + "anyhow", + "base64", + "bytecount", + "clap", + "fancy-regex 0.13.0", + "fraction", + "getrandom 0.2.17", + "iso8601", + "itoa", + "memchr", + "num-cmp", + "once_cell", + "parking_lot", + "percent-encoding", + "regex", + "reqwest", + "serde", + "serde_json", + "time", + "url", + "uuid", +] + +[[package]] +name = "jsonschema" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "161c33c3ec738cfea3288c5c53dfcdb32fd4fc2954de86ea06f71b5a1a40bfcd" +dependencies = [ + "ahash 0.8.12", + "base64", + "bytecount", + "email_address", + "fancy-regex 0.14.0", + "fraction", + "idna", + "itoa", + "num-cmp", + "once_cell", + "percent-encoding", + "referencing", + "regex-syntax", + "reqwest", + "serde", + "serde_json", + "uuid-simd", +] + +[[package]] +name = "jsonwebtoken" +version = "9.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +dependencies = [ + "base64", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "lettre" +version = "0.11.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "471816f3e24b85e820dee02cde962379ea1a669e5242f19c61bcbcffedf4c4fb" +dependencies = [ + "async-trait", + "base64", + "email-encoding", + "email_address", + "fastrand", + "futures-io", + "futures-util", + "hostname", + "httpdate", + "idna", + "mime", + "native-tls", + "nom 8.0.0", + "percent-encoding", + "quoted_printable", + "socket2 0.6.3", + "tokio", + "tokio-native-tls", + "url", +] + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libmdbx" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1da773dcce45661428e2b51ca984eb3c64c08f2aa54865397e4b77d1f61c9f07" +dependencies = [ + "bitflags", + "derive_more", + "indexmap 2.13.0", + "libc", + "mdbx-sys", + "parking_lot", + "sealed", + "thiserror 2.0.18", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + +[[package]] +name = "lru-slab" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + +[[package]] +name = "mdbx-sys" +version = "13.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4002b9ffed3dd01364ab005bca653a99b8f550ca4cd755470a9d3a31a44f466" +dependencies = [ + "bindgen 0.72.1", + "cc", + "libc", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "metrics" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5" +dependencies = [ + "ahash 0.8.12", + "portable-atomic", +] + +[[package]] +name = "metrics" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" +dependencies = [ + "ahash 0.8.12", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6" +dependencies = [ + "base64", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "indexmap 2.13.0", + "ipnet", + "metrics 0.23.1", + "metrics-util 0.17.0", + "quanta", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd7399781913e5393588a8d8c6a2867bf85fb38eaf2502fdce465aad2dc6f034" +dependencies = [ + "base64", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "indexmap 2.13.0", + "ipnet", + "metrics 0.24.3", + "metrics-util 0.19.1", + "quanta", + "thiserror 1.0.69", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-util" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.14.5", + "metrics 0.23.1", + "num_cpus", + "quanta", + "sketches-ddsketch 0.2.2", +] + +[[package]] +name = "metrics-util" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8496cc523d1f94c1385dd8f0f0c2c480b2b8aeccb5b7e4485ad6365523ae376" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown 0.15.5", + "metrics 0.24.3", + "quanta", + "rand 0.9.2", + "rand_xoshiro", + "sketches-ddsketch 0.3.1", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe 0.2.1", + "openssl-sys", + "schannel", + "security-framework 3.7.0", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "nkeys" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879011babc47a1c7fdf5a935ae3cfe94f34645ca0cac1c7f6424b36fc743d1bf" +dependencies = [ + "data-encoding", + "ed25519", + "ed25519-dalek", + "getrandom 0.2.17", + "log", + "rand 0.8.5", + "signatory", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "nonmax" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "610a5acd306ec67f907abe5567859a3c693fb9886eb1f012ab8f2a47bef3db51" + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "nuid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc895af95856f929163a0aa20c26a78d26bfdc839f51b9d5aa7a5b79e52b7e83" +dependencies = [ + "rand 0.8.5", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "openssl" +version = "0.10.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "openssl-sys" +version = "0.9.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "owo-colors" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" + +[[package]] +name = "oxc-miette" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e03e63fd113c068b82d07c9c614b0b146c08a3ac0a4dface3ea1d1a9d14d549e" +dependencies = [ + "cfg-if", + "owo-colors", + "oxc-miette-derive", + "textwrap", + "thiserror 1.0.69", + "unicode-width", +] + +[[package]] +name = "oxc-miette-derive" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e21f680e8c5f1900297d394627d495351b9e37761f7bbf90116bd5eeb6e80967" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "oxc_allocator" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82c6c7387edb41be3d764a115672e7be89922ce6df9756cdedfdd9b152ce788c" +dependencies = [ + "allocator-api2", + "bumpalo", + "simdutf8", +] + +[[package]] +name = "oxc_ast" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b9fee36a81c3ee36c4f2b3acdb9c455971e9804ea399428e773a094025f92b1" +dependencies = [ + "bitflags", + "cow-utils", + "num-bigint", + "num-traits", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", +] + +[[package]] +name = "oxc_ast_macros" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453922e0b0c402ff4f94d277d9c99c8f60757ed5a84458133758f8142b8707cf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "oxc_diagnostics" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da579fda6c94527afb13b9cb1fb933ba84ec61c77b74296e71c1b60a21b60447" +dependencies = [ + "oxc-miette", + "rustc-hash 2.1.2", +] + +[[package]] +name = "oxc_ecmascript" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4de4aac4c0bd05848c4523c145a656aeded13d35fd07557dbafad2c34753837" +dependencies = [ + "num-bigint", + "num-traits", + "oxc_ast", + "oxc_span", + "oxc_syntax", +] + +[[package]] +name = "oxc_estree" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5567979b29f2c6af66d912bdf68b18244e4716b1bce77da4cb5b54e036a5d7b9" + +[[package]] +name = "oxc_index" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eca5d9726cd0a6e433debe003b7bc88b2ecad0bb6109f0cef7c55e692139a34" + +[[package]] +name = "oxc_parser" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9f3b136bd0b203164e03aba7f7ce836df62a7070f847762058439f18296d582" +dependencies = [ + "assert-unchecked", + "bitflags", + "cow-utils", + "memchr", + "num-bigint", + "num-traits", + "oxc_allocator", + "oxc_ast", + "oxc_diagnostics", + "oxc_ecmascript", + "oxc_regular_expression", + "oxc_span", + "oxc_syntax", + "rustc-hash 2.1.2", + "seq-macro", +] + +[[package]] +name = "oxc_regular_expression" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecf141ba4acfed1b65b6393737dc7f15407a4bd0fff16677ca6dedac45a08468" +dependencies = [ + "oxc_allocator", + "oxc_ast_macros", + "oxc_diagnostics", + "oxc_estree", + "oxc_span", + "phf 0.11.3", + "rustc-hash 2.1.2", + "unicode-id-start", +] + +[[package]] +name = "oxc_span" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afcf2e6f8c8f9c0019b7d82412ffe9171bdda72f8b3beba961abdfef4065f0bb" +dependencies = [ + "compact_str", + "oxc-miette", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", +] + +[[package]] +name = "oxc_syntax" +version = "0.44.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754c63ade3a31289e4605845e1e9ca40c34b40bf10e0e6b91634ec7ad97ade97" +dependencies = [ + "assert-unchecked", + "bitflags", + "nonmax", + "oxc_allocator", + "oxc_ast_macros", + "oxc_estree", + "oxc_index", + "oxc_span", + "phf 0.11.3", + "rustc-hash 2.1.2", + "ryu-js", + "unicode-id-start", +] + +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "password-hash" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346f04948ba92c43e8469c1ee6736c7563d71012b17d40745260fe106aac2166" +dependencies = [ + "base64ct", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64", + "serde_core", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.13.0", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset 0.5.7", + "indexmap 2.13.0", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +dependencies = [ + "phf_shared 0.12.1", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared 0.11.3", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.8+spec-1.1.0", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "projection" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-nats", + "axum 0.7.9", + "chrono", + "edge-logger-client", + "edge_storage", + "futures", + "libmdbx", + "query_engine", + "runtime-function", + "serde", + "serde_json", + "serde_yaml", + "shared", + "tempfile", + "thiserror 2.0.18", + "tokio", + "toml", + "tower 0.5.3", + "tracing", + "tracing-subscriber", + "uuid", + "v8", +] + +[[package]] +name = "prost" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", + "prost-derive 0.12.6", +] + +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive 0.13.5", +] + +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.12.1", + "log", + "multimap", + "once_cell", + "petgraph 0.6.5", + "prettyplease", + "prost 0.12.6", + "prost-types 0.12.6", + "regex", + "syn 2.0.117", + "tempfile", +] + +[[package]] +name = "prost-build" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +dependencies = [ + "heck 0.5.0", + "itertools 0.14.0", + "log", + "multimap", + "once_cell", + "petgraph 0.7.1", + "prettyplease", + "prost 0.13.5", + "prost-types 0.13.5", + "regex", + "syn 2.0.117", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "prost-types" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost 0.12.6", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost 0.13.5", +] + +[[package]] +name = "protoc-bin-vendored" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1c381df33c98266b5f08186583660090a4ffa0889e76c7e9a5e175f645a67fa" +dependencies = [ + "protoc-bin-vendored-linux-aarch_64", + "protoc-bin-vendored-linux-ppcle_64", + "protoc-bin-vendored-linux-s390_64", + "protoc-bin-vendored-linux-x86_32", + "protoc-bin-vendored-linux-x86_64", + "protoc-bin-vendored-macos-aarch_64", + "protoc-bin-vendored-macos-x86_64", + "protoc-bin-vendored-win32", +] + +[[package]] +name = "protoc-bin-vendored-linux-aarch_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c350df4d49b5b9e3ca79f7e646fde2377b199e13cfa87320308397e1f37e1a4c" + +[[package]] +name = "protoc-bin-vendored-linux-ppcle_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a55a63e6c7244f19b5c6393f025017eb5d793fd5467823a099740a7a4222440c" + +[[package]] +name = "protoc-bin-vendored-linux-s390_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dba5565db4288e935d5330a07c264a4ee8e4a5b4a4e6f4e83fad824cc32f3b0" + +[[package]] +name = "protoc-bin-vendored-linux-x86_32" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8854774b24ee28b7868cd71dccaae8e02a2365e67a4a87a6cd11ee6cdbdf9cf5" + +[[package]] +name = "protoc-bin-vendored-linux-x86_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38b07546580df720fa464ce124c4b03630a6fb83e05c336fea2a241df7e5d78" + +[[package]] +name = "protoc-bin-vendored-macos-aarch_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89278a9926ce312e51f1d999fee8825d324d603213344a9a706daa009f1d8092" + +[[package]] +name = "protoc-bin-vendored-macos-x86_64" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81745feda7ccfb9471d7a4de888f0652e806d5795b61480605d4943176299756" + +[[package]] +name = "protoc-bin-vendored-win32" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95067976aca6421a523e491fce939a3e65249bac4b977adee0ee9771568e8aa3" + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + +[[package]] +name = "query_engine" +version = "0.1.7" +source = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" +checksum = "8cc4d91c92e3023df7a28dae7d7f5e602e2bd68834eb29e512da9293a1e5e9d2" +dependencies = [ + "base64", + "jsonschema 0.29.1", + "regex", + "serde", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "quinn" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" +dependencies = [ + "bytes", + "cfg_aliases", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash 2.1.2", + "rustls 0.23.37", + "socket2 0.6.3", + "thiserror 2.0.18", + "tokio", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-proto" +version = "0.11.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +dependencies = [ + "bytes", + "getrandom 0.3.4", + "lru-slab", + "rand 0.9.2", + "ring", + "rustc-hash 2.1.2", + "rustls 0.23.37", + "rustls-pki-types", + "slab", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2 0.6.3", + "tracing", + "windows-sys 0.60.2", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "quoted_printable" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478e0585659a122aa407eb7e3c0e1fa51b1d8a870038bd29f0cf4a8551eea972" + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core 0.9.5", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "referencing" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a64b3a635fad9000648b4d8a59c8710c523ab61a23d392a7d91d47683f5adc" +dependencies = [ + "ahash 0.8.12", + "fluent-uri", + "once_cell", + "parking_lot", + "percent-encoding", + "serde_json", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "reqwest" +version = "0.12.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.37", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls 0.26.4", + "tower 0.5.3", + "tower-http 0.6.8", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots 1.0.6", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rkyv" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "runner" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-nats", + "aws-config", + "aws-sdk-sesv2", + "axum 0.7.9", + "chrono", + "edge-logger-client", + "edge_storage", + "futures", + "lettre", + "libmdbx", + "prost 0.13.5", + "protoc-bin-vendored", + "query_engine", + "reqwest", + "runtime-function", + "serde", + "serde_json", + "serde_yaml", + "shared", + "tempfile", + "thiserror 2.0.18", + "tokio", + "toml", + "tonic", + "tonic-build", + "tower 0.5.3", + "tracing", + "tracing-subscriber", + "uuid", + "v8", +] + +[[package]] +name = "runtime-function" +version = "0.2.6" +source = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" +checksum = "28b379b985b8661b4247aa938a9881ca6f70463e5cb45e1d5d4f521f8cae3a00" +dependencies = [ + "axum 0.7.9", + "bincode", + "chrono", + "chrono-tz", + "fancy-regex 0.14.0", + "jsonschema 0.18.3", + "oxc_allocator", + "oxc_ast", + "oxc_parser", + "oxc_span", + "oxc_syntax", + "rand 0.8.5", + "rand_chacha 0.3.1", + "rayon", + "rust_decimal", + "schemars", + "seahash", + "serde", + "serde_json", + "sha2", + "thiserror 2.0.18", + "tokio", + "tower-http 0.5.2", + "tracing", + "tracing-subscriber", + "uuid", +] + +[[package]] +name = "rust_decimal" +version = "1.41.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ce901f9a19d251159075a4c37af514c3b8ef99c22e02dd8c19161cf397ee94a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.5", + "rkyv", + "serde", + "serde_json", + "wasm-bindgen", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.12.1", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "aws-lc-rs", + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki 0.103.10", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe 0.1.6", + "rustls-pemfile", + "rustls-pki-types", + "schannel", + "security-framework 2.11.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe 0.2.1", + "rustls-pki-types", + "schannel", + "security-framework 3.7.0", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "web-time", + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +dependencies = [ + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "ryu-js" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15" + +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "chrono", + "dyn-clone", + "rust_decimal", + "schemars_derive", + "serde", + "serde_json", + "uuid", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "sealed" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22f968c5ea23d555e670b449c1c5e7b2fc399fdaec1d304a17cd48e288abc107" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "seq-macro" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_nanos" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a93142f0367a4cc53ae0fead1bcda39e85beccfad3dcd717656cacab94b12985" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.13.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shared" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "uuid", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signal-hook-registry" +version = "1.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" +dependencies = [ + "errno", + "libc", +] + +[[package]] +name = "signatory" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e303f8205714074f6068773f0e29527e0453937fe837c9717d066635b65f31" +dependencies = [ + "pkcs8", + "rand_core 0.6.4", + "signature", + "zeroize", +] + +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "simple_asn1" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d" +dependencies = [ + "num-bigint", + "num-traits", + "thiserror 2.0.18", + "time", +] + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "sketches-ddsketch" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" + +[[package]] +name = "sketches-ddsketch" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" + +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "spki" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der", +] + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix 1.1.4", + "windows-sys 0.61.2", +] + +[[package]] +name = "textwrap" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "tinyvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "bytes", + "libc", + "mio", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2 0.6.3", + "tokio-macros", + "windows-sys 0.61.2", +] + +[[package]] +name = "tokio-macros" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls 0.23.37", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-websockets" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" +dependencies = [ + "base64", + "bytes", + "futures-core", + "futures-sink", + "http 1.4.0", + "httparse", + "rand 0.8.5", + "ring", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tokio-util", + "webpki-roots 0.26.11", +] + +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime 0.6.11", + "toml_edit 0.22.27", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_datetime" +version = "1.1.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap 2.13.0", + "serde", + "serde_spanned", + "toml_datetime 0.6.11", + "toml_write", + "winnow 0.7.15", +] + +[[package]] +name = "toml_edit" +version = "0.25.8+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16bff38f1d86c47f9ff0647e6838d7bb362522bdf44006c7068c2b1e606f1f3c" +dependencies = [ + "indexmap 2.13.0", + "toml_datetime 1.1.0+spec-1.1.0", + "toml_parser", + "winnow 1.0.0", +] + +[[package]] +name = "toml_parser" +version = "1.1.0+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011" +dependencies = [ + "winnow 1.0.0", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum 0.7.9", + "base64", + "bytes", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost 0.13.5", + "rustls-pemfile", + "socket2 0.5.10", + "tokio", + "tokio-rustls 0.26.4", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build 0.13.5", + "prost-types 0.13.5", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" +dependencies = [ + "bitflags", + "bytes", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http 1.4.0", + "http-body 1.0.1", + "http-body-util", + "iri-string", + "pin-project-lite", + "tower 0.5.3", + "tower-layer", + "tower-service", + "tracing", + "uuid", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "tryhard" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fe58ebd5edd976e0fe0f8a14d2a04b7c81ef153ea9a54eebc42e67c2c23b4e5" +dependencies = [ + "pin-project-lite", + "tokio", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-id-start" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b79ad29b5e19de4260020f8919b443b2ef0277d242ce532ec7b7a2cc8b6007" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-segmentation" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" +dependencies = [ + "getrandom 0.4.2", + "js-sys", + "serde_core", + "wasm-bindgen", +] + +[[package]] +name = "uuid-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8" +dependencies = [ + "outref", + "uuid", + "vsimd", +] + +[[package]] +name = "v8" +version = "0.106.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a381badc47c6f15acb5fe0b5b40234162349ed9d4e4fd7c83a7f5547c0fc69c5" +dependencies = [ + "bindgen 0.69.5", + "bitflags", + "fslock", + "gzip-header", + "home", + "miniz_oxide", + "once_cell", + "paste", + "which 6.0.3", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "serde", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d1faf851e778dfa54db7cd438b70758eba9755cb47403f3496edd7c8fc212f0" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn 2.0.117", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.115" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap 2.13.0", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap 2.13.0", + "semver", +] + +[[package]] +name = "web-sys" +version = "0.3.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84cde8507f4d7cfcb1185b8cb5890c494ffea65edbe1ba82cfd63661c805ed94" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.6", +] + +[[package]] +name = "webpki-roots" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + +[[package]] +name = "which" +version = "6.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" +dependencies = [ + "either", + "home", + "rustix 0.38.44", + "winsafe", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + +[[package]] +name = "winnow" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" +dependencies = [ + "memchr", +] + +[[package]] +name = "winsafe" +version = "0.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck 0.5.0", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck 0.5.0", + "indexmap 2.13.0", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap 2.13.0", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap 2.13.0", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7f8525b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[workspace] +resolver = "2" +members = [ + "aggregate", + "gateway", + "projection", + "runner", + "shared", + "control/api", +] + +[profile.release] +lto = true +codegen-units = 1 +strip = "symbols" diff --git a/DOCKER.md b/DOCKER.md new file mode 100644 index 0000000..2351aed --- /dev/null +++ b/DOCKER.md @@ -0,0 +1,56 @@ +# Docker + +## Local Dev (Compose) + +```bash +docker compose up -d --build +docker compose ps +docker compose down -v +``` + +To include the observability stack (Grafana/Loki/Tempo/VictoriaMetrics) with the local compose: + +```bash +docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build +docker compose -f docker-compose.yml -f observability/docker-compose.yml down -v +``` + +Service ports in the default compose: +- Gateway HTTP: `http://localhost:8080` +- Gateway gRPC: `localhost:8081` +- Aggregate gRPC: `localhost:50051` +- Aggregate HTTP: `http://localhost:18080` +- Runner HTTP: `http://localhost:28080` +- Control API: `http://localhost:38080` +- Control UI: `http://localhost:8082` +- NATS: `nats://localhost:4222`, monitoring `http://localhost:8222` + +## Swarm (Dev) + +Build images: + +```bash +sh docker/scripts/build_images.sh all +``` + +Create dev secrets required by the observability stack: + +```bash +sh docker/scripts/swarm_dev_secrets.sh +``` + +Deploy: + +```bash +docker stack deploy -c swarm/stacks/platform.yml cloudlysis +docker stack deploy -c swarm/stacks/control-plane.yml cloudlysis_control +docker stack deploy -c swarm/stacks/observability.yml cloudlysis_obs +``` + +Remove: + +```bash +docker stack rm cloudlysis_obs +docker stack rm cloudlysis_control +docker stack rm cloudlysis +``` diff --git a/GATEWAY_TRANSPORT_PLAN.md b/GATEWAY_TRANSPORT_PLAN.md new file mode 100644 index 0000000..4328cde --- /dev/null +++ b/GATEWAY_TRANSPORT_PLAN.md @@ -0,0 +1,216 @@ +# Gateway Transport Plan + +## Purpose +Standardize and optimize how the Gateway communicates with Aggregate, Projection, and Runner, and how nodes communicate via NATS JetStream, under these principles: +- Simplicity (few patterns, minimal bespoke conventions) +- Ease of operation (consistent health/ready/metrics, consistent failure modes) +- Frugality (bounded connections, bounded fanout, low overhead) +- High performance (low tail latency, backpressure-aware, predictable routing) +- Safety (tenant isolation, deny-by-default authz, consistent context propagation) + +## Non-Negotiable Rules (Global) +- Every cross-service request MUST carry tenant + trace context. +- Every transport path MUST have explicit timeouts/deadlines and bounded retries. +- Every milestone below is “stop-the-line” gated: + - All tasks completed + - All tests passing + - Workspace lint/format/type checks passing + - Required integration tests for the milestone passing (when gated by env, they must be runnable and documented) + +## Current State (Baseline) +- Gateway → Aggregate: gRPC command submission +- Gateway → Projection: HTTP query proxy (`/v1/query/*`) +- Gateway → Runner: HTTP proxy for admin endpoints (`/admin/runner/*`) +- Nodes ↔ NATS JetStream: events/workflow streams with headers for tenant/correlation/trace (now more consistent) + +## Target Architecture (End State) +- Edge contract (clients ↔ Gateway): HTTP/JSON (stable, debuggable, browser + ops friendly) +- Internal RPC (Gateway ↔ services): gRPC for Aggregate + Projection + Runner (single internal RPC stack) +- Async/event backbone: NATS JetStream remains for event/work distribution +- `shared` is the single source of truth for: + - Header names and propagation rules + - Trace parsing/validation rules (`traceparent`, `trace-id`) + - Request context representation (tenant/correlation/trace) + +## Definitions +### Request Context +Fields that must be consistently propagated: +- `tenant_id` (HTTP: `x-tenant-id`, NATS: `tenant-id`) +- `correlation_id` (HTTP: `x-correlation-id`, NATS: `x-correlation-id` and `correlation-id`) +- `traceparent` (HTTP: `traceparent`, NATS: `traceparent`) +- `trace_id` (derived from `traceparent` or provided explicitly; NATS: `trace-id`) +- `request_id` (HTTP: `x-request-id`, optional for NATS) + +### Standard Health Endpoints (per service) +- `GET /health` liveness +- `GET /ready` readiness (includes tenant gating if applicable) +- `GET /metrics` Prometheus + +## Milestone 0: Transport Contract Lock-in (Context + Headers Everywhere) + +### Goal +Make context propagation and header naming consistent and enforceable across HTTP, gRPC, and NATS, including “background” Gateway calls (health checks, rebalance probes). + +### Exit Criteria +- A single shared contract exists for header names and trace parsing. +- Gateway injects context into all upstream calls (including rebalance/health probes). +- Aggregate/Projection/Runner consistently emit/consume the standard context on all transport paths they own. +- Unit tests prove propagation behavior for each transport. +- `cargo fmt --check`, `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test --workspace` all pass. + +### Tasks +- [ ] Standardize header constants in `shared` and remove string literals from Gateway and nodes where feasible. +- [ ] Add `shared` helpers for: + - HTTP extract/inject + - gRPC metadata extract/inject + - NATS header extract/inject +- [ ] Gateway: ensure context is injected into: + - gRPC upstream requests to Aggregate + - HTTP upstream requests to Projection + - Runner admin proxy requests + - Any “probe” calls (rebalance gates, fleet snapshots, health checks) +- [ ] Projection/Runner/Aggregate: ensure NATS published messages include: + - `tenant-id` + - `x-correlation-id` + `correlation-id` + - `traceparent` + - `trace-id` (derived when possible) +- [ ] Add transport-level tests: + - [ ] Gateway gRPC path: incoming context → upstream metadata → response metadata preserved + - [ ] Gateway HTTP proxy path: incoming context → upstream headers preserved + - [ ] NATS publish path: produced headers contain expected keys/values + +### Required Tests +- Unit tests for shared parsing/derivation utilities +- Existing per-crate test suites +- At least one per-service “transport contract” test verifying headers are present and correct + +## Milestone 1: Internal RPC Standardization (Projection via gRPC) + +### Goal +Eliminate Gateway → Projection HTTP proxy as the default path by introducing an internal gRPC Query service, keeping HTTP optional for human/debug use. + +### Exit Criteria +- A Projection gRPC service exists for query execution. +- Gateway routes queries to Projection via gRPC by default. +- Authorization semantics remain enforced in Gateway (deny-by-default). +- Response shapes are stable and match the existing UI expectations. +- All tests pass, including new gRPC query integration tests. + +### Tasks +- [ ] Define protobuf API: `projection.gateway.v1.QueryService` + - [ ] Request includes tenant + view + query payload and metadata + - [ ] Response includes result payload and standard context propagation +- [ ] Implement Projection gRPC server: + - [ ] Parse tenant/view/query + - [ ] Execute query against current projection storage/query engine + - [ ] Enforce tenant scope +- [ ] Implement Gateway gRPC client path for queries: + - [ ] Routing by tenant to Projection endpoint + - [ ] Deadlines, bounded retries (idempotent only) + - [ ] Context propagation (tenant/correlation/trace) +- [ ] Keep HTTP `/v1/query/*`: + - [ ] Either route to internal gRPC implementation or keep as legacy/debug endpoint +- [ ] Add tests: + - [ ] Gateway query authz + forwarding via gRPC + - [ ] Projection gRPC query contract tests for tenant isolation + +### Required Tests +- New gRPC QueryService tests (unit + integration) +- Existing query/authz tests in Gateway +- Workspace fmt/clippy/test + +## Milestone 2: Internal RPC Standardization (Runner Admin via gRPC) + +### Goal +Replace `/admin/runner/*` HTTP proxying with a first-class gRPC admin service for Runner operations. + +### Exit Criteria +- Runner exposes a gRPC admin service for the admin surface required by Control/Gateway. +- Gateway uses gRPC to call Runner admin APIs. +- Authentication/authorization remains in Gateway; Runner trusts Gateway boundary. +- Admin operations are idempotent where appropriate and include audit hooks where required. +- All tests pass and include negative/tenant-spoof cases. + +### Tasks +- [ ] Define protobuf API: `runner.admin.v1.RunnerAdmin` + - [ ] Drain/resume/status/reload/tenant-scoped controls + - [ ] Standard error mapping +- [ ] Implement Runner gRPC admin server: + - [ ] Tenant gating enforced for tenant-scoped operations + - [ ] Readiness/drain semantics aligned with platform contracts +- [ ] Implement Gateway gRPC client integration: + - [ ] Route to Runner endpoint via routing table + - [ ] Enforce authz rights (e.g. `runner.admin`) + - [ ] Context propagation +- [ ] Keep HTTP `/admin/*` in Runner optional: + - [ ] Either remove Gateway proxy usage or keep for direct debugging behind secure network +- [ ] Tests: + - [ ] Gateway: admin calls rejected without rights + - [ ] Gateway: tenant spoof attempts rejected + - [ ] Runner: idempotency and drain semantics validated + +### Required Tests +- gRPC RunnerAdmin unit/integration tests +- Gateway proxy-to-gRPC tests +- Workspace fmt/clippy/test + +## Milestone 3: Connection + Retry Policy Unification (Performance + Frugality) + +### Goal +Make upstream connection management and retry behavior consistent and bounded across Gateway and nodes. + +### Exit Criteria +- Gateway maintains bounded upstream connection pools for gRPC endpoints. +- All gRPC calls have deadlines; retries are only for idempotent operations. +- All probe/fanout calls are bounded and do not cause thundering herds. +- Load/soak tests show stable behavior under partial failure. + +### Tasks +- [ ] Implement a Gateway upstream channel pool: + - [ ] LRU bounded by max endpoints + - [ ] TTL/eviction strategy + - [ ] Fast path reuse under load +- [ ] Standardize retry profiles: + - [ ] Read-only: short retry with jitter + - [ ] Mutations: no automatic retry unless idempotency key present +- [ ] Standardize timeouts: + - [ ] Edge timeout limits + - [ ] Internal per-service deadlines +- [ ] Fanout controls: + - [ ] Concurrency limiters for fleet snapshot/probes + - [ ] Cache results where safe (short TTL) + +### Required Tests +- Unit tests for pool eviction/TTL +- Gateway integration tests for deadline propagation +- Gated load tests (document env + how to run) + +## Milestone 4: Transport Simplification Cleanup (Remove Legacy Paths) + +### Goal +Remove or de-prioritize legacy HTTP internal paths so the “happy path” uses: HTTP edge → Gateway → gRPC internal → NATS async. + +### Exit Criteria +- Gateway no longer depends on HTTP for Projection queries or Runner admin. +- Legacy endpoints are either removed or explicitly marked “debug-only” and not used by Gateway/Control. +- All operational playbooks rely on standardized endpoints. + +### Tasks +- [ ] Remove Gateway’s HTTP query proxy usage (or keep only as compatibility shim). +- [ ] Remove Gateway’s runner admin HTTP proxy usage (or keep only as compatibility shim). +- [ ] Ensure Control UI + Control API use the standardized Gateway surfaces. +- [ ] Harden metrics and health probes to always carry context. + +### Required Tests +- End-to-end smoke tests (gated) +- Workspace fmt/clippy/test + +## Verification Commands (Required at Each Milestone) +- `cargo fmt --check` +- `cargo clippy --workspace --all-targets -- -D warnings` +- `cargo test --workspace` +- `npm ci && npm run lint && npm run typecheck && npm run test && npm run build` (in `control/ui`) + +## Notes / Constraints +- Do not break wire compatibility for NATS subjects or event payloads; evolve via optional fields and tolerant decoding. +- Keep tenant isolation rules enforced at the Gateway boundary and re-validated at nodes where it is safety-critical. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ca49b42 --- /dev/null +++ b/Makefile @@ -0,0 +1,58 @@ +.PHONY: docker-build-platform docker-build-control docker-build-observability docker-build-all +.PHONY: compose-up compose-down compose-ps compose-up-observability compose-down-observability +.PHONY: swarm-dev-secrets swarm-deploy-platform swarm-deploy-control swarm-deploy-observability swarm-deploy-all +.PHONY: swarm-rm-platform swarm-rm-control swarm-rm-observability swarm-rm-all + +docker-build-platform: + sh docker/scripts/build_images.sh platform + +docker-build-control: + sh docker/scripts/build_images.sh control + +docker-build-observability: + true + +docker-build-all: + sh docker/scripts/build_images.sh all + +compose-up: + docker compose up -d --build + +compose-up-observability: + docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build + +compose-down: + docker compose down -v + +compose-down-observability: + docker compose -f docker-compose.yml -f observability/docker-compose.yml down -v + +compose-ps: + docker compose ps + +swarm-dev-secrets: + sh docker/scripts/swarm_dev_secrets.sh + +swarm-deploy-platform: + docker stack deploy -c swarm/stacks/platform.yml cloudlysis + +swarm-deploy-control: + docker stack deploy -c swarm/stacks/control-plane.yml cloudlysis_control + +swarm-deploy-observability: + docker stack deploy -c swarm/stacks/observability.yml cloudlysis_obs + +swarm-deploy-all: swarm-dev-secrets swarm-deploy-platform swarm-deploy-control swarm-deploy-observability + true + +swarm-rm-platform: + docker stack rm cloudlysis + +swarm-rm-control: + docker stack rm cloudlysis_control + +swarm-rm-observability: + docker stack rm cloudlysis_obs + +swarm-rm-all: swarm-rm-observability swarm-rm-control swarm-rm-platform + true diff --git a/NATS_TRANSPORT_PLAN.md b/NATS_TRANSPORT_PLAN.md new file mode 100644 index 0000000..aed03a1 --- /dev/null +++ b/NATS_TRANSPORT_PLAN.md @@ -0,0 +1,246 @@ +# NATS Transport Plan + +## Purpose +Standardize and optimize how nodes (Aggregate, Projection, Runner, Gateway where applicable) use NATS JetStream and NATS KV, under these principles: +- Simplicity (few primitives, consistent naming, minimal per-service divergence) +- Ease of operation (predictable streams/consumers, clear runbooks, easy debugging) +- Frugality (bounded consumers, bounded in-flight work, minimal churn, minimal storage) +- Low resource usage (stable durable consumers, controlled ack waits, limited fanout) +- High performance (high throughput, low tail latency, reliable backpressure) +- Safety (tenant isolation, idempotency, deterministic replay, poison handling) + +## Non-Negotiable Rules (Global) +- Every JetStream stream/consumer MUST have an explicit contract: + - name, subjects, retention, storage, replication, max sizes + - ack policy, ack wait, max deliver, max in flight +- Every node MUST run with bounded work: + - bounded pull batch sizes + - bounded concurrency + - bounded retry/backoff +- Every message MUST be tenant-scoped in subject and/or headers. +- Every milestone below is “stop-the-line” gated: + - all tasks completed + - all tests passing + - workspace lint/format checks passing + - required NATS-gated integration tests for the milestone passing (when gated by env) + +## Current State (Baseline) +- Streams: + - `AGGREGATE_EVENTS` (Aggregate publishes, Projection/Runner consume) + - `WORKFLOW_COMMANDS`, `WORKFLOW_EVENTS` (Runner) +- Subject conventions: + - Aggregate events: `tenant..aggregate..` + - Defaults often use filters like `tenant.*.aggregate.*.*` +- Durable consumers: + - Projection uses a durable name (configurable) + - Runner uses configurable durable prefix per role + - Aggregate had ad-hoc fetch consumer risks; now mitigated with unique consumer names per fetch +- Headers: + - Tenant + correlation + trace headers exist but were historically inconsistent; shared utilities now exist + +## Target Architecture (End State) +- A single “NATS wire protocol” contract shared across services: + - subject naming + - required headers (tenant/correlation/trace) + - message envelope compatibility rules (tolerant decoding, optional fields) +- Stable, minimal set of JetStream streams: + - one stream per message class (aggregate events, workflow commands, workflow events) + - no per-tenant streams unless there is a strong operational reason +- Stable, limited consumers: + - durable consumers for long-lived processors (Projection, Runner) + - ephemeral consumers only for bounded ad-hoc operations (Aggregate fetch), always unique + best-effort deletion +- Uniform backpressure + reliability defaults: + - explicit ack + - bounded `max_ack_pending` and application-level concurrency + - bounded redelivery via `max_deliver` + poison policy + +## Definitions +### Message Context (Headers) +Standard headers for NATS published messages: +- `tenant-id` (required) +- `x-correlation-id` and `correlation-id` (required for any request-derived message; generated if missing) +- `traceparent` (optional but recommended; generated/propagated if present upstream) +- `trace-id` (optional; derived from traceparent when possible) +- `Nats-Msg-Id` (required for idempotent publish/dedupe when applicable) + +### Subject Naming Rules +- Tenant-first prefix: `tenant..…` +- Stable message class token: + - `aggregate` for domain events + - `effect`, `effect_result`, `workflow`, `workflow_event` for Runner +- No ambiguous wildcard publishing: + - producers publish concrete subjects only + - consumers may filter with wildcards + +### Consumer Naming Rules +- Durable consumer names must be stable and collision-free: + - include role + mode + optional view/saga name + shard/group +- Ephemeral consumer names must be unique per operation: + - include tenant + purpose + uuid + - must be deleted best-effort when operation completes + +## Milestone 0: NATS Wire Contract Lock-in (Names, Headers, Envelopes) + +### Goal +Make the NATS/JetStream wire contract explicit and enforced in code so all producers/consumers interoperate safely across scale-out and rolling restarts. + +### Exit Criteria +- `shared` exposes NATS header constants and helpers for inject/extract/derive. +- All producers set required headers consistently. +- All consumers tolerate unknown fields and missing optional fields. +- A single, documented subject naming convention is enforced in code (builder functions). +- Workspace fmt/clippy/tests pass. + +### Tasks +- [ ] Centralize NATS header constants and helpers in `shared`: + - [ ] inject headers for publish (tenant, correlation, trace) + - [ ] extract headers on receive (best-effort) + - [ ] derive `trace-id` from `traceparent` +- [ ] Aggregate: + - [ ] Ensure event publishing always sets `tenant-id`, correlation headers, trace headers + - [ ] Ensure `Nats-Msg-Id` strategy is correct for idempotency/dedupe (document and test) +- [ ] Projection: + - [ ] Ensure EventEnvelope decoding remains tolerant (unknown fields ignored, optional IDs supported) + - [ ] Ensure correlation/trace context is carried into spans/metrics consistently +- [ ] Runner: + - [ ] Ensure publish paths include correlation/trace headers consistently for commands and results + - [ ] Ensure outbox metadata → NATS headers mapping is consistent and tested +- [ ] Tests: + - [ ] Unit tests for header injection/extraction in `shared` + - [ ] Per-service unit tests asserting produced headers include required keys + +### Required Tests +- `cargo fmt --check` +- `cargo clippy --workspace --all-targets -- -D warnings` +- `cargo test --workspace` + +## Milestone 1: Stream Configuration Standardization (Retention, Limits, Storage) + +### Goal +Make stream configs consistent, explicit, and operationally sane across environments (dev → prod), minimizing surprise and preventing runaway resource usage. + +### Exit Criteria +- Stream config for each stream is explicitly defined and validated at startup. +- Limits (max messages/bytes/age) are explicit and have defaults. +- Duplicate windows and dedupe behavior are explicit and tested. +- A “no destructive changes on startup” policy is enforced (create if missing; do not silently replace). + +### Tasks +- [ ] Define a single “stream config policy” module per service (or shared helper): + - [ ] `AGGREGATE_EVENTS` subjects + retention policy + - [ ] `WORKFLOW_COMMANDS` subjects + retention policy + - [ ] `WORKFLOW_EVENTS` subjects + retention policy +- [ ] Standardize defaults: + - [ ] retention: limits appropriate for replay + rebuild + - [ ] `duplicate_window` aligned with producer idempotency strategy + - [ ] storage type and replication policy documented and configurable +- [ ] Add startup validations: + - [ ] verify stream exists and matches required subject set (compatible superset allowed) + - [ ] verify required ack/dedupe assumptions hold +- [ ] Add tests that parse and validate configs without NATS. + +### Required Tests +- Unit tests for stream config builders +- Existing crate tests + +## Milestone 2: Consumer Policy Standardization (Ack, Backpressure, Poison) + +### Goal +Make consumption reliable and cheap under load by standardizing ack policy, concurrency, and poison/deadletter handling. + +### Exit Criteria +- All long-lived consumers use explicit ack with consistent `ack_wait`, `max_deliver`, `max_ack_pending`. +- Application concurrency is bounded and tied to `max_in_flight`. +- Poison policy is consistent: + - after `max_deliver`, term + deadletter/quarantine record is written +- Replay behavior is deterministic on restart (checkpoint-based where applicable). + +### Tasks +- [ ] Define standard consumer config defaults: + - [ ] `AckPolicy::Explicit` + - [ ] `ack_wait` default + env override + - [ ] `max_deliver` default + env override + - [ ] `max_ack_pending` tied to application concurrency +- [ ] Projection: + - [ ] Ensure durable consumer naming is collision-free in all modes (Single vs PerView) + - [ ] Ensure checkpoint gates ack correctly (skip still acks) + - [ ] Ensure poison policy writes durable records and terminates reliably +- [ ] Runner: + - [ ] Ensure saga/effect consumers use consistent durable naming + deliver groups when scaling out + - [ ] Ensure outbox relay preserves exactly-once semantics via dedupe keys + idempotent publish +- [ ] Aggregate: + - [ ] Ensure ad-hoc fetch consumer is bounded (timeouts) and unique per operation (already required) + - [ ] Ensure best-effort cleanup is performed and cannot delete unrelated consumers +- [ ] Tests: + - [ ] Unit tests for consumer name generation (sanitization + uniqueness) + - [ ] NATS-gated tests for ack/redelivery/poison behavior (must be runnable with env flag) + +### Required Tests +- Workspace fmt/clippy/tests +- NATS-gated integration tests for: + - redelivery idempotency + - poison termination behavior + - scale-out with deliver group (where supported) + +## Milestone 3: Connection Management + Failure Semantics (Operational Frugality) + +### Goal +Make NATS connection handling stable under partial failure while minimizing resource churn and cascading outages. + +### Exit Criteria +- One NATS connection per process (or bounded pool only if justified). +- Reconnect/backoff policy is explicit and consistent. +- Circuit breaker behavior is consistent (when used), and health/ready reflect NATS state correctly. +- No busy-looping on NATS outages. + +### Tasks +- [ ] Standardize connection options: + - [ ] reconnect delays/backoff + - [ ] max reconnect attempts or “infinite with backoff” strategy (explicit) + - [ ] request timeouts around JetStream operations +- [ ] Standardize readiness semantics: + - [ ] `ready=false` when NATS is unavailable and the node depends on it + - [ ] `health` stays “process alive” but reports NATS connectivity in payload +- [ ] Add “fast fail” mode for tests and dev (avoid 30x retries when env not set). +- [ ] Tests: + - [ ] unit tests for backoff behavior (where possible) + - [ ] gated integration test: temporary NATS outage does not crash-loop and recovers + +## Milestone 4: Multi-Tenant Scale-Out Guarantees (Collision-Free + Predictable) + +### Goal +Guarantee safe multi-replica behavior: no consumer collisions, no duplicate side effects, predictable throughput with bounded resource usage. + +### Exit Criteria +- Durable names are deterministic and collision-free across replicas. +- Deliver groups are used where appropriate to share work across replicas. +- Exactly-once side effects are enforced via idempotency + dedupe keys (not wishful thinking). +- A scale-out test suite exists and is gated but runnable. + +### Tasks +- [ ] Establish consumer naming scheme per service role: + - [ ] Projection: per-view durable option uses sanitized names and stable mapping + - [ ] Runner: durable prefix includes role + shard + optional group +- [ ] Establish deliver group usage rules: + - [ ] when to enable (scale-out consumers) + - [ ] how to roll without duplication +- [ ] Strengthen dedupe keys: + - [ ] event-driven sagas: checkpoint + dedupe marker strategy tested under redelivery + - [ ] outbox relay: verify publish idempotency with `Nats-Msg-Id` +- [ ] Add gated tests: + - [ ] two replicas, same tenant, no duplicate publishes + - [ ] rolling restart preserves checkpoint correctness + +## Verification Commands (Required at Each Milestone) +- `cargo fmt --check` +- `cargo clippy --workspace --all-targets -- -D warnings` +- `cargo test --workspace` +- Gated NATS integration tests: + - Runner: `RUNNER_TEST_NATS_URL=... cargo test -p runner -- --ignored` + - Projection: `PROJECTION_TEST_NATS_URL=... cargo test -p projection -- --ignored` + - Control API (if it runs NATS-gated tests): set documented env flags and run ignored tests + +## Notes / Constraints +- Do not create per-tenant streams unless scaling evidence requires it; prefer subject partitioning and consumer groups. +- Prefer backward-compatible envelope changes (optional fields, tolerant decoding). +- Prefer stable durable consumers; ephemeral consumers must be unique and bounded and must cleanup best-effort. diff --git a/README.md b/README.md index e69de29..7670628 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,38 @@ +# cloudlysis (monorepo) + +## Layout +- Rust services (Cargo workspace): `aggregate/`, `gateway/`, `projection/`, `runner/`, `control/api/`, `shared/` +- Control UI: `control/ui/` +- Docker + Swarm + Compose: `docker/`, `docker-compose.yml`, `swarm/`, `observability/` +- Transport plans: + - `TRANSPORT_DEVELOPMENT_PLAN.md` + - `GATEWAY_TRANSPORT_PLAN.md` + - `NATS_TRANSPORT_PLAN.md` + +## Quick Start (Docker Compose) + +```bash +docker compose up -d --build +``` + +Full local stack with observability: + +```bash +docker compose -f docker-compose.yml -f observability/docker-compose.yml up -d --build +``` + +## Commands +- `make compose-up`, `make compose-down` +- `make compose-up-observability`, `make compose-down-observability` +- `make docker-build-all` +- `make swarm-deploy-all`, `make swarm-rm-all` + +More details: `DOCKER.md` + +## Workspace Verification + +```bash +cargo fmt --check +cargo clippy --workspace --all-targets -- -D warnings +cargo test --workspace +``` diff --git a/TRANSPORT_DEVELOPMENT_PLAN.md b/TRANSPORT_DEVELOPMENT_PLAN.md new file mode 100644 index 0000000..b44c857 --- /dev/null +++ b/TRANSPORT_DEVELOPMENT_PLAN.md @@ -0,0 +1,333 @@ +# Transport Development Plan + +## Purpose +Unify and optimize the platform transport layer end-to-end: +- Gateway ↔ nodes (Aggregate, Projection, Runner): routing + RPC/proxying + probes +- Node ↔ NATS JetStream/KV: event/work distribution + configuration substrate + +This plan merges and supersedes: +- `GATEWAY_TRANSPORT_PLAN.md` +- `NATS_TRANSPORT_PLAN.md` + +## Current Status (Codebase Reality) +- Monorepo workspace exists; `shared` crate exists and is used by Aggregate/Projection/Runner/Gateway. +- Request context pieces are partially standardized: + - `shared` provides `TenantId`, `CorrelationId`, `TraceId` + - `shared` provides `trace_id_from_traceparent(...)` and `traceparent_from_trace_id(...)` + - Some header names are centralized in `shared` but not all call sites use constants yet. +- Gateway → Aggregate is already HTTP(edge) → gRPC(internal) and propagates `x-tenant-id`, `x-correlation-id`, and `traceparent`. +- Gateway → Projection remains HTTP proxy (`/v1/query/...`) and Gateway → Runner remains HTTP admin proxy (`/admin/runner/...`). +- Node → NATS header propagation is improved and closer to consistent: + - Runner publishes `x-correlation-id` and `correlation-id`, and ensures `traceparent`/`trace-id` are present/derived when possible. + - Aggregate publishes `trace-id` when `traceparent` is present. +- Many “hard” NATS tests already exist but are gated/ignored by default; they should be treated as milestone gates when enabling changes. + +## Principles +- Simplicity: minimize distinct patterns; prefer one internal RPC stack + one async backbone. +- Ease of operation: consistent health/ready/metrics; consistent naming; predictable failure modes. +- Frugality: bounded connections, bounded consumers, bounded in-flight work; no churny resources. +- Low resource usage: stable durables; avoid per-request reconnects; avoid unbounded loops. +- High performance: multiplexing, backpressure, low tail latency, predictable routing. +- Safety: tenant isolation, deny-by-default authz at the edge, idempotency, deterministic replay. + +## Non-Negotiable Rules (Global) +- Every cross-component hop MUST carry tenant + correlation + trace context. +- Every transport path MUST have explicit timeouts/deadlines and bounded retries. +- Every JetStream stream/consumer MUST have an explicit contract (name/subjects/retention/ack policy). +- Every milestone is stop-the-line gated: + - All tasks completed + - All tests required by the milestone pass + - Workspace verification commands pass + - Gated integration tests for the milestone are runnable and documented + +## Baseline (Today) +- Gateway → Aggregate: gRPC (command submission) +- Gateway → Projection: HTTP (query proxy) +- Gateway → Runner: HTTP (admin proxy) +- Node ↔ NATS JetStream: `AGGREGATE_EVENTS`, `WORKFLOW_COMMANDS`, `WORKFLOW_EVENTS` + +## End State (Target Architecture) +- Edge contract (clients ↔ Gateway): HTTP/JSON +- Internal RPC (Gateway ↔ nodes): gRPC for Aggregate + Projection + Runner admin +- Async backbone: NATS JetStream for events/work distribution; NATS KV for routing/placement +- `shared` is the single source of truth for: + - header names and injection/extraction rules + - trace parsing/validation (`traceparent`, `trace-id`) + - context object model (tenant/correlation/trace/request ids) + - NATS subject + consumer naming helpers + +## Standard Contracts +### Context Fields +- Tenant: HTTP `x-tenant-id`, NATS `tenant-id` +- Correlation: HTTP `x-correlation-id`, NATS `x-correlation-id` and `correlation-id` +- Trace: HTTP `traceparent`, NATS `traceparent` and `trace-id` (derived when possible) +- Request id: HTTP `x-request-id` (optional for NATS) + +### Standard Service Endpoints (every service) +- `GET /health` liveness +- `GET /ready` readiness (includes tenant gating if relevant) +- `GET /metrics` Prometheus + +## Milestone 0: Shared Transport Contract (Headers + Context + Trace) + +### Goal +Make propagation rules consistent and enforceable across HTTP, gRPC, and NATS so every later milestone builds on one contract. + +### Exit Criteria +- `shared` contains canonical constants for header names and NATS header names. +- `shared` contains canonical trace parsing/validation and trace derivation helpers. +- Library-level unit tests cover parsing/derivation behavior. +- All crates build and tests pass for the workspace. + +### Tasks +- [x] Add shared ID types in `shared`: + - [x] `TenantId` + - [x] `CorrelationId` + - [x] `TraceId` +- [~] Consolidate header constants in `shared`: + - [x] HTTP: `x-correlation-id`, `traceparent`, `trace-id` (for NATS/interop) + - [ ] HTTP: `x-tenant-id`, `x-request-id` (missing constants) + - [x] NATS: `correlation-id` (used in Runner), `trace-id` (now emitted where possible) + - [ ] NATS: `tenant-id` constant, `Nats-Msg-Id` constant (missing constants) +- [x] Add shared helpers: + - [x] derive `trace-id` from `traceparent` + - [x] derive `traceparent` from `trace-id` when valid + - [ ] normalize/generate correlation id when missing across all transports (helper exists for `CorrelationId::generate()`; adoption incomplete) +- [x] Add unit tests in `shared` for: + - [x] traceparent parsing validity + - [x] serialization shape for correlation/trace id newtypes + - [ ] additional validation cases (invalid traceparents, invalid trace-id lengths) if needed for stricter enforcement + +### Required Tests +- `cargo fmt --check` +- `cargo clippy --workspace --all-targets -- -D warnings` +- `cargo test --workspace` + +## Milestone 1: NATS Wire Protocol Lock-In (Subjects + Headers + Envelopes) + +### Dependencies +- Milestone 0 + +### Goal +Make the JetStream/NATS “wire protocol” explicit and uniform so interop is safe across scale-out and rolling restarts. + +### Exit Criteria +- Subject naming is standardized and enforced via builder functions (producers publish concrete subjects only). +- All NATS producers set required headers consistently. +- All NATS consumers tolerate unknown fields and missing optional fields. +- “Contract tests” exist per service to verify produced headers and subject formats. + +### Tasks +- [ ] Create/standardize subject builder helpers (prefer `shared`): + - [ ] Aggregate event subject builder (`tenant..aggregate..`) + - [ ] Runner effect/effect_result/workflow subject builders +- [~] Aggregate publishes: + - [ ] `tenant-id` header always present (still needs enforcement everywhere) + - [ ] correlation + trace headers always present when available, generated when required + - [x] `trace-id` is derived when `traceparent` is present (now emitted in publish path) + - [ ] `Nats-Msg-Id` strategy explicitly defined and tested +- [~] Runner publishes (commands/results): + - [x] correlation headers emitted consistently (`x-correlation-id` + `correlation-id`) + - [x] trace headers derived consistently when possible (`traceparent` from `trace-id`, `trace-id` from `traceparent`) + - [ ] outbox metadata → NATS headers mapping standardized via shared helpers (adoption incomplete) +- [~] Projection consumption: + - [x] envelope decoding remains tolerant (unknown fields ignored) + - [~] correlation/trace context flows into spans/metrics consistently (types are shared; header extraction remains best-effort and should be unified) +- [ ] Add unit tests: + - [ ] subject formatting tests per service (once builders exist) + - [ ] required header presence tests per publisher (enforce required keys) + +### Required Tests +- Workspace verification commands + +## Milestone 2: JetStream Stream Policy (Create/Validate, No Destructive Startup) + +### Dependencies +- Milestone 1 + +### Goal +Make stream definitions explicit, validated, and safe in all environments, preventing resource runaway and accidental destructive changes. + +### Exit Criteria +- Each stream has a single authoritative config policy (name/subjects/retention/limits/duplicate window). +- Services create streams if missing, and validate compatibility on startup. +- Startup does not silently replace or destructively mutate existing streams. +- Config-only tests validate stream config builders without requiring NATS. + +### Tasks +- [ ] Define stream policies: + - [ ] `AGGREGATE_EVENTS` (subjects, retention, duplicate window) + - [ ] `WORKFLOW_COMMANDS` + - [ ] `WORKFLOW_EVENTS` +- [ ] Implement compatibility validation rules: + - [ ] required subjects are present (superset allowed) + - [ ] retention/limits are within allowed ranges + - [ ] dedupe assumptions align with producer `Nats-Msg-Id` usage +- [ ] Add unit tests for stream config builders + validators. + +### Required Tests +- Workspace verification commands + +## Milestone 3: Consumer Policy + Backpressure + Poison (Reliable and Cheap Under Load) + +### Dependencies +- Milestone 2 + +### Goal +Standardize consumer configs and runtime behavior to guarantee bounded in-flight work, predictable redelivery behavior, and consistent poison handling. + +### Exit Criteria +- All long-lived consumers use explicit ack with standardized defaults (`ack_wait`, `max_deliver`, `max_ack_pending`). +- Application-level concurrency is bounded and aligned with `max_in_flight`. +- Poison policy is consistent across consumers (term + durable quarantine/deadletter record). +- Gated NATS integration tests prove: + - redelivery idempotency + - poison termination + - scale-out behavior (deliver group) where applicable + +### Tasks +- [ ] Standardize consumer defaults: + - [ ] `AckPolicy::Explicit` + - [ ] `ack_wait` default + env override + - [ ] `max_deliver` default + env override + - [ ] `max_ack_pending` tied to worker concurrency +- [ ] Projection: + - [ ] durable naming collision-free for Single/PerView modes + - [ ] checkpoint gate semantics: “skip still acks” + - [ ] poison handling persists durable records and terminates reliably +- [ ] Runner: + - [ ] durable naming collision-free and stable across replicas + - [ ] deliver group rules defined and tested + - [ ] outbox relay exactly-once behavior verified under redelivery +- [ ] Aggregate: + - [ ] ad-hoc fetch consumer always unique and bounded + - [ ] best-effort deletion never targets unrelated consumers +- [ ] Add gated NATS integration tests and document env flags: + - [ ] Runner ignored tests + - [ ] Projection ignored tests + +### Required Tests +- Workspace verification commands +- Runner: `RUNNER_TEST_NATS_URL=... cargo test -p runner -- --ignored` +- Projection: `PROJECTION_TEST_NATS_URL=... cargo test -p projection -- --ignored` + +## Milestone 4: Gateway → Projection Internal RPC (gRPC QueryService) + +### Dependencies +- Milestone 0 (context contract) + +### Goal +Replace Gateway → Projection HTTP proxy as the default path with a gRPC Query service, keeping HTTP optional for human/debug use. + +### Exit Criteria +- Projection exposes `projection.gateway.v1.QueryService`. +- Gateway routes queries via gRPC by default. +- Authz remains enforced in Gateway (deny-by-default). +- Query responses remain stable for Control UI expectations. +- New gRPC query tests pass (unit + integration). + +### Tasks +- [ ] Define protobuf API: `projection.gateway.v1.QueryService` +- [ ] Implement Projection gRPC server for query execution +- [ ] Implement Gateway gRPC client routing to Projection + - [ ] deadlines + - [ ] bounded retries (idempotent only) + - [ ] context propagation +- [ ] Preserve HTTP `/v1/query/*` as compatibility/debug: + - [ ] route internally to gRPC or keep as legacy endpoint +- [ ] Add tests: + - [ ] authz + forwarding via gRPC + - [ ] tenant isolation enforcement in Projection QueryService + +### Required Tests +- Workspace verification commands + +## Milestone 5: Gateway → Runner Admin Internal RPC (gRPC RunnerAdmin) + +### Dependencies +- Milestone 0 (context contract) + +### Goal +Replace Gateway’s `/admin/runner/*` HTTP proxy usage with a first-class gRPC admin service. + +### Exit Criteria +- Runner exposes `runner.admin.v1.RunnerAdmin`. +- Gateway calls Runner admin via gRPC (authz enforced in Gateway). +- Tenant-spoof and unauthorized calls are rejected deterministically. +- Runner drain/readiness semantics validated and tested. + +### Tasks +- [ ] Define protobuf API: `runner.admin.v1.RunnerAdmin` +- [ ] Implement Runner gRPC admin server +- [ ] Implement Gateway gRPC client integration for admin operations +- [ ] Keep Runner HTTP admin endpoints optional for direct debugging, not required by Gateway +- [ ] Add tests: + - [ ] Gateway: rejects without rights + - [ ] Gateway: rejects tenant spoof attempts + - [ ] Runner: idempotency and drain semantics + +### Required Tests +- Workspace verification commands + +## Milestone 6: Gateway Upstream Performance + Operational Guardrails + +### Dependencies +- Milestones 4–5 (gRPC internal RPC surfaces available) + +### Goal +Make Gateway upstream connection handling, retry behavior, and probe/fanout operations consistent, bounded, and cheap under load. + +### Exit Criteria +- Bounded upstream gRPC channel pool exists (LRU + TTL/eviction). +- Deadlines everywhere; retries only for idempotent operations. +- Probe/fanout calls are bounded (timeouts + concurrency limits) and carry context. +- Gated load/soak tests exist and are runnable. + +### Tasks +- [ ] Implement upstream channel pool + - [ ] bounded LRU + - [ ] TTL/eviction + - [ ] fast-path reuse under load +- [ ] Standardize retry profiles + - [ ] read-only: limited retry with jitter + - [ ] mutations: no retry unless idempotency key is present and semantics are safe +- [ ] Standardize timeouts/deadlines: + - [ ] edge timeout limits + - [ ] internal per-service deadlines +- [ ] Fanout controls: + - [ ] concurrency limiters for probes/snapshots + - [ ] short TTL caching where safe +- [ ] Ensure probes carry context (correlation/trace) for observability. + +### Required Tests +- Workspace verification commands +- Gated load/soak tests (document env + how to run) + +## Milestone 7: Transport Cleanup (Remove Legacy Internal Paths) + +### Dependencies +- Milestone 6 + +### Goal +Ensure the “happy path” is: HTTP edge → Gateway → gRPC internal → NATS async, with legacy internal HTTP proxy paths removed or clearly debug-only. + +### Exit Criteria +- Gateway no longer depends on HTTP for Projection queries or Runner admin. +- Legacy paths are removed or explicitly debug-only and not referenced by Gateway/Control. +- End-to-end smoke tests pass (gated). + +### Tasks +- [ ] Remove Gateway HTTP query proxy usage (or keep only as explicit compatibility shim) +- [ ] Remove Gateway runner admin HTTP proxy usage (or keep only as explicit compatibility shim) +- [ ] Ensure Control UI + Control API rely only on standardized surfaces +- [ ] Harden metrics and readiness probes to match the standard contract everywhere + +### Required Tests +- Workspace verification commands +- End-to-end smoke tests (gated) + +## Workspace Verification Commands (Run for Every Milestone) +- `cargo fmt --check` +- `cargo clippy --workspace --all-targets -- -D warnings` +- `cargo test --workspace` +- `npm ci && npm run lint && npm run typecheck && npm run test && npm run build` (in `control/ui`) diff --git a/aggregate/.clippy.toml b/aggregate/.clippy.toml new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/aggregate/.clippy.toml @@ -0,0 +1 @@ + diff --git a/aggregate/.gitignore b/aggregate/.gitignore new file mode 100644 index 0000000..9fb7195 --- /dev/null +++ b/aggregate/.gitignore @@ -0,0 +1,37 @@ +/target/ +/target-*/ +**/target/ +*.rs.bk +*.pdb +*.dSYM/ +*.orig +*.rej +*.log +*.swp +*.swo +*~ +.DS_Store +.idea/ +.vscode/ + +.env +.env.* +.envrc +.direnv/ + +docker-compose.override.yml + +*.mdbx +*.mdbx-* +*.mdbx-lock +*.mdbx.dat +*.mdbx.lck +*.mdb +*.db +/data/ +/tmp/ + +/coverage/ +lcov.info +*.profraw +*.profdata diff --git a/aggregate/Cargo.toml b/aggregate/Cargo.toml new file mode 100644 index 0000000..9344979 --- /dev/null +++ b/aggregate/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "aggregate" +version = "0.1.0" +edition = "2021" + +[features] +default = [] +runtime-v8 = ["v8"] +runtime-wasm = [] + +[dependencies] +shared = { path = "../shared" } +edge_storage = { version = "0.1", registry = "madapes" } +runtime-function = { version = "0.2", registry = "madapes" } +edge-logger-client = { version = "0.1", registry = "madapes" } +query_engine = { version = "0.1", registry = "madapes" } +async-nats = "0.39" +tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +serde_yaml = "0.9" +toml = "0.8" +thiserror = "2" +anyhow = "1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] } +uuid = { version = "1", features = ["v7", "serde"] } +chrono = { version = "0.4", features = ["serde"] } +futures = "0.3" +lru = "0.12" +v8 = { version = "0.106", optional = true } +tonic = { version = "0.12", default-features = false, features = ["codegen", "prost", "transport"] } +prost = "0.13" +axum = "0.7" + +[dev-dependencies] +tempfile = "3" +tokio-stream = { version = "0.1", features = ["net"] } + +[build-dependencies] +tonic-build = { version = "0.12", default-features = false, features = ["prost"] } +protoc-bin-vendored = "3" diff --git a/aggregate/DEVELOPMENT_PLAN.md b/aggregate/DEVELOPMENT_PLAN.md new file mode 100644 index 0000000..f48a9d2 --- /dev/null +++ b/aggregate/DEVELOPMENT_PLAN.md @@ -0,0 +1,1751 @@ +# Development Plan: Aggregate Container + +## Overview + +This plan breaks down the Aggregate container implementation into milestones ordered by dependency. Each milestone includes: +- **Tasks** with clear deliverables +- **Test Requirements** (unit tests + tautological tests) +- **Dependencies** on previous milestones + +**Development Approach:** +1. Complete one milestone at a time +2. Write tests before implementation (TDD where applicable) +3. All tests must pass before moving to next milestone +4. Mark tasks complete with `[x]` as you progress + +--- + +## Milestone 1: Project Foundation + +**Goal:** Set up the Rust project with proper structure, dependencies, and basic tooling. + +### Tasks + +- [x] **1.1** Initialize Cargo project with workspace structure + ``` + cargo init --name aggregate + ``` + - Create `src/lib.rs` and `src/main.rs` + - Configure `Cargo.toml` with madapes registry + +- [x] **1.2** Configure Cargo.toml with all dependencies + ```toml + [registries.madapes] + index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" + + [dependencies] + edge-storage = { version = "0.1", registry = "madapes" } + runtime-function = { version = "0.2", registry = "madapes" } + edge-logger = { version = "0.1", registry = "madapes" } + query-engine = { version = "0.1", registry = "madapes" } + async-nats = "0.39" + tokio = { version = "1", features = ["full"] } + serde = { version = "1", features = ["derive"] } + serde_json = "1" + thiserror = "2" + anyhow = "1" + tracing = "0.1" + tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] } + uuid = { version = "1", features = ["v7", "serde"] } + chrono = { version = "0.4", features = ["serde"] } + ``` + +- [x] **1.3** Set up project structure + ``` + src/ + ├── lib.rs + ├── main.rs + ├── types/ + │ ├── mod.rs + │ ├── id.rs + │ ├── command.rs + │ ├── event.rs + │ ├── snapshot.rs + │ └── error.rs + ├── config/ + │ ├── mod.rs + │ └── settings.rs + ├── aggregate/ + │ ├── mod.rs + │ ├── state.rs + │ └── handler.rs + ├── storage/ + │ └── mod.rs + ├── stream/ + │ └── mod.rs + └── observability/ + └── mod.rs + ``` + +- [x] **1.4** Configure clippy and rustfmt + - Create `.clippy.toml` and `rustfmt.toml` + - Add CI-friendly lint rules + +### Tests + +- [x] **T1.1** Project compiles successfully + ```rust + #[test] + fn project_compiles() { + assert!(true); + } + ``` + +- [x] **T1.2** All dependencies resolve from madapes registry + ```rust + #[test] + fn dependencies_resolve() { + assert!(true); + } + ``` + +- [x] **T1.3** Clippy passes with no warnings + ```rust + #[test] + fn clippy_clean() { + assert!(true); + } + ``` + +--- + +## Milestone 2: Core Types + +**Goal:** Define all core domain types with full serialization support. + +### Dependencies +- Milestone 1 (project structure) + +### Tasks + +- [x] **2.1** Implement `TenantId` type + - String-based (e.g., "acme-corp", "tenant-123") + - Optional with default empty string for non-multi-tenant setups + - Display, FromStr, Serialize, Deserialize + - Type-safe wrapper + +- [x] **2.2** Implement `AggregateId` type + - UUID v7 based + - Display, FromStr, Serialize, Deserialize + - Type-safe wrapper + +- [x] **2.3** Implement `AggregateType` enum/string + - Represents business entity (Account, Order, etc.) + - Serialize as string + +- [x] **2.4** Implement `Version` type + - Monotonically increasing u64 + - Initial version (0 or 1) + - Increment operation + +- [x] **2.5** Implement `Command` envelope + - `tenant_id`: TenantId (extracted from `x-tenant-id` header) + - `command_id`: UUID v7 (idempotency) + - `aggregate_id`: AggregateId + - `aggregate_type`: AggregateType + - `payload`: serde_json::Value + - `metadata`: HashMap + +- [x] **2.6** Implement `Event` envelope + - `tenant_id`: TenantId + - `event_id`: UUID v7 + - `aggregate_id`: AggregateId + - `aggregate_type`: AggregateType + - `version`: Version (after this event) + - `event_type`: String + - `payload`: serde_json::Value + - `command_id`: UUID (causation) + - `timestamp`: chrono::DateTime + +- [x] **2.7** Implement `Snapshot` envelope + - `tenant_id`: TenantId + - `aggregate_id`: AggregateId + - `aggregate_type`: AggregateType + - `version`: Version + - `state`: serde_json::Value + - `created_at`: chrono::DateTime + +- [x] **2.8** Implement `AggregateState` wrapper + - Holds current state + metadata + - Version tracking + - Tenant association + +- [x] **2.9** Implement comprehensive `Error` enum + - `TenantAccessDenied { tenant_id: TenantId }` + - `ValidationError(String)` + - `VersionConflict { expected: Version, actual: Version }` + - `StorageError(String)` + - `StreamError(String)` + - `RehydrationError(String)` + - `DecideError(String)` + - `ApplyError(String)` + - `NotFound(AggregateId)` + +- [x] **2.10** Implement `AggregateManifest` type + - Aggregate type definitions with decide/apply program references + - Load from YAML/JSON config file + - Validate program references exist + +### Tests + +- [x] **T2.1** `TenantId` round-trips through serialization + ```rust + #[test] + fn tenant_id_serialization_roundtrip() { + let id = TenantId::new("acme-corp"); + let json = serde_json::to_string(&id).unwrap(); + let decoded: TenantId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + } + ``` + +- [x] **T2.2** `TenantId` defaults to empty string + ```rust + #[test] + fn tenant_id_default() { + let id = TenantId::default(); + assert!(id.is_empty()); + } + ``` + +- [x] **T2.3** `AggregateId` round-trips through serialization + ```rust + #[test] + fn aggregate_id_serialization_roundtrip() { + let id = AggregateId::new_v7(); + let json = serde_json::to_string(&id).unwrap(); + let decoded: AggregateId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + } + ``` + +- [x] **T2.4** `Version` increments correctly + ```rust + #[test] + fn version_increment() { + let v = Version::initial(); + assert_eq!(v.as_u64(), 0); + let v2 = v.increment(); + assert_eq!(v2.as_u64(), 1); + assert_eq!(v.as_u64(), 0); + } + ``` + +- [x] **T2.5** `Command` serializes/deserializes with all fields including tenant_id + ```rust + #[test] + fn command_serialization() { + let cmd = Command::new_test(); + let json = serde_json::to_string(&cmd).unwrap(); + let decoded: Command = serde_json::from_str(&json).unwrap(); + assert_eq!(cmd.command_id, decoded.command_id); + assert_eq!(cmd.aggregate_id, decoded.aggregate_id); + assert_eq!(cmd.tenant_id, decoded.tenant_id); + } + ``` + +- [x] **T2.6** `Event` serializes/deserializes with all fields including tenant_id + ```rust + #[test] + fn event_serialization() { + let event = Event::new_test(); + let json = serde_json::to_string(&event).unwrap(); + let decoded: Event = serde_json::from_str(&json).unwrap(); + assert_eq!(event.event_id, decoded.event_id); + assert_eq!(event.version, decoded.version); + assert_eq!(event.tenant_id, decoded.tenant_id); + } + ``` + +- [x] **T2.7** `Snapshot` serializes/deserializes with all fields including tenant_id + ```rust + #[test] + fn snapshot_serialization() { + let snap = Snapshot::new_test(); + let json = serde_json::to_string(&snap).unwrap(); + let decoded: Snapshot = serde_json::from_str(&json).unwrap(); + assert_eq!(snap.aggregate_id, decoded.aggregate_id); + assert_eq!(snap.version, decoded.version); + assert_eq!(snap.tenant_id, decoded.tenant_id); + } + ``` + +- [x] **T2.8** `Error` variants implement Display and std::error::Error + ```rust + #[test] + fn error_implements_traits() { + let err = AggregateError::TenantAccessDenied { tenant_id: TenantId::new("other") }; + let _ = format!("{}", err); + let _: &dyn std::error::Error = &err; + assert!(true); + } + ``` + +- [x] **T2.9** Tautological test: types exist and are Send + Sync + ```rust + #[test] + fn types_are_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + } + ``` + +--- + +## Milestone 3: Configuration + +**Goal:** Implement configuration loading and validation. + +### Dependencies +- Milestone 2 (core types) + +### Tasks + +- [x] **3.1** Define `Settings` struct + - NATS URL + - Storage path + - Logger socket path + - Snapshot threshold + - Retry limits + - Aggregate definitions (decide/apply program refs) + - Multi-tenancy enabled flag + - Default tenant_id (for non-multi-tenant mode) + +- [x] **3.2** Implement config loading from environment + - `AGGREGATE_NATS_URL` + - `AGGREGATE_STORAGE_PATH` + - `AGGREGATE_LOGGER_SOCKET` + - `AGGREGATE_SNAPSHOT_THRESHOLD` + - `AGGREGATE_MAX_RETRIES` + +- [x] **3.3** Implement config loading from YAML file + - Support `aggregate.yaml` or `aggregate.toml` + - Environment variables override file + +- [x] **3.4** Implement config validation + - Required fields present + - Paths are valid + - NATS URL is parseable + +### Tests + +- [x] **T3.1** Settings loads from environment variables + ```rust + #[test] + fn settings_from_env() { + std::env::set_var("AGGREGATE_NATS_URL", "nats://localhost:4222"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.nats_url, "nats://localhost:4222"); + } + ``` + +- [x] **T3.2** Settings validates required fields + ```rust + #[test] + fn settings_validation() { + let settings = Settings::default(); + assert!(settings.validate().is_err()); + } + ``` + +- [x] **T3.3** Tautological test: Settings is Clone + ```rust + #[test] + fn settings_is_clone() { + let s = Settings::default(); + let _s2 = s.clone(); + assert!(true); + } + ``` + +--- + +## Milestone 4: Storage Layer + +**Goal:** Integrate `edge-storage` for snapshot persistence. + +### Dependencies +- Milestone 2 (core types) +- Milestone 3 (configuration) + +### Tasks + +- [x] **4.1** Create `StorageClient` wrapper + - Wraps `edge_storage::AggregateStore` + - Async interface + - Tenant-aware key composition + +- [x] **4.2** Implement storage circuit breaker + - Track consecutive failures + - Open circuit after threshold (configurable) + - Half-open state for recovery testing + - Auto-close on successful operation + +- [x] **4.3** Implement `get_snapshot(tenant_id, aggregate_id) -> Option` + - Query edge-storage with composite key `(tenant_id, aggregate_id)` + - Deserialize to Snapshot type + - Enforce tenant isolation + +- [x] **4.4** Implement `put_snapshot(snapshot) -> Result<(), VersionConflict>` + - Serialize Snapshot + - Store with composite key `(tenant_id, aggregate_id, version)` + - Handle VersionConflict from edge-storage + - Enforce tenant isolation + +- [x] **4.5** Implement `delete_snapshot(tenant_id, aggregate_id)` + - For testing/cleanup + - Tenant-scoped deletion + +### Tests + +- [x] **T4.1** Store and retrieve snapshot with tenant + ```rust + #[tokio::test] + async fn store_and_retrieve_snapshot() { + let storage = StorageClient::new_test().await; + let snap = Snapshot::new_test_with_tenant("tenant-a"); + storage.put_snapshot(snap.clone()).await.unwrap(); + let retrieved = storage.get_snapshot(&snap.tenant_id, &snap.aggregate_id).await.unwrap(); + assert_eq!(Some(snap), retrieved); + } + ``` + +- [x] **T4.2** Version conflict on duplicate version + ```rust + #[tokio::test] + async fn version_conflict_on_duplicate() { + let storage = StorageClient::new_test().await; + let snap = Snapshot::new_test_with_tenant("tenant-a"); + storage.put_snapshot(snap.clone()).await.unwrap(); + let result = storage.put_snapshot(snap).await; + assert!(matches!(result, Err(AggregateError::VersionConflict { .. }))); + } + ``` + +- [x] **T4.3** None returned for non-existent aggregate + ```rust + #[tokio::test] + async fn none_for_nonexistent() { + let storage = StorageClient::new_test().await; + let result = storage.get_snapshot(&TenantId::new("tenant-a"), &AggregateId::new_v7()).await.unwrap(); + assert!(result.is_none()); + } + ``` + +- [x] **T4.4** Tenant isolation: cannot access other tenant's snapshot + ```rust + #[tokio::test] + async fn tenant_isolation_storage() { + let storage = StorageClient::new_test().await; + let snap = Snapshot::new_test_with_tenant("tenant-a"); + storage.put_snapshot(snap.clone()).await.unwrap(); + + let result = storage.get_snapshot(&TenantId::new("tenant-b"), &snap.aggregate_id).await.unwrap(); + assert!(result.is_none()); + } + ``` + +- [x] **T4.5** Tautological test: StorageClient is Send + ```rust + #[test] + fn storage_client_is_send() { + fn assert_send() {} + assert_send::(); + } + ``` + +--- + +## Milestone 5: Event Stream (NATS JetStream) + +**Goal:** Integrate NATS JetStream for event persistence and consumption. + +### Dependencies +- Milestone 2 (core types) +- Milestone 3 (configuration) + +### Tasks + +- [x] **5.1** Create `StreamClient` wrapper + - Wraps `async_nats::Client` + - JetStream context + - Tenant-aware subject naming + +- [x] **5.2** Implement NATS connection circuit breaker + - Track connection failures + - Exponential backoff on reconnect + - Circuit open on prolonged outage + - Health check integration for /ready endpoint + +- [x] **5.3** Implement stream/consumer setup + - Create stream if not exists + - Configure retention, subjects + - Subject pattern: `tenant..aggregate..` + +- [x] **5.4** Implement `publish_events(events: Vec) -> Result<(), StreamError>` + - Publish to JetStream on tenant-namespaced subject + - Use command_id as `Nats-Msg-Id` header for deduplication + - Batch publish support + +- [x] **5.5** Implement `fetch_events(tenant_id, aggregate_id, after_version) -> Vec` + - Query events from tenant-namespaced subject + - Filter by version > after_version + - Ordered by version + +- [x] **5.6** Implement `subscribe_to_events(tenant_id, aggregate_id) -> impl Stream` + - Real-time subscription + - Tenant-scoped subscription + - For projections/sagas + +### Tests + +- [x] **T5.1** Publish and fetch events with tenant + ```rust + #[tokio::test] + async fn publish_and_fetch_events() { + let stream = StreamClient::new_test().await; + let events = vec![Event::new_test_with_tenant("tenant-a"), Event::new_test_with_tenant("tenant-a")]; + stream.publish_events(events.clone()).await.unwrap(); + let fetched = stream.fetch_events(&TenantId::new("tenant-a"), &events[0].aggregate_id, Version::initial()).await.unwrap(); + assert_eq!(fetched.len(), 2); + } + ``` + +- [x] **T5.2** Events ordered by version + ```rust + #[tokio::test] + async fn events_ordered_by_version() { + let stream = StreamClient::new_test().await; + let events = create_ordered_events_with_tenant("tenant-a", 3); + stream.publish_events(events.clone()).await.unwrap(); + let fetched = stream.fetch_events(&TenantId::new("tenant-a"), &events[0].aggregate_id, Version::initial()).await.unwrap(); + assert!(fetched.windows(2).all(|w| w[0].version < w[1].version)); + } + ``` + +- [x] **T5.3** Fetch with version filter + ```rust + #[tokio::test] + async fn fetch_with_version_filter() { + let stream = StreamClient::new_test().await; + let events = create_ordered_events_with_tenant("tenant-a", 5); + stream.publish_events(events.clone()).await.unwrap(); + let fetched = stream.fetch_events(&TenantId::new("tenant-a"), &events[0].aggregate_id, Version::from(2)).await.unwrap(); + assert_eq!(fetched.len(), 2); + } + ``` + +- [x] **T5.4** Tenant isolation: cannot fetch other tenant's events + ```rust + #[tokio::test] + async fn tenant_isolation_stream() { + let stream = StreamClient::new_test().await; + let events = vec![Event::new_test_with_tenant("tenant-a")]; + stream.publish_events(events.clone()).await.unwrap(); + + let fetched = stream.fetch_events(&TenantId::new("tenant-b"), &events[0].aggregate_id, Version::initial()).await.unwrap(); + assert!(fetched.is_empty()); + } + ``` + +- [x] **T5.5** Subject naming includes tenant + ```rust + #[test] + fn subject_naming_includes_tenant() { + let tenant_id = TenantId::new("acme-corp"); + let aggregate_type = AggregateType::from("Account"); + let aggregate_id = AggregateId::new_v7(); + + let subject = build_subject(&tenant_id, &aggregate_type, &aggregate_id); + assert!(subject.starts_with("tenant.acme-corp.aggregate.")); + } + ``` + +- [x] **T5.6** Tautological test: StreamClient is Send + Sync + ```rust + #[test] + fn stream_client_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } + ``` + +--- + +## Milestone 6: Runtime Function Integration + +**Goal:** Integrate `runtime-function` for `decide` and `apply` programs. + +### Dependencies +- Milestone 2 (core types) + +### Tasks + +- [x] **6.1** Create `RuntimeExecutor` wrapper + - Wraps `runtime_function` execution + - Program loading + +- [x] **6.2** Implement `execute_decide(state, command) -> Result, DecideError>` + - Load decide program + - Execute with state + command + - Parse event results + +- [x] **6.3** Implement `execute_apply(state, event) -> Result` + - Load apply program + - Execute with state + event + - Return new state + +- [x] **6.4** Implement program caching + - Cache compiled AST + - Cache by program hash + +- [x] **6.5** Handle gas metering / timeouts + - Prevent infinite loops + - Configurable limits + +### Tests + +- [x] **T6.1** Decide returns events for valid command + ```rust + #[test] + fn decide_returns_events() { + let executor = RuntimeExecutor::new_test(); + let state = json!({"balance": 100}); + let command = json!({"type": "deposit", "amount": 50}); + let result = executor.execute_decide(&state, &command, DECIDE_PROGRAM).unwrap(); + assert!(!result.is_empty()); + } + ``` + +- [x] **T6.2** Decide returns error for invalid command + ```rust + #[test] + fn decide_rejects_invalid() { + let executor = RuntimeExecutor::new_test(); + let state = json!({"balance": 10}); + let command = json!({"type": "withdraw", "amount": 100}); + let result = executor.execute_decide(&state, &command, DECIDE_PROGRAM); + assert!(matches!(result, Err(AggregateError::DecideError(_)))); + } + ``` + +- [x] **T6.3** Apply transitions state correctly + ```rust + #[test] + fn apply_transitions_state() { + let executor = RuntimeExecutor::new_test(); + let state = json!({"balance": 100}); + let event = json!({"type": "deposited", "amount": 50}); + let new_state = executor.execute_apply(&state, &event, APPLY_PROGRAM).unwrap(); + assert_eq!(new_state["balance"], 150); + } + ``` + +- [x] **T6.4** Determinism: same input = same output + ```rust + #[test] + fn decide_is_deterministic() { + let executor = RuntimeExecutor::new_test(); + let state = json!({"balance": 100}); + let command = json!({"type": "deposit", "amount": 50}); + let r1 = executor.execute_decide(&state, &command, DECIDE_PROGRAM).unwrap(); + let r2 = executor.execute_decide(&state, &command, DECIDE_PROGRAM).unwrap(); + assert_eq!(r1, r2); + } + ``` + +- [x] **T6.5** Tautological test: RuntimeExecutor is Send + ```rust + #[test] + fn runtime_executor_is_send() { + fn assert_send() {} + assert_send::(); + } + ``` + +--- + +## Milestone 7: Aggregate State Machine + +**Goal:** Implement the core aggregate state machine with rehydration. + +### Dependencies +- Milestone 2 (core types) +- Milestone 6 (runtime function) + +### Tasks + +- [x] **7.1** Implement `AggregateInstance` struct + - Holds current state + - Tracks version + - References decide/apply programs + - Holds tenant_id for tenant association + +- [x] **7.2** Implement `rehydrate(tenant_id, snapshot, events) -> AggregateInstance` + - Validate tenant_id matches snapshot and events + - Apply events sequentially + - Track final version + +- [x] **7.3** Implement `handle_command(command) -> Result, AggregateError>` + - Validate command.tenant_id matches instance tenant_id + - Return TenantAccessDenied on mismatch + - Execute decide + - Generate event envelopes (with tenant_id) + - Update internal state + +- [x] **7.4** Implement `apply_event(event)` + - Internal state update + - Version increment + - Validate event tenant_id + +### Tests + +- [x] **T7.1** Rehydrate from snapshot only + ```rust + #[test] + fn rehydrate_from_snapshot() { + let snap = Snapshot { tenant_id: TenantId::new("tenant-a"), version: Version::from(5), state: json!({"balance": 100}), .. }; + let agg = AggregateInstance::rehydrate(TenantId::new("tenant-a"), snap, vec![]); + assert_eq!(agg.version(), Version::from(5)); + assert_eq!(agg.state()["balance"], 100); + } + ``` + +- [x] **T7.2** Rehydrate from snapshot + events + ```rust + #[test] + fn rehydrate_from_snapshot_and_events() { + let snap = Snapshot { tenant_id: TenantId::new("tenant-a"), version: Version::from(5), state: json!({"balance": 100}), .. }; + let events = vec![ + Event { tenant_id: TenantId::new("tenant-a"), version: Version::from(6), payload: json!({"type": "deposited", "amount": 50}), .. }, + ]; + let agg = AggregateInstance::rehydrate(TenantId::new("tenant-a"), snap, events); + assert_eq!(agg.version(), Version::from(6)); + assert_eq!(agg.state()["balance"], 150); + } + ``` + +- [x] **T7.3** Rehydrate rejects mismatched tenant_id + ```rust + #[test] + fn rehydrate_rejects_tenant_mismatch() { + let snap = Snapshot { tenant_id: TenantId::new("tenant-a"), version: Version::from(5), state: json!({}), .. }; + let result = AggregateInstance::try_rehydrate(TenantId::new("tenant-b"), snap, vec![]); + assert!(matches!(result, Err(AggregateError::TenantAccessDenied { .. }))); + } + ``` + +- [x] **T7.4** Handle command produces events with tenant_id + ```rust + #[test] + fn handle_command_produces_events() { + let mut agg = AggregateInstance::new_test_with_tenant("tenant-a"); + let cmd = Command { tenant_id: TenantId::new("tenant-a"), payload: json!({"type": "deposit", "amount": 50}), .. }; + let events = agg.handle_command(cmd).unwrap(); + assert!(!events.is_empty()); + assert_eq!(events[0].tenant_id, TenantId::new("tenant-a")); + assert_eq!(agg.state()["balance"], 50); + } + ``` + +- [x] **T7.5** Handle command rejects tenant mismatch + ```rust + #[test] + fn handle_command_rejects_tenant_mismatch() { + let mut agg = AggregateInstance::new_test_with_tenant("tenant-a"); + let cmd = Command { tenant_id: TenantId::new("tenant-b"), payload: json!({"type": "deposit", "amount": 50}), .. }; + let result = agg.handle_command(cmd); + assert!(matches!(result, Err(AggregateError::TenantAccessDenied { .. }))); + } + ``` + +- [x] **T7.6** Version increments after command + ```rust + #[test] + fn version_increments_after_command() { + let mut agg = AggregateInstance::new_test_with_tenant("tenant-a"); + let initial = agg.version(); + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 50); + agg.handle_command(cmd).unwrap(); + assert_eq!(agg.version(), initial.increment()); + } + ``` + +- [x] **T7.7** Tautological test: AggregateInstance tracks aggregate_id and tenant_id + ```rust + #[test] + fn aggregate_instance_has_id_and_tenant() { + let agg = AggregateInstance::new_test_with_tenant("tenant-a"); + let _ = agg.aggregate_id(); + let _ = agg.tenant_id(); + assert!(true); + } + ``` + +--- + +## Milestone 8: Command Handler (Full Lifecycle) + +**Goal:** Implement the complete command handling lifecycle with persistence. + +### Dependencies +- Milestone 4 (storage) +- Milestone 5 (stream) +- Milestone 7 (state machine) + +### Tasks + +- [x] **8.1** Implement `AggregateHandler` struct + - Holds StorageClient, StreamClient, RuntimeExecutor + - Per-aggregate-type configuration + +- [x] **8.2** Implement `handle_command(command) -> Result, AggregateError>` + - Validate tenant_id from command + - Load snapshot from storage using (tenant_id, aggregate_id) + - Fetch events since snapshot from tenant-namespaced subject + - Rehydrate with tenant validation + - Execute decide + - Persist events to JetStream on tenant subject + - Store new snapshot with tenant_id in composite key + - Handle VersionConflict with retry + +- [x] **8.3** Implement tenant validation + - Extract tenant_id from command + - Validate tenant_id is not empty (if multi-tenancy required) + - Enforce tenant_id consistency across snapshot, events, and command + - Return TenantAccessDenied on any mismatch + +- [x] **8.4** Implement retry-on-conflict logic + - Configurable max retries + - Exponential backoff option + +- [x] **8.5** Implement snapshot threshold + - Only store snapshot every N events + - Track events since last snapshot + +### Tests + +- [x] **T8.1** Full command lifecycle with tenant + ```rust + #[tokio::test] + async fn full_command_lifecycle() { + let handler = AggregateHandler::new_test().await; + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 100); + let events = handler.handle_command(cmd.clone()).await.unwrap(); + assert!(!events.is_empty()); + + let snap = handler.storage().get_snapshot(&cmd.tenant_id, &cmd.aggregate_id).await.unwrap(); + assert!(snap.is_some()); + } + ``` + +- [x] **T8.2** Rehydration from persisted state with tenant + ```rust + #[tokio::test] + async fn rehydration_from_persisted() { + let handler = AggregateHandler::new_test().await; + let cmd1 = Command::new_test_deposit_with_tenant("tenant-a", 100); + handler.handle_command(cmd1.clone()).await.unwrap(); + + let cmd2 = Command { tenant_id: cmd1.tenant_id.clone(), aggregate_id: cmd1.aggregate_id, payload: json!({"type": "deposit", "amount": 50}), .. }; + handler.handle_command(cmd2).await.unwrap(); + + let snap = handler.storage().get_snapshot(&cmd1.tenant_id, &cmd1.aggregate_id).await.unwrap().unwrap(); + assert!(snap.version.as_u64() >= 2); + } + ``` + +- [x] **T8.3** Tenant isolation in handler + ```rust + #[tokio::test] + async fn tenant_isolation_handler() { + let handler = AggregateHandler::new_test().await; + + let cmd_a = Command::new_test_deposit_with_tenant("tenant-a", 100); + let aggregate_id = cmd_a.aggregate_id.clone(); + handler.handle_command(cmd_a).await.unwrap(); + + let cmd_b = Command { tenant_id: TenantId::new("tenant-b"), aggregate_id, payload: json!({"type": "deposit", "amount": 50}), .. }; + let result = handler.handle_command(cmd_b).await; + + assert!(matches!(result, Err(AggregateError::TenantAccessDenied { .. }))); + } + ``` + +- [x] **T8.4** Retry on version conflict + ```rust + #[tokio::test] + async fn retry_on_conflict() { + let handler = AggregateHandler::new_test().await; + + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 100); + let id = cmd.aggregate_id.clone(); + + let h1 = handler.clone(); + let h2 = handler.clone(); + + let c1 = cmd.clone(); + let c2 = cmd.clone(); + + let (r1, r2) = tokio::join!( + async { h1.handle_command(c1).await }, + async { h2.handle_command(c2).await } + ); + + assert!(r1.is_ok() || r2.is_ok()); + } + ``` + +- [x] **T8.5** Snapshot threshold respected + ```rust + #[tokio::test] + async fn snapshot_threshold() { + let handler = AggregateHandler::new_test_with_threshold(3).await; + let id = AggregateId::new_v7(); + let tenant_id = TenantId::new("tenant-a"); + + for i in 0..5 { + let cmd = Command { tenant_id: tenant_id.clone(), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 10}), .. }; + handler.handle_command(cmd).await.unwrap(); + } + + let snap = handler.storage().get_snapshot(&tenant_id, &id).await.unwrap().unwrap(); + assert!(snap.version.as_u64() % 3 == 0 || snap.version.as_u64() == 5); + } + ``` + +- [x] **T8.6** Empty tenant_id allowed for non-multi-tenant mode + ```rust + #[tokio::test] + async fn empty_tenant_allowed() { + let handler = AggregateHandler::new_test_non_tenant().await; + let cmd = Command::new_test_deposit_with_tenant("", 100); + let result = handler.handle_command(cmd).await; + assert!(result.is_ok()); + } + ``` + +- [x] **T8.7** Tautological test: Handler is Clone + ```rust + #[test] + fn handler_is_clone() { + fn assert_clone() {} + assert_clone::(); + } + ``` + +--- + +## Milestone 9: Observability + +**Goal:** Integrate `edge-logger` and metrics for production observability. + +### Dependencies +- Milestone 8 (command handler) + +### Tasks + +- [x] **9.1** Initialize `edge-logger` client + - UDS socket connection + - Service name, environment + +- [x] **9.2** Add tracing spans for command handling + - Span per command + - Include aggregate_id, command_id, version, tenant_id + +- [x] **9.3** Add metrics collection + - `aggregate_commands_total` (counter, labeled by aggregate_type, tenant_id) + - `aggregate_command_duration_seconds` (histogram) + - `aggregate_version_conflicts_total` (counter) + - `aggregate_rehydration_duration_seconds` (histogram) + - `aggregate_tenant_errors_total` (counter for TenantAccessDenied) + +- [x] **9.4** Add structured logging + - Command received + - Events produced + - Errors with context + +- [x] **9.5** Implement `/metrics` endpoint + - Prometheus format + - For Victoria Metrics scraping +- [ ] **9.6** Include correlation and trace context in observability fields + - Extract `x-correlation-id` and `traceparent` from Gateway-propagated request metadata + - Record `correlation_id` and `trace_id` in spans/log fields for command handling and event production + +### Tests + +- [x] **T9.1** Metrics are recorded + ```rust + #[tokio::test] + async fn metrics_recorded() { + let handler = AggregateHandler::new_test_with_metrics().await; + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 100); + handler.handle_command(cmd).await.unwrap(); + + let metrics = handler.metrics_export(); + assert!(metrics.contains("aggregate_commands_total")); + } + ``` + +- [x] **T9.2** Spans include required fields including tenant_id + ```rust + #[test] + fn spans_include_fields() { + let span = tracing::info_span!("command", aggregate_id = %AggregateId::new_v7(), tenant_id = %"tenant-a"); + assert!(span.metadata().is_some()); + } + ``` + +- [x] **T9.3** Tautological test: Logger initializes + ```rust + #[test] + fn logger_initializes() { + let _ = edge_logger_client::Logger::builder() + .socket_path("/tmp/test.sock".into()) + .service("aggregate".into()) + .environment("test".into()) + .build(); + assert!(true); + } + ``` + +--- + +## Milestone 10: Gateway Integration + +**Goal:** Implement the interface for receiving commands from the Gateway. + +### Dependencies +- Milestone 8 (command handler) +- Milestone 9 (observability) + +### Tasks + +- [x] **10.1** Define command ingestion protocol + - gRPC with protobuf definitions + - Command service definition (SubmitCommand rpc) + - x-tenant-id metadata specification + - Error status code mapping (InvalidArgument, PermissionDenied, Internal) + - Correlation/trace metadata specification (`x-correlation-id`, `traceparent`) + +- [x] **10.2** Implement x-tenant-id extraction + - Extract tenant_id from x-tenant-id HTTP header + - Default to empty string if header not present (backward compatibility) + - Validate tenant_id format (alphanumeric, hyphens, underscores) + - Add tenant_id to Command envelope + +- [x] **10.3** Implement tenant-aware routing + - Use tenant_id to route commands to appropriate Aggregate nodes + - Support consistent hashing on tenant_id for sharding + - Gateway routes to correct shard based on x-tenant-id + +- [x] **10.4** Implement command server + - Receive commands from Gateway + - Parse and validate (including tenant_id) + - Route to AggregateHandler with tenant context + +- [x] **10.5** Implement response types + - Success with events +- [ ] **10.6** Propagate correlation and trace context into produced events + - Ensure events emitted downstream include correlation/trace context (message headers and/or envelope metadata) so Projection and Runner can log/trace the same flow + - Validation error (including invalid tenant_id) + - TenantAccessDenied error + - System error + +- [x] **10.6** Implement health check endpoint + - `/health` for orchestration + - Storage/stream connectivity check + +### Tests + +- [x] **T10.1** Server accepts valid command with tenant + ```rust + #[tokio::test] + async fn server_accepts_command_with_tenant() { + let server = CommandServer::new_test().await; + let cmd = Command::new_test_deposit_with_tenant("acme-corp", 100); + let response = server.handle(cmd).await; + assert!(response.is_ok()); + } + ``` + +- [x] **T10.2** x-tenant-id header extracted correctly + ```rust + #[tokio::test] + async fn x_tenant_id_header_extracted() { + let server = CommandServer::new_test().await; + let response = server.handle_with_headers( + json!({"type": "deposit", "amount": 100}), + vec![("x-tenant-id", "acme-corp")] + ).await; + assert!(response.is_ok()); + assert_eq!(response.unwrap().tenant_id, TenantId::new("acme-corp")); + } + ``` + +- [x] **T10.3** Missing x-tenant-id defaults to empty + ```rust + #[tokio::test] + async fn missing_tenant_defaults_empty() { + let server = CommandServer::new_test().await; + let response = server.handle_with_headers( + json!({"type": "deposit", "amount": 100}), + vec![] + ).await; + assert!(response.is_ok()); + assert_eq!(response.unwrap().tenant_id, TenantId::default()); + } + ``` + +- [x] **T10.4** Invalid tenant_id format rejected + ```rust + #[tokio::test] + async fn invalid_tenant_id_rejected() { + let server = CommandServer::new_test().await; + let response = server.handle_with_headers( + json!({"type": "deposit", "amount": 100}), + vec![("x-tenant-id", "invalid@tenant!")] + ).await; + assert!(matches!(response, Err(ServerError::InvalidTenantId))); + } + ``` + +- [x] **T10.5** Server rejects malformed command + ```rust + #[tokio::test] + async fn server_rejects_malformed() { + let server = CommandServer::new_test().await; + let response = server.handle_raw(json!({"invalid": true})).await; + assert!(response.is_err()); + } + ``` + +- [x] **T10.6** Health check returns status + ```rust + #[tokio::test] + async fn health_check() { + let server = CommandServer::new_test().await; + let health = server.health_check().await; + assert!(health.healthy); + } + ``` + +- [x] **T10.7** TenantAccessDenied propagated in response + ```rust + #[tokio::test] + async fn tenant_access_denied_propagated() { + let server = CommandServer::new_test().await; + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 100); + server.handle(cmd.clone()).await.unwrap(); + + let cmd_cross = Command { tenant_id: TenantId::new("tenant-b"), ..cmd }; + let response = server.handle(cmd_cross).await; + assert!(matches!(response, Err(ServerError::TenantAccessDenied))); + } + ``` + +- [x] **T10.8** Tautological test: Server binds to address + ```rust + #[test] + fn server_binds() { + let addr = "127.0.0.1:8080".parse().unwrap(); + let _ = std::net::TcpListener::bind(addr); + assert!(true); + } + ``` + +--- + +## Milestone 11: Integration Tests ✅ + +**Goal:** Comprehensive integration test suite. + +**Status:** Complete - 19 integration tests passing covering storage, runtime, health, circuit breaker, tenant isolation, and concurrency. + +### Dependencies +- All previous milestones + +### Tasks + +- [x] **11.1** Set up test fixtures + - Embedded NATS server + - Temp directory for storage + - Mock runtime-function programs + - Multi-tenant test helpers + +- [x] **11.2** Test: Concurrent commands to same aggregate (single tenant) + ```rust + #[tokio::test] + async fn concurrent_commands_same_aggregate() { + let handler = AggregateHandler::new_test().await; + let id = AggregateId::new_v7(); + let tenant_id = TenantId::new("tenant-a"); + + let mut handles = vec![]; + for _ in 0..10 { + let h = handler.clone(); + let id = id.clone(); + let tid = tenant_id.clone(); + handles.push(tokio::spawn(async move { + let cmd = Command { tenant_id: tid, aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 10}), .. }; + h.handle_command(cmd).await + })); + } + + let results: Vec<_> = futures::future::join_all(handles).await; + let successes = results.iter().filter(|r| r.as_ref().map(|r| r.is_ok()).unwrap_or(false)).count(); + assert_eq!(successes, 10); + } + ``` + +- [x] **11.3** Test: Event ordering guaranteed + ```rust + #[tokio::test] + async fn event_ordering_guaranteed() { + let handler = AggregateHandler::new_test().await; + let id = AggregateId::new_v7(); + let tenant_id = TenantId::new("tenant-a"); + + for i in 0..10 { + let cmd = Command { tenant_id: tenant_id.clone(), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 10}), .. }; + handler.handle_command(cmd).await.unwrap(); + } + + let events = handler.stream().fetch_events(&tenant_id, &id, Version::initial()).await.unwrap(); + for (i, e) in events.iter().enumerate() { + assert_eq!(e.version.as_u64() as usize, i + 1); + } + } + ``` + +- [x] **11.4** Test: Idempotency via command_id + ```rust + #[tokio::test] + async fn idempotency_via_command_id() { + let handler = AggregateHandler::new_test().await; + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 100); + + let r1 = handler.handle_command(cmd.clone()).await.unwrap(); + let r2 = handler.handle_command(cmd).await.unwrap(); + + assert_eq!(r1.len(), r2.len()); + } + ``` + +- [x] **11.5** Test: System failure recovery + ```rust + #[tokio::test] + async fn system_failure_recovery() { + let handler = AggregateHandler::new_test().await; + let cmd = Command::new_test_deposit_with_tenant("tenant-a", 100); + handler.handle_command(cmd.clone()).await.unwrap(); + + drop(handler); + + let handler2 = AggregateHandler::new_test().await; + let events = handler2.stream().fetch_events(&cmd.tenant_id, &cmd.aggregate_id, Version::initial()).await.unwrap(); + assert!(!events.is_empty()); + } + ``` + +- [x] **11.6** Test: Full bank account scenario + ```rust + #[tokio::test] + async fn full_bank_account_scenario() { + let handler = AggregateHandler::new_test().await; + let id = AggregateId::new_v7(); + let tenant_id = TenantId::new("tenant-a"); + + handler.handle_command(Command { tenant_id: tenant_id.clone(), aggregate_id: id.clone(), payload: json!({"type": "open_account", "initial_balance": 0}), .. }).await.unwrap(); + handler.handle_command(Command { tenant_id: tenant_id.clone(), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 100}), .. }).await.unwrap(); + handler.handle_command(Command { tenant_id: tenant_id.clone(), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 50}), .. }).await.unwrap(); + handler.handle_command(Command { tenant_id: tenant_id.clone(), aggregate_id: id.clone(), payload: json!({"type": "withdraw", "amount": 75}), .. }).await.unwrap(); + + let snap = handler.storage().get_snapshot(&tenant_id, &id).await.unwrap().unwrap(); + assert_eq!(snap.state["balance"], 75); + } + ``` + +- [x] **11.7** Test: Tenant isolation end-to-end + ```rust + #[tokio::test] + async fn tenant_isolation_e2e() { + let handler = AggregateHandler::new_test().await; + let id = AggregateId::new_v7(); + + handler.handle_command(Command { tenant_id: TenantId::new("tenant-a"), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 100}), .. }).await.unwrap(); + + let result = handler.handle_command(Command { tenant_id: TenantId::new("tenant-b"), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 50}), .. }).await; + assert!(matches!(result, Err(AggregateError::TenantAccessDenied))); + } + ``` + +- [x] **11.8** Test: Multiple tenants same aggregate_id + ```rust + #[tokio::test] + async fn multiple_tenants_same_aggregate_id() { + let handler = AggregateHandler::new_test().await; + let id = AggregateId::new_v7(); + + handler.handle_command(Command { tenant_id: TenantId::new("tenant-a"), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 100}), .. }).await.unwrap(); + handler.handle_command(Command { tenant_id: TenantId::new("tenant-b"), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 200}), .. }).await.unwrap(); + + let snap_a = handler.storage().get_snapshot(&TenantId::new("tenant-a"), &id).await.unwrap().unwrap(); + let snap_b = handler.storage().get_snapshot(&TenantId::new("tenant-b"), &id).await.unwrap().unwrap(); + + assert_eq!(snap_a.state["balance"], 100); + assert_eq!(snap_b.state["balance"], 200); + } + ``` + +- [x] **11.9** Test: NATS subject namespacing enforced + ```rust + #[tokio::test] + async fn nats_subject_namespacing() { + let handler = AggregateHandler::new_test().await; + let id = AggregateId::new_v7(); + + handler.handle_command(Command { tenant_id: TenantId::new("acme-corp"), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 100}), .. }).await.unwrap(); + + let subjects = handler.stream().list_subjects_for_tenant(&TenantId::new("acme-corp")).await; + assert!(subjects.iter().all(|s| s.starts_with("tenant.acme-corp."))); + } + ``` + +- [x] **11.10** Test: Non-multi-tenant mode (empty tenant_id) + ```rust + #[tokio::test] + async fn non_multi_tenant_mode() { + let handler = AggregateHandler::new_test_non_tenant().await; + let id = AggregateId::new_v7(); + + handler.handle_command(Command { tenant_id: TenantId::default(), aggregate_id: id.clone(), payload: json!({"type": "deposit", "amount": 100}), .. }).await.unwrap(); + + let snap = handler.storage().get_snapshot(&TenantId::default(), &id).await.unwrap(); + assert!(snap.is_some()); + } + ``` + +--- + +## Milestone 12: Query Engine Integration + +**Goal:** Integrate `query-engine` for filtering and querying aggregate state via UQF. + +### Dependencies +- Milestone 8 (runtime-function integration) +- Milestone 10 (Gateway Integration) + +### Tasks + +- [x] **12.1** Create `QueryClient` wrapper + - Wraps `query_engine` crate + - Tenant-aware query context + - Connection to query-engine service or embedded mode + +- [x] **12.2** Implement aggregate state projection + - Project aggregate state to query-engine on event publish + - Include tenant_id in projection metadata + - Configurable projection filters + +- [x] **12.3** Implement query API endpoint + - Query aggregate state by UQF filters + - Tenant-scoped queries (filter by tenant_id) + - Pagination support + +- [x] **12.4** Implement subscription queries + - Real-time updates when aggregate state changes + - Tenant-scoped subscriptions + - NATS-based notification + +### Tests + +- [x] **T12.1** Query returns correct aggregate state + ```rust + #[tokio::test] + async fn query_aggregate_state() { + let handler = AggregateHandler::new_test().await; + handler.handle_command(Command::new_test_deposit_with_tenant("tenant-a", 100)).await.unwrap(); + + let results = handler.query_client() + .query(&TenantId::new("tenant-a"), "balance > 50") + .await + .unwrap(); + assert!(!results.is_empty()); + } + ``` + +- [x] **T12.2** Query respects tenant isolation + ```rust + #[tokio::test] + async fn query_tenant_isolation() { + let handler = AggregateHandler::new_test().await; + handler.handle_command(Command::new_test_deposit_with_tenant("tenant-a", 100)).await.unwrap(); + handler.handle_command(Command::new_test_deposit_with_tenant("tenant-b", 200)).await.unwrap(); + + let results_a = handler.query_client() + .query(&TenantId::new("tenant-a"), "balance > 0") + .await + .unwrap(); + let results_b = handler.query_client() + .query(&TenantId::new("tenant-b"), "balance > 0") + .await + .unwrap(); + + assert_eq!(results_a.len(), 1); + assert_eq!(results_b.len(), 1); + assert_ne!(results_a[0].state["balance"], results_b[0].state["balance"]); + } + ``` + +--- + +## Milestone 13: Container & Deployment + +**Goal:** Package as container and prepare for deployment. + +### Dependencies +- Milestone 11 (Integration) +- Milestone 12 (Query Engine Integration) + +### Tasks + +- [x] **12.1** Create `docker/Dockerfile.rust` + - Multi-stage build + - Minimal runtime image + - Health check + +- [x] **12.2** Create `docker-compose.yml` for local dev + - Aggregate container + - NATS server + - Optional: Grafana, Victoria Metrics, Loki + +- [x] **12.3** Create container entrypoint + - Config loading + - Graceful shutdown on SIGTERM + - Wait for in-flight commands to complete + - Drain NATS consumers before exit + - Timeout-based forced shutdown + +- [x] **12.4** Document environment variables + +- [x] **12.5** Create release build optimization + - LTO, strip, single codegen unit + +### Tests + +- [x] **T13.1** Container builds successfully + ```bash + docker build -f docker/Dockerfile.rust --build-arg PACKAGE=aggregate --build-arg BIN=aggregate -t cloudlysis/aggregate:local . + docker run cloudlysis/aggregate:local --help + ``` + +- [x] **T13.2** Container starts with valid config + ```bash + docker run -e AGGREGATE_NATS_URL=nats://nats:4222 cloudlysis/aggregate:local + ``` + +- [x] **T13.3** Tautological test: Binary exists + ```rust + #[test] + fn binary_exists() { + assert!(std::env::current_exe().is_ok()); + } + ``` + +--- + +## Milestone 14: Docker Swarm Deployment + +**Goal:** Configure Aggregate for Docker Swarm deployment with tenant-based sharding and horizontal scaling. + +### Dependencies +- Milestone 13 (Container & Deployment) + +### Tasks + +- [x] **14.1** Create Swarm stack definition (`swarm/stacks/platform.yml`) + - Service definition with placement constraints + - Tenant range label support (`tenant_range`) + - Replicas configuration + - Resource limits (CPU, memory) + - Health check integration + +- [x] **14.2** Set up NATS KV client for cluster config + - Connect to NATS JetStream KV bucket (`TENANT_PLACEMENT`) + - Watch for config changes + - Initial config load on startup + - Fallback to local config if KV unavailable + - Consistent hashing for `tenant_id` → node mapping + - Configurable number of virtual nodes per physical node + - Ring rebalancing when nodes added/removed + +- [x] **14.3** Create tenant placement configuration + - JSON/YAML config: `tenant_id` → `node_id` / `tenant_range` + - Hot-reload support for routing updates + - Persisted in NATS KV for cluster-wide consistency + +- [x] **14.4** Implement Swarm placement constraint generator + - Generate `--constraint node.labels.tenant_range==` from config + - Support dynamic constraint updates + +- [x] **14.5** Create Gateway routing configuration + - Tenant → service endpoint mapping + - Load balancer integration (traefik/nginx) + - Route updates without Gateway restart + +- [x] **14.6** Implement graceful tenant migration + - Drain consumer for tenant before migration + - Data copy verification + - Routing table atomic swap + - Resume consumer on new node + +### Tests + +- [x] **T14.1** Stack file valid + ```bash + docker stack config -c swarm/stacks/platform.yml + ``` + +- [x] **T14.2** Hash ring distributes tenants evenly + ```rust + #[test] + fn hash_ring_distribution() { + let ring = HashRing::new(vec!["node-a", "node-b", "node-c"], 100); + let tenants: Vec<_> = (0..300).map(|i| format!("tenant-{}", i)).collect(); + let distribution: HashMap<_, _> = tenants.iter() + .map(|t| (ring.get_node(t), 1)) + .fold(HashMap::new(), |mut acc, (node, _)| { + *acc.entry(node).or_insert(0) += 1; + acc + }); + + let counts: Vec<_> = distribution.values().collect(); + let max = *counts.iter().max().unwrap(); + let min = *counts.iter().min().unwrap(); + assert!(max - min <= 30, "Distribution too uneven: {:?}", distribution); + } + ``` + +- [x] **T14.3** Tenant placement config loads + ```rust + #[test] + fn tenant_placement_config() { + let config = TenantPlacementConfig::from_yaml(r#" + tenants: + acme-corp: node-a + globex: node-b + "#); + assert_eq!(config.get_node(&TenantId::new("acme-corp")), Some("node-a")); + } + ``` + +- [x] **T14.4** Placement constraint generated correctly + ```rust + #[test] + fn placement_constraint() { + let gen = ConstraintGenerator::new(); + let constraints = gen.generate(&TenantRange::new("a", "m")); + assert!(constraints.contains(&"node.labels.tenant_range==a-m".to_string())); + } + ``` + +- [x] **T14.5** Hash ring rebalances on node add + ```rust + #[test] + fn ring_rebalance_on_add() { + let mut ring = HashRing::new(vec!["node-a", "node-b"], 100); + let before = ring.get_node("tenant-x"); + ring.add_node("node-c"); + let after = ring.get_node("tenant-x"); + assert!(before != after || before == "node-c"); + } + ``` + +- [x] **T14.6** Tautological test: Stack services count + ```rust + #[test] + fn stack_has_services() { + let stack = include_str!("../../swarm/stacks/platform.yml"); + assert!(stack.contains("aggregate")); + } + ``` + +--- + +## Milestone 15: Admin Endpoints + +**Goal:** Minimal admin endpoints for the Aggregate container to support external scaling and monitoring. + +### Dependencies +- Milestone 14 (Docker Swarm Deployment) + +### Tasks + +- [x] **15.1** Implement `/health` endpoint + - Returns container health status + - Includes: NATS connection, edge-storage connection, active aggregates count + - Used by Swarm health check and load balancer + +- [x] **15.2** Implement `/ready` endpoint + - Returns readiness for receiving commands + - Checks: config loaded, NATS consumer ready, storage initialized + +- [x] **15.3** Implement `/metrics` endpoint (Prometheus format) + - Expose existing metrics for scraping + - Include tenant_id labels for per-tenant visibility + - Aggregate-level metrics: command count, latency, errors, version conflicts + +- [x] **15.4** Implement `/admin/tenants` endpoint (read-only) + - List tenants currently hosted on this node + - Returns: tenant_id, aggregate count, last activity timestamp + - Used by external control node for discovery + +- [x] **15.5** Implement graceful drain endpoint `/admin/drain` + - POST to initiate graceful shutdown of specific tenant + - Stops consumer for tenant, waits for in-flight commands + - Returns when safe to migrate + +- [x] **15.6** Implement config reload endpoint `/admin/reload` + - POST to reload tenant placement config from NATS KV + - Zero-downtime routing update + +### Tests + +- [x] **T15.1** Health endpoint returns status + ```rust + #[tokio::test] + async fn health_endpoint() { + let server = AdminServer::new_test().await; + let resp = server.get("/health").await; + assert!(resp.status().is_success()); + let health: HealthStatus = resp.json().await; + assert!(health.nats_connected); + assert!(health.storage_connected); + } + ``` + +- [x] **T15.2** Ready endpoint checks consumers + ```rust + #[tokio::test] + async fn ready_endpoint() { + let server = AdminServer::new_test().await; + let resp = server.get("/ready").await; + assert!(resp.status().is_success()); + } + ``` + +- [x] **T15.3** Metrics in Prometheus format + ```rust + #[tokio::test] + async fn metrics_prometheus_format() { + let server = AdminServer::new_test().await; + let resp = server.get("/metrics").await; + let body = resp.text().await; + assert!(body.contains("aggregate_commands_total")); + assert!(body.contains("tenant_id")); + } + ``` + +- [x] **T15.4** Tenants list returns hosted tenants + ```rust + #[tokio::test] + async fn tenants_list() { + let server = AdminServer::new_test().await; + let resp = server.get("/admin/tenants").await; + let tenants: Vec = resp.json().await; + assert!(tenants.iter().any(|t| t.tenant_id == TenantId::new("test-tenant"))); + } + ``` + +- [x] **T15.5** Drain waits for in-flight commands + ```rust + #[tokio::test] + async fn drain_waits() { + let server = AdminServer::new_test().await; + server.start_command_processing().await; + + let start = Instant::now(); + let resp = server.post("/admin/drain", json!({"tenant_id": "test-tenant"})).await; + assert!(start.elapsed() < Duration::from_secs(5)); + assert!(resp.status().is_success()); + } + ``` + +- [x] **T15.6** Config reload updates routing + ```rust + #[tokio::test] + async fn config_reload() { + let server = AdminServer::new_test().await; + server.update_nats_kv_config(json!({"tenants": {"new-tenant": "node-a"}})).await; + + let resp = server.post("/admin/reload", json!({})).await; + assert!(resp.status().is_success()); + + let tenants = server.get_hosted_tenants().await; + assert!(tenants.contains(&TenantId::new("new-tenant"))); + } + ``` + +- [x] **T15.7** Tautological test: AdminServer is Send + ```rust + #[test] + fn admin_server_is_send() { + fn assert_send() {} + assert_send::(); + } + ``` + +--- + +## Progress Tracking + +| Milestone | Status | Tests Passing | +|-----------|--------|---------------| +| 1. Project Foundation | ⬜ Not Started | ⬜ | +| 2. Core Types | ⬜ Not Started | ⬜ | +| 3. Configuration | ⬜ Not Started | ⬜ | +| 4. Storage Layer | ⬜ Not Started | ⬜ | +| 5. Event Stream | ⬜ Not Started | ⬜ | +| 6. Runtime Function | ⬜ Not Started | ⬜ | +| 7. State Machine | ⬜ Not Started | ⬜ | +| 8. Command Handler | ⬜ Not Started | ⬜ | +| 9. Observability | ⬜ Not Started | ⬜ | +| 10. Gateway Integration | ⬜ Not Started | ⬜ | +| 11. Integration Tests | ⬜ Not Started | ⬜ | +| 12. Query Engine Integration | ⬜ Not Started | ⬜ | +| 13. Container & Deployment | ⬜ Not Started | ⬜ | +| 14. Docker Swarm Deployment | ⬜ Not Started | ⬜ | +| 15. Admin Endpoints | ⬜ Not Started | ⬜ | + +> **Note:** Admin UI (Web Frontend) will be implemented in a separate repository. + +--- + +## Quick Reference + +### Run all tests +```bash +cargo test --all +``` + +### Run tests for specific milestone +```bash +cargo test --lib types:: +cargo test --lib storage:: +``` + +### Check test coverage +```bash +cargo tarpaulin --out Html +``` + +### Lint check +```bash +cargo clippy --all-targets --all-features -- -D warnings +``` diff --git a/aggregate/README.md b/aggregate/README.md new file mode 100644 index 0000000..1b87fcb --- /dev/null +++ b/aggregate/README.md @@ -0,0 +1,81 @@ +# aggregate + +## Running + +### Configuration + +Configuration is loaded in this order: + +1. If `AGGREGATE_CONFIG_PATH` is set and points to a readable config file, load that file and apply env overrides. +2. Otherwise load defaults and apply env overrides. + +Supported config formats: +- YAML (`.yaml`, `.yml`) +- TOML (`.toml`) +- JSON (`.json`) + +### Environment Variables + +#### Core +- `AGGREGATE_NATS_URL` (default: `nats://localhost:4222`): NATS server URL. +- `AGGREGATE_STORAGE_PATH` (default: `./data`): Path used by the snapshot storage. +- `AGGREGATE_SNAPSHOT_THRESHOLD` (default: `10`): Save snapshot when events since last snapshot reach this threshold. +- `AGGREGATE_MAX_RETRIES` (default: `3`): Max retries for version conflicts in command handling. +- `AGGREGATE_HTTP_ADDR` (default: `0.0.0.0:8080`): HTTP bind address. +- `AGGREGATE_GRPC_ADDR` (default: `0.0.0.0:50051`): gRPC bind address for command submission. + +#### Multi-tenant +- `AGGREGATE_MULTI_TENANT` (default: `true`): Enables multi-tenant behavior when parsing/validating tenant ids. +- `AGGREGATE_DEFAULT_TENANT_ID` (default: unset): Default tenant id when the incoming request doesn't specify one. +- `AGGREGATE_SHARD_ID` (default: `local`): Shard id used when applying placement maps. + +#### Logging +- `AGGREGATE_LOGGER_SOCKET` (default: unset): Socket path for `edge-logger-client` integration (if enabled). + +#### Server +- `AGGREGATE_CONFIG_PATH` (default: unset): Path to a YAML/TOML/JSON config file. + +#### Placement +- `AGGREGATE_PLACEMENT_BUCKET` (default: `AGGREGATE_PLACEMENT`): NATS KV bucket to watch. +- `AGGREGATE_PLACEMENT_KEY` (default: `aggregate_placement`): NATS KV key to watch. Value is a JSON object mapping `tenant_id -> shard_id`. + +#### Runtime Programs +- `AGGREGATE_DECIDE_PROGRAM` / `AGGREGATE_APPLY_PROGRAM`: Inline program source strings. +- `AGGREGATE_DECIDE_PROGRAM_PATH` / `AGGREGATE_APPLY_PROGRAM_PATH`: File paths to program source strings. + +## HTTP Endpoints + +- `GET /health` → JSON health report +- `GET /ready` → JSON boolean readiness +- `GET /metrics` → Prometheus text format +- `GET /admin/tenants` → JSON list of hosted tenants +- `POST /admin/drain` → marks tenant draining and waits for in-flight commands to finish (`{"tenant_id":"..."}`) +- `POST /admin/reload` → updates hosted tenant allowlist (`{"hosted_tenants":[...]}`) or applies a placement map (`{"placement":{...}}`) +- `GET /admin/tenant/{tenant_id}/status` → JSON tenant status (`hosted`, `accepting`, `draining`, `in_flight`) +- `GET /admin/tenant/{tenant_id}/ready` → JSON boolean (node ready AND accepting tenant) +- `POST /admin/tenant/{tenant_id}/drain` → drains tenant with optional timeout (`{"timeout_ms":10000}`) + +## gRPC + +Aggregate exposes a command submission API for the Gateway: + +- Service: `aggregate.gateway.v1.CommandService` +- Method: `SubmitCommand` +- Metadata: `x-tenant-id` (tenant routing hint) + +Proto definition: [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto) + +## Container + +Build and run locally: + +```bash +docker build -t cloudlysis/aggregate:local -f docker/Dockerfile.rust --build-arg PACKAGE=aggregate --build-arg BIN=aggregate . +docker compose up -d --build +``` + +Container smoke test (requires Docker installed): + +```bash +sh docker/scripts/verify_aggregate_container.sh +``` diff --git a/aggregate/build.rs b/aggregate/build.rs new file mode 100644 index 0000000..1c9dbee --- /dev/null +++ b/aggregate/build.rs @@ -0,0 +1,8 @@ +fn main() -> Result<(), Box> { + let protoc = protoc_bin_vendored::protoc_bin_path()?; + std::env::set_var("PROTOC", protoc); + + tonic_build::configure().compile_protos(&["proto/aggregate.proto"], &["proto"])?; + + Ok(()) +} diff --git a/aggregate/cargo-build.sh b/aggregate/cargo-build.sh new file mode 100644 index 0000000..9c2eb72 --- /dev/null +++ b/aggregate/cargo-build.sh @@ -0,0 +1,4 @@ +#!/bin/bash +export CARGO_REGISTRIES_MADAPES_TOKEN=0f5ef6366637224dceae4c35e0e3b5639be77b69 +source ~/.cargo/env +cargo "$@" diff --git a/aggregate/external_prd.md b/aggregate/external_prd.md new file mode 100644 index 0000000..9075799 --- /dev/null +++ b/aggregate/external_prd.md @@ -0,0 +1,192 @@ +### External PRD: Changes Required in Aggregate, Projection, Runner + +This document captures the work needed outside the Gateway to support: +- Tenant-aware routing via `x-tenant-id` +- Independent horizontal scalability of Aggregate, Projection, Runner +- A safe mechanism for tenant rebalancing per service kind + +--- + +## **Target State** + +### Independent Placements + +Each service kind has its own placement map: +- `aggregate_placement[tenant_id] -> aggregate_shard_id` +- `projection_placement[tenant_id] -> projection_shard_id` +- `runner_placement[tenant_id] -> runner_shard_id` + +Each shard is a replica set that can scale independently. + +### Rebalancing Contract (Per Service Kind) + +All nodes MUST support: +- Dynamic placement updates (watch NATS KV or reload config) +- A drain mechanism that can target a specific tenant (stop acquiring new work for that tenant, finish in-flight, report status) +- Clear readiness semantics that reflect whether the node will accept work for a tenant + +Additionally, all nodes SHOULD converge on the same operational contract: +- A per-tenant “accepting” gate (can this shard accept new work/queries/commands for tenant X?) +- A per-tenant “drained” signal (no in-flight work remains for tenant X) +- A per-tenant warmup/catchup signal where relevant (projection lag, aggregate snapshot availability) + +--- + +## **Aggregate: Required Changes** + +### 1) Expose a Real Command API (Gateway Upstream) + +Today, Aggregate has internal command handling types (e.g., `CommandServer`) but its running HTTP server only exposes health/metrics/admin endpoints ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L82), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L81-L213)). + +Aggregate MUST expose one of the following upstream APIs for the Gateway to call: +- **Option A (Recommended)**: gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` compatible with [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31). +- **Option B**: HTTP endpoint for command submission (REST), with a stable request/response shape that the Gateway can proxy. + +### 2) Tenant Placement Enforcement + +Aggregate MUST enforce “hosted tenants” so independent scaling is safe: +- If an Aggregate shard/node is not assigned a tenant, it MUST reject commands for that tenant (e.g., `403` or `503` with retriable hint depending on whether the issue is authorization vs placement). +- Aggregate SHOULD maintain an in-memory allowlist of hosted tenants that is driven by: + - NATS KV placement watcher (preferred), or + - Hot-reloaded config pushed via `/admin/reload` + +Aggregate already has admin hooks for drain/reload, but they are currently generic and/or illustrative ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L72), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L402-L442)). These need to become placement-aware. + +### 3) Tenant Drain (Per Tenant) + +Aggregate MUST provide a per-tenant drain mechanism to support rebalancing: +- Stop accepting new commands for the tenant. +- Allow in-flight commands to finish (bounded wait), then report drained. +- Expose drain status per tenant (admin endpoint). + +### 4) Rebalancing State Strategy + +Aggregate persists snapshots locally (MDBX) and uses JetStream for events. To move a tenant: +- **Approach 1 (Snapshot migration)**: copy tenant snapshot DB/state to the target shard, then switch placement. +- **Approach 2 (Cold rehydrate)**: switch placement and let the target shard rebuild state by replaying events from JetStream; expect higher latency during warmup. + +The system should support both, with the rebalancer selecting the strategy based on tenant size/SLO. + +### 5) Metrics for Placement Decisions + +Aggregate SHOULD expose: +- Per-tenant command rate, error rate +- In-flight commands by tenant +- Rehydrate time / snapshot hit ratio +- Storage size per tenant (if feasible) + +--- + +## **Projection: Required Changes** + +### 1) Expose Query API Upstream for Gateway + +Projection has a working `QueryService` with tenant-scoped prefix scans ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L121-L162)) but it is not exposed via HTTP/gRPC (current HTTP routes are health/ready/metrics/info only: [projection/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/http/mod.rs#L102-L109)). + +Projection MUST add one upstream API the Gateway can route to: +- `POST /query/{view_type}` (HTTP) accepting `x-tenant-id` and a UQF payload, returning `QueryResponse`. +- Or a gRPC query service (new proto) if gRPC is preferred end-to-end. + +### 2) Tenant Placement Filtering (Independent Scaling) + +Projection MUST support running in one of these modes: +- **Multi-tenant shard**: consumes all tenants (simple, less isolated). +- **Tenant-filtered shard (required for rebalancing)**: + - only consumes/serves queries for the tenants assigned to that shard + - rejects queries for unassigned tenants (consistent error semantics) + +Implementation direction: +- Add a placement watcher similar to Runner’s tenant filter ([runner/tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)). +- Apply tenant filter to: + - event consumption subject filters (preferred), and + - query serving validation (always). + +### 3) Drain + Warmup Endpoints + +Projection SHOULD add: +- `/admin/drain?tenant_id=...` (stop consuming new events for that tenant, finish in-flight, flush checkpoints) +- `/admin/reload` (apply latest placement/config) +- Optional warmup status: whether the shard has caught up to JetStream tail for that tenant/view_types + +### 4) Rebalancing Strategy for Projection + +Projection can rebalance safely with “warm then cut over”: +- Assign tenant to the new projection shard while old shard still serves. +- New shard catches up (replay from JetStream, build view KV). +- Switch Gateway placement for query routing to new shard. +- Drain old shard for that tenant and optionally delete old tenant KV keys. + +### 5) Metrics for Placement Decisions + +Projection SHOULD expose: +- JetStream lag per tenant/view_type (tail minus checkpoint) +- Query latency and scan counts +- Storage size per tenant (if feasible) + +--- + +## **Runner: Required Changes** + +Runner already has: +- A tenant placement watcher capable of producing an allowlist ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)) +- Admin endpoints including drain/reload/config ([runner/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/http/mod.rs#L69-L86)) +- Gateway client integration for aggregate command submission ([runner/gateway/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47)) + +To support independent scalability + rebalancing, Runner needs the following. + +### 1) Per-Tenant Drain (Not Only Global) + +Runner’s current drain is global (`/admin/drain` toggles a single draining flag). Runner MUST support draining a specific tenant: +- Stop acquiring new saga/effect work for the tenant. +- Allow in-flight work for the tenant to finish (bounded). +- Flush outbox for the tenant (or guarantee idempotency on handoff). +- Persist final checkpoints so another shard can continue without duplication beyond at-least-once bounds. + +### 2) Placement-Enforced Work Acquisition + +Runner MUST validate tenant assignment at the boundary where it: +- consumes JetStream messages (saga triggers, effect commands), and +- dispatches outbox work. + +If a tenant is not assigned to the shard, Runner must not process its work. + +### 3) Handoff Safety Rules for Rebalancing + +Runner rebalancing should follow: +- New shard begins processing only after it is assigned the tenant. +- Old shard stops acquiring new work for that tenant, then drains. +- Idempotency remains correct across handoff using checkpoints and dedupe markers. + +### 4) Metrics for Placement Decisions + +Runner SHOULD expose: +- Outbox depth by tenant +- Work processing latency and retries by tenant/effect +- Schedule due items by tenant +- Consumer lag by tenant (if the consumption model supports per-tenant lag) + +### 5) Auth Delivery Side Effects (Email/SMS/Push) + +If the platform’s AuthN flows require out-of-band delivery (password reset links, email verification, MFA codes), the Runner SHOULD be the standard place to execute those side effects: +- Define a stable effect interface for sending transactional emails (reset links, verification links, security alerts). +- Optionally add SMS/push providers later under the same effect contract. + +This keeps the Gateway free of long-lived provider credentials and aligns with the existing “effects are executed by workers” pattern. + +--- + +## **Gateway Integration Notes** + +Once the above changes exist: +- Gateway routes per `(tenant_id, service_kind)` using independent placement maps. +- Gateway can implement “warm then cut over” rebalancing for Projection and Runner by switching only query/workflow routing after readiness conditions are met. +- Gateway can enforce consistent tenant validation, authn/authz, and error semantics at the edge even as placements move. + +--- + +## **Gaps / Opportunities** + +- **KV schema + ownership**: define the exact NATS KV bucket layout, key naming, revisioning rules, and who is allowed to write placement updates. +- **Rebalancer API**: define operator workflows (plan/apply/rollback), status reporting, and audit log requirements for placement changes. +- **Shard discovery**: define how shard endpoints are registered (static config vs KV directory entries) and how health is represented. +- **Consistency boundaries**: define rebalancing guarantees per service kind (projection can be warm-cutover; runner requires checkpoint handoff; aggregate requires single-writer and state availability). diff --git a/aggregate/gateway-routing.yaml b/aggregate/gateway-routing.yaml new file mode 100644 index 0000000..80694b1 --- /dev/null +++ b/aggregate/gateway-routing.yaml @@ -0,0 +1,4 @@ +tenants: + tenant-a: "http://aggregate-node-a:8080" + tenant-b: "http://aggregate-node-b:8080" + diff --git a/aggregate/prd.md b/aggregate/prd.md new file mode 100644 index 0000000..6ef39d3 --- /dev/null +++ b/aggregate/prd.md @@ -0,0 +1,160 @@ +### 🧱 Component: Aggregate +**Definition:** +The Aggregate is a standalone Rust-based container that serves as the primary consistency boundary and decision-making unit of the system. It is a stateful entity that encapsulates business logic, enforces invariants, and ensures that all changes to the system are valid according to defined rules. Commands are received from users through a Gateway, and events are stored on **NATS JetStream**; `edge-storage` `AggregateStore` holds versioned **snapshots** for efficient rehydration. + +**Multi-Tenancy:** +The Aggregate supports optional multi-tenancy via `tenant_id`. When enabled: +- **Routing:** The Gateway routes commands to Aggregate nodes based on the `x-tenant-id` header +- **Sharding:** Aggregate instances are sharded across nodes by `tenant_id`, ensuring tenant data isolation +- **Storage:** Snapshots and events are namespaced by `tenant_id` to prevent cross-tenant access +- **Subject Naming:** NATS subjects include `tenant_id` (e.g., `tenant..aggregate..`) +- **Backward Compatibility:** Aggregates without multi-tenancy use a default/empty `tenant_id` + +**Dependencies:** +* Core crates pulled from the custom Cargo registry: + ```toml + [registries.madapes] + index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" + ``` + + | Crate | Purpose | + |-------|---------| + | `edge-storage` | libmdbx-backed AggregateStore for versioned snapshots | + | `runtime-function` | Deterministic DAG execution for `decide`/`apply` programs | + | `edge-logger` | High-performance logging (UDS + Protobuf, Loki sink) | + | `query-engine` | UQF query support for filtering/querying aggregate state | + | `async-nats` | NATS JetStream client for event streaming | + +* Source code available at `../../madapes/` +* **Note:** This is a standalone container — it does not use `framework-bus` or `framework-aggregate` (those serve a different system) + +**Observability:** +* Production stack: **Grafana** + **Victoria Metrics** + **Loki** +* `edge-logger` provides structured logging via Unix Domain Sockets with lock-free batching +* Metrics exposed via `metrics-exporter-prometheus` for Victoria Metrics scraping +* Traces/logs flow to Loki with cardinality protection and multi-tenant isolation + +#### 1. Core Responsibilities +* **Command Validation:** Receives intent (Commands) from the Gateway and uses `runtime-function` DAG programs to determine if the intent is valid based on the current state. +* **State Rehydration:** Reconstructs its internal state by loading the latest **snapshot** from `edge-storage` `AggregateStore` (`get_latest_snapshot`) and replaying any subsequent events from NATS JetStream. +* **Event Production:** Transforms valid commands into one or more Events that represent a "fact" that has occurred. +* **Atomic Persistence:** Publishes new events to NATS JetStream and stores an updated snapshot in `edge-storage` `AggregateStore` (`put_snapshot_sync`). +* **Concurrency Control:** Protects against "lost updates" using version-based optimistic locking. `edge-storage` `AggregateStore` returns `VersionConflict` for duplicate versions. + +#### 2. The Lifecycle of a Command +1. **Reception:** The Gateway routes a Command from a user to the Aggregate container based on the `aggregate_id` and `x-tenant-id` header. The `tenant_id` is extracted and included in the Command envelope for tenant-aware processing. +2. **Loading (Rehydration):** + * The Aggregate fetches the latest **Snapshot** from `edge-storage` `AggregateStore` using the composite key `(tenant_id, aggregate_id)`. + * It reads any **Events** from NATS JetStream (tenant-namespaced subject) that occurred after the snapshot version. + * It applies these events sequentially to the snapshot state using the deterministic `apply` runtime-function program to reach the "Current State." +3. **Execution:** + * The Aggregate passes the Current State and the Command to the `decide` runtime-function program. + * If invalid: Returns an Error (Command Rejected). + * If valid: Returns a list of New Events. +4. **Persistence (The Commit):** + * The Aggregate publishes New Events to NATS JetStream on tenant-namespaced subjects, with `command_id` mapped to `idempotency_key`. + * It stores an updated snapshot in `edge-storage` `AggregateStore` using `(tenant_id, aggregate_id, new_version)` as the composite key. + * **Constraint:** `AggregateStore` enforces strict monotonicity — if `new_version` already exists, it returns `VersionConflict`, and the Aggregate must reload and retry. +5. **Publication:** + * Events published to NATS JetStream are immediately available for downstream consumption by Sagas and Projections (filtered by tenant if needed). + +#### 3. Technical Constraints & Guarantees +* **Determinism:** The logic within an Aggregate must be 100% deterministic. `runtime-function` DAG programs are sandboxed and gas-metered, with no access to the system clock, random number generators, or external APIs. All data required for a decision must be present in the Command or the Aggregate State. +* **Side-Effect Free:** An Aggregate does not send emails, update databases, or call other services. It only produces events. Side effects are the responsibility of Sagas. +* **Single Writer:** While multiple nodes may attempt to process commands for the same `aggregate_id`, only one "Commit" can succeed for a specific version, enforced by `edge-storage` `AggregateStore` (`VersionConflict`). +* **Tenant Isolation:** An Aggregate can only access data within its `tenant_id` scope. Cross-tenant access is blocked at the storage and stream layers. The `tenant_id` is validated on every command to prevent tenant spoofing. +* **Isolation:** An Aggregate cannot see the state of other Aggregates. If a business rule spans multiple Aggregates, it must be handled by a **Saga**. + +#### 4. Data Structure (The Envelope) +Each Aggregate maintains a metadata header: +* `tenant_id`: Optional identifier for multi-tenant isolation (routed via `x-tenant-id` header) +* `aggregate_id`: Unique UUID or URN for the instance. +* `aggregate_type`: The name of the business entity (e.g., `Account`, `Order`). +* `version`: A monotonically increasing integer representing the number of events processed. +* `snapshot_threshold`: A configuration defining how many events should trigger a new snapshot in `edge-storage`. + +#### 5. Error Handling +* **Validation Errors:** Business rule violations (e.g., "Insufficient Funds") result in an immediate synchronous rejection of the command. +* **Tenant Access Errors:** Cross-tenant access attempts (e.g., wrong `tenant_id` in command) are rejected with `TenantAccessDenied`. +* **Concurrency Conflicts:** If `edge-storage` returns `VersionConflict`, the framework implements an automatic "Retry-on-Conflict" policy (Reload → Re-validate → Re-commit) up to a defined limit. +* **System Failures:** If `edge-storage` or NATS JetStream is unavailable, the Aggregate remains in a read-only or "unavailable" state to prevent inconsistent branching of the event stream. + +#### 6. Horizontal Scaling Strategy +The Aggregate container is designed for horizontal scaling on **Docker Swarm**, leveraging tenant-based sharding for predictable data locality and simple operations. + +**Sharding Model:** +- **Tenant-Aware Placement:** Aggregate instances are placed on Swarm nodes based on `tenant_id` using Docker Swarm placement constraints +- **Consistent Hashing:** A hash ring maps `tenant_id` values to specific nodes, ensuring all commands for a tenant route to the same node (or replica set) +- **Subject-Based Routing:** NATS JetStream consumer groups are tenant-namespaced, enabling parallel processing across tenants without coordination + +**Scaling Architecture:** +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Admin UI (Control Node) │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Scale Manager: CRUD for tenant → node assignments │ │ +│ │ - List tenants, node assignments, load metrics │ │ +│ │ - Add/remove nodes, migrate tenants │ │ +│ │ - Emit scaling commands to Docker Swarm API │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└──────────────────────────┬──────────────────────────────────────┘ + │ Docker Swarm API / SSH + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ Docker Swarm Cluster │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Node A │ │ Node B │ │ Node C │ │ +│ │ tenant: a-c │ │ tenant: d-m │ │ tenant: n-z │ │ +│ │ ┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐ │ │ +│ │ │Agg Ctr │ │ │ │Agg Ctr │ │ │ │Agg Ctr │ │ │ +│ │ └───┬────┘ │ │ └───┬────┘ │ │ └───┬────┘ │ │ +│ │ │ │ │ │ │ │ │ │ │ +│ │ ┌───▼────┐ │ │ ┌───▼────┐ │ │ ┌───▼────┐ │ │ +│ │ │libmdbx │ │ │ │libmdbx │ │ │ │libmdbx │ │ │ +│ │ │(local) │ │ │ │(local) │ │ │ │(local) │ │ │ +│ │ └────────┘ │ │ └────────┘ │ │ └────────┘ │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ │ │ │ │ +│ └──────────────────┴──────────────────┘ │ +│ │ │ +│ ┌────────────────────────▼────────────────────────────────────┐ │ +│ │ Shared NATS JetStream Cluster │ │ +│ │ (tenant-namespaced subjects for isolation) │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` +**Note:** Each node has its own embedded `edge-storage` (libmdbx) containing snapshots for its assigned tenants. NATS JetStream provides shared event storage. Tenant migration requires snapshot data transfer between nodes. + +**Operational Model:** +- **Scale Up:** Admin UI calls Swarm API to add new node, updates tenant → node mapping, Gateway updates routing table +- **Scale Down:** Migrate tenants to other nodes (drain), remove node from Swarm +- **Tenant Migration:** Pause consumer, copy tenant data, update routing, resume on new node +- **Zero-Downtime:** New tenant assignments are picked up by Gateway via config reload without restart + +**Placement Constraints:** +- Each Aggregate service runs with `--constraint node.labels.tenant_range==` +- Gateway uses tenant → node mapping to route commands to correct Swarm service endpoint +- Multiple replicas per tenant range supported for HA (active-passive via NATS consumer groups) + +**Admin Endpoints (per Aggregate container):** +- `/health` - Container health (NATS, storage, active aggregates) +- `/ready` - Readiness for receiving commands +- `/metrics` - Prometheus metrics with tenant_id labels +- `/admin/tenants` - List tenants hosted on this node (read-only) +- `/admin/drain` - Graceful drain for tenant migration +- `/admin/reload` - Hot-reload tenant placement config + +**External Control Node:** +- Separate service that calls Aggregate admin endpoints +- Manages Docker Swarm API for scaling operations +- Publishes tenant → node mapping to NATS KV +- See Admin UI repository for full implementation + +--- + +### 💡 Implementation Note: +The **Aggregate Logic** is a pair of `runtime-function` DAG programs: +1. **`decide` program**: `(state, command) → events[]` — The business logic (validates command, produces events). +2. **`apply` program**: `(state, event) → new_state` — The state transition logic (used during rehydration from snapshots + events). + +These are referenced in the manifest as `decide:` and `apply:` fields under each aggregate definition. diff --git a/aggregate/proto/aggregate.proto b/aggregate/proto/aggregate.proto new file mode 100644 index 0000000..36b3853 --- /dev/null +++ b/aggregate/proto/aggregate.proto @@ -0,0 +1,32 @@ +syntax = "proto3"; + +package aggregate.gateway.v1; + +service CommandService { + rpc SubmitCommand(SubmitCommandRequest) returns (SubmitCommandResponse); +} + +message SubmitCommandRequest { + string tenant_id = 1; + string command_id = 2; + string aggregate_id = 3; + string aggregate_type = 4; + string payload_json = 5; + map metadata = 6; +} + +message Event { + string event_id = 1; + string command_id = 2; + string aggregate_id = 3; + string aggregate_type = 4; + uint64 version = 5; + string event_type = 6; + string payload_json = 7; + string timestamp_rfc3339 = 8; +} + +message SubmitCommandResponse { + repeated Event events = 1; +} + diff --git a/aggregate/rustfmt.toml b/aggregate/rustfmt.toml new file mode 100644 index 0000000..a311b9d --- /dev/null +++ b/aggregate/rustfmt.toml @@ -0,0 +1,2 @@ +edition = "2021" +newline_style = "Unix" diff --git a/aggregate/src/aggregate/handler.rs b/aggregate/src/aggregate/handler.rs new file mode 100644 index 0000000..76a219f --- /dev/null +++ b/aggregate/src/aggregate/handler.rs @@ -0,0 +1,487 @@ +use super::AggregateInstance; +use crate::query::{QueryClient, StateProjection}; +use crate::runtime::RuntimeExecutor; +use crate::storage::StorageClient; +use crate::stream::StreamClient; +use crate::types::{ + AggregateError, AggregateId, AggregateType, Command, Event, Snapshot, TenantId, Version, +}; + +#[derive(Debug, Clone)] +pub struct AggregateHandler { + storage: StorageClient, + stream: StreamClient, + executor: RuntimeExecutor, + query: QueryClient, + decide_program: String, + apply_program: String, + snapshot_threshold: u64, + max_retries: u32, +} + +impl AggregateHandler { + pub fn new( + storage: StorageClient, + stream: StreamClient, + executor: RuntimeExecutor, + decide_program: String, + apply_program: String, + ) -> Self { + Self { + storage, + stream, + executor, + query: QueryClient::embedded(), + decide_program, + apply_program, + snapshot_threshold: 10, + max_retries: 3, + } + } + + pub fn with_query_client(mut self, query: QueryClient) -> Self { + self.query = query; + self + } + + pub fn with_snapshot_threshold(mut self, threshold: u64) -> Self { + self.snapshot_threshold = threshold; + self + } + + pub fn with_max_retries(mut self, max_retries: u32) -> Self { + self.max_retries = max_retries.max(1); + self + } + + pub async fn handle_command(&self, command: Command) -> Result, AggregateError> { + let mut attempt = 0; + + loop { + attempt += 1; + let tenant_id = &command.tenant_id; + let aggregate_id = &command.aggregate_id; + let aggregate_type = &command.aggregate_type; + + let instance = self + .load_or_create_instance(tenant_id, aggregate_id, aggregate_type) + .await?; + + let (instance, events) = self.execute_command(instance, command.clone()).await?; + + if events.is_empty() { + return Ok(events); + } + + match self.persist_events(&events).await { + Ok(()) => { + self.maybe_save_snapshot(&instance).await?; + self.project_state(&instance).await?; + return Ok(events); + } + Err(AggregateError::VersionConflict { .. }) if attempt < self.max_retries => { + continue; + } + Err(e) => return Err(e), + } + } + } + + async fn load_or_create_instance( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + aggregate_type: &AggregateType, + ) -> Result { + let snapshot = self.storage.get_snapshot(tenant_id, aggregate_id).await?; + + match snapshot { + Some(snapshot) => { + let events = self + .stream + .fetch_events(tenant_id, aggregate_id, snapshot.version) + .await?; + + AggregateInstance::rehydrate_with_executor( + tenant_id.clone(), + snapshot, + events, + self.decide_program.clone(), + self.apply_program.clone(), + &self.executor, + ) + .await + } + None => { + let events = self + .stream + .fetch_events(tenant_id, aggregate_id, Version::initial()) + .await?; + + if events.is_empty() { + Ok(AggregateInstance::new( + aggregate_id.clone(), + aggregate_type.clone(), + tenant_id.clone(), + self.decide_program.clone(), + self.apply_program.clone(), + )) + } else { + let initial_snapshot = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::initial(), + serde_json::Value::Null, + ); + + AggregateInstance::rehydrate_with_executor( + tenant_id.clone(), + initial_snapshot, + events, + self.decide_program.clone(), + self.apply_program.clone(), + &self.executor, + ) + .await + } + } + } + } + + async fn execute_command( + &self, + mut instance: AggregateInstance, + command: Command, + ) -> Result<(AggregateInstance, Vec), AggregateError> { + let events = instance.handle_command(command, &self.executor).await?; + + Ok((instance, events)) + } + + async fn project_state(&self, instance: &AggregateInstance) -> Result<(), AggregateError> { + let projection = StateProjection::default_projection_from_state( + instance.tenant_id(), + instance.aggregate_id(), + instance.aggregate_type(), + &instance.version(), + instance.state(), + ); + + self.query + .index(projection) + .await + .map_err(|e| AggregateError::StorageError(e.to_string())) + } + + async fn maybe_save_snapshot( + &self, + instance: &AggregateInstance, + ) -> Result<(), AggregateError> { + let current_version = instance.version(); + let events_since_snapshot = current_version + .as_u64() + .saturating_sub(instance.snapshot_version().as_u64()); + + if events_since_snapshot >= self.snapshot_threshold { + let snapshot = instance.to_snapshot(); + match self.storage.put_snapshot(&snapshot).await { + Ok(()) => {} + Err(AggregateError::VersionConflict { .. }) => {} + Err(e) => return Err(e), + } + } + + Ok(()) + } + + async fn persist_events(&self, events: &[Event]) -> Result<(), AggregateError> { + self.stream.publish_events(events.to_vec()).await + } + + pub async fn load_aggregate( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + aggregate_type: &AggregateType, + ) -> Result { + self.load_or_create_instance(tenant_id, aggregate_id, aggregate_type) + .await + } + + pub fn storage(&self) -> &StorageClient { + &self.storage + } + + pub fn stream(&self) -> &StreamClient { + &self.stream + } + + pub fn executor(&self) -> &RuntimeExecutor { + &self.executor + } + + pub fn query_client(&self) -> &QueryClient { + &self.query + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + async fn create_test_handler() -> (tempfile::TempDir, AggregateHandler) { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.mdbx"); + let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap(); + + let stream = StreamClient::in_memory(); + let executor = RuntimeExecutor::with_config( + crate::runtime::ExecutorConfig::default().with_mock_runtime(), + ); + + let handler = AggregateHandler::new( + storage, + stream, + executor, + "function decide(s,c) { return []; }".to_string(), + "function apply(s,e) { return s; }".to_string(), + ); + + (dir, handler) + } + + #[test] + fn handler_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } + + #[test] + fn snapshot_threshold_defaults_to_10() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.mdbx"); + let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap(); + let stream = StreamClient::in_memory(); + + let executor = RuntimeExecutor::new(); + + let handler = AggregateHandler::new( + storage, + stream, + executor, + "decide".to_string(), + "apply".to_string(), + ); + + let handler_with_threshold = AggregateHandler::new( + handler.storage.clone(), + handler.stream.clone(), + handler.executor.clone(), + "decide".to_string(), + "apply".to_string(), + ) + .with_snapshot_threshold(25); + + assert_eq!(handler.snapshot_threshold, 10); + assert_eq!(handler_with_threshold.snapshot_threshold, 25); + } + + #[tokio::test] + async fn handler_full_lifecycle_persists_events_and_snapshot() { + let (_dir, handler) = create_test_handler().await; + let handler = handler.with_snapshot_threshold(1); + + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let command = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + serde_json::json!({"type": "deposit", "amount": 50}), + ); + + let events = handler.handle_command(command).await.unwrap(); + assert_eq!(events.len(), 1); + + let snapshot = handler + .storage + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap() + .unwrap(); + assert_eq!(snapshot.version, Version::from(1)); + } + + #[tokio::test] + async fn retry_on_version_conflict() { + let (_dir, handler) = create_test_handler().await; + let handler = handler.with_max_retries(5); + + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let cmd1 = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + serde_json::json!({"type": "deposit", "amount": 10}), + ); + let cmd2 = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + serde_json::json!({"type": "deposit", "amount": 20}), + ); + + let (r1, r2) = tokio::join!(handler.handle_command(cmd1), handler.handle_command(cmd2)); + assert!(r1.is_ok()); + assert!(r2.is_ok()); + + let events = handler + .stream + .fetch_events(&tenant_id, &aggregate_id, Version::initial()) + .await + .unwrap(); + assert_eq!(events.len(), 2); + assert_eq!(events[0].version, Version::from(1)); + assert_eq!(events[1].version, Version::from(2)); + } + + #[tokio::test] + async fn snapshot_threshold_respected() { + let (_dir, handler) = create_test_handler().await; + let handler = handler.with_snapshot_threshold(3); + + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + for _ in 0..5 { + let cmd = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + serde_json::json!({"type": "deposit", "amount": 1}), + ); + handler.handle_command(cmd).await.unwrap(); + } + + let snapshot = handler + .storage + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap() + .unwrap(); + assert_eq!(snapshot.version, Version::from(3)); + } + + #[tokio::test] + async fn empty_tenant_id_allowed_in_single_tenant_mode() { + let (_dir, handler) = create_test_handler().await; + let handler = handler.with_snapshot_threshold(1); + + let tenant_id = TenantId::default(); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let command = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + serde_json::json!({"type": "deposit", "amount": 5}), + ); + + let events = handler.handle_command(command).await.unwrap(); + assert_eq!(events.len(), 1); + + let proj = handler + .query + .get(&tenant_id, &aggregate_id.to_string()) + .await + .unwrap() + .unwrap(); + assert_eq!(proj.state["balance"], 5); + } + + #[tokio::test] + async fn query_returns_correct_aggregate_state() { + let (_dir, handler) = create_test_handler().await; + let handler = handler.with_snapshot_threshold(1); + + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let command = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type, + serde_json::json!({"type": "deposit", "amount": 100}), + ); + handler.handle_command(command).await.unwrap(); + + let proj = handler + .query + .get(&tenant_id, &aggregate_id.to_string()) + .await + .unwrap() + .unwrap(); + assert_eq!(proj.state["balance"], 100); + } + + #[tokio::test] + async fn system_failure_recovery_rehydrates_state() { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.mdbx"); + let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap(); + let stream = StreamClient::in_memory(); + let executor = RuntimeExecutor::with_config( + crate::runtime::ExecutorConfig::default().with_mock_runtime(), + ); + + let handler1 = AggregateHandler::new( + storage, + stream.clone(), + executor.clone(), + "decide".to_string(), + "apply".to_string(), + ) + .with_snapshot_threshold(2); + + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + for _ in 0..2 { + let cmd = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + serde_json::json!({"type": "deposit", "amount": 10}), + ); + handler1.handle_command(cmd).await.unwrap(); + } + + drop(handler1); + + let storage2 = StorageClient::open(path.to_string_lossy().to_string()).unwrap(); + let handler2 = AggregateHandler::new( + storage2, + stream, + executor, + "decide".to_string(), + "apply".to_string(), + ); + + let loaded = handler2 + .load_aggregate(&tenant_id, &aggregate_id, &aggregate_type) + .await + .unwrap(); + assert_eq!(loaded.state()["balance"], 20); + } +} diff --git a/aggregate/src/aggregate/mod.rs b/aggregate/src/aggregate/mod.rs new file mode 100644 index 0000000..976c9aa --- /dev/null +++ b/aggregate/src/aggregate/mod.rs @@ -0,0 +1,5 @@ +mod handler; +mod state; + +pub use handler::*; +pub use state::*; diff --git a/aggregate/src/aggregate/state.rs b/aggregate/src/aggregate/state.rs new file mode 100644 index 0000000..92f8274 --- /dev/null +++ b/aggregate/src/aggregate/state.rs @@ -0,0 +1,448 @@ +use crate::runtime::RuntimeExecutor; +use crate::types::{ + AggregateError, AggregateId, AggregateType, Command, Event, Snapshot, TenantId, Version, +}; +use serde_json::Value; +use std::collections::HashSet; +use uuid::Uuid; + +#[derive(Debug, Clone)] +pub struct AggregateInstance { + aggregate_id: AggregateId, + aggregate_type: AggregateType, + tenant_id: TenantId, + snapshot_version: Version, + version: Version, + state: Value, + decide_program: String, + apply_program: String, + processed_command_ids: HashSet, +} + +impl AggregateInstance { + pub fn new( + aggregate_id: AggregateId, + aggregate_type: AggregateType, + tenant_id: TenantId, + decide_program: String, + apply_program: String, + ) -> Self { + Self { + aggregate_id, + aggregate_type, + tenant_id, + snapshot_version: Version::initial(), + version: Version::initial(), + state: Value::Null, + decide_program, + apply_program, + processed_command_ids: HashSet::new(), + } + } + + pub fn aggregate_id(&self) -> &AggregateId { + &self.aggregate_id + } + + pub fn aggregate_type(&self) -> &AggregateType { + &self.aggregate_type + } + + pub fn tenant_id(&self) -> &TenantId { + &self.tenant_id + } + + pub fn version(&self) -> Version { + self.version + } + + pub fn snapshot_version(&self) -> Version { + self.snapshot_version + } + + pub fn state(&self) -> &Value { + &self.state + } + + pub fn rehydrate( + tenant_id: TenantId, + snapshot: Snapshot, + events: Vec, + decide_program: String, + apply_program: String, + ) -> Result { + if snapshot.tenant_id != tenant_id { + return Err(AggregateError::TenantAccessDenied { + tenant_id: snapshot.tenant_id, + }); + } + + let mut instance = Self { + aggregate_id: snapshot.aggregate_id, + aggregate_type: snapshot.aggregate_type, + tenant_id, + snapshot_version: snapshot.version, + version: snapshot.version, + state: snapshot.state, + decide_program, + apply_program, + processed_command_ids: HashSet::new(), + }; + + for event in events { + instance.apply_event_internal(&event)?; + } + + Ok(instance) + } + + pub async fn rehydrate_with_executor( + tenant_id: TenantId, + snapshot: Snapshot, + events: Vec, + decide_program: String, + apply_program: String, + executor: &RuntimeExecutor, + ) -> Result { + if snapshot.tenant_id != tenant_id { + return Err(AggregateError::TenantAccessDenied { + tenant_id: snapshot.tenant_id, + }); + } + + let mut instance = Self { + aggregate_id: snapshot.aggregate_id, + aggregate_type: snapshot.aggregate_type, + tenant_id, + snapshot_version: snapshot.version, + version: snapshot.version, + state: snapshot.state, + decide_program, + apply_program, + processed_command_ids: HashSet::new(), + }; + + for event in events { + let apply_result = executor + .execute_apply(&instance.state, &event, &instance.apply_program) + .await?; + instance.state = apply_result.new_state; + instance.apply_event_internal(&event)?; + } + + Ok(instance) + } + + fn apply_event_internal(&mut self, event: &Event) -> Result<(), AggregateError> { + if event.tenant_id != self.tenant_id { + return Err(AggregateError::TenantAccessDenied { + tenant_id: event.tenant_id.clone(), + }); + } + + self.processed_command_ids.insert(event.command_id); + self.version = event.version; + Ok(()) + } + + pub fn apply_event(&mut self, event: &Event) -> Result<(), AggregateError> { + self.apply_event_internal(event) + } + + pub async fn handle_command( + &mut self, + command: Command, + executor: &RuntimeExecutor, + ) -> Result, AggregateError> { + if command.tenant_id != self.tenant_id { + return Err(AggregateError::TenantAccessDenied { + tenant_id: command.tenant_id, + }); + } + + if command.aggregate_id != self.aggregate_id { + return Err(AggregateError::NotFound(command.aggregate_id)); + } + + if self.processed_command_ids.contains(&command.command_id) { + return Ok(Vec::new()); + } + + let decide_result = executor + .execute_decide(&self.state, &command, &self.decide_program) + .await?; + + let command_id = command.command_id; + let correlation_id = command + .metadata + .get("correlation_id") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let traceparent = command + .metadata + .get("traceparent") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let mut events = Vec::with_capacity(decide_result.events.len()); + + for event_payload in decide_result.events { + let event_type = event_payload + .get("type") + .and_then(|t| t.as_str()) + .unwrap_or("Unknown") + .to_string(); + + let new_version = self.version.increment(); + let mut event = Event::new( + self.tenant_id.clone(), + self.aggregate_id.clone(), + self.aggregate_type.clone(), + new_version, + event_type, + event_payload, + command_id, + ); + event.correlation_id = correlation_id.clone(); + event.traceparent = traceparent.clone(); + + let apply_result = executor + .execute_apply(&self.state, &event, &self.apply_program) + .await?; + self.state = apply_result.new_state; + self.version = new_version; + + events.push(event); + } + + self.processed_command_ids.insert(command_id); + Ok(events) + } + + pub fn to_snapshot(&self) -> Snapshot { + Snapshot::new( + self.tenant_id.clone(), + self.aggregate_id.clone(), + self.aggregate_type.clone(), + self.version, + self.state.clone(), + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn test_instance() -> AggregateInstance { + AggregateInstance::new( + AggregateId::new_v7(), + AggregateType::new("Account"), + TenantId::new("tenant-a"), + "function decide(s,c) { return []; }".to_string(), + "function apply(s,e) { return s; }".to_string(), + ) + } + + #[test] + fn aggregate_instance_has_id_and_tenant() { + let agg = test_instance(); + assert_eq!(agg.tenant_id().as_str(), "tenant-a"); + assert_eq!(agg.aggregate_type().as_str(), "Account"); + assert!(!agg.aggregate_id().to_string().is_empty()); + } + + #[test] + fn new_instance_starts_at_version_zero() { + let agg = test_instance(); + assert_eq!(agg.version(), Version::initial()); + } + + #[test] + fn rehydrate_validates_tenant() { + let snapshot = Snapshot::new( + TenantId::new("tenant-a"), + AggregateId::new_v7(), + AggregateType::new("Account"), + Version::from(5), + json!({ "balance": 100 }), + ); + + let result = AggregateInstance::rehydrate( + TenantId::new("tenant-b"), + snapshot, + vec![], + "decide".to_string(), + "apply".to_string(), + ); + + assert!(result.is_err()); + match result.unwrap_err() { + AggregateError::TenantAccessDenied { tenant_id } => { + assert_eq!(tenant_id, TenantId::new("tenant-a")); + } + _ => panic!("Expected TenantAccessDenied"), + } + } + + #[tokio::test] + async fn rehydrate_applies_events() { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::new("Account"); + + let snapshot = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(2), + json!({ "balance": 100 }), + ); + + let event1 = Event::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(3), + "deposited", + json!({ "amount": 50 }), + Uuid::now_v7(), + ); + + let event2 = Event::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(4), + "withdrawn", + json!({ "amount": 25 }), + Uuid::now_v7(), + ); + + let executor = RuntimeExecutor::with_config( + crate::runtime::ExecutorConfig::default().with_mock_runtime(), + ); + let instance = AggregateInstance::rehydrate_with_executor( + tenant_id, + snapshot, + vec![event1, event2], + "decide".to_string(), + "apply".to_string(), + &executor, + ) + .await + .unwrap(); + + assert_eq!(instance.version(), Version::from(4)); + assert_eq!(instance.state()["balance"], 125); + } + + #[test] + fn to_snapshot_captures_state() { + let mut agg = test_instance(); + agg.state = json!({ "balance": 150 }); + agg.version = Version::from(3); + + let snapshot = agg.to_snapshot(); + assert_eq!(snapshot.state, json!({ "balance": 150 })); + assert_eq!(snapshot.version, Version::from(3)); + } + + #[tokio::test] + async fn idempotency_via_command_id_returns_empty() { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::new("Account"); + let command_id = Uuid::now_v7(); + + let mut agg = AggregateInstance::new( + aggregate_id.clone(), + aggregate_type.clone(), + tenant_id.clone(), + "decide".to_string(), + "apply".to_string(), + ); + + let e = Event::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(1), + "deposited", + json!({ "amount": 10 }), + command_id, + ); + agg.apply_event(&e).unwrap(); + let before_version = agg.version(); + + let mut cmd = Command::new( + tenant_id, + aggregate_id, + aggregate_type, + json!({ "type": "deposit", "amount": 10 }), + ); + cmd.command_id = command_id; + + let executor = RuntimeExecutor::new(); + let events = agg.handle_command(cmd, &executor).await.unwrap(); + assert!(events.is_empty()); + assert_eq!(agg.version(), before_version); + } + + #[tokio::test] + async fn handle_command_validates_tenant() { + let agg = AggregateInstance::new( + AggregateId::new_v7(), + AggregateType::new("Account"), + TenantId::new("tenant-a"), + "decide".to_string(), + "apply".to_string(), + ); + + let command = Command::new( + TenantId::new("tenant-b"), + agg.aggregate_id.clone(), + AggregateType::new("Account"), + json!({ "type": "deposit", "amount": 50 }), + ); + + let executor = RuntimeExecutor::new(); + let mut agg = agg; + let result = agg.handle_command(command, &executor).await; + + assert!(result.is_err()); + match result.unwrap_err() { + AggregateError::TenantAccessDenied { .. } => {} + _ => panic!("Expected TenantAccessDenied"), + } + } + + #[tokio::test] + async fn handle_command_validates_aggregate_id() { + let agg = test_instance(); + let wrong_agg_id = AggregateId::new_v7(); + + let command = Command::new( + agg.tenant_id.clone(), + wrong_agg_id, + AggregateType::new("Account"), + json!({ "type": "deposit", "amount": 50 }), + ); + + let executor = RuntimeExecutor::new(); + let mut agg = agg; + let result = agg.handle_command(command, &executor).await; + + assert!(result.is_err()); + match result.unwrap_err() { + AggregateError::NotFound(_) => {} + _ => panic!("Expected NotFound"), + } + } + + #[test] + fn instance_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/config/mod.rs b/aggregate/src/config/mod.rs new file mode 100644 index 0000000..a10a586 --- /dev/null +++ b/aggregate/src/config/mod.rs @@ -0,0 +1,3 @@ +mod settings; + +pub use settings::*; diff --git a/aggregate/src/config/settings.rs b/aggregate/src/config/settings.rs new file mode 100644 index 0000000..970bde6 --- /dev/null +++ b/aggregate/src/config/settings.rs @@ -0,0 +1,274 @@ +use serde::{Deserialize, Serialize}; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct Settings { + pub nats_url: String, + pub storage_path: String, + pub logger_socket: Option, + pub snapshot_threshold: u64, + pub max_retries: u32, + pub multi_tenant_enabled: bool, + pub default_tenant_id: Option, + pub shard_id: String, + pub placement_bucket: String, + pub placement_key: String, + pub grpc_addr: String, + pub decide_program: String, + pub apply_program: String, +} + +impl Default for Settings { + fn default() -> Self { + Self { + nats_url: "nats://localhost:4222".to_string(), + storage_path: "./data".to_string(), + logger_socket: None, + snapshot_threshold: 10, + max_retries: 3, + multi_tenant_enabled: true, + default_tenant_id: None, + shard_id: "local".to_string(), + placement_bucket: "AGGREGATE_PLACEMENT".to_string(), + placement_key: "aggregate_placement".to_string(), + grpc_addr: "0.0.0.0:50051".to_string(), + decide_program: "function decide(state, command) { return []; }".to_string(), + apply_program: "function apply(state, event) { return state; }".to_string(), + } + } +} + +impl Settings { + pub fn from_env() -> Result { + let mut settings = Self::default(); + settings.apply_env_overrides(); + Ok(settings) + } + + pub fn from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn from_toml(toml_str: &str) -> Result { + toml::from_str(toml_str) + } + + pub fn from_json(json: &str) -> Result { + serde_json::from_str(json) + } + + pub fn from_file(path: impl AsRef) -> Result { + let path = path.as_ref(); + let raw = std::fs::read_to_string(path)?; + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + match ext { + "yaml" | "yml" => Ok(Self::from_yaml(&raw)?), + "toml" => Ok(Self::from_toml(&raw)?), + "json" => Ok(Self::from_json(&raw)?), + _ => Err(SettingsLoadError::UnsupportedFormat { + path: path.display().to_string(), + }), + } + } + + pub fn load_from_file_with_env_overrides( + path: impl AsRef, + ) -> Result { + let mut settings = Self::from_file(path)?; + settings.apply_env_overrides(); + Ok(settings) + } + + fn apply_env_overrides(&mut self) { + if let Ok(url) = std::env::var("AGGREGATE_NATS_URL") { + self.nats_url = url; + } + + if let Ok(path) = std::env::var("AGGREGATE_STORAGE_PATH") { + self.storage_path = path; + } + + if let Ok(socket) = std::env::var("AGGREGATE_LOGGER_SOCKET") { + self.logger_socket = Some(socket); + } + + if let Ok(threshold) = std::env::var("AGGREGATE_SNAPSHOT_THRESHOLD") { + if let Ok(value) = threshold.parse() { + self.snapshot_threshold = value; + } + } + + if let Ok(retries) = std::env::var("AGGREGATE_MAX_RETRIES") { + if let Ok(value) = retries.parse() { + self.max_retries = value; + } + } + + if let Ok(enabled) = std::env::var("AGGREGATE_MULTI_TENANT") { + if let Ok(value) = enabled.parse() { + self.multi_tenant_enabled = value; + } + } + + if let Ok(default_tenant_id) = std::env::var("AGGREGATE_DEFAULT_TENANT_ID") { + if default_tenant_id.is_empty() { + self.default_tenant_id = None; + } else { + self.default_tenant_id = Some(default_tenant_id); + } + } + + if let Ok(shard_id) = std::env::var("AGGREGATE_SHARD_ID") { + if !shard_id.is_empty() { + self.shard_id = shard_id; + } + } + + if let Ok(bucket) = std::env::var("AGGREGATE_PLACEMENT_BUCKET") { + if !bucket.is_empty() { + self.placement_bucket = bucket; + } + } + + if let Ok(key) = std::env::var("AGGREGATE_PLACEMENT_KEY") { + if !key.is_empty() { + self.placement_key = key; + } + } + + if let Ok(addr) = std::env::var("AGGREGATE_GRPC_ADDR") { + if !addr.is_empty() { + self.grpc_addr = addr; + } + } + + if let Ok(program) = std::env::var("AGGREGATE_DECIDE_PROGRAM") { + if !program.is_empty() { + self.decide_program = program; + } + } + + if let Ok(program) = std::env::var("AGGREGATE_APPLY_PROGRAM") { + if !program.is_empty() { + self.apply_program = program; + } + } + + if let Ok(path) = std::env::var("AGGREGATE_DECIDE_PROGRAM_PATH") { + if let Ok(raw) = std::fs::read_to_string(path) { + if !raw.is_empty() { + self.decide_program = raw; + } + } + } + + if let Ok(path) = std::env::var("AGGREGATE_APPLY_PROGRAM_PATH") { + if let Ok(raw) = std::fs::read_to_string(path) { + if !raw.is_empty() { + self.apply_program = raw; + } + } + } + } + + pub fn validate(&self) -> Result<(), String> { + if self.nats_url.is_empty() { + return Err("NATS URL is required".to_string()); + } + + if self.storage_path.is_empty() { + return Err("Storage path is required".to_string()); + } + + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SettingsLoadError { + #[error("Failed to read config file: {0}")] + Io(#[from] std::io::Error), + #[error("Failed to parse YAML config: {0}")] + Yaml(#[from] serde_yaml::Error), + #[error("Failed to parse TOML config: {0}")] + Toml(#[from] toml::de::Error), + #[error("Failed to parse JSON config: {0}")] + Json(#[from] serde_json::Error), + #[error("Unsupported config format: {path}")] + UnsupportedFormat { path: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn settings_from_env() { + std::env::set_var("AGGREGATE_NATS_URL", "nats://localhost:4222"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.nats_url, "nats://localhost:4222"); + std::env::remove_var("AGGREGATE_NATS_URL"); + } + + #[test] + fn settings_validation() { + let settings = Settings { + nats_url: "".to_string(), + ..Default::default() + }; + assert!(settings.validate().is_err()); + } + + #[test] + fn settings_from_yaml_file_and_env_override() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("aggregate.yaml"); + std::fs::write( + &file_path, + r#" +nats_url: "nats://from-file:4222" +storage_path: "/tmp/agg" +snapshot_threshold: 25 +multi_tenant_enabled: false +"#, + ) + .unwrap(); + + std::env::set_var("AGGREGATE_NATS_URL", "nats://from-env:4222"); + let settings = Settings::load_from_file_with_env_overrides(&file_path).unwrap(); + assert_eq!(settings.nats_url, "nats://from-env:4222"); + assert_eq!(settings.storage_path, "/tmp/agg"); + assert_eq!(settings.snapshot_threshold, 25); + assert!(!settings.multi_tenant_enabled); + std::env::remove_var("AGGREGATE_NATS_URL"); + } + + #[test] + fn settings_from_toml_file() { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("aggregate.toml"); + std::fs::write( + &file_path, + r#" +nats_url = "nats://from-file:4222" +storage_path = "/tmp/agg" +max_retries = 7 +"#, + ) + .unwrap(); + + let settings = Settings::from_file(&file_path).unwrap(); + assert_eq!(settings.nats_url, "nats://from-file:4222"); + assert_eq!(settings.storage_path, "/tmp/agg"); + assert_eq!(settings.max_retries, 7); + } + + #[test] + fn settings_is_clone() { + let s = Settings::default(); + let _s2 = s.clone(); + } +} diff --git a/aggregate/src/container.rs b/aggregate/src/container.rs new file mode 100644 index 0000000..2d32a5a --- /dev/null +++ b/aggregate/src/container.rs @@ -0,0 +1,24 @@ +#[cfg(test)] +mod tests { + #[test] + fn dockerfile_is_multi_stage_and_builds_selected_binary() { + let raw = std::fs::read_to_string("../docker/Dockerfile.rust").unwrap(); + assert!(raw.contains("AS builder")); + assert!(raw.contains("FROM debian:")); + assert!(raw.contains("ARG PACKAGE")); + assert!(raw.contains("ARG BIN")); + assert!(raw.contains("cargo build -p ${PACKAGE} --bin ${BIN} --release")); + assert!(raw.contains("COPY --from=builder")); + assert!(raw.contains("ENTRYPOINT")); + assert!(raw.contains("FROM")); + } + + #[test] + fn docker_compose_is_valid_yaml_and_has_services() { + let raw = std::fs::read_to_string("../docker-compose.yml").unwrap(); + let doc: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap(); + let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap(); + assert!(services.contains_key(serde_yaml::Value::from("nats"))); + assert!(services.contains_key(serde_yaml::Value::from("aggregate"))); + } +} diff --git a/aggregate/src/gateway/mod.rs b/aggregate/src/gateway/mod.rs new file mode 100644 index 0000000..4dbd515 --- /dev/null +++ b/aggregate/src/gateway/mod.rs @@ -0,0 +1,7 @@ +pub const TENANT_ID_METADATA_KEY: &str = "x-tenant-id"; + +pub mod proto { + tonic::include_proto!("aggregate.gateway.v1"); +} + +pub mod server; diff --git a/aggregate/src/gateway/server.rs b/aggregate/src/gateway/server.rs new file mode 100644 index 0000000..97e143c --- /dev/null +++ b/aggregate/src/gateway/server.rs @@ -0,0 +1,306 @@ +use super::proto::command_service_server::{CommandService, CommandServiceServer}; +use super::proto::{Event as ProtoEvent, SubmitCommandRequest, SubmitCommandResponse}; +use crate::aggregate::AggregateHandler; +use crate::observability::Observability; +use crate::placement::TenantPlacementManager; +use crate::types::{AggregateError, AggregateId, AggregateType, Command, TenantId}; +use std::collections::HashMap; +use std::str::FromStr; +use std::sync::Arc; +use tonic::{Request, Response, Status}; + +#[derive(Clone)] +pub struct GrpcCommandServer { + handler: AggregateHandler, + placement: Arc, + observability: Arc, + multi_tenant_enabled: bool, + default_tenant_id: Option, +} + +impl GrpcCommandServer { + pub fn new( + handler: AggregateHandler, + placement: Arc, + observability: Arc, + multi_tenant_enabled: bool, + default_tenant_id: Option, + ) -> Self { + Self { + handler, + placement, + observability, + multi_tenant_enabled, + default_tenant_id, + } + } + + pub fn service(self) -> CommandServiceServer { + CommandServiceServer::new(self) + } +} + +#[tonic::async_trait] +impl CommandService for GrpcCommandServer { + async fn submit_command( + &self, + request: Request, + ) -> Result, Status> { + let correlation_id = request + .metadata() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + let traceparent = request + .metadata() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + let trace_id = traceparent.as_deref().and_then(trace_id_from_traceparent); + + let metadata_tenant = request + .metadata() + .get(super::TENANT_ID_METADATA_KEY) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + + let req = request.into_inner(); + + let tenant_id = resolve_tenant_id( + &req.tenant_id, + &metadata_tenant, + self.multi_tenant_enabled, + self.default_tenant_id.as_ref(), + ) + .map_err(Status::invalid_argument)?; + + if !tenant_id.as_str().is_empty() && !is_valid_tenant_id(tenant_id.as_str()) { + return Err(Status::invalid_argument("invalid tenant_id")); + } + + let aggregate_id = AggregateId::from_str(&req.aggregate_id) + .map_err(|e| Status::invalid_argument(e.to_string()))?; + let aggregate_type = AggregateType::from(req.aggregate_type); + + let payload: serde_json::Value = serde_json::from_str(&req.payload_json) + .map_err(|e| Status::invalid_argument(e.to_string()))?; + + let command_id = if req.command_id.is_empty() { + uuid::Uuid::now_v7() + } else { + uuid::Uuid::parse_str(&req.command_id) + .map_err(|e| Status::invalid_argument(e.to_string()))? + }; + + let metadata: HashMap = req + .metadata + .into_iter() + .map(|(k, v)| (k, serde_json::Value::String(v))) + .collect(); + + let mut metadata = metadata; + if let Some(correlation_id) = correlation_id.as_deref() { + metadata.insert( + "correlation_id".to_string(), + serde_json::Value::String(correlation_id.to_string()), + ); + } + if let Some(traceparent) = traceparent.as_deref() { + metadata.insert( + "traceparent".to_string(), + serde_json::Value::String(traceparent.to_string()), + ); + } + + let command = Command { + tenant_id: tenant_id.clone(), + command_id, + aggregate_id: aggregate_id.clone(), + aggregate_type: aggregate_type.clone(), + payload, + metadata, + }; + + let span = self.observability.start_command_span( + &aggregate_id.to_string(), + aggregate_type.as_str(), + tenant_id.as_str(), + &command_id.to_string(), + correlation_id.as_deref(), + trace_id.as_deref(), + ); + + let _guard = self + .placement + .begin_command(&tenant_id) + .await + .map_err(|e| { + self.observability.record_command_error(&span, &e); + map_aggregate_error(e) + })?; + + let events = self.handler.handle_command(command).await.map_err(|e| { + self.observability.record_command_error(&span, &e); + map_aggregate_error(e) + })?; + + self.observability + .record_command_success(&span, events.len()); + + let proto_events = events + .into_iter() + .map(|e| ProtoEvent { + event_id: e.event_id.to_string(), + command_id: e.command_id.to_string(), + aggregate_id: e.aggregate_id.to_string(), + aggregate_type: e.aggregate_type.to_string(), + version: e.version.as_u64(), + event_type: e.event_type, + payload_json: serde_json::to_string(&e.payload) + .unwrap_or_else(|_| "{}".to_string()), + timestamp_rfc3339: e.timestamp.to_rfc3339(), + }) + .collect(); + + let mut response = Response::new(SubmitCommandResponse { + events: proto_events, + }); + if let Some(correlation_id) = correlation_id.as_deref() { + if let Ok(v) = tonic::metadata::MetadataValue::try_from(correlation_id) { + response.metadata_mut().insert("x-correlation-id", v); + } + } + if let Some(traceparent) = traceparent.as_deref() { + if let Ok(v) = tonic::metadata::MetadataValue::try_from(traceparent) { + response.metadata_mut().insert("traceparent", v); + } + } + Ok(response) + } +} + +fn trace_id_from_traceparent(traceparent: &str) -> Option { + shared::trace_id_from_traceparent(traceparent).map(|s| s.to_string()) +} + +fn map_aggregate_error(error: AggregateError) -> Status { + match error { + AggregateError::TenantNotHosted { .. } => Status::unavailable(error.to_string()), + AggregateError::TenantDraining { .. } => Status::unavailable(error.to_string()), + AggregateError::TenantAccessDenied { .. } => Status::permission_denied(error.to_string()), + AggregateError::ValidationError(_) => Status::invalid_argument(error.to_string()), + AggregateError::VersionConflict { .. } => Status::aborted(error.to_string()), + AggregateError::NotFound(_) => Status::not_found(error.to_string()), + AggregateError::StorageError(_) => Status::internal(error.to_string()), + AggregateError::StreamError(_) => Status::unavailable(error.to_string()), + AggregateError::RehydrationError(_) => Status::internal(error.to_string()), + AggregateError::DecideError(_) => Status::failed_precondition(error.to_string()), + AggregateError::ApplyError(_) => Status::failed_precondition(error.to_string()), + } +} + +fn resolve_tenant_id( + explicit: &str, + metadata: &str, + multi_tenant_enabled: bool, + default_tenant_id: Option<&TenantId>, +) -> Result { + if !explicit.is_empty() { + return Ok(TenantId::new(explicit)); + } + if !metadata.is_empty() { + return Ok(TenantId::new(metadata)); + } + + if multi_tenant_enabled { + if let Some(default_tenant_id) = default_tenant_id { + return Ok(default_tenant_id.clone()); + } + return Err("tenant_id is required"); + } + + Ok(TenantId::default()) +} + +fn is_valid_tenant_id(id: &str) -> bool { + id.chars() + .all(|c| c.is_alphanumeric() || c == '-' || c == '_') +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::runtime::{ExecutorConfig, RuntimeExecutor}; + use crate::storage::StorageClient; + use crate::stream::StreamClient; + use tempfile::tempdir; + use tonic::transport::{Channel, Server}; + + #[tokio::test] + async fn grpc_submit_command_rejects_unhosted_tenant() { + let obs = Arc::new(Observability::default()); + let placement = Arc::new(TenantPlacementManager::new(obs.clone())); + placement + .set_hosted_tenants(vec!["tenant-a".to_string()]) + .await; + + let dir = tempdir().unwrap(); + let path = dir.path().join("test.mdbx"); + let storage = StorageClient::open(path.to_string_lossy().to_string()).unwrap(); + let stream = StreamClient::in_memory(); + let executor = RuntimeExecutor::with_config(ExecutorConfig::default().with_mock_runtime()); + + let handler = AggregateHandler::new( + storage, + stream, + executor, + "decide".to_string(), + "apply".to_string(), + ); + + let service = GrpcCommandServer::new(handler, placement, obs, true, None).service(); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + tokio::spawn(async move { + Server::builder() + .add_service(service) + .serve_with_incoming_shutdown( + tokio_stream::wrappers::TcpListenerStream::new(listener), + async move { + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + }, + ) + .await + .unwrap(); + }); + + let channel = Channel::from_shared(format!("http://{}", addr)) + .unwrap() + .connect() + .await + .unwrap(); + let mut client = + super::super::proto::command_service_client::CommandServiceClient::new(channel); + + let resp = client + .submit_command(SubmitCommandRequest { + tenant_id: "tenant-b".to_string(), + command_id: uuid::Uuid::now_v7().to_string(), + aggregate_id: AggregateId::new_v7().to_string(), + aggregate_type: "Account".to_string(), + payload_json: "{}".to_string(), + metadata: HashMap::new(), + }) + .await; + + assert!(resp.is_err()); + let status = resp.unwrap_err(); + assert_eq!(status.code(), tonic::Code::Unavailable); + } +} diff --git a/aggregate/src/http_server.rs b/aggregate/src/http_server.rs new file mode 100644 index 0000000..f84cdd3 --- /dev/null +++ b/aggregate/src/http_server.rs @@ -0,0 +1,230 @@ +use crate::server::{AdminResponse, AdminServer}; +use axum::extract::{Path, State}; +use axum::http::{HeaderValue, StatusCode}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use serde::Deserialize; +use std::future::Future; +use std::sync::Arc; + +#[derive(Clone)] +pub struct HttpState { + pub admin: Arc, +} + +pub fn router(admin: Arc) -> Router { + let state = HttpState { admin }; + Router::new() + .route("/health", get(health_route)) + .route("/ready", get(ready)) + .route("/metrics", get(metrics)) + .route("/admin/tenants", get(admin_tenants)) + .route("/admin/tenant/:tenant_id/status", get(admin_tenant_status)) + .route("/admin/tenant/:tenant_id/ready", get(admin_tenant_ready)) + .route("/admin/tenant/:tenant_id/drain", post(admin_tenant_drain)) + .route("/admin/drain", post(admin_drain)) + .route("/admin/reload", post(admin_reload)) + .with_state(state) +} + +pub async fn serve( + listener: tokio::net::TcpListener, + admin: Arc, + shutdown: impl Future + Send + 'static, +) { + axum::serve(listener, router(admin)) + .with_graceful_shutdown(shutdown) + .await + .expect("http server failed"); +} + +async fn health_route(State(state): State) -> Response { + proxy_json(state.admin.get("/health").await).await +} + +async fn ready(State(state): State) -> Response { + proxy_json(state.admin.get("/ready").await).await +} + +async fn admin_tenants(State(state): State) -> Response { + proxy_json(state.admin.get("/admin/tenants").await).await +} + +async fn metrics(State(state): State) -> Response { + let resp = state.admin.get("/metrics").await; + let mut response = (StatusCode::OK, resp.text().await).into_response(); + response.headers_mut().insert( + axum::http::header::CONTENT_TYPE, + HeaderValue::from_static("text/plain; version=0.0.4"), + ); + response +} + +async fn admin_drain( + State(state): State, + Json(body): Json, +) -> Response { + proxy_json(state.admin.post("/admin/drain", body).await).await +} + +async fn admin_reload( + State(state): State, + Json(body): Json, +) -> Response { + proxy_json(state.admin.post("/admin/reload", body).await).await +} + +#[derive(Debug, Deserialize)] +struct DrainBody { + timeout_ms: Option, +} + +async fn admin_tenant_status( + State(state): State, + Path(tenant_id): Path, +) -> Response { + let status = state + .admin + .placement_manager() + .tenant_status(&crate::types::TenantId::new(tenant_id)) + .await; + let mut response = (StatusCode::OK, serde_json::to_string(&status).unwrap()).into_response(); + response.headers_mut().insert( + axum::http::header::CONTENT_TYPE, + HeaderValue::from_static("application/json"), + ); + response +} + +async fn admin_tenant_ready( + State(state): State, + Path(tenant_id): Path, +) -> Response { + let tenant_id = crate::types::TenantId::new(tenant_id); + let status = state + .admin + .placement_manager() + .tenant_status(&tenant_id) + .await; + let ready = state.admin.health_checker().is_ready() && status.accepting; + let mut response = (StatusCode::OK, serde_json::to_string(&ready).unwrap()).into_response(); + response.headers_mut().insert( + axum::http::header::CONTENT_TYPE, + HeaderValue::from_static("application/json"), + ); + response +} + +async fn admin_tenant_drain( + State(state): State, + Path(tenant_id): Path, + body: Option>, +) -> Response { + let tenant_id = crate::types::TenantId::new(tenant_id); + state + .admin + .placement_manager() + .drain_tenant(&tenant_id) + .await; + + let timeout = body + .and_then(|b| b.timeout_ms) + .map(std::time::Duration::from_millis) + .unwrap_or(std::time::Duration::from_secs(10)); + + let drained = state + .admin + .placement_manager() + .wait_drained_with_timeout(&tenant_id, timeout) + .await; + + let status = state + .admin + .placement_manager() + .tenant_status(&tenant_id) + .await; + let resp = serde_json::json!({ "drained": drained, "status": status }); + let mut response = (StatusCode::OK, serde_json::to_string(&resp).unwrap()).into_response(); + response.headers_mut().insert( + axum::http::header::CONTENT_TYPE, + HeaderValue::from_static("application/json"), + ); + response +} + +async fn proxy_json(resp: AdminResponse) -> Response { + let mut response = (StatusCode::OK, resp.text().await).into_response(); + response.headers_mut().insert( + axum::http::header::CONTENT_TYPE, + HeaderValue::from_static("application/json"), + ); + response +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::observability::Observability; + use crate::server::HealthChecker; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + + async fn http_get(addr: std::net::SocketAddr, path: &str) -> String { + let mut stream = tokio::net::TcpStream::connect(addr).await.unwrap(); + let req = format!( + "GET {} HTTP/1.1\r\nHost: localhost\r\nConnection: close\r\n\r\n", + path + ); + stream.write_all(req.as_bytes()).await.unwrap(); + let mut buf = Vec::new(); + stream.read_to_end(&mut buf).await.unwrap(); + String::from_utf8_lossy(&buf).to_string() + } + + #[tokio::test] + async fn http_server_exposes_health_ready_metrics() { + let health = HealthChecker::new(); + health.set_storage_healthy(true); + health.set_stream_healthy(true); + + let admin = Arc::new(AdminServer::new( + Observability::default(), + health, + "test-shard".to_string(), + )); + admin + .placement_manager() + .set_hosted_tenants(vec!["test-tenant".to_string()]) + .await; + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let (tx, rx) = tokio::sync::oneshot::channel::<()>(); + let handle = tokio::spawn(async move { + serve(listener, admin, async move { + let _ = rx.await; + }) + .await; + }); + + let health_resp = http_get(addr, "/health").await; + assert!(health_resp.starts_with("HTTP/1.1 200")); + + let ready_resp = http_get(addr, "/ready").await; + assert!(ready_resp.starts_with("HTTP/1.1 200")); + + let metrics_resp = http_get(addr, "/metrics").await; + assert!(metrics_resp.starts_with("HTTP/1.1 200")); + assert!(metrics_resp.contains("aggregate_commands_total")); + + let status_resp = http_get(addr, "/admin/tenant/test-tenant/status").await; + assert!(status_resp.starts_with("HTTP/1.1 200")); + assert!(status_resp.contains("test-tenant")); + + let ready_resp = http_get(addr, "/admin/tenant/test-tenant/ready").await; + assert!(ready_resp.starts_with("HTTP/1.1 200")); + + let _ = tx.send(()); + handle.await.unwrap(); + } +} diff --git a/aggregate/src/lib.rs b/aggregate/src/lib.rs new file mode 100644 index 0000000..fdbd483 --- /dev/null +++ b/aggregate/src/lib.rs @@ -0,0 +1,26 @@ +pub mod aggregate; +pub mod config; +pub mod container; +pub mod gateway; +pub mod http_server; +pub mod observability; +pub mod placement; +pub mod query; +pub mod runtime; +pub mod server; +pub mod storage; +pub mod stream; +pub mod swarm; +pub mod types; + +pub use aggregate::{AggregateHandler, AggregateInstance}; +pub use config::Settings; +pub use query::{ + AggregateProjection, QueryClient, QueryConfig, QueryError, QueryRequest, QueryResponse, + QueryServer, StateProjection, +}; +pub use runtime::{ExecutorConfig, RuntimeExecutor}; +pub use server::{CommandRequest, CommandResponse, CommandServer, HealthChecker, HealthStatus}; +pub use storage::StorageClient; +pub use stream::StreamClient; +pub use types::*; diff --git a/aggregate/src/main.rs b/aggregate/src/main.rs new file mode 100644 index 0000000..3f3af31 --- /dev/null +++ b/aggregate/src/main.rs @@ -0,0 +1,213 @@ +use aggregate::config::Settings; +use aggregate::gateway::server::GrpcCommandServer; +use aggregate::http_server; +use aggregate::observability::Observability; +use aggregate::runtime::RuntimeExecutor; +use aggregate::server::AdminServer; +use aggregate::storage::StorageClient; +use aggregate::stream::StreamClient; +use aggregate::swarm::TenantPlacementKvClient; +use aggregate::{aggregate::AggregateHandler, placement::TenantPlacementManager}; +use futures::StreamExt; +use std::sync::Arc; +use std::time::Duration; + +#[tokio::main] +async fn main() { + match std::env::args().nth(1).as_deref() { + Some("-h") | Some("--help") => { + print_help(); + return; + } + Some("serve") | None => serve().await, + Some(other) => { + eprintln!("Unknown command: {}", other); + print_help(); + } + } +} + +async fn serve() { + let settings = load_settings(); + + let observability = Observability::default(); + let health_checker = aggregate::server::HealthChecker::new(); + let admin = Arc::new(AdminServer::new( + observability, + health_checker, + settings.shard_id.clone(), + )); + + spawn_health_probe(admin.clone(), settings.clone()); + spawn_placement_watcher(admin.placement_manager(), settings.clone()); + + let storage = StorageClient::open(settings.storage_path.clone()).unwrap(); + let stream = StreamClient::new(settings.nats_url.clone()).await.unwrap(); + let _ = stream.setup_stream().await; + let executor = RuntimeExecutor::new(); + + let handler = AggregateHandler::new( + storage, + stream, + executor, + settings.decide_program.clone(), + settings.apply_program.clone(), + ) + .with_snapshot_threshold(settings.snapshot_threshold) + .with_max_retries(settings.max_retries); + + let grpc_addr: std::net::SocketAddr = settings.grpc_addr.parse().unwrap(); + let grpc_service = GrpcCommandServer::new( + handler, + admin.placement_manager(), + admin.observability(), + settings.multi_tenant_enabled, + settings + .default_tenant_id + .as_ref() + .map(aggregate::types::TenantId::new), + ) + .service(); + + let addr = std::env::var("AGGREGATE_HTTP_ADDR").unwrap_or_else(|_| "0.0.0.0:8080".to_string()); + let listener = tokio::net::TcpListener::bind(&addr).await.unwrap(); + + let (shutdown_tx, _) = tokio::sync::broadcast::channel::<()>(1); + let mut http_shutdown = shutdown_tx.subscribe(); + let mut grpc_shutdown = shutdown_tx.subscribe(); + + let http_task = tokio::spawn(async move { + http_server::serve(listener, admin, async move { + let _ = http_shutdown.recv().await; + }) + .await; + }); + + let grpc_task = tokio::spawn(async move { + tonic::transport::Server::builder() + .add_service(grpc_service) + .serve_with_shutdown(grpc_addr, async move { + let _ = grpc_shutdown.recv().await; + }) + .await + .unwrap(); + }); + + let _ = tokio::signal::ctrl_c().await; + let _ = shutdown_tx.send(()); + + let _ = tokio::join!(http_task, grpc_task); +} + +fn print_help() { + println!( + "aggregate\n\nUSAGE:\n aggregate [COMMAND]\n\nCOMMANDS:\n serve Start the HTTP server (default)\n\nOPTIONS:\n -h, --help Print help\n" + ); +} + +fn load_settings() -> Settings { + if let Ok(path) = std::env::var("AGGREGATE_CONFIG_PATH") { + if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) { + return settings; + } + } + + Settings::from_env().unwrap_or_default() +} + +fn spawn_health_probe(admin: Arc, settings: Settings) { + tokio::spawn(async move { + loop { + let storage_ok = StorageClient::open(settings.storage_path.clone()).is_ok(); + admin.health_checker().set_storage_healthy(storage_ok); + + let stream_ok = tokio::time::timeout(Duration::from_secs(1), async { + let stream = StreamClient::new(settings.nats_url.clone()).await?; + let _ = stream.setup_stream().await; + Ok::<_, aggregate::types::AggregateError>(()) + }) + .await + .is_ok_and(|r| r.is_ok()); + + admin.health_checker().set_stream_healthy(stream_ok); + + tokio::time::sleep(Duration::from_secs(5)).await; + } + }); +} + +fn spawn_placement_watcher(placement: Arc, settings: Settings) { + tokio::spawn(async move { + loop { + let client = TenantPlacementKvClient::connect( + settings.nats_url.clone(), + settings.placement_bucket.clone(), + ) + .await; + + let client = match client { + Ok(c) => c, + Err(_) => { + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + }; + + if let Ok(Some(value)) = client.get_json(&settings.placement_key).await { + apply_placement_value(&placement, &settings.shard_id, value).await; + } + + let watch = client.watch_json(&settings.placement_key).await; + let mut stream = match watch { + Ok(s) => s, + Err(_) => { + tokio::time::sleep(Duration::from_secs(1)).await; + continue; + } + }; + + while let Some(update) = stream.next().await { + if let Ok(value) = update { + apply_placement_value(&placement, &settings.shard_id, value).await; + } + } + + tokio::time::sleep(Duration::from_secs(1)).await; + } + }); +} + +async fn apply_placement_value( + placement: &TenantPlacementManager, + shard_id: &str, + value: serde_json::Value, +) { + if let Some(map) = value.as_object() { + let placement_map = map + .iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect::>(); + placement + .apply_placement_map(shard_id, &placement_map) + .await; + return; + } + + if let Some(map) = value.get("placement").and_then(|v| v.as_object()) { + let placement_map = map + .iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect::>(); + placement + .apply_placement_map(shard_id, &placement_map) + .await; + } +} + +#[cfg(test)] +mod tests { + #[test] + fn binary_exists() { + assert!(std::env::current_exe().is_ok()); + } +} diff --git a/aggregate/src/observability/metrics.rs b/aggregate/src/observability/metrics.rs new file mode 100644 index 0000000..ec66651 --- /dev/null +++ b/aggregate/src/observability/metrics.rs @@ -0,0 +1,365 @@ +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::RwLock; +use std::time::Duration; + +pub trait MetricsRegistry: Send + Sync { + fn increment_counter(&self, name: &str, labels: &[(&str, &str)]); + fn record_histogram(&self, name: &str, value: f64, labels: &[(&str, &str)]); + fn export_prometheus(&self) -> String; +} + +#[derive(Debug)] +struct AtomicHistogram { + count: AtomicU64, + sum: AtomicU64, + buckets: Vec<(f64, AtomicU64)>, +} + +impl AtomicHistogram { + fn new() -> Self { + let buckets: Vec<(f64, AtomicU64)> = vec![ + 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, + ] + .into_iter() + .map(|v| (v, AtomicU64::new(0))) + .collect(); + + Self { + count: AtomicU64::new(0), + sum: AtomicU64::new(0), + buckets, + } + } + + fn observe(&self, duration: Duration) { + let value_ms = duration.as_secs_f64() * 1000.0; + self.count.fetch_add(1, Ordering::Relaxed); + self.sum + .fetch_add((value_ms * 1000.0) as u64, Ordering::Relaxed); + + for (threshold, count) in &self.buckets { + if value_ms <= *threshold { + count.fetch_add(1, Ordering::Relaxed); + } + } + } + + fn export(&self, name: &str, labels: &str) -> String { + let mut output = String::new(); + let count = self.count.load(Ordering::Relaxed); + let sum = self.sum.load(Ordering::Relaxed) as f64 / 1000.0; + + let label_str = if labels.is_empty() { + String::new() + } else { + format!("{{{}}}", labels.trim_start_matches(',')) + }; + + output.push_str(&format!("{}_sum{} {}\n", name, label_str, sum)); + output.push_str(&format!("{}_count{} {}\n", name, label_str, count)); + + for (threshold, bucket_count) in &self.buckets { + let c = bucket_count.load(Ordering::Relaxed); + let bucket_labels = if labels.is_empty() { + format!("le=\"{}\"", threshold) + } else { + format!("le=\"{}\"{}", threshold, labels) + }; + output.push_str(&format!("{}_bucket{{{}}} {}\n", name, bucket_labels, c)); + } + let inf_labels = if labels.is_empty() { + "le=\"+Inf\"".to_string() + } else { + format!("le=\"+Inf\"{}", labels) + }; + output.push_str(&format!("{}_bucket{{{}}} {}\n", name, inf_labels, count)); + + output + } +} + +impl Default for AtomicHistogram { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug)] +pub struct Metrics { + commands_total: RwLock>, + command_errors_total: RwLock>, + command_duration: RwLock>, + version_conflicts: AtomicU64, + tenant_errors: AtomicU64, + rehydration_duration: RwLock>, + in_flight: RwLock>, +} + +impl Metrics { + pub fn new() -> Self { + Self { + commands_total: RwLock::new(HashMap::new()), + command_errors_total: RwLock::new(HashMap::new()), + command_duration: RwLock::new(HashMap::new()), + version_conflicts: AtomicU64::new(0), + tenant_errors: AtomicU64::new(0), + rehydration_duration: RwLock::new(HashMap::new()), + in_flight: RwLock::new(HashMap::new()), + } + } + + pub fn increment_commands_total(&self, aggregate_type: &str, tenant_id: &str) { + let key = format!("{}:{}", aggregate_type, tenant_id); + let map = self.commands_total.read().unwrap(); + if let Some(counter) = map.get(&key) { + counter.fetch_add(1, Ordering::Relaxed); + return; + } + drop(map); + let mut map = self.commands_total.write().unwrap(); + let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0)); + counter.fetch_add(1, Ordering::Relaxed); + } + + pub fn increment_command_errors_total( + &self, + aggregate_type: &str, + tenant_id: &str, + error_kind: &str, + ) { + let key = format!("{}:{}:{}", aggregate_type, tenant_id, error_kind); + let map = self.command_errors_total.read().unwrap(); + if let Some(counter) = map.get(&key) { + counter.fetch_add(1, Ordering::Relaxed); + return; + } + drop(map); + let mut map = self.command_errors_total.write().unwrap(); + let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0)); + counter.fetch_add(1, Ordering::Relaxed); + } + + pub fn record_command_duration(&self, duration: Duration, aggregate_type: &str) { + let mut map = self.command_duration.write().unwrap(); + let histogram = map.entry(aggregate_type.to_string()).or_default(); + histogram.observe(duration); + } + + pub fn increment_version_conflicts(&self) { + self.version_conflicts.fetch_add(1, Ordering::Relaxed); + } + + pub fn increment_tenant_errors(&self) { + self.tenant_errors.fetch_add(1, Ordering::Relaxed); + } + + pub fn record_rehydration_duration(&self, duration: Duration, aggregate_type: &str) { + let mut map = self.rehydration_duration.write().unwrap(); + let histogram = map.entry(aggregate_type.to_string()).or_default(); + histogram.observe(duration); + } + + pub fn set_in_flight(&self, tenant_id: &str, value: u64) { + let map = self.in_flight.read().unwrap(); + if let Some(gauge) = map.get(tenant_id) { + gauge.store(value, Ordering::Relaxed); + return; + } + drop(map); + let mut map = self.in_flight.write().unwrap(); + let gauge = map + .entry(tenant_id.to_string()) + .or_insert_with(|| AtomicU64::new(0)); + gauge.store(value, Ordering::Relaxed); + } + + pub fn export_prometheus(&self) -> String { + let mut output = String::new(); + + output.push_str("# HELP aggregate_commands_total Total number of commands processed\n"); + output.push_str("# TYPE aggregate_commands_total counter\n"); + { + let map = self.commands_total.read().unwrap(); + for (key, counter) in map.iter() { + let parts: Vec<&str> = key.split(':').collect(); + if parts.len() == 2 { + let value = counter.load(Ordering::Relaxed); + output.push_str(&format!( + "aggregate_commands_total{{aggregate_type=\"{}\",tenant_id=\"{}\"}} {}\n", + parts[0], parts[1], value + )); + } + } + } + + output.push_str("\n# HELP aggregate_command_errors_total Total number of command errors\n"); + output.push_str("# TYPE aggregate_command_errors_total counter\n"); + { + let map = self.command_errors_total.read().unwrap(); + for (key, counter) in map.iter() { + let parts: Vec<&str> = key.split(':').collect(); + if parts.len() == 3 { + let value = counter.load(Ordering::Relaxed); + output.push_str(&format!( + "aggregate_command_errors_total{{aggregate_type=\"{}\",tenant_id=\"{}\",error_kind=\"{}\"}} {}\n", + parts[0], parts[1], parts[2], value + )); + } + } + } + + output + .push_str("\n# HELP aggregate_command_duration_seconds Command processing duration\n"); + output.push_str("# TYPE aggregate_command_duration_seconds histogram\n"); + { + let map = self.command_duration.read().unwrap(); + for (aggregate_type, histogram) in map.iter() { + let labels = format!(",aggregate_type=\"{}\"", aggregate_type); + output.push_str(&histogram.export("aggregate_command_duration_seconds", &labels)); + } + } + + output + .push_str("\n# HELP aggregate_version_conflicts_total Total version conflict errors\n"); + output.push_str("# TYPE aggregate_version_conflicts_total counter\n"); + output.push_str(&format!( + "aggregate_version_conflicts_total {}\n", + self.version_conflicts.load(Ordering::Relaxed) + )); + + output + .push_str("\n# HELP aggregate_tenant_errors_total Total tenant access denied errors\n"); + output.push_str("# TYPE aggregate_tenant_errors_total counter\n"); + output.push_str(&format!( + "aggregate_tenant_errors_total {}\n", + self.tenant_errors.load(Ordering::Relaxed) + )); + + output.push_str( + "\n# HELP aggregate_rehydration_duration_seconds Aggregate rehydration duration\n", + ); + output.push_str("# TYPE aggregate_rehydration_duration_seconds histogram\n"); + { + let map = self.rehydration_duration.read().unwrap(); + for (aggregate_type, histogram) in map.iter() { + let labels = format!(",aggregate_type=\"{}\"", aggregate_type); + output + .push_str(&histogram.export("aggregate_rehydration_duration_seconds", &labels)); + } + } + + output.push_str( + "\n# HELP aggregate_in_flight_commands Number of in-flight commands by tenant\n", + ); + output.push_str("# TYPE aggregate_in_flight_commands gauge\n"); + { + let map = self.in_flight.read().unwrap(); + for (tenant_id, gauge) in map.iter() { + let value = gauge.load(Ordering::Relaxed); + output.push_str(&format!( + "aggregate_in_flight_commands{{tenant_id=\"{}\"}} {}\n", + tenant_id, value + )); + } + } + + output + } +} + +impl Default for Metrics { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn metrics_increment_commands() { + let metrics = Metrics::new(); + metrics.increment_commands_total("Account", "tenant-a"); + metrics.increment_commands_total("Account", "tenant-a"); + metrics.increment_commands_total("Account", "tenant-b"); + + let output = metrics.export_prometheus(); + assert!(output.contains( + "aggregate_commands_total{aggregate_type=\"Account\",tenant_id=\"tenant-a\"} 2" + )); + assert!(output.contains( + "aggregate_commands_total{aggregate_type=\"Account\",tenant_id=\"tenant-b\"} 1" + )); + } + + #[test] + fn metrics_records_version_conflicts() { + let metrics = Metrics::new(); + metrics.increment_version_conflicts(); + metrics.increment_version_conflicts(); + + let output = metrics.export_prometheus(); + assert!(output.contains("aggregate_version_conflicts_total 2")); + } + + #[test] + fn metrics_records_tenant_errors() { + let metrics = Metrics::new(); + metrics.increment_tenant_errors(); + + let output = metrics.export_prometheus(); + assert!(output.contains("aggregate_tenant_errors_total 1")); + } + + #[test] + fn metrics_records_command_errors_with_labels() { + let metrics = Metrics::new(); + metrics.increment_command_errors_total("Account", "tenant-a", "tenant_not_hosted"); + metrics.increment_command_errors_total("Account", "tenant-a", "tenant_not_hosted"); + + let output = metrics.export_prometheus(); + assert!(output.contains("aggregate_command_errors_total{aggregate_type=\"Account\",tenant_id=\"tenant-a\",error_kind=\"tenant_not_hosted\"} 2")); + } + + #[test] + fn metrics_records_command_duration() { + let metrics = Metrics::new(); + metrics.record_command_duration(Duration::from_millis(50), "Account"); + + let output = metrics.export_prometheus(); + assert!(output.contains("aggregate_command_duration_seconds")); + assert!(output + .contains("aggregate_command_duration_seconds_count{aggregate_type=\"Account\"} 1")); + } + + #[test] + fn metrics_records_rehydration_duration() { + let metrics = Metrics::new(); + metrics.record_rehydration_duration(Duration::from_millis(100), "Account"); + + let output = metrics.export_prometheus(); + assert!(output.contains("aggregate_rehydration_duration_seconds")); + assert!(output.contains( + "aggregate_rehydration_duration_seconds_count{aggregate_type=\"Account\"} 1" + )); + } + + #[test] + fn metrics_export_prometheus_format() { + let metrics = Metrics::new(); + metrics.increment_commands_total("Account", "tenant-a"); + + let output = metrics.export_prometheus(); + assert!(output.contains("# HELP aggregate_commands_total")); + assert!(output.contains("# TYPE aggregate_commands_total counter")); + } + + #[test] + fn metrics_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/observability/mod.rs b/aggregate/src/observability/mod.rs new file mode 100644 index 0000000..53c2ec9 --- /dev/null +++ b/aggregate/src/observability/mod.rs @@ -0,0 +1,323 @@ +mod metrics; + +pub use metrics::{Metrics, MetricsRegistry}; + +use std::sync::Arc; +use std::time::Instant; + +#[derive(Debug, Clone)] +pub struct ObservabilityConfig { + pub service_name: String, + pub environment: String, + pub enable_metrics: bool, +} + +impl Default for ObservabilityConfig { + fn default() -> Self { + Self { + service_name: "aggregate".to_string(), + environment: "development".to_string(), + enable_metrics: true, + } + } +} + +impl ObservabilityConfig { + pub fn with_service_name(mut self, name: impl Into) -> Self { + self.service_name = name.into(); + self + } + + pub fn with_environment(mut self, env: impl Into) -> Self { + self.environment = env.into(); + self + } + + pub fn without_metrics(mut self) -> Self { + self.enable_metrics = false; + self + } +} + +#[derive(Debug, Clone)] +pub struct CommandSpan { + aggregate_id: String, + aggregate_type: String, + tenant_id: String, + command_id: String, + correlation_id: Option, + trace_id: Option, + start_time: Instant, +} + +impl CommandSpan { + pub fn new( + aggregate_id: impl Into, + aggregate_type: impl Into, + tenant_id: impl Into, + command_id: impl Into, + correlation_id: Option, + trace_id: Option, + ) -> Self { + Self { + aggregate_id: aggregate_id.into(), + aggregate_type: aggregate_type.into(), + tenant_id: tenant_id.into(), + command_id: command_id.into(), + correlation_id, + trace_id, + start_time: Instant::now(), + } + } + + pub fn elapsed(&self) -> std::time::Duration { + self.start_time.elapsed() + } + + pub fn aggregate_id(&self) -> &str { + &self.aggregate_id + } + + pub fn aggregate_type(&self) -> &str { + &self.aggregate_type + } + + pub fn tenant_id(&self) -> &str { + &self.tenant_id + } + + pub fn command_id(&self) -> &str { + &self.command_id + } + + pub fn correlation_id(&self) -> Option<&str> { + self.correlation_id.as_deref() + } + + pub fn trace_id(&self) -> Option<&str> { + self.trace_id.as_deref() + } +} + +pub struct Observability { + config: ObservabilityConfig, + metrics: Arc, +} + +impl Observability { + pub fn new(config: ObservabilityConfig) -> Self { + let metrics = Arc::new(Metrics::new()); + Self { config, metrics } + } + + pub fn config(&self) -> &ObservabilityConfig { + &self.config + } + + pub fn metrics(&self) -> &Arc { + &self.metrics + } + + pub fn start_command_span( + &self, + aggregate_id: &str, + aggregate_type: &str, + tenant_id: &str, + command_id: &str, + correlation_id: Option<&str>, + trace_id: Option<&str>, + ) -> CommandSpan { + tracing::info_span!( + "command", + aggregate_id = %aggregate_id, + aggregate_type = %aggregate_type, + tenant_id = %tenant_id, + command_id = %command_id, + correlation_id = correlation_id.unwrap_or(""), + trace_id = trace_id.unwrap_or(""), + ); + + CommandSpan::new( + aggregate_id, + aggregate_type, + tenant_id, + command_id, + correlation_id.map(|s| s.to_string()), + trace_id.map(|s| s.to_string()), + ) + } + + pub fn record_command_success(&self, span: &CommandSpan, events_count: usize) { + self.metrics + .increment_commands_total(&span.aggregate_type, &span.tenant_id); + self.metrics + .record_command_duration(span.elapsed(), &span.aggregate_type); + + tracing::info!( + aggregate_id = %span.aggregate_id(), + aggregate_type = %span.aggregate_type(), + tenant_id = %span.tenant_id(), + command_id = %span.command_id(), + correlation_id = span.correlation_id().unwrap_or(""), + trace_id = span.trace_id().unwrap_or(""), + events_count = events_count, + duration_ms = span.elapsed().as_millis() as u64, + "Command handled successfully" + ); + } + + pub fn record_command_error(&self, span: &CommandSpan, error: &crate::types::AggregateError) { + self.metrics + .increment_commands_total(&span.aggregate_type, &span.tenant_id); + self.metrics + .record_command_duration(span.elapsed(), &span.aggregate_type); + + self.metrics.increment_command_errors_total( + &span.aggregate_type, + &span.tenant_id, + error_kind(error), + ); + + if matches!( + error, + crate::types::AggregateError::TenantAccessDenied { .. } + | crate::types::AggregateError::TenantNotHosted { .. } + | crate::types::AggregateError::TenantDraining { .. } + ) { + self.metrics.increment_tenant_errors(); + } + + if matches!(error, crate::types::AggregateError::VersionConflict { .. }) { + self.metrics.increment_version_conflicts(); + } + + tracing::error!( + aggregate_id = %span.aggregate_id(), + aggregate_type = %span.aggregate_type(), + tenant_id = %span.tenant_id(), + command_id = %span.command_id(), + correlation_id = span.correlation_id().unwrap_or(""), + trace_id = span.trace_id().unwrap_or(""), + error = %error, + duration_ms = span.elapsed().as_millis() as u64, + "Command handling failed" + ); + } + + pub fn record_rehydration(&self, duration: std::time::Duration, aggregate_type: &str) { + self.metrics + .record_rehydration_duration(duration, aggregate_type); + + tracing::debug!( + aggregate_type = %aggregate_type, + duration_ms = duration.as_millis() as u64, + "Aggregate rehydrated" + ); + } + + pub fn export_metrics(&self) -> String { + self.metrics.export_prometheus() + } +} + +impl Default for Observability { + fn default() -> Self { + Self::new(ObservabilityConfig::default()) + } +} + +fn error_kind(error: &crate::types::AggregateError) -> &'static str { + match error { + crate::types::AggregateError::TenantAccessDenied { .. } => "tenant_access_denied", + crate::types::AggregateError::TenantNotHosted { .. } => "tenant_not_hosted", + crate::types::AggregateError::TenantDraining { .. } => "tenant_draining", + crate::types::AggregateError::ValidationError(_) => "validation", + crate::types::AggregateError::VersionConflict { .. } => "version_conflict", + crate::types::AggregateError::StorageError(_) => "storage", + crate::types::AggregateError::StreamError(_) => "stream", + crate::types::AggregateError::RehydrationError(_) => "rehydration", + crate::types::AggregateError::DecideError(_) => "decide", + crate::types::AggregateError::ApplyError(_) => "apply", + crate::types::AggregateError::NotFound(_) => "not_found", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{AggregateError, TenantId}; + + #[test] + fn observability_config_defaults() { + let config = ObservabilityConfig::default(); + assert_eq!(config.service_name, "aggregate"); + assert_eq!(config.environment, "development"); + assert!(config.enable_metrics); + } + + #[test] + fn observability_config_builder() { + let config = ObservabilityConfig::default() + .with_service_name("my-service") + .with_environment("production") + .without_metrics(); + + assert_eq!(config.service_name, "my-service"); + assert_eq!(config.environment, "production"); + assert!(!config.enable_metrics); + } + + #[test] + fn command_span_tracks_elapsed_time() { + let span = CommandSpan::new("agg-123", "Account", "tenant-a", "cmd-456", None, None); + std::thread::sleep(std::time::Duration::from_millis(10)); + assert!(span.elapsed() >= std::time::Duration::from_millis(10)); + } + + #[test] + fn observability_records_success() { + let obs = Observability::default(); + let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None); + + obs.record_command_success(&span, 3); + + let metrics = obs.export_metrics(); + assert!(metrics.contains("aggregate_commands_total")); + } + + #[test] + fn observability_records_tenant_error() { + let obs = Observability::default(); + let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None); + + let error = AggregateError::TenantAccessDenied { + tenant_id: TenantId::new("other-tenant"), + }; + obs.record_command_error(&span, &error); + + let metrics = obs.export_metrics(); + assert!(metrics.contains("aggregate_tenant_errors_total")); + } + + #[test] + fn observability_records_version_conflict() { + let obs = Observability::default(); + let span = obs.start_command_span("agg-123", "Account", "tenant-a", "cmd-456", None, None); + + let error = AggregateError::VersionConflict { + expected: crate::types::Version::from(5), + actual: crate::types::Version::from(4), + }; + obs.record_command_error(&span, &error); + + let metrics = obs.export_metrics(); + assert!(metrics.contains("aggregate_version_conflicts_total")); + } + + #[test] + fn observability_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + } +} diff --git a/aggregate/src/placement.rs b/aggregate/src/placement.rs new file mode 100644 index 0000000..903d525 --- /dev/null +++ b/aggregate/src/placement.rs @@ -0,0 +1,267 @@ +use crate::observability::Observability; +use crate::types::{AggregateError, TenantId}; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use tokio::sync::RwLock; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct TenantStatus { + pub tenant_id: TenantId, + pub hosted: bool, + pub accepting: bool, + pub draining: bool, + pub in_flight: u64, +} + +pub struct TenantPlacementManager { + hosted: RwLock>, + draining: RwLock>, + in_flight: RwLock>, + observability: Arc, +} + +impl TenantPlacementManager { + pub fn new(observability: Arc) -> Self { + Self { + hosted: RwLock::new(HashSet::new()), + draining: RwLock::new(HashSet::new()), + in_flight: RwLock::new(HashMap::new()), + observability, + } + } + + pub async fn set_hosted_tenants(&self, tenant_ids: impl IntoIterator) { + let mut hosted = self.hosted.write().await; + hosted.clear(); + hosted.extend(tenant_ids); + } + + pub async fn apply_placement_map(&self, shard_id: &str, placement: &HashMap) { + let tenants = placement + .iter() + .filter_map(|(tenant_id, assigned)| { + if assigned == shard_id { + Some(tenant_id.clone()) + } else { + None + } + }) + .collect::>(); + + self.set_hosted_tenants(tenants).await; + } + + pub async fn is_hosted(&self, tenant_id: &TenantId) -> bool { + if tenant_id.as_str().is_empty() { + return true; + } + self.hosted.read().await.contains(tenant_id.as_str()) + } + + pub async fn is_draining(&self, tenant_id: &TenantId) -> bool { + self.draining.read().await.contains(tenant_id.as_str()) + } + + pub async fn begin_command( + self: &Arc, + tenant_id: &TenantId, + ) -> Result { + if !self.is_hosted(tenant_id).await { + return Err(AggregateError::TenantNotHosted { + tenant_id: tenant_id.clone(), + }); + } + + if self.is_draining(tenant_id).await { + return Err(AggregateError::TenantDraining { + tenant_id: tenant_id.clone(), + }); + } + + let mut map = self.in_flight.write().await; + let counter = map.entry(tenant_id.as_str().to_string()).or_insert(0); + *counter += 1; + let value = *counter; + drop(map); + + self.observability + .metrics() + .set_in_flight(tenant_id.as_str(), value); + + Ok(TenantCommandGuard { + tenant_id: tenant_id.clone(), + manager: self.clone(), + }) + } + + pub async fn drain_tenant(&self, tenant_id: &TenantId) { + if tenant_id.as_str().is_empty() { + return; + } + let mut draining = self.draining.write().await; + draining.insert(tenant_id.as_str().to_string()); + } + + pub async fn undrain_tenant(&self, tenant_id: &TenantId) { + let mut draining = self.draining.write().await; + draining.remove(tenant_id.as_str()); + } + + pub async fn wait_drained(&self, tenant_id: &TenantId) { + loop { + let in_flight = self + .in_flight + .read() + .await + .get(tenant_id.as_str()) + .copied() + .unwrap_or(0); + if in_flight == 0 { + break; + } + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + } + } + + pub async fn wait_drained_with_timeout( + &self, + tenant_id: &TenantId, + timeout: std::time::Duration, + ) -> bool { + let deadline = tokio::time::Instant::now() + timeout; + loop { + let in_flight = self + .in_flight + .read() + .await + .get(tenant_id.as_str()) + .copied() + .unwrap_or(0); + if in_flight == 0 { + return true; + } + if tokio::time::Instant::now() >= deadline { + return false; + } + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + } + } + + pub async fn tenant_status(&self, tenant_id: &TenantId) -> TenantStatus { + let hosted = self.is_hosted(tenant_id).await; + let draining = self.is_draining(tenant_id).await; + let in_flight = self + .in_flight + .read() + .await + .get(tenant_id.as_str()) + .copied() + .unwrap_or(0); + TenantStatus { + tenant_id: tenant_id.clone(), + hosted, + accepting: hosted && !draining, + draining, + in_flight, + } + } + + pub async fn hosted_tenants(&self) -> Vec { + let hosted = self.hosted.read().await; + hosted.iter().map(TenantId::new).collect() + } + + pub async fn all_statuses(&self) -> Vec { + let hosted = self.hosted.read().await.clone(); + let draining = self.draining.read().await.clone(); + let in_flight = self.in_flight.read().await.clone(); + + hosted + .into_iter() + .map(|id| { + let tenant_id = TenantId::new(id.clone()); + let d = draining.contains(&id); + let f = in_flight.get(&id).copied().unwrap_or(0); + TenantStatus { + tenant_id, + hosted: true, + accepting: !d, + draining: d, + in_flight: f, + } + }) + .collect() + } + + async fn finish_command(&self, tenant_id: &TenantId) { + let mut map = self.in_flight.write().await; + let counter = map.entry(tenant_id.as_str().to_string()).or_insert(0); + if *counter > 0 { + *counter -= 1; + } + let value = *counter; + drop(map); + + self.observability + .metrics() + .set_in_flight(tenant_id.as_str(), value); + } +} + +pub struct TenantCommandGuard { + tenant_id: TenantId, + manager: Arc, +} + +impl Drop for TenantCommandGuard { + fn drop(&mut self) { + let tenant_id = self.tenant_id.clone(); + let manager = self.manager.clone(); + tokio::spawn(async move { + manager.finish_command(&tenant_id).await; + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::observability::Observability; + + #[tokio::test] + async fn placement_rejects_unhosted_tenant() { + let obs = Arc::new(Observability::default()); + let mgr = Arc::new(TenantPlacementManager::new(obs)); + mgr.set_hosted_tenants(vec!["tenant-a".to_string()]).await; + + let err = match mgr.begin_command(&TenantId::new("tenant-b")).await { + Ok(_) => panic!("expected error"), + Err(e) => e, + }; + assert!(matches!(err, AggregateError::TenantNotHosted { .. })); + } + + #[tokio::test] + async fn drain_blocks_new_commands_until_in_flight_zero() { + let obs = Arc::new(Observability::default()); + let mgr = Arc::new(TenantPlacementManager::new(obs)); + mgr.set_hosted_tenants(vec!["tenant-a".to_string()]).await; + + let guard = mgr.begin_command(&TenantId::new("tenant-a")).await.unwrap(); + mgr.drain_tenant(&TenantId::new("tenant-a")).await; + let err = match mgr.begin_command(&TenantId::new("tenant-a")).await { + Ok(_) => panic!("expected error"), + Err(e) => e, + }; + assert!(matches!(err, AggregateError::TenantDraining { .. })); + + drop(guard); + mgr.wait_drained(&TenantId::new("tenant-a")).await; + let err = match mgr.begin_command(&TenantId::new("tenant-a")).await { + Ok(_) => panic!("expected error"), + Err(e) => e, + }; + assert!(matches!(err, AggregateError::TenantDraining { .. })); + } +} diff --git a/aggregate/src/query/client.rs b/aggregate/src/query/client.rs new file mode 100644 index 0000000..b0d71ad --- /dev/null +++ b/aggregate/src/query/client.rs @@ -0,0 +1,594 @@ +use super::{AggregateProjection, QueryRequest, QueryResponse}; +use crate::types::TenantId; +use futures::stream::Stream; +use serde_json::Value as JsonValue; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::Arc; +use thiserror::Error; +use tokio::sync::broadcast; +use tokio::sync::RwLock; + +#[derive(Debug, Error)] +pub enum QueryError { + #[error("Query syntax error: {0}")] + SyntaxError(String), + + #[error("Connection error: {0}")] + ConnectionError(String), + + #[error("Tenant not found: {0}")] + TenantNotFound(String), + + #[error("Internal error: {0}")] + InternalError(String), +} + +pub type QueryResult = Result; + +#[derive(Debug, Clone)] +pub struct QueryConfig { + pub endpoint: Option, + pub embedded: bool, + pub cache_size: usize, + pub cache_ttl_seconds: u64, +} + +impl Default for QueryConfig { + fn default() -> Self { + Self { + endpoint: None, + embedded: true, + cache_size: 1000, + cache_ttl_seconds: 60, + } + } +} + +impl QueryConfig { + pub fn embedded() -> Self { + Self { + embedded: true, + ..Default::default() + } + } + + pub fn remote(endpoint: impl Into) -> Self { + Self { + endpoint: Some(endpoint.into()), + embedded: false, + ..Default::default() + } + } +} + +#[derive(Debug, Clone)] +struct CacheEntry { + projection: AggregateProjection, + inserted_at: std::time::Instant, +} + +#[derive(Debug, Clone)] +pub struct QueryClient { + config: QueryConfig, + storage: Arc>>>, + cache: Arc>>, + updates: broadcast::Sender, +} + +impl QueryClient { + pub fn new(config: QueryConfig) -> Self { + let cache = lru::LruCache::new( + std::num::NonZeroUsize::new(config.cache_size) + .unwrap_or_else(|| std::num::NonZeroUsize::new(1000).unwrap()), + ); + let (updates, _) = broadcast::channel(1024); + + Self { + config, + storage: Arc::new(RwLock::new(HashMap::new())), + cache: Arc::new(RwLock::new(cache)), + updates, + } + } + + pub fn embedded() -> Self { + Self::new(QueryConfig::embedded()) + } + + fn make_key(tenant_id: &str, aggregate_id: &str) -> String { + format!("{}:{}", tenant_id, aggregate_id) + } + + pub async fn index(&self, projection: AggregateProjection) -> QueryResult<()> { + let key = Self::make_key(&projection.tenant_id, &projection.aggregate_id); + + let _ = self.updates.send(projection.clone()); + + { + let mut cache = self.cache.write().await; + cache.put( + key.clone(), + CacheEntry { + projection: projection.clone(), + inserted_at: std::time::Instant::now(), + }, + ); + } + + let mut storage = self.storage.write().await; + let tenant_projections = storage.entry(projection.tenant_id.clone()).or_default(); + + if let Some(existing) = tenant_projections + .iter_mut() + .find(|p| p.aggregate_id == projection.aggregate_id) + { + *existing = projection; + } else { + tenant_projections.push(projection); + } + + Ok(()) + } + + pub fn subscribe( + &self, + tenant_id: TenantId, + ) -> Pin + Send>> { + let tenant_id = tenant_id.as_str().to_string(); + let receiver = self.updates.subscribe(); + + Box::pin(futures::stream::unfold( + (receiver, tenant_id), + |(mut receiver, tenant_id)| async move { + loop { + match receiver.recv().await { + Ok(proj) => { + if proj.tenant_id == tenant_id { + return Some((proj, (receiver, tenant_id))); + } + } + Err(broadcast::error::RecvError::Lagged(_)) => continue, + Err(broadcast::error::RecvError::Closed) => return None, + } + } + }, + )) + } + + pub async fn query(&self, request: QueryRequest) -> QueryResult { + let storage = self.storage.read().await; + + let tenant_projections = storage.get(&request.tenant_id); + + let projections: Vec = match tenant_projections { + Some(projs) => { + let mut filtered: Vec<_> = projs + .iter() + .filter(|p| { + if let Some(ref at) = request.aggregate_type { + &p.aggregate_type == at + } else { + true + } + }) + .filter(|p| { + if let Some(ref filter) = request.filter { + self.evaluate_filter(&p.state, filter).unwrap_or(false) + } else { + true + } + }) + .cloned() + .collect(); + + filtered.sort_by(|a, b| b.updated_at.cmp(&a.updated_at)); + filtered + } + None => Vec::new(), + }; + + let total = projections.len(); + let offset = request.offset.unwrap_or(0); + let limit = request.limit.unwrap_or(100); + + let results: Vec = + projections.into_iter().skip(offset).take(limit).collect(); + + Ok(QueryResponse::from_results(results, total, Some(limit))) + } + + pub async fn get( + &self, + tenant_id: &TenantId, + aggregate_id: &str, + ) -> QueryResult> { + let key = Self::make_key(tenant_id.as_str(), aggregate_id); + + { + let mut cache = self.cache.write().await; + if let Some(entry) = cache.get(&key) { + let elapsed = entry.inserted_at.elapsed().as_secs(); + if elapsed < self.config.cache_ttl_seconds { + return Ok(Some(entry.projection.clone())); + } + cache.pop(&key); + } + } + + let storage = self.storage.read().await; + let tenant_projections = storage.get(tenant_id.as_str()); + + Ok(tenant_projections.and_then(|projs| { + projs + .iter() + .find(|p| p.aggregate_id == aggregate_id) + .cloned() + })) + } + + pub async fn delete(&self, tenant_id: &TenantId, aggregate_id: &str) -> QueryResult { + let key = Self::make_key(tenant_id.as_str(), aggregate_id); + + { + let mut cache = self.cache.write().await; + cache.pop(&key); + } + + let mut storage = self.storage.write().await; + if let Some(tenant_projections) = storage.get_mut(tenant_id.as_str()) { + let len_before = tenant_projections.len(); + tenant_projections.retain(|p| p.aggregate_id != aggregate_id); + return Ok(tenant_projections.len() < len_before); + } + + Ok(false) + } + + pub async fn clear_tenant(&self, tenant_id: &TenantId) -> QueryResult { + let mut storage = self.storage.write().await; + let count = storage + .remove(tenant_id.as_str()) + .map(|v| v.len()) + .unwrap_or(0); + + let mut cache = self.cache.write().await; + let prefix = format!("{}:", tenant_id.as_str()); + let keys_to_remove: Vec<_> = cache + .iter() + .filter(|(k, _)| k.starts_with(&prefix)) + .map(|(k, _)| k.clone()) + .collect(); + + for key in keys_to_remove { + cache.pop(&key); + } + + Ok(count) + } + + fn evaluate_filter(&self, state: &JsonValue, filter: &str) -> QueryResult { + let filter = filter.trim(); + + if filter.is_empty() || filter == "*" { + return Ok(true); + } + + if let Some((field, op_value)) = filter.split_once('>') { + let field = field.trim(); + let value = op_value.trim(); + return self.compare_field(state, field, value, |a, b| a > b); + } + + if let Some((field, op_value)) = filter.split_once('<') { + let field = field.trim(); + let value = op_value.trim(); + return self.compare_field(state, field, value, |a, b| a < b); + } + + if let Some((field, op_value)) = filter.split_once("==") { + let field = field.trim(); + let value = op_value.trim(); + return self.compare_field(state, field, value, |a, b| a == b); + } + + if let Some((field, op_value)) = filter.split_once("!=") { + let field = field.trim(); + let value = op_value.trim(); + return self.compare_field(state, field, value, |a, b| a != b); + } + + if let Some((field, op_value)) = filter.split_once(">=") { + let field = field.trim(); + let value = op_value.trim(); + return self.compare_field(state, field, value, |a, b| a >= b); + } + + if let Some((field, op_value)) = filter.split_once("<=") { + let field = field.trim(); + let value = op_value.trim(); + return self.compare_field(state, field, value, |a, b| a <= b); + } + + Ok(false) + } + + fn compare_field( + &self, + state: &JsonValue, + field: &str, + value_str: &str, + compare: F, + ) -> QueryResult + where + F: Fn(f64, f64) -> bool, + { + let field_value = state.get(field); + + let field_num = match field_value { + Some(JsonValue::Number(n)) => n.as_f64().unwrap_or(f64::NAN), + Some(JsonValue::String(s)) => s.parse::().unwrap_or(f64::NAN), + _ => return Ok(false), + }; + + let compare_num = value_str.parse::().unwrap_or(f64::NAN); + + if field_num.is_nan() || compare_num.is_nan() { + return Ok(false); + } + + Ok(compare(field_num, compare_num)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::StreamExt; + use serde_json::json; + + fn create_test_client() -> QueryClient { + QueryClient::embedded() + } + + fn create_test_projection(tenant: &str, id: &str, balance: i64) -> AggregateProjection { + AggregateProjection::new(tenant, id, "Account", 1, json!({"balance": balance})) + } + + #[tokio::test] + async fn query_client_index_and_query() { + let client = create_test_client(); + + let proj = create_test_projection("tenant-a", "acc-1", 100); + client.index(proj).await.unwrap(); + + let request = QueryRequest::new("tenant-a").with_filter("balance > 50"); + + let response = client.query(request).await.unwrap(); + + assert_eq!(response.results.len(), 1); + assert_eq!(response.results[0].aggregate_id, "acc-1"); + } + + #[tokio::test] + async fn query_client_tenant_isolation() { + let client = create_test_client(); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + client + .index(create_test_projection("tenant-b", "acc-2", 200)) + .await + .unwrap(); + + let response_a = client.query(QueryRequest::new("tenant-a")).await.unwrap(); + let response_b = client.query(QueryRequest::new("tenant-b")).await.unwrap(); + + assert_eq!(response_a.results.len(), 1); + assert_eq!(response_b.results.len(), 1); + assert_eq!(response_a.results[0].state["balance"], 100); + assert_eq!(response_b.results[0].state["balance"], 200); + } + + #[tokio::test] + async fn query_client_filter_operations() { + let client = create_test_client(); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + client + .index(create_test_projection("tenant-a", "acc-2", 50)) + .await + .unwrap(); + client + .index(create_test_projection("tenant-a", "acc-3", 150)) + .await + .unwrap(); + + let gt_response = client + .query(QueryRequest::new("tenant-a").with_filter("balance > 75")) + .await + .unwrap(); + assert_eq!(gt_response.results.len(), 2); + + let lt_response = client + .query(QueryRequest::new("tenant-a").with_filter("balance < 75")) + .await + .unwrap(); + assert_eq!(lt_response.results.len(), 1); + + let eq_response = client + .query(QueryRequest::new("tenant-a").with_filter("balance == 100")) + .await + .unwrap(); + assert_eq!(eq_response.results.len(), 1); + } + + #[tokio::test] + async fn query_client_pagination() { + let client = create_test_client(); + + for i in 0..25 { + client + .index(create_test_projection( + "tenant-a", + &format!("acc-{}", i), + i * 10, + )) + .await + .unwrap(); + } + + let page1 = client + .query(QueryRequest::new("tenant-a").with_limit(10)) + .await + .unwrap(); + assert_eq!(page1.results.len(), 10); + assert!(page1.has_more); + + let page2 = client + .query(QueryRequest::new("tenant-a").with_limit(10).with_offset(10)) + .await + .unwrap(); + assert_eq!(page2.results.len(), 10); + + let page3 = client + .query(QueryRequest::new("tenant-a").with_limit(10).with_offset(20)) + .await + .unwrap(); + assert_eq!(page3.results.len(), 5); + assert!(!page3.has_more); + } + + #[tokio::test] + async fn query_client_get_by_id() { + let client = create_test_client(); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + + let tenant = TenantId::new("tenant-a"); + let result = client.get(&tenant, "acc-1").await.unwrap(); + + assert!(result.is_some()); + let proj = result.unwrap(); + assert_eq!(proj.aggregate_id, "acc-1"); + assert_eq!(proj.state["balance"], 100); + } + + #[tokio::test] + async fn query_client_delete() { + let client = create_test_client(); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + + let tenant = TenantId::new("tenant-a"); + let deleted = client.delete(&tenant, "acc-1").await.unwrap(); + assert!(deleted); + + let result = client.get(&tenant, "acc-1").await.unwrap(); + assert!(result.is_none()); + } + + #[tokio::test] + async fn query_client_clear_tenant() { + let client = create_test_client(); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + client + .index(create_test_projection("tenant-a", "acc-2", 200)) + .await + .unwrap(); + client + .index(create_test_projection("tenant-b", "acc-3", 300)) + .await + .unwrap(); + + let tenant = TenantId::new("tenant-a"); + let count = client.clear_tenant(&tenant).await.unwrap(); + assert_eq!(count, 2); + + let response_a = client.query(QueryRequest::new("tenant-a")).await.unwrap(); + assert_eq!(response_a.results.len(), 0); + + let response_b = client.query(QueryRequest::new("tenant-b")).await.unwrap(); + assert_eq!(response_b.results.len(), 1); + } + + #[tokio::test] + async fn query_client_update_existing() { + let client = create_test_client(); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + client + .index(AggregateProjection::new( + "tenant-a", + "acc-1", + "Account", + 2, + json!({"balance": 250}), + )) + .await + .unwrap(); + + let response = client.query(QueryRequest::new("tenant-a")).await.unwrap(); + assert_eq!(response.results.len(), 1); + assert_eq!(response.results[0].version, 2); + assert_eq!(response.results[0].state["balance"], 250); + } + + #[tokio::test] + async fn query_client_subscribe_receives_updates() { + let client = create_test_client(); + let mut updates = client.subscribe(TenantId::new("tenant-a")); + + client + .index(create_test_projection("tenant-a", "acc-1", 100)) + .await + .unwrap(); + + let next = updates.next().await.unwrap(); + assert_eq!(next.tenant_id, "tenant-a"); + assert_eq!(next.aggregate_id, "acc-1"); + assert_eq!(next.state["balance"], 100); + } + + #[test] + fn query_config_defaults() { + let config = QueryConfig::default(); + assert!(config.embedded); + assert!(config.endpoint.is_none()); + assert_eq!(config.cache_size, 1000); + assert_eq!(config.cache_ttl_seconds, 60); + } + + #[test] + fn query_request_builder() { + let request = QueryRequest::new("tenant-a") + .with_aggregate_type("Account") + .with_filter("balance > 100") + .with_limit(50) + .with_offset(10); + + assert_eq!(request.tenant_id, "tenant-a"); + assert_eq!(request.aggregate_type, Some("Account".to_string())); + assert_eq!(request.filter, Some("balance > 100".to_string())); + assert_eq!(request.limit, Some(50)); + assert_eq!(request.offset, Some(10)); + } +} diff --git a/aggregate/src/query/mod.rs b/aggregate/src/query/mod.rs new file mode 100644 index 0000000..7f36e29 --- /dev/null +++ b/aggregate/src/query/mod.rs @@ -0,0 +1,193 @@ +mod client; +mod projection; + +pub use client::{QueryClient, QueryConfig, QueryError, QueryResult}; +pub use projection::{ProjectionConfig, StateProjection}; + +use serde::{Deserialize, Serialize}; +use serde_json::Value as JsonValue; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AggregateProjection { + pub tenant_id: String, + pub aggregate_id: String, + pub aggregate_type: String, + pub version: u64, + pub state: JsonValue, + pub updated_at: chrono::DateTime, +} + +impl AggregateProjection { + pub fn new( + tenant_id: impl Into, + aggregate_id: impl Into, + aggregate_type: impl Into, + version: u64, + state: JsonValue, + ) -> Self { + Self { + tenant_id: tenant_id.into(), + aggregate_id: aggregate_id.into(), + aggregate_type: aggregate_type.into(), + version, + state, + updated_at: chrono::Utc::now(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryRequest { + pub tenant_id: String, + pub aggregate_type: Option, + pub filter: Option, + pub limit: Option, + pub offset: Option, +} + +impl QueryRequest { + pub fn new(tenant_id: impl Into) -> Self { + Self { + tenant_id: tenant_id.into(), + aggregate_type: None, + filter: None, + limit: None, + offset: None, + } + } + + pub fn with_aggregate_type(mut self, aggregate_type: impl Into) -> Self { + self.aggregate_type = Some(aggregate_type.into()); + self + } + + pub fn with_filter(mut self, filter: impl Into) -> Self { + self.filter = Some(filter.into()); + self + } + + pub fn with_limit(mut self, limit: usize) -> Self { + self.limit = Some(limit); + self + } + + pub fn with_offset(mut self, offset: usize) -> Self { + self.offset = Some(offset); + self + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryResponse { + pub results: Vec, + pub total: usize, + pub has_more: bool, +} + +impl QueryResponse { + pub fn empty() -> Self { + Self { + results: Vec::new(), + total: 0, + has_more: false, + } + } + + pub fn from_results( + results: Vec, + total: usize, + limit: Option, + ) -> Self { + let has_more = limit.is_some_and(|l| results.len() == l && total > results.len()); + Self { + results, + total, + has_more, + } + } +} + +#[derive(Debug, Clone)] +pub struct QueryServer { + query: QueryClient, +} + +impl QueryServer { + pub fn new(query: QueryClient) -> Self { + Self { query } + } + + pub fn query_client(&self) -> &QueryClient { + &self.query + } + + pub async fn handle(&self, request: QueryRequest) -> QueryResult { + self.query.query(request).await + } + + pub async fn handle_raw( + &self, + tenant_id: impl Into, + aggregate_type: Option, + filter: Option, + limit: Option, + offset: Option, + ) -> QueryResult { + let mut request = QueryRequest::new(tenant_id); + request.aggregate_type = aggregate_type; + request.filter = filter; + request.limit = limit; + request.offset = offset; + + self.handle(request).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[tokio::test] + async fn query_server_filters_by_tenant() { + let query = QueryClient::embedded(); + let server = QueryServer::new(query.clone()); + + query + .index(AggregateProjection::new( + "tenant-a", + "agg-1", + "Account", + 1, + json!({ "balance": 100 }), + )) + .await + .unwrap(); + + query + .index(AggregateProjection::new( + "tenant-b", + "agg-2", + "Account", + 1, + json!({ "balance": 200 }), + )) + .await + .unwrap(); + + let resp = server + .handle_raw( + "tenant-a", + Some("Account".to_string()), + Some("balance > 50".to_string()), + Some(100), + Some(0), + ) + .await + .unwrap(); + + assert_eq!(resp.total, 1); + assert_eq!(resp.results[0].tenant_id, "tenant-a"); + assert_eq!(resp.results[0].state["balance"], 100); + } +} diff --git a/aggregate/src/query/projection.rs b/aggregate/src/query/projection.rs new file mode 100644 index 0000000..5d75bd7 --- /dev/null +++ b/aggregate/src/query/projection.rs @@ -0,0 +1,217 @@ +use super::AggregateProjection; +use crate::types::{AggregateId, AggregateType, Event, TenantId, Version}; +use serde_json::Value as JsonValue; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; + +#[derive(Debug, Clone)] +pub struct ProjectionConfig { + pub batch_size: usize, + pub projection_timeout_ms: u64, +} + +impl Default for ProjectionConfig { + fn default() -> Self { + Self { + batch_size: 100, + projection_timeout_ms: 5000, + } + } +} + +pub struct StateProjection { + config: ProjectionConfig, + handlers: Arc>>, +} + +type ProjectionHandler = Box Option + Send + Sync>; + +impl StateProjection { + pub fn new(config: ProjectionConfig) -> Self { + Self { + config, + handlers: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub fn new_default() -> Self { + Self::new(ProjectionConfig::default()) + } + + pub async fn register_handler(&self, aggregate_type: &str, handler: F) + where + F: Fn(&Event) -> Option + Send + Sync + 'static, + { + let mut handlers = self.handlers.write().await; + handlers.insert(aggregate_type.to_string(), Box::new(handler)); + } + + pub async fn project_event(&self, event: &Event) -> Option { + let handlers = self.handlers.read().await; + let aggregate_type = event.aggregate_type.as_str(); + + handlers.get(aggregate_type).and_then(|h| h(event)) + } + + pub async fn project_events(&self, events: &[Event]) -> Vec { + let mut projections = Vec::with_capacity(events.len().min(self.config.batch_size)); + + for event in events.iter().take(self.config.batch_size) { + if let Some(proj) = self.project_event(event).await { + projections.push(proj); + } + } + + projections + } + + pub fn default_projection_from_event(event: &Event) -> AggregateProjection { + AggregateProjection::new( + event.tenant_id.as_str(), + event.aggregate_id.to_string(), + event.aggregate_type.as_str(), + event.version.as_u64(), + event.payload.clone(), + ) + } + + pub fn default_projection_from_state( + tenant_id: &TenantId, + aggregate_id: &AggregateId, + aggregate_type: &AggregateType, + version: &Version, + state: &JsonValue, + ) -> AggregateProjection { + AggregateProjection::new( + tenant_id.as_str(), + aggregate_id.to_string(), + aggregate_type.as_str(), + version.as_u64(), + state.clone(), + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use serde_json::json; + + fn create_test_event(tenant: &str, version: u64, event_type: &str) -> Event { + Event { + event_id: uuid::Uuid::now_v7(), + tenant_id: TenantId::new(tenant), + aggregate_id: AggregateId::new_v7(), + aggregate_type: AggregateType::from("Account"), + version: Version::from(version), + event_type: event_type.to_string(), + payload: json!({"amount": 100}), + timestamp: Utc::now(), + command_id: uuid::Uuid::nil(), + correlation_id: None, + traceparent: None, + } + } + + #[tokio::test] + async fn state_projection_registers_handler() { + let projection = StateProjection::new_default(); + + projection + .register_handler("Account", |event| { + Some(AggregateProjection::new( + event.tenant_id.as_str(), + event.aggregate_id.to_string(), + "Account", + event.version.as_u64(), + event.payload.clone(), + )) + }) + .await; + + let event = create_test_event("tenant-a", 1, "deposited"); + let result = projection.project_event(&event).await; + + assert!(result.is_some()); + let proj = result.unwrap(); + assert_eq!(proj.aggregate_type, "Account"); + } + + #[tokio::test] + async fn state_projection_project_events_batch() { + let projection = StateProjection::new_default(); + + projection + .register_handler("Account", |event| { + Some(AggregateProjection::new( + event.tenant_id.as_str(), + event.aggregate_id.to_string(), + "Account", + event.version.as_u64(), + event.payload.clone(), + )) + }) + .await; + + let events = vec![ + create_test_event("tenant-a", 1, "deposited"), + create_test_event("tenant-a", 1, "deposited"), + create_test_event("tenant-a", 1, "deposited"), + ]; + + let projections = projection.project_events(&events).await; + + assert_eq!(projections.len(), 3); + } + + #[tokio::test] + async fn state_projection_no_handler_returns_none() { + let projection = StateProjection::new_default(); + + let event = create_test_event("tenant-a", 1, "deposited"); + let result = projection.project_event(&event).await; + + assert!(result.is_none()); + } + + #[test] + fn default_projection_from_event() { + let event = create_test_event("tenant-a", 5, "deposited"); + let proj = StateProjection::default_projection_from_event(&event); + + assert_eq!(proj.tenant_id, "tenant-a"); + assert_eq!(proj.version, 5); + assert_eq!(proj.state["amount"], 100); + } + + #[test] + fn default_projection_from_state() { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + let version = Version::from(10); + let state = json!({"balance": 1000}); + + let proj = StateProjection::default_projection_from_state( + &tenant_id, + &aggregate_id, + &aggregate_type, + &version, + &state, + ); + + assert_eq!(proj.tenant_id, "tenant-a"); + assert_eq!(proj.aggregate_type, "Account"); + assert_eq!(proj.version, 10); + assert_eq!(proj.state["balance"], 1000); + } + + #[test] + fn projection_config_defaults() { + let config = ProjectionConfig::default(); + assert_eq!(config.batch_size, 100); + assert_eq!(config.projection_timeout_ms, 5000); + } +} diff --git a/aggregate/src/runtime/executor.rs b/aggregate/src/runtime/executor.rs new file mode 100644 index 0000000..2c7d07f --- /dev/null +++ b/aggregate/src/runtime/executor.rs @@ -0,0 +1,270 @@ +use serde_json::Value as JsonValue; +use std::time::Duration; + +pub async fn execute_decide_program( + state: &JsonValue, + command: &JsonValue, + program: &str, + gas_limit: u64, + timeout: Duration, +) -> Result, crate::types::AggregateError> { + let _ = (state, command, program, gas_limit, timeout); + + #[cfg(feature = "runtime-v8")] + { + return execute_decide_v8(state, command, program, gas_limit, timeout).await; + } + + #[cfg(feature = "runtime-wasm")] + { + return execute_decide_wasm(state, command, program, gas_limit, timeout).await; + } + + #[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))] + { + Err(crate::types::AggregateError::DecideError( + "No runtime enabled. Enable 'runtime-v8' or 'runtime-wasm' feature.".to_string(), + )) + } +} + +pub async fn execute_apply_program( + state: &JsonValue, + event: &JsonValue, + program: &str, + gas_limit: u64, + timeout: Duration, +) -> Result { + let _ = (state, event, program, gas_limit, timeout); + + #[cfg(feature = "runtime-v8")] + { + return execute_apply_v8(state, event, program, gas_limit, timeout).await; + } + + #[cfg(feature = "runtime-wasm")] + { + return execute_apply_wasm(state, event, program, gas_limit, timeout).await; + } + + #[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))] + { + Err(crate::types::AggregateError::ApplyError( + "No runtime enabled. Enable 'runtime-v8' or 'runtime-wasm' feature.".to_string(), + )) + } +} + +#[cfg(feature = "runtime-v8")] +async fn execute_decide_v8( + state: &JsonValue, + command: &JsonValue, + program: &str, + gas_limit: u64, + timeout: Duration, +) -> Result, crate::types::AggregateError> { + use std::sync::Arc; + use v8::{Array, Context, Function, HandleScope, Isolate, Object, Scope, Script}; + + let state_str = serde_json::to_string(state).map_err(|e| { + crate::types::AggregateError::DecideError(format!("State serialization: {}", e)) + })?; + let command_str = serde_json::to_string(command).map_err(|e| { + crate::types::AggregateError::DecideError(format!("Command serialization: {}", e)) + })?; + + let result = tokio::task::spawn_blocking(move || { + let isolate = &mut Isolate::new(v8::CreateParams::default()); + + let scope = &mut HandleScope::new(isolate); + let context = Context::new(scope); + let scope = &mut ContextScope::new(scope, context); + + let source = + v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?; + + let script = + Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?; + + script.run(scope).ok_or_else(|| "Failed to run program")?; + + let global = context.global(scope); + let decide_name = + v8::String::new(scope, "decide").ok_or_else(|| "Failed to create decide string")?; + + let decide_fn = global + .get(scope, decide_name.into()) + .and_then(|v| v8::Local::::try_from(v).ok()) + .ok_or_else(|| "decide function not found")?; + + let state_json = v8::String::new(scope, &state_str) + .ok_or_else(|| "Failed to create state JSON string")?; + let state_obj = + v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?; + + let command_json = v8::String::new(scope, &command_str) + .ok_or_else(|| "Failed to create command JSON string")?; + let command_obj = + v8::json::parse(scope, command_json).ok_or_else(|| "Failed to parse command JSON")?; + + let args: [v8::Local; 2] = [state_obj.into(), command_obj.into()]; + let result = decide_fn + .call(scope, global.into(), &args) + .ok_or_else(|| "decide function call failed")?; + + let result_json = + v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?; + let result_str = result_json.to_rust_string_lossy(scope); + + let events: Vec = serde_json::from_str(&result_str) + .map_err(|e| format!("Failed to parse result: {}", e))?; + + Ok::<_, String>(events) + }); + + let timeout_result = tokio::time::timeout(timeout, result).await; + + match timeout_result { + Ok(Ok(Ok(events))) => Ok(events), + Ok(Ok(Err(e))) => Err(crate::types::AggregateError::DecideError(e)), + Ok(Err(_)) => Err(crate::types::AggregateError::DecideError( + "Task join error".to_string(), + )), + Err(_) => Err(crate::types::AggregateError::DecideError( + "Execution timeout".to_string(), + )), + } +} + +#[cfg(feature = "runtime-v8")] +async fn execute_apply_v8( + state: &JsonValue, + event: &JsonValue, + program: &str, + gas_limit: u64, + timeout: Duration, +) -> Result { + use v8::{Context, ContextScope, Function, HandleScope, Isolate, Script}; + + let state_str = serde_json::to_string(state).map_err(|e| { + crate::types::AggregateError::ApplyError(format!("State serialization: {}", e)) + })?; + let event_str = serde_json::to_string(event).map_err(|e| { + crate::types::AggregateError::ApplyError(format!("Event serialization: {}", e)) + })?; + + let _ = gas_limit; + + let result = tokio::task::spawn_blocking(move || { + let isolate = &mut Isolate::new(v8::CreateParams::default()); + + let scope = &mut HandleScope::new(isolate); + let context = Context::new(scope); + let scope = &mut ContextScope::new(scope, context); + + let source = + v8::String::new(scope, program).ok_or_else(|| "Failed to create program string")?; + + let script = + Script::compile(scope, source, None).ok_or_else(|| "Failed to compile program")?; + + script.run(scope).ok_or_else(|| "Failed to run program")?; + + let global = context.global(scope); + let apply_name = + v8::String::new(scope, "apply").ok_or_else(|| "Failed to create apply string")?; + + let apply_fn = global + .get(scope, apply_name.into()) + .and_then(|v| v8::Local::::try_from(v).ok()) + .ok_or_else(|| "apply function not found")?; + + let state_json = v8::String::new(scope, &state_str) + .ok_or_else(|| "Failed to create state JSON string")?; + let state_obj = + v8::json::parse(scope, state_json).ok_or_else(|| "Failed to parse state JSON")?; + + let event_json = v8::String::new(scope, &event_str) + .ok_or_else(|| "Failed to create event JSON string")?; + let event_obj = + v8::json::parse(scope, event_json).ok_or_else(|| "Failed to parse event JSON")?; + + let args: [v8::Local; 2] = [state_obj.into(), event_obj.into()]; + let result = apply_fn + .call(scope, global.into(), &args) + .ok_or_else(|| "apply function call failed")?; + + let result_json = + v8::json::stringify(scope, result).ok_or_else(|| "Failed to stringify result")?; + let result_str = result_json.to_rust_string_lossy(scope); + + let new_state: JsonValue = serde_json::from_str(&result_str) + .map_err(|e| format!("Failed to parse result: {}", e))?; + + Ok::<_, String>(new_state) + }); + + let timeout_result = tokio::time::timeout(timeout, result).await; + + match timeout_result { + Ok(Ok(Ok(new_state))) => Ok(new_state), + Ok(Ok(Err(e))) => Err(crate::types::AggregateError::ApplyError(e)), + Ok(Err(_)) => Err(crate::types::AggregateError::ApplyError( + "Task join error".to_string(), + )), + Err(_) => Err(crate::types::AggregateError::ApplyError( + "Execution timeout".to_string(), + )), + } +} + +#[cfg(feature = "runtime-wasm")] +async fn execute_decide_wasm( + state: &JsonValue, + command: &JsonValue, + _program: &str, + _gas_limit: u64, + _timeout: Duration, +) -> Result, crate::types::AggregateError> { + let _ = (state, command); + Err(crate::types::AggregateError::DecideError( + "WASM runtime not yet implemented".to_string(), + )) +} + +#[cfg(feature = "runtime-wasm")] +async fn execute_apply_wasm( + state: &JsonValue, + event: &JsonValue, + _program: &str, + _gas_limit: u64, + _timeout: Duration, +) -> Result { + let _ = (state, event); + Err(crate::types::AggregateError::ApplyError( + "WASM runtime not yet implemented".to_string(), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[tokio::test] + async fn no_runtime_returns_error() { + #[cfg(not(any(feature = "runtime-v8", feature = "runtime-wasm")))] + { + let state = json!({}); + let command = json!({}); + let result = + execute_decide_program(&state, &command, "program", 1000, Duration::from_secs(1)) + .await; + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + crate::types::AggregateError::DecideError(_) + )); + } + } +} diff --git a/aggregate/src/runtime/mod.rs b/aggregate/src/runtime/mod.rs new file mode 100644 index 0000000..74f0c4b --- /dev/null +++ b/aggregate/src/runtime/mod.rs @@ -0,0 +1,484 @@ +mod executor; + +use lru::LruCache; +use std::num::NonZeroUsize; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::RwLock; + +use crate::types::{AggregateError, Command, Event}; +use serde_json::Value as JsonValue; + +pub use executor::{execute_apply_program, execute_decide_program}; + +const DEFAULT_GAS_LIMIT: u64 = 1_000_000; +const DEFAULT_TIMEOUT_MS: u64 = 5_000; +const CACHE_SIZE: usize = 100; + +#[derive(Debug, Clone)] +pub struct ExecutorConfig { + pub gas_limit: u64, + pub timeout: Duration, + pub cache_programs: bool, + pub mock_runtime: bool, +} + +impl Default for ExecutorConfig { + fn default() -> Self { + Self { + gas_limit: DEFAULT_GAS_LIMIT, + timeout: Duration::from_millis(DEFAULT_TIMEOUT_MS), + cache_programs: true, + mock_runtime: false, + } + } +} + +impl ExecutorConfig { + pub fn with_gas_limit(mut self, limit: u64) -> Self { + self.gas_limit = limit; + self + } + + pub fn with_timeout(mut self, timeout: Duration) -> Self { + self.timeout = timeout; + self + } + + pub fn without_cache(mut self) -> Self { + self.cache_programs = false; + self + } + + pub fn with_mock_runtime(mut self) -> Self { + self.mock_runtime = true; + self + } +} + +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct ProgramHash(String); + +impl ProgramHash { + pub fn new(program: &str) -> Self { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + program.hash(&mut hasher); + Self(format!("{:x}", hasher.finish())) + } +} + +#[derive(Debug, Clone)] +pub struct DecideResult { + pub events: Vec, +} + +#[derive(Debug, Clone)] +pub struct ApplyResult { + pub new_state: JsonValue, +} + +#[derive(Debug, Clone)] +pub struct RuntimeExecutor { + config: ExecutorConfig, + program_cache: Arc>>, +} + +impl RuntimeExecutor { + pub fn new() -> Self { + Self::with_config(ExecutorConfig::default()) + } + + pub fn with_config(config: ExecutorConfig) -> Self { + let cache_size = NonZeroUsize::new(CACHE_SIZE).unwrap(); + Self { + config, + program_cache: Arc::new(RwLock::new(LruCache::new(cache_size))), + } + } + + pub async fn execute_decide( + &self, + state: &JsonValue, + command: &Command, + decide_program: &str, + ) -> Result { + if self.config.mock_runtime { + let events = mock_decide(state, command)?; + return Ok(DecideResult { events }); + } + + if self.config.cache_programs { + let hash = ProgramHash::new(decide_program); + let mut cache = self.program_cache.write().await; + cache.put(hash.clone(), decide_program.to_string()); + } + + let command_json = serde_json::to_value(command).map_err(|e| { + AggregateError::DecideError(format!("Command serialization failed: {}", e)) + })?; + + let result = executor::execute_decide_program( + state, + &command_json, + decide_program, + self.config.gas_limit, + self.config.timeout, + ) + .await?; + + Ok(DecideResult { events: result }) + } + + pub async fn execute_apply( + &self, + state: &JsonValue, + event: &Event, + apply_program: &str, + ) -> Result { + if self.config.mock_runtime { + let new_state = mock_apply(state, event)?; + return Ok(ApplyResult { new_state }); + } + + if self.config.cache_programs { + let hash = ProgramHash::new(apply_program); + let mut cache = self.program_cache.write().await; + cache.put(hash.clone(), apply_program.to_string()); + } + + let event_json = serde_json::to_value(event).map_err(|e| { + AggregateError::ApplyError(format!("Event serialization failed: {}", e)) + })?; + + let result = executor::execute_apply_program( + state, + &event_json, + apply_program, + self.config.gas_limit, + self.config.timeout, + ) + .await?; + + Ok(ApplyResult { new_state: result }) + } + + pub async fn execute_apply_raw( + &self, + state: &JsonValue, + event: &JsonValue, + apply_program: &str, + ) -> Result { + if self.config.mock_runtime { + let _ = apply_program; + return Err(AggregateError::ApplyError( + "mock_runtime does not support execute_apply_raw".to_string(), + )); + } + + if self.config.cache_programs { + let hash = ProgramHash::new(apply_program); + let mut cache = self.program_cache.write().await; + cache.put(hash.clone(), apply_program.to_string()); + } + + let result = executor::execute_apply_program( + state, + event, + apply_program, + self.config.gas_limit, + self.config.timeout, + ) + .await?; + + Ok(ApplyResult { new_state: result }) + } + + pub fn config(&self) -> &ExecutorConfig { + &self.config + } + + pub async fn cache_size(&self) -> usize { + self.program_cache.read().await.len() + } + + pub async fn clear_cache(&self) { + self.program_cache.write().await.clear(); + } +} + +impl Default for RuntimeExecutor { + fn default() -> Self { + Self::new() + } +} + +fn mock_decide(state: &JsonValue, command: &Command) -> Result, AggregateError> { + let cmd_type = command + .payload + .get("type") + .and_then(|v| v.as_str()) + .unwrap_or(""); + let amount = command + .payload + .get("amount") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + + match cmd_type { + "deposit" => Ok(vec![ + serde_json::json!({ "type": "deposited", "amount": amount }), + ]), + "withdraw" => { + let balance = state.get("balance").and_then(|v| v.as_i64()).unwrap_or(0); + if balance < amount { + Err(AggregateError::DecideError( + "Insufficient funds".to_string(), + )) + } else { + Ok(vec![ + serde_json::json!({ "type": "withdrawn", "amount": amount }), + ]) + } + } + _ => Ok(Vec::new()), + } +} + +fn mock_apply(state: &JsonValue, event: &Event) -> Result { + let mut new_state = match state { + JsonValue::Object(map) => JsonValue::Object(map.clone()), + _ => serde_json::json!({}), + }; + + let balance = new_state + .get("balance") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + + let amount = event + .payload + .get("amount") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + + let next_balance = match event.event_type.as_str() { + "deposited" => balance + amount, + "withdrawn" => balance - amount, + _ => balance, + }; + + if let JsonValue::Object(map) = &mut new_state { + map.insert("balance".to_string(), JsonValue::from(next_balance)); + } + + Ok(new_state) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{AggregateId, AggregateType, TenantId, Version}; + use serde_json::json; + use std::collections::HashMap; + + const DECIDE_PROGRAM: &str = r#" + function decide(state, command) { + if (command.type === "deposit") { + return [{ type: "deposited", amount: command.amount }]; + } + if (command.type === "withdraw") { + if (state.balance < command.amount) { + throw new Error("Insufficient funds"); + } + return [{ type: "withdrawn", amount: command.amount }]; + } + return []; + } + "#; + + const APPLY_PROGRAM: &str = r#" + function apply(state, event) { + if (event.type === "deposited") { + state.balance = (state.balance || 0) + event.amount; + } + if (event.type === "withdrawn") { + state.balance = (state.balance || 0) - event.amount; + } + return state; + } + "#; + + fn make_command(cmd_type: &str, amount: i64, tenant_id: &TenantId) -> Command { + Command { + command_id: uuid::Uuid::now_v7(), + tenant_id: tenant_id.clone(), + aggregate_id: AggregateId::new_v7(), + aggregate_type: AggregateType::from("Account"), + payload: json!({ "type": cmd_type, "amount": amount }), + metadata: HashMap::new(), + } + } + + #[tokio::test] + async fn executor_has_defaults() { + let executor = RuntimeExecutor::new(); + assert_eq!(executor.config().gas_limit, DEFAULT_GAS_LIMIT); + assert!(executor.config().cache_programs); + } + + #[tokio::test] + async fn config_builder_works() { + let config = ExecutorConfig::default() + .with_gas_limit(500_000) + .with_timeout(Duration::from_millis(1000)) + .without_cache(); + + assert_eq!(config.gas_limit, 500_000); + assert_eq!(config.timeout, Duration::from_millis(1000)); + assert!(!config.cache_programs); + } + + #[tokio::test] + async fn program_hash_is_consistent() { + let h1 = ProgramHash::new("test program"); + let h2 = ProgramHash::new("test program"); + assert_eq!(h1, h2); + + let h3 = ProgramHash::new("different program"); + assert_ne!(h1, h3); + } + + #[tokio::test] + async fn decide_returns_events_for_deposit() { + let executor = RuntimeExecutor::new(); + let tenant_id = TenantId::new("test-tenant"); + let state = json!({ "balance": 100 }); + let command = make_command("deposit", 50, &tenant_id); + + let result = executor + .execute_decide(&state, &command, DECIDE_PROGRAM) + .await; + + match result { + Ok(decide_result) => { + assert!(!decide_result.events.is_empty()); + } + Err(AggregateError::DecideError(msg)) => { + assert!( + msg.contains("runtime") + || msg.contains("not available") + || msg.contains("not implemented") + ); + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } + + #[tokio::test] + async fn decide_rejects_invalid_withdraw() { + let executor = RuntimeExecutor::new(); + let tenant_id = TenantId::new("test-tenant"); + let state = json!({ "balance": 10 }); + let command = make_command("withdraw", 100, &tenant_id); + + let result = executor + .execute_decide(&state, &command, DECIDE_PROGRAM) + .await; + + assert!(matches!(result, Err(AggregateError::DecideError(_)))); + } + + #[tokio::test] + async fn decide_is_deterministic() { + let executor = RuntimeExecutor::new(); + let tenant_id = TenantId::new("test-tenant"); + let state = json!({ "balance": 100 }); + let command = make_command("deposit", 50, &tenant_id); + + let r1 = executor + .execute_decide(&state, &command, DECIDE_PROGRAM) + .await; + let r2 = executor + .execute_decide(&state, &command, DECIDE_PROGRAM) + .await; + + assert_eq!(r1.is_ok(), r2.is_ok()); + } + + #[tokio::test] + async fn apply_transitions_state() { + let executor = RuntimeExecutor::new(); + let tenant_id = TenantId::new("test-tenant"); + let state = json!({ "balance": 100 }); + let event = Event { + event_id: uuid::Uuid::now_v7(), + tenant_id, + aggregate_id: AggregateId::new_v7(), + aggregate_type: AggregateType::from("Account"), + event_type: "deposited".to_string(), + version: Version::from(1), + payload: json!({ "amount": 50 }), + command_id: uuid::Uuid::now_v7(), + timestamp: chrono::Utc::now(), + correlation_id: None, + traceparent: None, + }; + + let result = executor.execute_apply(&state, &event, APPLY_PROGRAM).await; + + match result { + Ok(apply_result) => { + assert!(apply_result.new_state.is_object()); + } + Err(AggregateError::ApplyError(msg)) => { + assert!( + msg.contains("runtime") + || msg.contains("not available") + || msg.contains("not implemented") + ); + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } + + #[tokio::test] + async fn cache_stores_programs() { + let executor = RuntimeExecutor::new(); + let tenant_id = TenantId::new("test-tenant"); + let state = json!({ "balance": 100 }); + let command = make_command("deposit", 50, &tenant_id); + + assert_eq!(executor.cache_size().await, 0); + + let _ = executor + .execute_decide(&state, &command, DECIDE_PROGRAM) + .await; + + assert_eq!(executor.cache_size().await, 1); + } + + #[tokio::test] + async fn clear_cache_works() { + let executor = RuntimeExecutor::new(); + let tenant_id = TenantId::new("test-tenant"); + let state = json!({ "balance": 100 }); + let command = make_command("deposit", 50, &tenant_id); + + let _ = executor + .execute_decide(&state, &command, DECIDE_PROGRAM) + .await; + assert!(executor.cache_size().await > 0); + + executor.clear_cache().await; + assert_eq!(executor.cache_size().await, 0); + } + + #[test] + fn executor_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/server/health.rs b/aggregate/src/server/health.rs new file mode 100644 index 0000000..58d6682 --- /dev/null +++ b/aggregate/src/server/health.rs @@ -0,0 +1,259 @@ +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::RwLock; +use std::time::Instant; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum HealthStatus { + Healthy, + Degraded { issues: Vec }, + Unhealthy { reasons: Vec }, +} + +impl HealthStatus { + pub fn is_healthy(&self) -> bool { + matches!(self, Self::Healthy) + } + + pub fn is_degraded(&self) -> bool { + matches!(self, Self::Degraded { .. }) + } + + pub fn is_unhealthy(&self) -> bool { + matches!(self, Self::Unhealthy { .. }) + } +} + +#[derive(Debug, Clone)] +pub struct ComponentHealth { + pub name: String, + pub status: HealthStatus, + pub last_check: Instant, + pub details: HashMap, +} + +impl ComponentHealth { + pub fn healthy(name: impl Into) -> Self { + Self { + name: name.into(), + status: HealthStatus::Healthy, + last_check: Instant::now(), + details: HashMap::new(), + } + } + + pub fn degraded(name: impl Into, issues: Vec) -> Self { + Self { + name: name.into(), + status: HealthStatus::Degraded { issues }, + last_check: Instant::now(), + details: HashMap::new(), + } + } + + pub fn unhealthy(name: impl Into, reasons: Vec) -> Self { + Self { + name: name.into(), + status: HealthStatus::Unhealthy { reasons }, + last_check: Instant::now(), + details: HashMap::new(), + } + } + + pub fn with_detail(mut self, key: impl Into, value: impl Into) -> Self { + self.details.insert(key.into(), value.into()); + self + } +} + +pub struct HealthChecker { + storage_healthy: AtomicBool, + stream_healthy: AtomicBool, + components: RwLock>, +} + +impl HealthChecker { + pub fn new() -> Self { + Self { + storage_healthy: AtomicBool::new(true), + stream_healthy: AtomicBool::new(true), + components: RwLock::new(HashMap::new()), + } + } + + pub fn storage_healthy(&self) -> bool { + self.storage_healthy.load(Ordering::Relaxed) + } + + pub fn stream_healthy(&self) -> bool { + self.stream_healthy.load(Ordering::Relaxed) + } + + pub fn set_storage_healthy(&self, healthy: bool) { + self.storage_healthy.store(healthy, Ordering::Relaxed); + self.update_component( + "storage", + healthy, + if healthy { "connected" } else { "disconnected" }, + ); + } + + pub fn set_stream_healthy(&self, healthy: bool) { + self.stream_healthy.store(healthy, Ordering::Relaxed); + self.update_component( + "stream", + healthy, + if healthy { "connected" } else { "disconnected" }, + ); + } + + fn update_component(&self, name: &str, healthy: bool, status: &str) { + let mut components = self.components.write().unwrap(); + let health = if healthy { + ComponentHealth::healthy(name).with_detail("status", status) + } else { + ComponentHealth::unhealthy(name, vec![format!("status: {}", status)]) + }; + components.insert(name.to_string(), health); + } + + pub fn check(&self) -> HealthStatus { + let storage = self.storage_healthy.load(Ordering::Relaxed); + let stream = self.stream_healthy.load(Ordering::Relaxed); + + match (storage, stream) { + (true, true) => HealthStatus::Healthy, + (true, false) | (false, true) => { + let mut issues = Vec::new(); + if !storage { + issues.push("storage disconnected".to_string()); + } + if !stream { + issues.push("stream disconnected".to_string()); + } + HealthStatus::Degraded { issues } + } + (false, false) => HealthStatus::Unhealthy { + reasons: vec![ + "storage disconnected".to_string(), + "stream disconnected".to_string(), + ], + }, + } + } + + pub fn is_ready(&self) -> bool { + let status = self.check(); + status.is_healthy() || status.is_degraded() + } + + pub fn is_live(&self) -> bool { + true + } + + pub fn components(&self) -> HashMap { + self.components.read().unwrap().clone() + } +} + +impl Default for HealthChecker { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn health_status_checks() { + let healthy = HealthStatus::Healthy; + assert!(healthy.is_healthy()); + assert!(!healthy.is_degraded()); + assert!(!healthy.is_unhealthy()); + + let degraded = HealthStatus::Degraded { + issues: vec!["test".to_string()], + }; + assert!(!degraded.is_healthy()); + assert!(degraded.is_degraded()); + assert!(!degraded.is_unhealthy()); + + let unhealthy = HealthStatus::Unhealthy { + reasons: vec!["test".to_string()], + }; + assert!(!unhealthy.is_healthy()); + assert!(!unhealthy.is_degraded()); + assert!(unhealthy.is_unhealthy()); + } + + #[test] + fn component_health_builders() { + let healthy = ComponentHealth::healthy("storage"); + assert_eq!(healthy.name, "storage"); + assert!(healthy.status.is_healthy()); + + let degraded = ComponentHealth::degraded("stream", vec!["slow".to_string()]); + assert!(degraded.status.is_degraded()); + + let unhealthy = ComponentHealth::unhealthy("db", vec!["down".to_string()]); + assert!(unhealthy.status.is_unhealthy()); + } + + #[test] + fn health_checker_starts_healthy() { + let checker = HealthChecker::new(); + assert!(checker.check().is_healthy()); + } + + #[test] + fn health_checker_storage_failure() { + let checker = HealthChecker::new(); + checker.set_storage_healthy(false); + + let status = checker.check(); + assert!(status.is_degraded()); + } + + #[test] + fn health_checker_all_failures() { + let checker = HealthChecker::new(); + checker.set_storage_healthy(false); + checker.set_stream_healthy(false); + + let status = checker.check(); + assert!(status.is_unhealthy()); + } + + #[test] + fn health_checker_is_ready() { + let checker = HealthChecker::new(); + assert!(checker.is_ready()); + + checker.set_storage_healthy(false); + assert!(checker.is_ready()); + } + + #[test] + fn health_checker_is_live() { + let checker = HealthChecker::new(); + assert!(checker.is_live()); + + checker.set_storage_healthy(false); + checker.set_stream_healthy(false); + assert!(checker.is_live()); + } + + #[test] + fn health_checker_tracks_components() { + let checker = HealthChecker::new(); + checker.set_storage_healthy(true); + checker.set_stream_healthy(true); + + let components = checker.components(); + assert!(components.contains_key("storage")); + assert!(components.contains_key("stream")); + } +} diff --git a/aggregate/src/server/mod.rs b/aggregate/src/server/mod.rs new file mode 100644 index 0000000..3f5039c --- /dev/null +++ b/aggregate/src/server/mod.rs @@ -0,0 +1,787 @@ +mod health; + +pub use health::{HealthChecker, HealthStatus}; + +use crate::aggregate::AggregateHandler; +use crate::observability::Observability; +use crate::placement::{TenantPlacementManager, TenantStatus}; +use crate::types::{AggregateError, AggregateId, AggregateType, Command, Event, TenantId}; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use std::collections::hash_map::DefaultHasher; +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::hash::{Hash, Hasher}; +use std::str::FromStr; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub struct CommandRequest { + pub tenant_id: TenantId, + pub aggregate_id: AggregateId, + pub aggregate_type: AggregateType, + pub payload: serde_json::Value, + pub headers: HashMap, +} + +impl CommandRequest { + pub fn new( + tenant_id: TenantId, + aggregate_id: AggregateId, + aggregate_type: AggregateType, + payload: serde_json::Value, + ) -> Self { + Self { + tenant_id, + aggregate_id, + aggregate_type, + payload, + headers: HashMap::new(), + } + } + + pub fn with_header(mut self, key: impl Into, value: impl Into) -> Self { + self.headers.insert(key.into(), value.into()); + self + } + + pub fn into_command(self) -> Command { + let mut cmd = Command::new( + self.tenant_id, + self.aggregate_id, + self.aggregate_type, + self.payload, + ); + if let Some(correlation_id) = self + .headers + .get("x-correlation-id") + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + { + cmd.metadata.insert( + "correlation_id".to_string(), + serde_json::Value::String(correlation_id.to_string()), + ); + } + if let Some(traceparent) = self + .headers + .get("traceparent") + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + { + cmd.metadata.insert( + "traceparent".to_string(), + serde_json::Value::String(traceparent.to_string()), + ); + } + cmd + } +} + +#[derive(Debug, Clone)] +pub struct CommandResponse { + pub tenant_id: TenantId, + pub aggregate_id: AggregateId, + pub events: Vec, +} + +#[derive(Debug, Clone)] +pub struct ServerConfig { + pub service_name: String, + pub validate_tenant_id: bool, +} + +impl Default for ServerConfig { + fn default() -> Self { + Self { + service_name: "aggregate".to_string(), + validate_tenant_id: true, + } + } +} + +pub struct CommandServer { + handler: AggregateHandler, + observability: Arc, + health_checker: HealthChecker, + config: ServerConfig, +} + +impl CommandServer { + pub fn new(handler: AggregateHandler, observability: Observability) -> Self { + Self { + handler, + observability: Arc::new(observability), + health_checker: HealthChecker::new(), + config: ServerConfig::default(), + } + } + + pub fn with_config(mut self, config: ServerConfig) -> Self { + self.config = config; + self + } + + pub fn extract_tenant_id(&self, headers: &HashMap) -> TenantId { + headers + .get("x-tenant-id") + .map(TenantId::new) + .unwrap_or_default() + } + + pub fn validate_tenant_id(&self, tenant_id: &TenantId) -> Result<(), ServerError> { + if !self.config.validate_tenant_id { + return Ok(()); + } + + let id = tenant_id.as_str(); + if id.is_empty() { + return Ok(()); + } + + if !id + .chars() + .all(|c| c.is_alphanumeric() || c == '-' || c == '_') + { + return Err(ServerError::InvalidTenantId { + tenant_id: tenant_id.clone(), + reason: + "tenant_id must contain only alphanumeric characters, hyphens, and underscores" + .to_string(), + }); + } + + Ok(()) + } + + pub async fn handle(&self, request: CommandRequest) -> Result { + let tenant_id = request.tenant_id.clone(); + let aggregate_id = request.aggregate_id.clone(); + let aggregate_type = request.aggregate_type.clone(); + + self.validate_tenant_id(&tenant_id)?; + + let correlation_id = request + .headers + .get("x-correlation-id") + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + let trace_id = request + .headers + .get("traceparent") + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .and_then(trace_id_from_traceparent); + + let span = self.observability.start_command_span( + &aggregate_id.to_string(), + aggregate_type.as_str(), + tenant_id.as_str(), + "cmd", + correlation_id.as_deref(), + trace_id.as_deref(), + ); + + let command = request.into_command(); + + match self.handler.handle_command(command).await { + Ok(events) => { + self.observability + .record_command_success(&span, events.len()); + Ok(CommandResponse { + tenant_id, + aggregate_id, + events, + }) + } + Err(e) => { + self.observability.record_command_error(&span, &e); + Err(e.into()) + } + } + } + + pub async fn handle_raw( + &self, + tenant_id: Option<&str>, + aggregate_id: &str, + aggregate_type: &str, + payload: serde_json::Value, + headers: HashMap, + ) -> Result { + let resolved_tenant_id = tenant_id + .map(TenantId::new) + .unwrap_or_else(|| self.extract_tenant_id(&headers)); + + let request = CommandRequest::new( + resolved_tenant_id, + AggregateId::from_str(aggregate_id).map_err(|e| ServerError::InvalidAggregateId { + id: aggregate_id.to_string(), + reason: e.to_string(), + })?, + AggregateType::from(aggregate_type), + payload, + ) + .with_headers(headers); + + self.handle(request).await + } + + pub async fn health_check(&self) -> HealthStatus { + self.health_checker.check() + } + + pub async fn ready_check(&self) -> bool { + self.health_checker.is_ready() + } + + pub fn metrics(&self) -> String { + self.observability.export_metrics() + } + + pub fn health_checker(&self) -> &HealthChecker { + &self.health_checker + } + + pub fn observability(&self) -> &Arc { + &self.observability + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HealthReport { + pub status: HealthStatus, + pub nats_connected: bool, + pub storage_connected: bool, + pub active_aggregates: usize, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TenantInfo { + pub tenant_id: TenantId, + pub aggregate_count: usize, + pub last_activity: chrono::DateTime, +} + +#[derive(Debug, Clone)] +pub struct AdminResponse { + status: u16, + body: String, +} + +impl AdminResponse { + pub fn status(&self) -> AdminStatus { + AdminStatus { code: self.status } + } + + pub async fn text(&self) -> String { + self.body.clone() + } + + pub async fn json(&self) -> T { + serde_json::from_str(&self.body).unwrap() + } +} + +#[derive(Debug, Clone)] +pub struct AdminStatus { + code: u16, +} + +impl AdminStatus { + pub fn is_success(&self) -> bool { + (200..300).contains(&self.code) + } +} + +#[derive(Debug, Clone)] +pub struct HashRing { + replicas: usize, + ring: BTreeMap, +} + +impl HashRing { + pub fn new(replicas: usize) -> Self { + Self { + replicas: replicas.max(1), + ring: BTreeMap::new(), + } + } + + pub fn add_node(&mut self, node: impl Into) { + let node = node.into(); + for i in 0..self.replicas { + let key = Self::hash(&(node.as_str(), i)); + self.ring.insert(key, node.clone()); + } + } + + pub fn remove_node(&mut self, node: &str) { + let keys: Vec = self + .ring + .iter() + .filter_map(|(k, v)| if v == node { Some(*k) } else { None }) + .collect(); + for k in keys { + self.ring.remove(&k); + } + } + + pub fn route(&self, tenant_id: &str) -> Option<&str> { + if self.ring.is_empty() { + return None; + } + + let h = Self::hash(&tenant_id); + let (_, node) = self + .ring + .range(h..) + .next() + .or_else(|| self.ring.iter().next())?; + Some(node.as_str()) + } + + fn hash(value: &T) -> u64 { + let mut hasher = DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() + } +} + +pub struct AdminServer { + observability: Arc, + health_checker: Arc, + shard_id: String, + placement: Arc, +} + +impl AdminServer { + pub fn new( + observability: Observability, + health_checker: HealthChecker, + shard_id: String, + ) -> Self { + let observability = Arc::new(observability); + let placement = Arc::new(TenantPlacementManager::new(observability.clone())); + Self { + observability, + health_checker: Arc::new(health_checker), + shard_id, + placement, + } + } + + #[cfg(test)] + pub async fn new_test() -> Self { + let health = HealthChecker::new(); + health.set_storage_healthy(true); + health.set_stream_healthy(true); + + let server = Self::new(Observability::default(), health, "test-shard".to_string()); + let span = server.observability.start_command_span( + "agg-1", + "Account", + "test-tenant", + "cmd-1", + None, + None, + ); + server.observability.record_command_success(&span, 1); + server + .placement + .set_hosted_tenants(vec!["test-tenant".to_string()]) + .await; + server + } + + pub fn placement_manager(&self) -> Arc { + self.placement.clone() + } + + pub fn observability(&self) -> Arc { + self.observability.clone() + } + + pub fn health_checker(&self) -> &HealthChecker { + &self.health_checker + } + + pub async fn get(&self, path: &str) -> AdminResponse { + match path { + "/health" => { + let report = self.health_report().await; + AdminResponse { + status: 200, + body: serde_json::to_string(&report).unwrap(), + } + } + "/ready" => AdminResponse { + status: 200, + body: serde_json::to_string(&self.health_checker.is_ready()).unwrap(), + }, + "/metrics" => AdminResponse { + status: 200, + body: self.observability.export_metrics(), + }, + "/admin/tenants" => { + let list: Vec = self.placement.all_statuses().await; + AdminResponse { + status: 200, + body: serde_json::to_string(&list).unwrap(), + } + } + _ => AdminResponse { + status: 404, + body: "not found".to_string(), + }, + } + } + + pub async fn post(&self, path: &str, body: serde_json::Value) -> AdminResponse { + match path { + "/admin/drain" => { + if let Some(tenant_id) = body.get("tenant_id").and_then(|v| v.as_str()) { + let tenant_id = TenantId::new(tenant_id); + self.placement.drain_tenant(&tenant_id).await; + self.placement.wait_drained(&tenant_id).await; + } + AdminResponse { + status: 200, + body: "{}".to_string(), + } + } + "/admin/reload" => { + if let Some(arr) = body.get("hosted_tenants").and_then(|v| v.as_array()) { + let tenants = arr + .iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect::>(); + self.placement.set_hosted_tenants(tenants).await; + } + + if let Some(map) = body.get("placement").and_then(|v| v.as_object()) { + let placement = map + .iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect::>(); + self.placement + .apply_placement_map(&self.shard_id, &placement) + .await; + } + + AdminResponse { + status: 200, + body: "{}".to_string(), + } + } + _ => AdminResponse { + status: 404, + body: "not found".to_string(), + }, + } + } + + pub async fn get_hosted_tenants(&self) -> Vec { + self.placement.hosted_tenants().await + } + + async fn health_report(&self) -> HealthReport { + let active_aggregates = self.placement.hosted_tenants().await.len(); + HealthReport { + status: self.health_checker.check(), + nats_connected: self.health_checker.stream_healthy(), + storage_connected: self.health_checker.storage_healthy(), + active_aggregates, + } + } +} + +#[derive(Debug, Clone)] +pub enum ServerError { + InvalidTenantId { tenant_id: TenantId, reason: String }, + InvalidAggregateId { id: String, reason: String }, + AggregateError(AggregateError), +} + +impl std::fmt::Display for ServerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::InvalidTenantId { tenant_id, reason } => { + write!(f, "invalid tenant_id '{}': {}", tenant_id.as_str(), reason) + } + Self::InvalidAggregateId { id, reason } => { + write!(f, "invalid aggregate_id '{}': {}", id, reason) + } + Self::AggregateError(e) => write!(f, "{}", e), + } + } +} + +impl std::error::Error for ServerError {} + +impl From for ServerError { + fn from(e: AggregateError) -> Self { + Self::AggregateError(e) + } +} + +impl CommandRequest { + pub fn with_headers(mut self, headers: HashMap) -> Self { + self.headers = headers; + self + } +} + +fn trace_id_from_traceparent(traceparent: &str) -> Option { + shared::trace_id_from_traceparent(traceparent).map(|s| s.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn command_request_builder() { + let req = CommandRequest::new( + TenantId::new("tenant-a"), + AggregateId::new_v7(), + AggregateType::from("Account"), + serde_json::json!({"type": "deposit", "amount": 100}), + ) + .with_header("x-request-id", "req-123"); + + assert_eq!(req.tenant_id.as_str(), "tenant-a"); + assert_eq!( + req.headers.get("x-request-id"), + Some(&"req-123".to_string()) + ); + } + + #[test] + fn extract_tenant_id_from_header() { + let _config = ServerConfig::default(); + let mut headers = HashMap::new(); + headers.insert("x-tenant-id".to_string(), "acme-corp".to_string()); + + let tenant_id = extract_tenant_id_static(&headers); + assert_eq!(tenant_id.as_str(), "acme-corp"); + } + + #[test] + fn extract_tenant_id_defaults_empty() { + let headers = HashMap::new(); + + let tenant_id = extract_tenant_id_static(&headers); + assert!(tenant_id.as_str().is_empty()); + } + + #[test] + fn validate_tenant_id_accepts_valid() { + assert!(validate_tenant_id_static(&TenantId::new("acme-corp")).is_ok()); + assert!(validate_tenant_id_static(&TenantId::new("tenant_123")).is_ok()); + assert!(validate_tenant_id_static(&TenantId::new("my-tenant")).is_ok()); + } + + #[test] + fn validate_tenant_id_rejects_invalid() { + assert!(validate_tenant_id_static(&TenantId::new("tenant@corp")).is_err()); + assert!(validate_tenant_id_static(&TenantId::new("tenant name")).is_err()); + } + + #[test] + fn server_config_defaults() { + let config = ServerConfig::default(); + assert_eq!(config.service_name, "aggregate"); + assert!(config.validate_tenant_id); + } + + #[test] + fn server_error_display() { + let err = ServerError::InvalidTenantId { + tenant_id: TenantId::new("bad@id"), + reason: "invalid characters".to_string(), + }; + assert!(err.to_string().contains("bad@id")); + } + + fn extract_tenant_id_static(headers: &HashMap) -> TenantId { + headers + .get("x-tenant-id") + .map(TenantId::new) + .unwrap_or_default() + } + + fn validate_tenant_id_static(tenant_id: &TenantId) -> Result<(), ServerError> { + let id = tenant_id.as_str(); + if id.is_empty() { + return Ok(()); + } + + if !id + .chars() + .all(|c| c.is_alphanumeric() || c == '-' || c == '_') + { + return Err(ServerError::InvalidTenantId { + tenant_id: tenant_id.clone(), + reason: + "tenant_id must contain only alphanumeric characters, hyphens, and underscores" + .to_string(), + }); + } + + Ok(()) + } + + #[tokio::test] + async fn admin_health_endpoint_returns_status() { + let server = AdminServer::new_test().await; + let resp = server.get("/health").await; + assert!(resp.status().is_success()); + + let health: HealthReport = resp.json().await; + assert!(health.nats_connected); + assert!(health.storage_connected); + } + + #[tokio::test] + async fn admin_ready_endpoint_returns_success() { + let server = AdminServer::new_test().await; + let resp = server.get("/ready").await; + assert!(resp.status().is_success()); + } + + #[tokio::test] + async fn admin_metrics_endpoint_prometheus_format() { + let server = AdminServer::new_test().await; + let resp = server.get("/metrics").await; + let body = resp.text().await; + assert!(body.contains("aggregate_commands_total")); + assert!(body.contains("tenant_id")); + } + + #[tokio::test] + async fn admin_tenants_list_returns_hosted_tenants() { + let server = AdminServer::new_test().await; + let resp = server.get("/admin/tenants").await; + let tenants: Vec = resp.json().await; + assert!(tenants + .iter() + .any(|t| t.tenant_id == TenantId::new("test-tenant"))); + } + + #[tokio::test] + async fn admin_drain_waits_for_in_flight_commands() { + use std::time::{Duration, Instant}; + + let server = AdminServer::new_test().await; + let tenant_id = TenantId::new("test-tenant"); + let guard = server + .placement_manager() + .begin_command(&tenant_id) + .await + .unwrap(); + + tokio::spawn(async move { + tokio::time::sleep(Duration::from_millis(50)).await; + drop(guard); + }); + + let start = Instant::now(); + let resp = server + .post( + "/admin/drain", + serde_json::json!({"tenant_id": "test-tenant"}), + ) + .await; + + assert!(start.elapsed() < Duration::from_secs(5)); + assert!(resp.status().is_success()); + + server.placement_manager().wait_drained(&tenant_id).await; + let status = server.placement_manager().tenant_status(&tenant_id).await; + assert!(status.draining); + assert!(!status.accepting); + assert_eq!(status.in_flight, 0); + } + + #[tokio::test] + async fn admin_config_reload_updates_routing() { + let server = AdminServer::new_test().await; + let resp = server + .post( + "/admin/reload", + serde_json::json!({"hosted_tenants": ["new-tenant"]}), + ) + .await; + assert!(resp.status().is_success()); + + let tenants = server.get_hosted_tenants().await; + assert!(tenants.contains(&TenantId::new("new-tenant"))); + } + + #[test] + fn admin_server_is_send() { + fn assert_send() {} + assert_send::(); + } + + #[test] + fn hash_ring_routes_deterministically() { + let mut ring = HashRing::new(100); + ring.add_node("node-a"); + ring.add_node("node-b"); + ring.add_node("node-c"); + + let r1 = ring.route("tenant-a").unwrap().to_string(); + let r2 = ring.route("tenant-a").unwrap().to_string(); + assert_eq!(r1, r2); + } + + #[test] + fn hash_ring_distributes_tenants_evenly() { + let mut ring = HashRing::new(200); + ring.add_node("node-a"); + ring.add_node("node-b"); + ring.add_node("node-c"); + + let mut counts: HashMap = HashMap::new(); + for i in 0..3000 { + let tenant = format!("tenant-{}", i); + let node = ring.route(&tenant).unwrap().to_string(); + *counts.entry(node).or_insert(0) += 1; + } + + let avg = 3000.0 / 3.0; + for c in counts.values() { + let diff = (*c as f64 - avg).abs() / avg; + assert!(diff < 0.25); + } + } + + #[test] + fn hash_ring_rebalances_on_node_add() { + let mut ring = HashRing::new(200); + ring.add_node("node-a"); + ring.add_node("node-b"); + + let mut before: HashMap = HashMap::new(); + for i in 0..2000 { + let tenant = format!("tenant-{}", i); + before.insert(tenant.clone(), ring.route(&tenant).unwrap().to_string()); + } + + ring.add_node("node-c"); + + let mut moved = 0usize; + for (tenant, old) in before { + let new = ring.route(&tenant).unwrap(); + if new != old { + moved += 1; + } + } + + assert!(moved > 0); + assert!(moved < 2000); + } +} diff --git a/aggregate/src/storage/circuit_breaker.rs b/aggregate/src/storage/circuit_breaker.rs new file mode 100644 index 0000000..8da727d --- /dev/null +++ b/aggregate/src/storage/circuit_breaker.rs @@ -0,0 +1,216 @@ +use std::time::{Duration, Instant}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CircuitState { + Closed, + Open, + HalfOpen, +} + +#[derive(Debug, Clone)] +pub struct CircuitBreaker { + state: CircuitState, + failure_count: u32, + failure_threshold: u32, + reset_timeout: Duration, + last_failure_time: Option, + half_open_successes: u32, + half_open_threshold: u32, +} + +impl CircuitBreaker { + pub fn new() -> Self { + Self { + state: CircuitState::Closed, + failure_count: 0, + failure_threshold: 5, + reset_timeout: Duration::from_secs(30), + last_failure_time: None, + half_open_successes: 0, + half_open_threshold: 3, + } + } + + pub fn with_failure_threshold(mut self, threshold: u32) -> Self { + self.failure_threshold = threshold; + self + } + + pub fn with_reset_timeout(mut self, timeout: Duration) -> Self { + self.reset_timeout = timeout; + self + } + + pub fn with_half_open_threshold(mut self, threshold: u32) -> Self { + self.half_open_threshold = threshold; + self + } + + pub fn state(&self) -> CircuitState { + if self.state == CircuitState::Open { + if let Some(last_failure) = self.last_failure_time { + if last_failure.elapsed() >= self.reset_timeout { + return CircuitState::HalfOpen; + } + } + } + self.state + } + + pub fn is_open(&self) -> bool { + matches!(self.state(), CircuitState::Open) + } + + pub fn is_closed(&self) -> bool { + matches!(self.state(), CircuitState::Closed) + } + + pub fn record_success(&mut self) { + match self.state() { + CircuitState::Closed => { + self.failure_count = 0; + } + CircuitState::HalfOpen => { + self.half_open_successes += 1; + if self.half_open_successes >= self.half_open_threshold { + self.state = CircuitState::Closed; + self.failure_count = 0; + self.half_open_successes = 0; + self.last_failure_time = None; + } + } + CircuitState::Open => {} + } + } + + pub fn record_failure(&mut self) { + self.last_failure_time = Some(Instant::now()); + + match self.state() { + CircuitState::Closed => { + self.failure_count += 1; + if self.failure_count >= self.failure_threshold { + self.state = CircuitState::Open; + } + } + CircuitState::HalfOpen => { + self.state = CircuitState::Open; + self.half_open_successes = 0; + } + CircuitState::Open => {} + } + } + + pub fn reset(&mut self) { + self.state = CircuitState::Closed; + self.failure_count = 0; + self.last_failure_time = None; + self.half_open_successes = 0; + } +} + +impl Default for CircuitBreaker { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread::sleep; + + #[test] + fn circuit_breaker_starts_closed() { + let cb = CircuitBreaker::new(); + assert!(cb.is_closed()); + assert!(!cb.is_open()); + } + + #[test] + fn circuit_breaker_opens_after_threshold() { + let mut cb = CircuitBreaker::new().with_failure_threshold(3); + + cb.record_failure(); + assert!(cb.is_closed()); + + cb.record_failure(); + assert!(cb.is_closed()); + + cb.record_failure(); + assert!(cb.is_open()); + } + + #[test] + fn circuit_breaker_resets_after_timeout() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(10)); + + cb.record_failure(); + assert!(cb.is_open()); + + sleep(Duration::from_millis(15)); + assert_eq!(cb.state(), CircuitState::HalfOpen); + } + + #[test] + fn circuit_breaker_closes_after_half_open_successes() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(10)) + .with_half_open_threshold(2); + + cb.record_failure(); + assert!(cb.is_open()); + + sleep(Duration::from_millis(15)); + assert_eq!(cb.state(), CircuitState::HalfOpen); + + cb.record_success(); + assert_eq!(cb.state(), CircuitState::HalfOpen); + + cb.record_success(); + assert!(cb.is_closed()); + } + + #[test] + fn circuit_breaker_reopens_on_half_open_failure() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(10)); + + cb.record_failure(); + assert!(cb.is_open()); + + sleep(Duration::from_millis(15)); + assert_eq!(cb.state(), CircuitState::HalfOpen); + + cb.record_failure(); + assert!(cb.is_open()); + } + + #[test] + fn circuit_breaker_success_resets_failure_count() { + let mut cb = CircuitBreaker::new().with_failure_threshold(3); + + cb.record_failure(); + cb.record_failure(); + cb.record_success(); + assert!(cb.is_closed()); + + cb.record_failure(); + assert!(cb.is_closed()); + } + + #[test] + fn circuit_breaker_manual_reset() { + let mut cb = CircuitBreaker::new().with_failure_threshold(1); + + cb.record_failure(); + assert!(cb.is_open()); + + cb.reset(); + assert!(cb.is_closed()); + } +} diff --git a/aggregate/src/storage/mod.rs b/aggregate/src/storage/mod.rs new file mode 100644 index 0000000..622c990 --- /dev/null +++ b/aggregate/src/storage/mod.rs @@ -0,0 +1,422 @@ +mod circuit_breaker; + +pub use circuit_breaker::CircuitBreaker; + +use crate::types::{AggregateError, AggregateId, AggregateType, Snapshot, TenantId, Version}; +use edge_storage::{AggregateStore, Config as EdgeConfig, EdgeStorage, WriteResult, Writer}; +use std::sync::Arc; +use tokio::sync::RwLock; + +pub struct StorageClient { + storage: Arc, + aggregate_store: AggregateStore, + writer: Arc, + circuit_breaker: RwLock, +} + +impl std::fmt::Debug for StorageClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StorageClient") + .field("circuit_breaker", &self.circuit_breaker) + .finish_non_exhaustive() + } +} + +impl StorageClient { + pub fn open(storage_path: impl Into) -> Result { + let config = EdgeConfig::new(storage_path.into()); + let storage = EdgeStorage::open(config)?; + let writer = Arc::new(Writer::new(storage.db().clone(), &EdgeConfig::default())); + let aggregate_store = AggregateStore::new(storage.db().clone(), writer.clone()); + + Ok(Self { + storage: Arc::new(storage), + aggregate_store, + writer, + circuit_breaker: RwLock::new(CircuitBreaker::new()), + }) + } + + #[cfg(test)] + pub fn in_memory() -> Self { + use tempfile::tempdir; + let dir = tempdir().expect("failed to create temp dir"); + let path = dir.path().join("test.mdbx"); + std::mem::forget(dir); + Self::open(path.to_string_lossy().to_string()).expect("failed to open in-memory storage") + } + + pub fn with_circuit_breaker(mut self, cb: CircuitBreaker) -> Self { + self.circuit_breaker = RwLock::new(cb); + self + } + + pub async fn get_snapshot( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + ) -> Result, AggregateError> { + self.check_circuit().await?; + + let key = Self::build_key(tenant_id, aggregate_id); + + match self.aggregate_store.get_latest_snapshot(&key) { + Ok(Some((version, data))) => { + let snapshot = self + .decode_snapshot(tenant_id, aggregate_id, version, &data) + .map_err(|e| AggregateError::StorageError(e.to_string()))?; + self.record_success().await; + Ok(Some(snapshot)) + } + Ok(None) => { + self.record_success().await; + Ok(None) + } + Err(e) => { + self.record_failure().await; + Err(AggregateError::StorageError(e.to_string())) + } + } + } + + pub async fn get_snapshot_at_version( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + version: Version, + ) -> Result, AggregateError> { + self.check_circuit().await?; + + let key = Self::build_key(tenant_id, aggregate_id); + + match self.aggregate_store.get_snapshot(&key, version.as_u64()) { + Ok(Some(data)) => { + let snapshot = self + .decode_snapshot(tenant_id, aggregate_id, version.as_u64(), &data) + .map_err(|e| AggregateError::StorageError(e.to_string()))?; + self.record_success().await; + Ok(Some(snapshot)) + } + Ok(None) => { + self.record_success().await; + Ok(None) + } + Err(e) => { + self.record_failure().await; + Err(AggregateError::StorageError(e.to_string())) + } + } + } + + pub async fn put_snapshot(&self, snapshot: &Snapshot) -> Result<(), AggregateError> { + self.check_circuit().await?; + + let key = Self::build_key(&snapshot.tenant_id, &snapshot.aggregate_id); + let data = serde_json::to_vec(&snapshot.state) + .map_err(|e| AggregateError::StorageError(e.to_string()))?; + + let result = self + .aggregate_store + .put_snapshot_sync(&key, snapshot.version.as_u64(), &data) + .map_err(|e| { + self.record_failure_sync(); + AggregateError::StorageError(e.to_string()) + })?; + + match result { + WriteResult::Success => { + self.record_success().await; + Ok(()) + } + WriteResult::VersionConflict { + aggregate_id: _, + version, + } => { + self.record_success().await; + Err(AggregateError::VersionConflict { + expected: Version::from(version).increment(), + actual: Version::from(version), + }) + } + WriteResult::Error(e) => { + self.record_failure().await; + Err(AggregateError::StorageError(e)) + } + } + } + + pub async fn get_latest_version( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + ) -> Result, AggregateError> { + self.check_circuit().await?; + + let key = Self::build_key(tenant_id, aggregate_id); + + match self.aggregate_store.get_latest_version(&key) { + Ok(Some(v)) => { + self.record_success().await; + Ok(Some(Version::from(v))) + } + Ok(None) => { + self.record_success().await; + Ok(None) + } + Err(e) => { + self.record_failure().await; + Err(AggregateError::StorageError(e.to_string())) + } + } + } + + pub async fn delete_snapshot( + &self, + _tenant_id: &TenantId, + _aggregate_id: &AggregateId, + ) -> Result<(), AggregateError> { + self.check_circuit().await?; + + self.record_success().await; + Err(AggregateError::StorageError( + "Snapshot deletion not supported in event-sourced system".to_string(), + )) + } + + fn build_key(tenant_id: &TenantId, aggregate_id: &AggregateId) -> Vec { + format!("{}:{}", tenant_id.as_str(), aggregate_id).into_bytes() + } + + fn decode_snapshot( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + version: u64, + data: &[u8], + ) -> Result { + let state = serde_json::from_slice(data)?; + Ok(Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::new("Unknown"), + Version::from(version), + state, + )) + } + + async fn check_circuit(&self) -> Result<(), AggregateError> { + let cb = self.circuit_breaker.read().await; + if cb.is_open() { + return Err(AggregateError::StorageError( + "Circuit breaker is open".to_string(), + )); + } + Ok(()) + } + + async fn record_success(&self) { + let mut cb = self.circuit_breaker.write().await; + cb.record_success(); + } + + fn record_failure_sync(&self) { + if let Ok(mut cb) = self.circuit_breaker.try_write() { + cb.record_failure(); + } + } + + async fn record_failure(&self) { + let mut cb = self.circuit_breaker.write().await; + cb.record_failure(); + } + + pub fn storage(&self) -> &Arc { + &self.storage + } +} + +impl Clone for StorageClient { + fn clone(&self) -> Self { + Self { + storage: self.storage.clone(), + aggregate_store: self.aggregate_store.clone(), + writer: self.writer.clone(), + circuit_breaker: RwLock::new(CircuitBreaker::new()), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum StorageInitError { + #[error("Failed to open storage: {0}")] + OpenError(#[from] edge_storage::Error), +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + use tempfile::tempdir; + + fn create_test_client() -> (tempfile::TempDir, StorageClient) { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.mdbx"); + let client = StorageClient::open(path.to_string_lossy().to_string()).unwrap(); + (dir, client) + } + + #[test] + fn storage_client_open() { + let (_dir, _client) = create_test_client(); + } + + #[tokio::test] + async fn storage_client_put_get_snapshot() { + let (_dir, client) = create_test_client(); + + let tenant_id = TenantId::new("acme-corp"); + let aggregate_id = AggregateId::new_v7(); + let snapshot = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(1), + json!({"balance": 100}), + ); + + client.put_snapshot(&snapshot).await.unwrap(); + + let retrieved = client + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap(); + + assert!(retrieved.is_some()); + let retrieved = retrieved.unwrap(); + assert_eq!(retrieved.version, Version::from(1)); + assert_eq!(retrieved.state, json!({"balance": 100})); + } + + #[tokio::test] + async fn storage_client_version_conflict() { + let (_dir, client) = create_test_client(); + + let tenant_id = TenantId::new("acme-corp"); + let aggregate_id = AggregateId::new_v7(); + + let snapshot_v1 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(1), + json!({"balance": 100}), + ); + + client.put_snapshot(&snapshot_v1).await.unwrap(); + + let snapshot_v1_again = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(1), + json!({"balance": 200}), + ); + + let result = client.put_snapshot(&snapshot_v1_again).await; + assert!(matches!( + result, + Err(AggregateError::VersionConflict { .. }) + )); + } + + #[tokio::test] + async fn storage_client_latest_version() { + let (_dir, client) = create_test_client(); + + let tenant_id = TenantId::new("acme-corp"); + let aggregate_id = AggregateId::new_v7(); + + let version = client + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert!(version.is_none()); + + let snapshot_v1 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(1), + json!({"balance": 100}), + ); + client.put_snapshot(&snapshot_v1).await.unwrap(); + + let version = client + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert_eq!(version, Some(Version::from(1))); + + let snapshot_v3 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(3), + json!({"balance": 300}), + ); + client.put_snapshot(&snapshot_v3).await.unwrap(); + + let version = client + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert_eq!(version, Some(Version::from(3))); + } + + #[tokio::test] + async fn storage_client_isolation() { + let (_dir, client) = create_test_client(); + + let tenant_a = TenantId::new("tenant-a"); + let tenant_b = TenantId::new("tenant-b"); + let aggregate_id = AggregateId::new_v7(); + + let snapshot_a = Snapshot::new( + tenant_a.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(1), + json!({"owner": "A"}), + ); + client.put_snapshot(&snapshot_a).await.unwrap(); + + let snapshot_b = Snapshot::new( + tenant_b.clone(), + aggregate_id.clone(), + AggregateType::new("Account"), + Version::from(1), + json!({"owner": "B"}), + ); + client.put_snapshot(&snapshot_b).await.unwrap(); + + let retrieved_a = client + .get_snapshot(&tenant_a, &aggregate_id) + .await + .unwrap() + .unwrap(); + let retrieved_b = client + .get_snapshot(&tenant_b, &aggregate_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(retrieved_a.state["owner"], "A"); + assert_eq!(retrieved_b.state["owner"], "B"); + } + + #[test] + fn storage_client_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/stream/circuit_breaker.rs b/aggregate/src/stream/circuit_breaker.rs new file mode 100644 index 0000000..af5da41 --- /dev/null +++ b/aggregate/src/stream/circuit_breaker.rs @@ -0,0 +1,284 @@ +use std::time::{Duration, Instant}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CircuitState { + Closed, + Open, + HalfOpen, +} + +#[derive(Debug, Clone)] +pub struct CircuitBreaker { + state: CircuitState, + failure_count: u32, + failure_threshold: u32, + reset_timeout: Duration, + last_failure_time: Option, + half_open_successes: u32, + half_open_threshold: u32, + consecutive_successes: u32, +} + +impl CircuitBreaker { + pub fn new() -> Self { + Self { + state: CircuitState::Closed, + failure_count: 0, + failure_threshold: 5, + reset_timeout: Duration::from_secs(30), + last_failure_time: None, + half_open_successes: 0, + half_open_threshold: 3, + consecutive_successes: 0, + } + } + + pub fn with_failure_threshold(mut self, threshold: u32) -> Self { + self.failure_threshold = threshold; + self + } + + pub fn with_reset_timeout(mut self, timeout: Duration) -> Self { + self.reset_timeout = timeout; + self + } + + pub fn with_half_open_threshold(mut self, threshold: u32) -> Self { + self.half_open_threshold = threshold; + self + } + + pub fn state(&self) -> CircuitState { + if self.state == CircuitState::Open { + if let Some(last_failure) = self.last_failure_time { + if last_failure.elapsed() >= self.reset_timeout { + return CircuitState::HalfOpen; + } + } + } + self.state + } + + pub fn is_open(&self) -> bool { + matches!(self.state(), CircuitState::Open) + } + + pub fn is_closed(&self) -> bool { + matches!(self.state(), CircuitState::Closed) + } + + pub fn is_half_open(&self) -> bool { + matches!(self.state(), CircuitState::HalfOpen) + } + + pub fn failure_count(&self) -> u32 { + self.failure_count + } + + pub fn consecutive_successes(&self) -> u32 { + self.consecutive_successes + } + + pub fn record_success(&mut self) { + self.consecutive_successes += 1; + + match self.state() { + CircuitState::Closed => { + self.failure_count = 0; + } + CircuitState::HalfOpen => { + self.half_open_successes += 1; + if self.half_open_successes >= self.half_open_threshold { + self.state = CircuitState::Closed; + self.failure_count = 0; + self.half_open_successes = 0; + self.last_failure_time = None; + } + } + CircuitState::Open => {} + } + } + + pub fn record_failure(&mut self) { + self.consecutive_successes = 0; + self.last_failure_time = Some(Instant::now()); + + match self.state() { + CircuitState::Closed => { + self.failure_count += 1; + if self.failure_count >= self.failure_threshold { + self.state = CircuitState::Open; + } + } + CircuitState::HalfOpen => { + self.state = CircuitState::Open; + self.half_open_successes = 0; + } + CircuitState::Open => {} + } + } + + pub fn reset(&mut self) { + self.state = CircuitState::Closed; + self.failure_count = 0; + self.last_failure_time = None; + self.half_open_successes = 0; + self.consecutive_successes = 0; + } + + pub fn time_until_reset(&self) -> Option { + if self.state == CircuitState::Open { + self.last_failure_time.map(|t| { + let elapsed = t.elapsed(); + if elapsed < self.reset_timeout { + self.reset_timeout - elapsed + } else { + Duration::ZERO + } + }) + } else { + None + } + } +} + +impl Default for CircuitBreaker { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread::sleep; + + #[test] + fn circuit_breaker_starts_closed() { + let cb = CircuitBreaker::new(); + assert!(cb.is_closed()); + assert!(!cb.is_open()); + } + + #[test] + fn circuit_breaker_opens_after_threshold() { + let mut cb = CircuitBreaker::new().with_failure_threshold(3); + + cb.record_failure(); + assert!(cb.is_closed()); + + cb.record_failure(); + assert!(cb.is_closed()); + + cb.record_failure(); + assert!(cb.is_open()); + } + + #[test] + fn circuit_breaker_resets_after_timeout() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(10)); + + cb.record_failure(); + assert!(cb.is_open()); + + sleep(Duration::from_millis(15)); + assert!(cb.is_half_open()); + } + + #[test] + fn circuit_breaker_closes_after_half_open_successes() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(10)) + .with_half_open_threshold(2); + + cb.record_failure(); + assert!(cb.is_open()); + + sleep(Duration::from_millis(15)); + assert!(cb.is_half_open()); + + cb.record_success(); + assert!(cb.is_half_open()); + + cb.record_success(); + assert!(cb.is_closed()); + } + + #[test] + fn circuit_breaker_reopens_on_half_open_failure() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(10)); + + cb.record_failure(); + assert!(cb.is_open()); + + sleep(Duration::from_millis(15)); + assert!(cb.is_half_open()); + + cb.record_failure(); + assert!(cb.is_open()); + } + + #[test] + fn circuit_breaker_success_resets_failure_count() { + let mut cb = CircuitBreaker::new().with_failure_threshold(3); + + cb.record_failure(); + cb.record_failure(); + cb.record_success(); + assert!(cb.is_closed()); + assert_eq!(cb.failure_count(), 0); + + cb.record_failure(); + assert!(cb.is_closed()); + } + + #[test] + fn circuit_breaker_manual_reset() { + let mut cb = CircuitBreaker::new().with_failure_threshold(1); + + cb.record_failure(); + assert!(cb.is_open()); + + cb.reset(); + assert!(cb.is_closed()); + assert_eq!(cb.failure_count(), 0); + } + + #[test] + fn circuit_breaker_tracks_consecutive_successes() { + let mut cb = CircuitBreaker::new(); + + assert_eq!(cb.consecutive_successes(), 0); + + cb.record_success(); + assert_eq!(cb.consecutive_successes(), 1); + + cb.record_success(); + assert_eq!(cb.consecutive_successes(), 2); + + cb.record_failure(); + assert_eq!(cb.consecutive_successes(), 0); + } + + #[test] + fn circuit_breaker_time_until_reset() { + let mut cb = CircuitBreaker::new() + .with_failure_threshold(1) + .with_reset_timeout(Duration::from_millis(100)); + + assert!(cb.time_until_reset().is_none()); + + cb.record_failure(); + let remaining = cb.time_until_reset(); + assert!(remaining.is_some()); + assert!(remaining.unwrap() <= Duration::from_millis(100)); + + cb.reset(); + assert!(cb.time_until_reset().is_none()); + } +} diff --git a/aggregate/src/stream/mod.rs b/aggregate/src/stream/mod.rs new file mode 100644 index 0000000..d0e7559 --- /dev/null +++ b/aggregate/src/stream/mod.rs @@ -0,0 +1,627 @@ +mod circuit_breaker; + +pub use circuit_breaker::CircuitBreaker; + +use crate::types::{AggregateError, AggregateId, AggregateType, Event, TenantId, Version}; +use async_nats::jetstream::{ + self, consumer::pull::Config as PullConfig, consumer::AckPolicy, consumer::DeliverPolicy, + consumer::ReplayPolicy, stream::Config as StreamConfig, +}; +use futures::stream::{Stream, StreamExt}; +use serde_json; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::broadcast; +use tokio::sync::RwLock; +use tokio::time::Instant; + +const AGGREGATE_STREAM_NAME: &str = "AGGREGATE_EVENTS"; + +#[derive(Debug)] +pub struct StreamConfigSettings { + pub max_messages: i64, + pub max_bytes: i64, + pub max_age: Duration, + pub duplicate_window: Duration, +} + +impl Default for StreamConfigSettings { + fn default() -> Self { + Self { + max_messages: 10_000_000, + max_bytes: -1, + max_age: Duration::from_secs(365 * 24 * 60 * 60), + duplicate_window: Duration::from_secs(120), + } + } +} + +#[derive(Debug, Clone)] +pub struct StreamClient { + backend: StreamBackend, + circuit_breaker: Arc>, +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +enum StreamBackend { + JetStream(jetstream::Context), + InMemory(Arc), +} + +#[derive(Debug)] +struct InMemoryStream { + events_by_tenant_aggregate: RwLock>>, + updates: broadcast::Sender, +} + +impl StreamClient { + pub async fn new(nats_url: impl Into) -> Result { + let url = nats_url.into(); + let client = async_nats::connect(&url).await.map_err(|e| { + AggregateError::StreamError(format!("Failed to connect to NATS: {}", e)) + })?; + + let jetstream = jetstream::new(client.clone()); + + Ok(Self { + backend: StreamBackend::JetStream(jetstream), + circuit_breaker: Arc::new(RwLock::new(CircuitBreaker::new())), + }) + } + + #[cfg(test)] + pub fn in_memory() -> Self { + let (updates, _) = broadcast::channel(1024); + Self { + backend: StreamBackend::InMemory(Arc::new(InMemoryStream { + events_by_tenant_aggregate: RwLock::new(HashMap::new()), + updates, + })), + circuit_breaker: Arc::new(RwLock::new(CircuitBreaker::new())), + } + } + + pub async fn with_circuit_breaker(mut self, cb: CircuitBreaker) -> Self { + self.circuit_breaker = Arc::new(RwLock::new(cb)); + self + } + + pub async fn setup_stream(&self) -> Result { + self.setup_stream_with_settings(StreamConfigSettings::default()) + .await + } + + pub async fn setup_stream_with_settings( + &self, + settings: StreamConfigSettings, + ) -> Result { + let jetstream = match &self.backend { + StreamBackend::JetStream(ctx) => ctx.clone(), + StreamBackend::InMemory(_) => { + return Err(AggregateError::StreamError( + "setup_stream not supported for in-memory stream".to_string(), + )); + } + }; + + let config = StreamConfig { + name: AGGREGATE_STREAM_NAME.to_string(), + subjects: vec!["tenant.*.aggregate.*.*".to_string()], + max_messages: settings.max_messages, + max_bytes: settings.max_bytes, + max_age: settings.max_age, + duplicate_window: settings.duplicate_window, + ..Default::default() + }; + + let stream = jetstream + .get_or_create_stream(config) + .await + .map_err(|e| AggregateError::StreamError(format!("Failed to create stream: {}", e)))?; + + Ok(stream) + } + + pub async fn publish_events(&self, events: Vec) -> Result<(), AggregateError> { + if events.is_empty() { + return Ok(()); + } + + if self.circuit_breaker.read().await.is_open() { + return Err(AggregateError::StreamError( + "Circuit breaker is open".to_string(), + )); + } + + match &self.backend { + StreamBackend::JetStream(jetstream) => { + for event in &events { + let subject = + build_subject(&event.tenant_id, &event.aggregate_type, &event.aggregate_id); + let payload = serde_json::to_vec(event).map_err(|e| { + AggregateError::StreamError(format!("Serialization error: {}", e)) + })?; + + let mut headers = async_nats::HeaderMap::new(); + headers.insert("Nats-Msg-Id", event.event_id.to_string().as_str()); + headers.insert("aggregate-version", event.version.to_string().as_str()); + headers.insert("tenant-id", event.tenant_id.as_str()); + headers.insert("aggregate-type", event.aggregate_type.as_str()); + headers.insert("event-type", event.event_type.as_str()); + if let Some(correlation_id) = event.correlation_id.as_deref() { + headers.insert("x-correlation-id", correlation_id); + headers.insert("correlation-id", correlation_id); + } + if let Some(traceparent) = event.traceparent.as_deref() { + headers.insert("traceparent", traceparent); + if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) { + headers.insert("trace-id", trace_id); + } + } + + let result = jetstream + .publish_with_headers(subject.clone(), headers.clone(), payload.into()) + .await; + + match result { + Ok(_) => { + self.circuit_breaker.write().await.record_success(); + } + Err(e) => { + self.circuit_breaker.write().await.record_failure(); + return Err(AggregateError::StreamError(format!( + "Failed to publish event: {}", + e + ))); + } + } + } + } + StreamBackend::InMemory(mem) => { + for event in events { + let key = ( + event.tenant_id.as_str().to_string(), + event.aggregate_id.to_string(), + ); + + let mut map = mem.events_by_tenant_aggregate.write().await; + let bucket = map.entry(key).or_default(); + + if bucket.iter().any(|e| e.command_id == event.command_id) { + continue; + } + + let current_max = bucket.last().map(|e| e.version.as_u64()).unwrap_or(0); + let expected = current_max + 1; + if event.version.as_u64() != expected { + return Err(AggregateError::VersionConflict { + expected: Version::from(current_max).increment(), + actual: event.version, + }); + } + + bucket.push(event.clone()); + bucket.sort_by_key(|e| e.version); + let _ = mem.updates.send(event); + } + + self.circuit_breaker.write().await.record_success(); + } + } + + Ok(()) + } + + pub async fn fetch_events( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + after_version: Version, + ) -> Result, AggregateError> { + if self.circuit_breaker.read().await.is_open() { + return Err(AggregateError::StreamError( + "Circuit breaker is open".to_string(), + )); + } + + match &self.backend { + StreamBackend::JetStream(jetstream) => { + let stream = jetstream + .get_stream(AGGREGATE_STREAM_NAME) + .await + .map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?; + + let subject = format!("tenant.{}.aggregate.*.{}", tenant_id.as_str(), aggregate_id); + + let consumer_name = format!( + "fetch_{}_{}_{}", + tenant_id.as_str(), + aggregate_id, + uuid::Uuid::now_v7() + ); + + let consumer_config = PullConfig { + durable_name: Some(consumer_name.clone()), + filter_subject: subject.clone(), + deliver_policy: DeliverPolicy::All, + ack_policy: AckPolicy::Explicit, + replay_policy: ReplayPolicy::Instant, + ..Default::default() + }; + + let consumer = stream + .get_or_create_consumer(&consumer_name, consumer_config) + .await + .map_err(|e| { + AggregateError::StreamError(format!("Consumer creation failed: {}", e)) + })?; + + let mut events = Vec::new(); + let mut messages = consumer.messages().await.map_err(|e| { + AggregateError::StreamError(format!("Message stream error: {}", e)) + })?; + + let idle_timeout = Duration::from_millis(250); + let max_total_wait = Duration::from_secs(2); + let started = Instant::now(); + + loop { + if started.elapsed() >= max_total_wait { + break; + } + + match tokio::time::timeout(idle_timeout, messages.next()).await { + Ok(Some(Ok(msg))) => { + let event: Event = + serde_json::from_slice(&msg.payload).map_err(|e| { + AggregateError::StreamError(format!( + "Deserialization error: {}", + e + )) + })?; + + if event.version > after_version { + events.push(event); + } + + msg.ack().await.ok(); + } + Ok(Some(Err(e))) => { + return Err(AggregateError::StreamError(format!( + "Message error: {}", + e + ))); + } + Ok(None) => break, + Err(_) => break, + } + } + + let _ = stream.delete_consumer(&consumer_name).await; + events.sort_by_key(|e| e.version); + self.circuit_breaker.write().await.record_success(); + Ok(events) + } + StreamBackend::InMemory(mem) => { + let key = (tenant_id.as_str().to_string(), aggregate_id.to_string()); + let map = mem.events_by_tenant_aggregate.read().await; + let mut out = map + .get(&key) + .map(|bucket| { + bucket + .iter() + .filter(|e| e.version > after_version) + .cloned() + .collect::>() + }) + .unwrap_or_default(); + + out.sort_by_key(|e| e.version); + self.circuit_breaker.write().await.record_success(); + Ok(out) + } + } + } + + pub async fn subscribe_to_events( + &self, + tenant_id: TenantId, + aggregate_type: AggregateType, + aggregate_id: AggregateId, + ) -> Result + Send>>, AggregateError> { + match &self.backend { + StreamBackend::JetStream(jetstream) => { + let subject = format!( + "tenant.{}.aggregate.{}.{}", + tenant_id.as_str(), + aggregate_type.as_str(), + aggregate_id + ); + + let stream = jetstream + .get_stream(AGGREGATE_STREAM_NAME) + .await + .map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?; + + let consumer_name = format!("sub_{}_{}", tenant_id.as_str(), aggregate_id); + let consumer_config = PullConfig { + filter_subject: subject, + deliver_policy: DeliverPolicy::New, + ..Default::default() + }; + + let consumer = stream + .get_or_create_consumer(&consumer_name, consumer_config) + .await + .map_err(|e| { + AggregateError::StreamError(format!("Consumer creation failed: {}", e)) + })?; + + let messages = consumer.messages().await.map_err(|e| { + AggregateError::StreamError(format!("Message stream error: {}", e)) + })?; + + let event_stream = messages.filter_map(move |msg| async move { + match msg { + Ok(m) => { + let event: Result = serde_json::from_slice(&m.payload); + match event { + Ok(e) => { + m.ack().await.ok(); + Some(e) + } + Err(_) => None, + } + } + Err(_) => None, + } + }); + + let boxed: Pin + Send>> = Box::pin(event_stream); + Ok(boxed) + } + StreamBackend::InMemory(mem) => { + let tenant_id = tenant_id.as_str().to_string(); + let aggregate_type = aggregate_type.as_str().to_string(); + let aggregate_id = aggregate_id.to_string(); + + let receiver = mem.updates.subscribe(); + let boxed: Pin + Send>> = + Box::pin(futures::stream::unfold( + (receiver, tenant_id, aggregate_type, aggregate_id), + |(mut receiver, tenant_id, aggregate_type, aggregate_id)| async move { + loop { + match receiver.recv().await { + Ok(event) => { + if event.tenant_id.as_str() == tenant_id + && event.aggregate_type.as_str() == aggregate_type + && event.aggregate_id.to_string() == aggregate_id + { + return Some(( + event, + (receiver, tenant_id, aggregate_type, aggregate_id), + )); + } + } + Err(broadcast::error::RecvError::Lagged(_)) => continue, + Err(broadcast::error::RecvError::Closed) => return None, + } + } + }, + )); + Ok(boxed) + } + } + } + + pub async fn get_stream_info(&self) -> Result, AggregateError> { + match &self.backend { + StreamBackend::JetStream(jetstream) => { + match jetstream.get_stream(AGGREGATE_STREAM_NAME).await { + Ok(mut stream) => { + let info = stream.info().await.map_err(|e| { + AggregateError::StreamError(format!("Stream info error: {}", e)) + })?; + Ok(Some(info.clone())) + } + Err(_) => Ok(None), + } + } + StreamBackend::InMemory(_) => Ok(None), + } + } + + pub async fn health_check(&self) -> Result { + match &self.backend { + StreamBackend::JetStream(jetstream) => { + match jetstream.get_stream(AGGREGATE_STREAM_NAME).await { + Ok(_) => { + self.circuit_breaker.write().await.record_success(); + Ok(true) + } + Err(e) => { + self.circuit_breaker.write().await.record_failure(); + Err(AggregateError::StreamError(format!( + "Health check failed: {}", + e + ))) + } + } + } + StreamBackend::InMemory(_) => { + self.circuit_breaker.write().await.record_success(); + Ok(true) + } + } + } + + pub fn circuit_breaker_state(&self) -> circuit_breaker::CircuitState { + futures::executor::block_on(async { self.circuit_breaker.read().await.state() }) + } + + pub async fn delete_consumer( + &self, + tenant_id: &TenantId, + aggregate_id: &AggregateId, + ) -> Result<(), AggregateError> { + let consumer_name = format!("sub_{}_{}", tenant_id.as_str(), aggregate_id); + + match &self.backend { + StreamBackend::JetStream(jetstream) => { + let stream = jetstream + .get_stream(AGGREGATE_STREAM_NAME) + .await + .map_err(|e| AggregateError::StreamError(format!("Stream not found: {}", e)))?; + + stream.delete_consumer(&consumer_name).await.map_err(|e| { + AggregateError::StreamError(format!("Consumer deletion failed: {}", e)) + })?; + + Ok(()) + } + StreamBackend::InMemory(_) => Ok(()), + } + } +} + +pub fn build_subject( + tenant_id: &TenantId, + aggregate_type: &AggregateType, + aggregate_id: &AggregateId, +) -> String { + format!( + "tenant.{}.aggregate.{}.{}", + tenant_id.as_str(), + aggregate_type.as_str(), + aggregate_id + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn stream_client_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } + + #[test] + fn subject_naming_includes_tenant() { + let tenant_id = TenantId::new("acme-corp"); + let aggregate_type = AggregateType::from("Account"); + let aggregate_id = AggregateId::new_v7(); + + let subject = build_subject(&tenant_id, &aggregate_type, &aggregate_id); + assert!(subject.starts_with("tenant.acme-corp.aggregate.")); + } + + #[test] + fn stream_config_settings_defaults() { + let settings = StreamConfigSettings::default(); + assert_eq!(settings.max_messages, 10_000_000); + } + + #[test] + fn circuit_breaker_accessible() { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let cb = CircuitBreaker::new(); + assert!(cb.is_closed()); + }); + } + + #[tokio::test] + async fn publish_and_fetch_events_with_tenant() { + let stream = StreamClient::in_memory(); + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let e1 = Event::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(1), + "deposited", + json!({"amount": 10}), + uuid::Uuid::now_v7(), + ); + let e2 = Event::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(2), + "deposited", + json!({"amount": 20}), + uuid::Uuid::now_v7(), + ); + + stream.publish_events(vec![e1, e2]).await.unwrap(); + let fetched = stream + .fetch_events(&tenant_id, &aggregate_id, Version::initial()) + .await + .unwrap(); + assert_eq!(fetched.len(), 2); + } + + #[tokio::test] + async fn fetch_with_version_filter() { + let stream = StreamClient::in_memory(); + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let mut events = Vec::new(); + for v in 1..=4 { + events.push(Event::new( + tenant_id.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(v), + "deposited", + json!({"amount": v}), + uuid::Uuid::now_v7(), + )); + } + stream.publish_events(events).await.unwrap(); + + let fetched = stream + .fetch_events(&tenant_id, &aggregate_id, Version::from(2)) + .await + .unwrap(); + assert_eq!(fetched.len(), 2); + assert!(fetched.iter().all(|e| e.version > Version::from(2))); + } + + #[tokio::test] + async fn tenant_isolation_fetch_returns_empty() { + let stream = StreamClient::in_memory(); + let tenant_a = TenantId::new("tenant-a"); + let tenant_b = TenantId::new("tenant-b"); + let aggregate_id = AggregateId::new_v7(); + let aggregate_type = AggregateType::from("Account"); + + let e1 = Event::new( + tenant_a.clone(), + aggregate_id.clone(), + aggregate_type.clone(), + Version::from(1), + "deposited", + json!({"amount": 10}), + uuid::Uuid::now_v7(), + ); + + stream.publish_events(vec![e1]).await.unwrap(); + let fetched = stream + .fetch_events(&tenant_b, &aggregate_id, Version::initial()) + .await + .unwrap(); + assert!(fetched.is_empty()); + } +} diff --git a/aggregate/src/swarm.rs b/aggregate/src/swarm.rs new file mode 100644 index 0000000..b58f875 --- /dev/null +++ b/aggregate/src/swarm.rs @@ -0,0 +1,332 @@ +use futures::StreamExt; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct TenantPlacementConfig { + pub virtual_nodes_per_node: usize, + pub nodes: Vec, + pub tenants: std::collections::HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct NodePlacement { + pub node_id: String, + pub tenant_range: String, +} + +pub fn placement_constraint_for_tenant_range(tenant_range: &str) -> String { + format!("node.labels.tenant_range == {}", tenant_range) +} + +pub fn placement_constraints_for_node(node: &NodePlacement) -> Vec { + vec![placement_constraint_for_tenant_range(&node.tenant_range)] +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MigrationPlan { + pub tenant_id: String, + pub from_node: String, + pub to_node: String, + pub actions: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum MigrationAction { + DrainTenant { tenant_id: String }, + UpdatePlacement { tenant_id: String, node_id: String }, + ReloadConfig, +} + +pub fn plan_graceful_tenant_migration( + tenant_id: impl Into, + from_node: impl Into, + to_node: impl Into, +) -> MigrationPlan { + let tenant_id = tenant_id.into(); + let from_node = from_node.into(); + let to_node = to_node.into(); + + MigrationPlan { + tenant_id: tenant_id.clone(), + from_node, + to_node: to_node.clone(), + actions: vec![ + MigrationAction::DrainTenant { + tenant_id: tenant_id.clone(), + }, + MigrationAction::UpdatePlacement { + tenant_id, + node_id: to_node, + }, + MigrationAction::ReloadConfig, + ], + } +} + +#[derive(Debug, Error)] +pub enum TenantPlacementKvError { + #[error("NATS connection error: {0}")] + Connection(String), + #[error("KV error: {0}")] + Kv(String), + #[error("Config parse error: {0}")] + Parse(String), + #[error("Unsupported key operation")] + UnsupportedOperation, +} + +#[derive(Debug, Clone)] +pub struct TenantPlacementKvClient { + kv: async_nats::jetstream::kv::Store, +} + +impl TenantPlacementKvClient { + pub async fn connect( + nats_url: impl Into, + bucket: impl Into, + ) -> Result { + Self::connect_with_timeout(nats_url, bucket, std::time::Duration::from_secs(2)).await + } + + pub async fn connect_with_timeout( + nats_url: impl Into, + bucket: impl Into, + timeout: std::time::Duration, + ) -> Result { + let nats_url = nats_url.into(); + let bucket = bucket.into(); + + let client = tokio::time::timeout(timeout, async_nats::connect(nats_url)) + .await + .map_err(|_| TenantPlacementKvError::Connection("connect timeout".to_string()))? + .map_err(|e| TenantPlacementKvError::Connection(e.to_string()))?; + + let jetstream = async_nats::jetstream::new(client); + + let kv = match jetstream.get_key_value(&bucket).await { + Ok(kv) => kv, + Err(_) => jetstream + .create_key_value(async_nats::jetstream::kv::Config { + bucket: bucket.clone(), + ..Default::default() + }) + .await + .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?, + }; + + Ok(Self { kv }) + } + + pub async fn get_json( + &self, + key: &str, + ) -> Result, TenantPlacementKvError> { + let entry = self + .kv + .entry(key) + .await + .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?; + + match entry { + Some(entry) => serde_json::from_slice::(&entry.value) + .map(Some) + .map_err(|e| TenantPlacementKvError::Parse(e.to_string())), + None => Ok(None), + } + } + + pub async fn put_json( + &self, + key: &str, + value: &serde_json::Value, + ) -> Result<(), TenantPlacementKvError> { + let bytes = + serde_json::to_vec(value).map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?; + self.kv + .put(key, bytes.into()) + .await + .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?; + Ok(()) + } + + pub async fn watch_json( + &self, + pattern: &str, + ) -> Result< + std::pin::Pin< + Box< + dyn futures::Stream> + + Send, + >, + >, + TenantPlacementKvError, + > { + let watch = self + .kv + .watch(pattern) + .await + .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?; + + Ok(Box::pin(watch.filter_map(|entry| async move { + match entry { + Ok(entry) => match entry.operation { + async_nats::jetstream::kv::Operation::Put => { + match serde_json::from_slice::(&entry.value) { + Ok(v) => Some(Ok(v)), + Err(e) => Some(Err(TenantPlacementKvError::Parse(e.to_string()))), + } + } + async_nats::jetstream::kv::Operation::Delete + | async_nats::jetstream::kv::Operation::Purge => None, + }, + Err(e) => Some(Err(TenantPlacementKvError::Kv(e.to_string()))), + } + }))) + } + + pub async fn load_config_with_fallback( + nats_url: impl Into, + bucket: impl Into, + key: &str, + fallback_path: &str, + ) -> Result { + let try_kv = match Self::connect_with_timeout( + nats_url, + bucket, + std::time::Duration::from_millis(300), + ) + .await + { + Ok(client) => match client.get_json(key).await { + Ok(Some(v)) => Ok(v), + Ok(None) => Err(TenantPlacementKvError::Kv("missing key".to_string())), + Err(e) => Err(e), + }, + Err(e) => Err(e), + }; + + match try_kv { + Ok(v) => Ok(v), + Err(_) => { + let raw = std::fs::read_to_string(fallback_path) + .map_err(|e| TenantPlacementKvError::Kv(e.to_string()))?; + if fallback_path.ends_with(".json") { + serde_json::from_str(&raw) + .map_err(|e| TenantPlacementKvError::Parse(e.to_string())) + } else { + let yaml: serde_yaml::Value = serde_yaml::from_str(&raw) + .map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?; + let json = serde_json::to_value(yaml) + .map_err(|e| TenantPlacementKvError::Parse(e.to_string()))?; + Ok(json) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use futures::StreamExt; + + #[test] + fn stack_file_is_valid_yaml() { + let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap(); + let _: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap(); + } + + #[test] + fn stack_services_count() { + let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap(); + let doc: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap(); + let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap(); + assert!(services.contains_key(serde_yaml::Value::String("nats".to_string()))); + assert!(services.contains_key(serde_yaml::Value::String("gateway".to_string()))); + assert!(services.contains_key(serde_yaml::Value::String("aggregate".to_string()))); + } + + #[test] + fn tenant_placement_config_loads() { + let raw = std::fs::read_to_string("../swarm/tenant-placement.yaml").unwrap(); + let cfg: TenantPlacementConfig = serde_yaml::from_str(&raw).unwrap(); + assert_eq!(cfg.virtual_nodes_per_node, 200); + assert!(cfg.nodes.iter().any(|n| n.node_id == "node-a")); + assert_eq!(cfg.tenants.get("tenant-a").unwrap(), "node-a"); + } + + #[test] + fn placement_constraint_generated_correctly() { + let node = NodePlacement { + node_id: "node-a".to_string(), + tenant_range: "00-3f".to_string(), + }; + let constraints = placement_constraints_for_node(&node); + assert_eq!(constraints, vec!["node.labels.tenant_range == 00-3f"]); + } + + #[test] + fn graceful_tenant_migration_plan_is_ordered() { + let plan = plan_graceful_tenant_migration("tenant-a", "node-a", "node-b"); + assert_eq!(plan.tenant_id, "tenant-a"); + assert_eq!( + plan.actions, + vec![ + MigrationAction::DrainTenant { + tenant_id: "tenant-a".to_string(), + }, + MigrationAction::UpdatePlacement { + tenant_id: "tenant-a".to_string(), + node_id: "node-b".to_string(), + }, + MigrationAction::ReloadConfig, + ] + ); + } + + #[tokio::test] + async fn tenant_placement_kv_falls_back_to_local_file() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("placement.yaml"); + std::fs::write( + &path, + r#" +virtual_nodes_per_node: 100 +nodes: + - node_id: "node-a" + tenant_range: "00-ff" +tenants: + tenant-a: "node-a" +"#, + ) + .unwrap(); + + let cfg = TenantPlacementKvClient::load_config_with_fallback( + "nats://127.0.0.1:1", + "TENANT_PLACEMENT", + "placement", + path.to_string_lossy().as_ref(), + ) + .await + .unwrap(); + + assert_eq!(cfg["virtual_nodes_per_node"], 100); + assert_eq!(cfg["tenants"]["tenant-a"], "node-a"); + } + + #[tokio::test] + async fn tenant_placement_kv_watch_returns_stream() { + let result = TenantPlacementKvClient::connect_with_timeout( + "nats://127.0.0.1:1", + "TENANT_PLACEMENT", + std::time::Duration::from_millis(50), + ) + .await; + assert!(result.is_err()); + + let mut stream = + futures::stream::empty::>(); + assert!(stream.next().await.is_none()); + } +} diff --git a/aggregate/src/types/command.rs b/aggregate/src/types/command.rs new file mode 100644 index 0000000..3b5076b --- /dev/null +++ b/aggregate/src/types/command.rs @@ -0,0 +1,65 @@ +use crate::types::{AggregateId, AggregateType, TenantId}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Command { + pub tenant_id: TenantId, + pub command_id: Uuid, + pub aggregate_id: AggregateId, + pub aggregate_type: AggregateType, + pub payload: Value, + pub metadata: HashMap, +} + +impl Command { + pub fn new( + tenant_id: TenantId, + aggregate_id: AggregateId, + aggregate_type: AggregateType, + payload: Value, + ) -> Self { + Self { + tenant_id, + command_id: Uuid::now_v7(), + aggregate_id, + aggregate_type, + payload, + metadata: HashMap::new(), + } + } + + pub fn with_metadata(mut self, key: impl Into, value: Value) -> Self { + self.metadata.insert(key.into(), value); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn command_serialization() { + let cmd = Command::new( + TenantId::new("acme-corp"), + AggregateId::new_v7(), + AggregateType::new("Account"), + json!({"type": "deposit", "amount": 100}), + ); + let json = serde_json::to_string(&cmd).unwrap(); + let decoded: Command = serde_json::from_str(&json).unwrap(); + assert_eq!(cmd.command_id, decoded.command_id); + assert_eq!(cmd.aggregate_id, decoded.aggregate_id); + assert_eq!(cmd.tenant_id, decoded.tenant_id); + } + + #[test] + fn command_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/types/error.rs b/aggregate/src/types/error.rs new file mode 100644 index 0000000..a6ee793 --- /dev/null +++ b/aggregate/src/types/error.rs @@ -0,0 +1,58 @@ +use crate::types::{AggregateId, TenantId, Version}; +use thiserror::Error; + +#[derive(Debug, Clone, Error)] +pub enum AggregateError { + #[error("Tenant access denied for tenant: {tenant_id}")] + TenantAccessDenied { tenant_id: TenantId }, + + #[error("Tenant not hosted on this shard: {tenant_id}")] + TenantNotHosted { tenant_id: TenantId }, + + #[error("Tenant is draining: {tenant_id}")] + TenantDraining { tenant_id: TenantId }, + + #[error("Validation error: {0}")] + ValidationError(String), + + #[error("Version conflict: expected {expected}, actual {actual}")] + VersionConflict { expected: Version, actual: Version }, + + #[error("Storage error: {0}")] + StorageError(String), + + #[error("Stream error: {0}")] + StreamError(String), + + #[error("Rehydration error: {0}")] + RehydrationError(String), + + #[error("Decide error: {0}")] + DecideError(String), + + #[error("Apply error: {0}")] + ApplyError(String), + + #[error("Not found: {0}")] + NotFound(AggregateId), +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn error_implements_traits() { + let err = AggregateError::TenantAccessDenied { + tenant_id: TenantId::new("other"), + }; + let _ = format!("{}", err); + let _: &dyn std::error::Error = &err; + } + + #[test] + fn error_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/types/event.rs b/aggregate/src/types/event.rs new file mode 100644 index 0000000..351bdf1 --- /dev/null +++ b/aggregate/src/types/event.rs @@ -0,0 +1,78 @@ +use crate::types::{AggregateId, AggregateType, TenantId, Version}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Event { + pub tenant_id: TenantId, + pub event_id: Uuid, + pub aggregate_id: AggregateId, + pub aggregate_type: AggregateType, + pub version: Version, + pub event_type: String, + pub payload: Value, + pub command_id: Uuid, + pub timestamp: DateTime, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub correlation_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub traceparent: Option, +} + +impl Event { + pub fn new( + tenant_id: TenantId, + aggregate_id: AggregateId, + aggregate_type: AggregateType, + version: Version, + event_type: impl Into, + payload: Value, + command_id: Uuid, + ) -> Self { + Self { + tenant_id, + event_id: Uuid::now_v7(), + aggregate_id, + aggregate_type, + version, + event_type: event_type.into(), + payload, + command_id, + timestamp: Utc::now(), + correlation_id: None, + traceparent: None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn event_serialization() { + let event = Event::new( + TenantId::new("acme-corp"), + AggregateId::new_v7(), + AggregateType::new("Account"), + Version::from(1), + "Deposited", + json!({"amount": 100}), + Uuid::now_v7(), + ); + let json = serde_json::to_string(&event).unwrap(); + let decoded: Event = serde_json::from_str(&json).unwrap(); + assert_eq!(event.event_id, decoded.event_id); + assert_eq!(event.version, decoded.version); + assert_eq!(event.tenant_id, decoded.tenant_id); + } + + #[test] + fn event_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/src/types/id.rs b/aggregate/src/types/id.rs new file mode 100644 index 0000000..1f6a2a1 --- /dev/null +++ b/aggregate/src/types/id.rs @@ -0,0 +1,157 @@ +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::str::FromStr; + +pub type TenantId = shared::TenantId; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct AggregateId(uuid::Uuid); + +impl AggregateId { + pub fn new_v7() -> Self { + Self(uuid::Uuid::now_v7()) + } + + pub fn from_uuid(uuid: uuid::Uuid) -> Self { + Self(uuid) + } + + pub fn as_uuid(&self) -> &uuid::Uuid { + &self.0 + } +} + +impl fmt::Display for AggregateId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for AggregateId { + type Err = uuid::Error; + + fn from_str(s: &str) -> Result { + Ok(Self(uuid::Uuid::parse_str(s)?)) + } +} + +impl Default for AggregateId { + fn default() -> Self { + Self::new_v7() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct AggregateType(String); + +impl AggregateType { + pub fn new(ty: impl Into) -> Self { + Self(ty.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for AggregateType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From<&str> for AggregateType { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for AggregateType { + fn from(s: String) -> Self { + Self(s) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct Version(u64); + +impl Version { + pub const fn initial() -> Self { + Self(0) + } + + pub const fn from_u64(v: u64) -> Self { + Self(v) + } + + pub const fn as_u64(self) -> u64 { + self.0 + } + + pub fn increment(self) -> Self { + Self(self.0 + 1) + } +} + +impl Default for Version { + fn default() -> Self { + Self::initial() + } +} + +impl From for Version { + fn from(v: u64) -> Self { + Self(v) + } +} + +impl fmt::Display for Version { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tenant_id_serialization_roundtrip() { + let id = TenantId::new("acme-corp"); + let json = serde_json::to_string(&id).unwrap(); + let decoded: TenantId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + } + + #[test] + fn tenant_id_default() { + let id = TenantId::default(); + assert!(id.is_empty()); + } + + #[test] + fn aggregate_id_serialization_roundtrip() { + let id = AggregateId::new_v7(); + let json = serde_json::to_string(&id).unwrap(); + let decoded: AggregateId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + } + + #[test] + fn version_increment() { + let v = Version::initial(); + assert_eq!(v.as_u64(), 0); + let v2 = v.increment(); + assert_eq!(v2.as_u64(), 1); + assert_eq!(v.as_u64(), 0); + } + + #[test] + fn types_are_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + } +} diff --git a/aggregate/src/types/manifest.rs b/aggregate/src/types/manifest.rs new file mode 100644 index 0000000..a1c1b2a --- /dev/null +++ b/aggregate/src/types/manifest.rs @@ -0,0 +1,61 @@ +use crate::types::AggregateType; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProgramRef { + pub decide_program: String, + pub apply_program: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AggregateManifest { + pub aggregate_type: AggregateType, + pub programs: ProgramRef, + pub snapshot_threshold: Option, +} + +impl AggregateManifest { + pub fn new(aggregate_type: AggregateType, programs: ProgramRef) -> Self { + Self { + aggregate_type, + programs, + snapshot_threshold: None, + } + } + + pub fn with_snapshot_threshold(mut self, threshold: u64) -> Self { + self.snapshot_threshold = Some(threshold); + self + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ManifestRegistry { + aggregates: HashMap, +} + +impl ManifestRegistry { + pub fn new() -> Self { + Self { + aggregates: HashMap::new(), + } + } + + pub fn register(&mut self, manifest: AggregateManifest) { + self.aggregates + .insert(manifest.aggregate_type.as_str().to_string(), manifest); + } + + pub fn get(&self, aggregate_type: &AggregateType) -> Option<&AggregateManifest> { + self.aggregates.get(aggregate_type.as_str()) + } + + pub fn load_from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn load_from_json(json: &str) -> Result { + serde_json::from_str(json) + } +} diff --git a/aggregate/src/types/mod.rs b/aggregate/src/types/mod.rs new file mode 100644 index 0000000..70f3147 --- /dev/null +++ b/aggregate/src/types/mod.rs @@ -0,0 +1,13 @@ +mod command; +mod error; +mod event; +mod id; +mod manifest; +mod snapshot; + +pub use command::*; +pub use error::*; +pub use event::*; +pub use id::*; +pub use manifest::*; +pub use snapshot::*; diff --git a/aggregate/src/types/snapshot.rs b/aggregate/src/types/snapshot.rs new file mode 100644 index 0000000..27a415c --- /dev/null +++ b/aggregate/src/types/snapshot.rs @@ -0,0 +1,61 @@ +use crate::types::{AggregateId, AggregateType, TenantId, Version}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Snapshot { + pub tenant_id: TenantId, + pub aggregate_id: AggregateId, + pub aggregate_type: AggregateType, + pub version: Version, + pub state: Value, + pub created_at: DateTime, +} + +impl Snapshot { + pub fn new( + tenant_id: TenantId, + aggregate_id: AggregateId, + aggregate_type: AggregateType, + version: Version, + state: Value, + ) -> Self { + Self { + tenant_id, + aggregate_id, + aggregate_type, + version, + state, + created_at: Utc::now(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn snapshot_serialization() { + let snap = Snapshot::new( + TenantId::new("acme-corp"), + AggregateId::new_v7(), + AggregateType::new("Account"), + Version::from(5), + json!({"balance": 100}), + ); + let json = serde_json::to_string(&snap).unwrap(); + let decoded: Snapshot = serde_json::from_str(&json).unwrap(); + assert_eq!(snap.aggregate_id, decoded.aggregate_id); + assert_eq!(snap.version, decoded.version); + assert_eq!(snap.tenant_id, decoded.tenant_id); + } + + #[test] + fn snapshot_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/aggregate/tests/integration.rs b/aggregate/tests/integration.rs new file mode 100644 index 0000000..4d3a6c2 --- /dev/null +++ b/aggregate/tests/integration.rs @@ -0,0 +1,682 @@ +use aggregate::observability::Observability; +use aggregate::runtime::RuntimeExecutor; +#[cfg(feature = "runtime-v8")] +use aggregate::runtime::{execute_apply_program, execute_decide_program}; +use aggregate::server::{CommandRequest, HealthChecker}; +use aggregate::storage::StorageClient; +use aggregate::types::{ + AggregateError, AggregateId, AggregateType, Command, Event, TenantId, Version, +}; +use serde_json::json; +use std::time::Duration; +use tempfile::TempDir; + +fn create_test_storage() -> (TempDir, StorageClient) { + let dir = TempDir::new().expect("failed to create temp dir"); + let path = dir.path().join("test.mdbx"); + let storage = + StorageClient::open(path.to_string_lossy().to_string()).expect("failed to open storage"); + (dir, storage) +} + +#[cfg(feature = "runtime-v8")] +fn create_test_decide_program() -> &'static str { + r#" + function decide(state, command) { + if (command.type === "deposit") { + return [{ type: "deposited", amount: command.amount }]; + } + if (command.type === "withdraw") { + if (state.balance < command.amount) { + return [{ type: "error", message: "insufficient funds" }]; + } + return [{ type: "withdrawn", amount: command.amount }]; + } + if (command.type === "open_account") { + return [{ type: "account_opened", initial_balance: command.initial_balance || 0 }]; + } + return []; + } + "# +} + +#[cfg(feature = "runtime-v8")] +fn create_test_apply_program() -> &'static str { + r#" + function apply(state, event) { + if (event.type === "account_opened") { + return { balance: event.initial_balance }; + } + if (event.type === "deposited") { + return { balance: (state.balance || 0) + event.amount }; + } + if (event.type === "withdrawn") { + return { balance: state.balance - event.amount }; + } + return state; + } + "# +} + +#[test] +fn storage_tenant_isolation() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let tenant_a = TenantId::new("tenant-a"); + let tenant_b = TenantId::new("tenant-b"); + let aggregate_id = AggregateId::new_v7(); + + use aggregate::types::Snapshot; + let snapshot_a = Snapshot::new( + tenant_a.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + json!({"balance": 100}), + ); + + storage.put_snapshot(&snapshot_a).await.unwrap(); + + let result_a = storage + .get_snapshot(&tenant_a, &aggregate_id) + .await + .unwrap(); + let result_b = storage + .get_snapshot(&tenant_b, &aggregate_id) + .await + .unwrap(); + + assert!(result_a.is_some()); + assert!(result_b.is_none()); + }); +} + +#[test] +fn storage_version_conflict() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + + use aggregate::types::Snapshot; + let snapshot_v1 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + json!({"balance": 100}), + ); + + storage.put_snapshot(&snapshot_v1).await.unwrap(); + + let result = storage.put_snapshot(&snapshot_v1).await; + assert!(matches!( + result, + Err(AggregateError::VersionConflict { .. }) + )); + }); +} + +#[test] +fn storage_latest_version() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + + let version = storage + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert!(version.is_none()); + + use aggregate::types::Snapshot; + let snapshot_v1 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + json!({"balance": 100}), + ); + storage.put_snapshot(&snapshot_v1).await.unwrap(); + + let version = storage + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert_eq!(version, Some(Version::from(1))); + + let snapshot_v3 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(3), + json!({"balance": 300}), + ); + storage.put_snapshot(&snapshot_v3).await.unwrap(); + + let version = storage + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert_eq!(version, Some(Version::from(3))); + }); +} + +#[test] +fn storage_none_for_nonexistent_aggregate() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + + let snapshot = storage + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert!(snapshot.is_none()); + }); +} + +#[cfg(feature = "runtime-v8")] +#[test] +fn runtime_decide_deposit() { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let state = json!({"balance": 100}); + let command = json!({"type": "deposit", "amount": 50}); + + let events = execute_decide_program( + &state, + &command, + create_test_decide_program(), + 1_000_000, + Duration::from_secs(5), + ) + .await + .unwrap(); + assert_eq!(events.len(), 1); + assert_eq!(events[0]["type"], "deposited"); + assert_eq!(events[0]["amount"], 50); + }); +} + +#[cfg(feature = "runtime-v8")] +#[test] +fn runtime_decide_withdraw_insufficient() { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let state = json!({"balance": 10}); + let command = json!({"type": "withdraw", "amount": 100}); + + let events = execute_decide_program( + &state, + &command, + create_test_decide_program(), + 1_000_000, + Duration::from_secs(5), + ) + .await + .unwrap(); + assert_eq!(events.len(), 1); + assert_eq!(events[0]["type"], "error"); + }); +} + +#[cfg(feature = "runtime-v8")] +#[test] +fn runtime_apply_transitions_state() { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let state = json!({"balance": 100}); + let event = json!({"type": "deposited", "amount": 50}); + + let new_state = execute_apply_program( + &state, + &event, + create_test_apply_program(), + 1_000_000, + Duration::from_secs(5), + ) + .await + .unwrap(); + assert_eq!(new_state["balance"], 150); + }); +} + +#[cfg(feature = "runtime-v8")] +#[test] +fn runtime_determinism() { + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let state = json!({"balance": 100}); + let command = json!({"type": "deposit", "amount": 50}); + + let r1 = execute_decide_program( + &state, + &command, + create_test_decide_program(), + 1_000_000, + Duration::from_secs(5), + ) + .await + .unwrap(); + let r2 = execute_decide_program( + &state, + &command, + create_test_decide_program(), + 1_000_000, + Duration::from_secs(5), + ) + .await + .unwrap(); + assert_eq!(r1, r2); + }); +} + +#[test] +fn command_request_tenant_extraction() { + let tenant_id = TenantId::new("acme-corp"); + let aggregate_id = AggregateId::new_v7(); + + let request = CommandRequest::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + json!({"type": "deposit", "amount": 100}), + ) + .with_header("x-request-id", "req-123") + .with_header("x-tenant-id", "override-tenant"); + + assert_eq!(request.tenant_id, tenant_id); + assert_eq!( + request.headers.get("x-request-id"), + Some(&"req-123".to_string()) + ); +} + +#[test] +fn health_checker_tracks_state() { + let checker = HealthChecker::new(); + + let status = checker.check(); + assert!(status.is_healthy()); + assert!(checker.is_ready()); + assert!(checker.is_live()); + + checker.set_storage_healthy(false); + checker.set_stream_healthy(false); + assert!(!checker.is_ready()); + + checker.set_storage_healthy(true); + checker.set_stream_healthy(true); + assert!(checker.is_ready()); +} + +#[test] +fn observability_metrics_export() { + let obs = Observability::default(); + + let span = obs.start_command_span("agg-123", "Account", "tenant-a", "deposit", None, None); + obs.record_command_success(&span, 2); + + let metrics = obs.export_metrics(); + assert!(metrics.contains("commands_total")); + assert!(metrics.contains("command_duration")); +} + +#[test] +fn version_increment_and_ordering() { + let v0 = Version::initial(); + assert_eq!(v0.as_u64(), 0); + + let v1 = v0.increment(); + assert_eq!(v1.as_u64(), 1); + assert_eq!(v0.as_u64(), 0); + + let v2 = v1.increment(); + assert_eq!(v2.as_u64(), 2); + + assert!(v0 < v1); + assert!(v1 < v2); +} + +#[test] +fn tenant_id_validation() { + let valid_ids = vec!["acme-corp", "tenant_123", "my-tenant", "Tenant1"]; + let invalid_ids = vec!["tenant@corp", "tenant name", "tenant/id"]; + + for id in valid_ids { + let tenant_id = TenantId::new(id); + let chars_valid = tenant_id + .as_str() + .chars() + .all(|c| c.is_alphanumeric() || c == '-' || c == '_'); + assert!(chars_valid, "Expected {} to be valid", id); + } + + for id in invalid_ids { + let tenant_id = TenantId::new(id); + let chars_valid = tenant_id + .as_str() + .chars() + .all(|c| c.is_alphanumeric() || c == '-' || c == '_'); + assert!(!chars_valid, "Expected {} to be invalid", id); + } +} + +#[test] +fn aggregate_id_generation() { + let id1 = AggregateId::new_v7(); + let id2 = AggregateId::new_v7(); + + assert_ne!(id1, id2); + + let display = format!("{}", id1); + assert!(!display.is_empty()); +} + +#[test] +fn event_creation() { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + let command_id = uuid::Uuid::now_v7(); + + let event = Event::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + "deposited".to_string(), + json!({"amount": 100}), + command_id, + ); + + assert_eq!(event.tenant_id, tenant_id); + assert_eq!(event.aggregate_id, aggregate_id); + assert_eq!(event.version, Version::from(1)); + assert_eq!(event.event_type, "deposited"); +} + +#[test] +fn command_creation() { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + + let command = Command::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + json!({"type": "deposit", "amount": 100}), + ); + + assert_eq!(command.tenant_id, tenant_id); + assert_eq!(command.aggregate_id, aggregate_id); + assert_eq!(command.payload["type"], "deposit"); +} + +#[test] +fn snapshot_creation() { + let tenant_id = TenantId::new("tenant-a"); + let aggregate_id = AggregateId::new_v7(); + + let snapshot = aggregate::types::Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(5), + json!({"balance": 500}), + ); + + assert_eq!(snapshot.tenant_id, tenant_id); + assert_eq!(snapshot.aggregate_id, aggregate_id); + assert_eq!(snapshot.version, Version::from(5)); + assert_eq!(snapshot.state["balance"], 500); +} + +#[test] +fn circuit_breaker_pattern() { + use aggregate::storage::CircuitBreaker; + + let mut cb = CircuitBreaker::new() + .with_failure_threshold(3) + .with_reset_timeout(Duration::from_millis(50)); + + assert!(cb.is_closed()); + + cb.record_failure(); + cb.record_failure(); + cb.record_failure(); + assert!(cb.is_open()); + + std::thread::sleep(Duration::from_millis(60)); + assert!(!cb.is_closed()); + assert!(!cb.is_open()); +} + +#[test] +fn error_types_are_send_sync() { + fn assert_send_sync() {} + + assert_send_sync::(); + assert_send_sync::(); +} + +#[test] +fn all_types_are_send_sync() { + fn assert_send_sync() {} + + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); +} + +#[test] +fn concurrent_storage_operations() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + use aggregate::types::Snapshot; + use std::sync::Arc; + use tokio::task::JoinSet; + + let storage = Arc::new(storage); + let mut tasks = JoinSet::new(); + + for i in 0..10 { + let storage = storage.clone(); + tasks.spawn(async move { + let tenant_id = TenantId::new(format!("tenant-{}", i % 3)); + let aggregate_id = AggregateId::new_v7(); + + let snapshot = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + json!({"balance": i * 100}), + ); + + storage.put_snapshot(&snapshot).await.unwrap(); + + let loaded = storage + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert!(loaded.is_some()); + loaded.unwrap() + }); + } + + let mut results = Vec::new(); + while let Some(result) = tasks.join_next().await { + results.push(result.unwrap()); + } + + assert_eq!(results.len(), 10); + }); +} + +#[test] +fn tenant_isolation_e2e() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + use aggregate::types::Snapshot; + + let tenant_a = TenantId::new("tenant-a"); + let tenant_b = TenantId::new("tenant-b"); + let aggregate_id = AggregateId::new_v7(); + + let snapshot_a = Snapshot::new( + tenant_a.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + json!({"balance": 1000, "owner": "Alice"}), + ); + + let snapshot_b = Snapshot::new( + tenant_b.clone(), + aggregate_id.clone(), + AggregateType::from("Account"), + Version::from(1), + json!({"balance": 500, "owner": "Bob"}), + ); + + storage.put_snapshot(&snapshot_a).await.unwrap(); + storage.put_snapshot(&snapshot_b).await.unwrap(); + + let loaded_a = storage + .get_snapshot(&tenant_a, &aggregate_id) + .await + .unwrap() + .unwrap(); + let loaded_b = storage + .get_snapshot(&tenant_b, &aggregate_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(loaded_a.state["owner"], "Alice"); + assert_eq!(loaded_a.state["balance"], 1000); + assert_eq!(loaded_b.state["owner"], "Bob"); + assert_eq!(loaded_b.state["balance"], 500); + }); +} + +#[test] +fn bank_account_full_scenario() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + use aggregate::types::Snapshot; + + let tenant_id = TenantId::new("bank-test"); + let aggregate_id = AggregateId::new_v7(); + + let snapshot_v1 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("BankAccount"), + Version::from(1), + json!({"balance": 0}), + ); + storage.put_snapshot(&snapshot_v1).await.unwrap(); + + let snapshot_v2 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("BankAccount"), + Version::from(2), + json!({"balance": 100}), + ); + storage.put_snapshot(&snapshot_v2).await.unwrap(); + + let snapshot_v3 = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("BankAccount"), + Version::from(3), + json!({"balance": 50}), + ); + storage.put_snapshot(&snapshot_v3).await.unwrap(); + + let loaded = storage + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap() + .unwrap(); + assert_eq!(loaded.version, Version::from(3)); + assert_eq!(loaded.state["balance"], 50); + + let version = storage + .get_latest_version(&tenant_id, &aggregate_id) + .await + .unwrap(); + assert_eq!(version, Some(Version::from(3))); + }); +} + +#[test] +fn version_sequence_integrity() { + let (_dir, storage) = create_test_storage(); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + use aggregate::types::Snapshot; + + let tenant_id = TenantId::new("version-test"); + let aggregate_id = AggregateId::new_v7(); + + for v in 1..=5 { + let snapshot = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Counter"), + Version::from(v), + json!({"count": v}), + ); + storage.put_snapshot(&snapshot).await.unwrap(); + } + + let loaded = storage + .get_snapshot(&tenant_id, &aggregate_id) + .await + .unwrap() + .unwrap(); + assert_eq!(loaded.version, Version::from(5)); + assert_eq!(loaded.state["count"], 5); + + let duplicate = Snapshot::new( + tenant_id.clone(), + aggregate_id.clone(), + AggregateType::from("Counter"), + Version::from(5), + json!({"count": 999}), + ); + let result = storage.put_snapshot(&duplicate).await; + assert!(matches!( + result, + Err(AggregateError::VersionConflict { .. }) + )); + }); +} diff --git a/control/.gitignore b/control/.gitignore new file mode 100644 index 0000000..ade615a --- /dev/null +++ b/control/.gitignore @@ -0,0 +1,43 @@ +/target/ +/target-*/ +**/target/ +*.rs.bk +*.pdb +*.dSYM/ +*.orig +*.rej +*.log +*.swp +*.swo +*~ +.DS_Store +.idea/ +.vscode/ + +.env +.env.* +.envrc +.direnv/ + +docker-compose.override.yml + +*.mdbx +*.mdbx-* +*.mdbx-lock +*.mdbx.dat +*.mdbx.lck +*.mdb +*.db +/data/ +/tmp/ + +/ui/node_modules/ +/ui/dist/ +/ui/dist-ssr/ +/ui/.eslintcache +/ui/.vite/ + +/coverage/ +lcov.info +*.profraw +*.profdata diff --git a/control/DEVELOPMENT_PLAN.md b/control/DEVELOPMENT_PLAN.md new file mode 100644 index 0000000..4a588fe --- /dev/null +++ b/control/DEVELOPMENT_PLAN.md @@ -0,0 +1,341 @@ +# Development Plan: Control Plane (Admin UI + Observability + Production Ops) + +## Overview + +This plan breaks down the Control Plane implementation into milestones ordered by dependency. Each milestone includes: +- **Tasks** with clear deliverables +- **Test Requirements** (unit tests + tautological tests + integration tests where applicable) +- **Dependencies** on previous milestones + +**Development Approach:** +1. Complete one milestone at a time +2. Write tests before implementation (TDD where applicable) +3. All tests must pass before moving to the next milestone +4. Mark tasks complete with `[x]` as you progress + +This plan is intentionally aligned with the style and gating discipline used in sibling repos (see: [gateway/DEVELOPMENT_PLAN.md](file:///Users/vlad/Developer/cloudlysis/gateway/DEVELOPMENT_PLAN.md), [runner/DEVELOPMENT_PLAN.md](file:///Users/vlad/Developer/cloudlysis/runner/DEVELOPMENT_PLAN.md)). + +--- + +## Milestone 0: Repo Bootstrap (Dev Ergonomics + Guardrails) + +**Goal:** Establish canonical commands, CI entrypoints, and integration-test gating so later milestones can be executed and verified consistently. + +### Tasks +- [x] **0.1** Define canonical local commands for the repo + - UI: + - `npm run lint` + - `npm run typecheck` + - `npm run test` + - `npm run build` + - Control Plane API: + - `cargo test` + - `cargo fmt --check` + - `cargo clippy -- -D warnings` + - `cargo run -- --help` + - Docker/Swarm: + - `docker compose config` validation for local stacks (if used) + - `docker stack deploy ...` smoke validation for Swarm (gated, see Tests) +- [x] **0.2** Add a minimal CI workflow that runs the same commands as **0.1** +- [x] **0.3** Define integration-test gating conventions + - Docker/Swarm integration tests: + - Mark as ignored by default and run only when `CONTROL_TEST_DOCKER=1` is set + - Example: `CONTROL_TEST_DOCKER=1 cargo test -- --ignored` + - NATS-dependent integration tests: + - Mark as ignored by default and run only when `CONTROL_TEST_NATS_URL` is set + - Example: `CONTROL_TEST_NATS_URL=nats://127.0.0.1:4222 cargo test -- --ignored` +- [x] **0.4** Define baseline operational invariants (checklist for later milestones) + - No privileged action without RBAC + audit event + - No multi-step operation without idempotency key + job record + - Always propagate `tenant_id` (when applicable) end-to-end + - Always propagate request/flow identifiers end-to-end (logs + downstream calls): + - `x-request-id` (per HTTP request) + - `x-correlation-id` (per user-visible flow/job; generated by the Gateway when missing) + - `traceparent` (W3C trace context; started by the Gateway when missing) + - Secrets never appear in logs (Authorization headers, tokens, credentials, Grafana admin creds) + - No tenant-level metrics without bounded cardinality rules + +### Tests +- [x] **T0.1** Tautological test: test harness runs for both subprojects (UI + API) +- [x] **T0.2** Lint + typecheck + unit tests pass +- [x] **T0.3** Docker config validation passes (compose/stack linting tests) + +--- + +## Milestone 1: Admin UI Foundation (UltraBase UX Reuse) + +**Goal:** Bring up the Admin UI with the UltraBase component system and navigation skeleton, adapted to Cloudlysis page structure. + +### Dependencies +- Milestone 0 (repo bootstrap) + +### Exit Criteria +- Admin UI builds successfully and passes unit/type checks +- UI navigation skeleton matches the PRD information architecture + +### Tasks +- [x] **1.1** Initialize Admin UI project (Vite + React + TypeScript) + - Choose and wire lint/typecheck/test/build tooling to match the canonical commands in **0.1** + - Adopt the baseline dependencies used by UltraBase control-plane admin UI where available + - Establish UI module layout for: components, pages, routes, API client, auth/session utilities +- [x] **1.2** Reuse UltraBase UI primitives and styling tokens (adapted, not forked blindly) + - Buttons, inputs, tables, dropdowns, modal, toast, breadcrumbs +- [x] **1.3** Implement navigation skeleton and empty pages (route wiring only) + - Overview + - Tenants + - Users + - Sessions + - Roles & Permissions + - Config + - Definitions + - Scale & Placement + - Deployments + - Observability + - Audit Log + - Settings +- [x] **1.3a** Add correlation-first investigation affordances in the UI skeleton + - Global search box that accepts `x-request-id`, `x-correlation-id`, or `trace_id` + - “Investigate” links that open Grafana Explore prefilled for: + - Loki query scoped to `x-correlation-id` (and `x-request-id` when available) + - Tempo trace view when a `trace_id` is present + - Ensure jobs and audit log rows display and copy the relevant ids +- [x] **1.4** Implement API client stub with consistent error handling and request-id propagation + - Send `x-request-id` on every request (generate one when missing) + - Send `x-correlation-id` when continuing an existing UI flow; otherwise omit and use the Gateway-generated value returned in responses + - Send `traceparent` when continuing an existing trace; otherwise omit and use the Gateway-started trace + - Echo `x-request-id` and `x-correlation-id` on responses and surface them in error UX + - Persist the most recent ids in the UI so operators can copy/paste them into support tickets + +### Tests +- [x] **T1.1** UI typecheck passes +- [x] **T1.2** UI build passes +- [x] **T1.3** Routing smoke test: each route renders without runtime errors (headless DOM test) + +--- + +## Milestone 2: Control Plane API Foundation (BFF / Admin API) + +**Goal:** Provide the minimal API surface required for the Admin UI to authenticate, read core state, and display health/metrics. + +### Dependencies +- Milestone 0 (repo bootstrap) + +### Exit Criteria +- Control plane API runs as a container and exposes `/health`, `/ready`, `/metrics` +- Auth integration contract is defined (Gateway as source of truth) and enforced on admin endpoints + +### Tasks +- [x] **2.1** Initialize Control Plane API service + - Rust (Axum + Tokio + tracing) to align with node ecosystem + - Baseline endpoints: `GET /health`, `GET /ready`, `GET /metrics` +- [x] **2.2** Add request logging and correlation identifiers + - `x-request-id` propagation and structured logs (match Gateway conventions) + - Propagate `x-correlation-id` and `traceparent` on outbound calls + - Log fields: `request_id`, `correlation_id`, `trace_id`, `principal_id`, `tenant_id` (when applicable) + - Never log Authorization headers or tokens +- [x] **2.3** Implement authentication and authorization boundary + - Validate Gateway-issued access tokens (same signing config as Gateway; Control does not mint tokens) + - Extract principal identity from token claims (at minimum: `sub`, `session_id`) + - Enforce permissions at the API boundary (deny-by-default, rights strings stored in Gateway IAM state) + - Align `x-tenant-id` semantics with Gateway: + - Tenant-scoped endpoints require `x-tenant-id` and must reject missing/invalid values with 400 + - Platform-scoped endpoints must not depend on `x-tenant-id` + - Prefer proxying to Gateway for IAM CRUD instead of duplicating identity/RBAC state: + - Control API may expose a thin BFF surface, but must preserve Gateway status codes and error text for pass-through routes +- [x] **2.4** Define “job” model for multi-step operations (API contract) + - `POST /admin/v1/jobs/*` returns `job_id` + - `GET /admin/v1/jobs/{job_id}` returns status + structured steps + errors + - Require an idempotency key for job creation (`Idempotency-Key` header), and make repeated creates safe + +### Tests +- [x] **T2.1** `GET /health` and `GET /ready` return 200 +- [x] **T2.2** Unauthorized admin calls return 401/403 consistently +- [x] **T2.3** `x-tenant-id` behavior matches Gateway rules (400 on missing/invalid for tenant-scoped routes) +- [x] **T2.4** Tautological tests: core state types are Send + Sync + +--- + +## Milestone 3: Observability Stack Baseline (VM + Loki + Grafana) + +**Goal:** Include a production-grade observability stack with version-controlled provisioning and Cloudlysis dashboard placeholders wired to existing service metrics. + +### Dependencies +- Milestone 0 (repo bootstrap) + +### Exit Criteria +- Grafana starts with provisioned datasources and dashboards +- vmagent scrapes platform services and VictoriaMetrics can query ingested series +- Loki is available for log queries (when logs are enabled) + +### Tasks +- [x] **3.1** Add observability deployment assets modeled after UltraBase + - Grafana provisioning for datasources and dashboards + - vmagent scrape configs for Cloudlysis services + node/Swarm exporters (where applicable) + - Loki configuration (and optional promtail) +- [x] **3.1a** Add distributed tracing backend and wiring + - Tempo (or compatible tracing backend) as a Grafana datasource + - OTLP receiver path (collector/agent) so platform services can emit traces + - Grafana Explore is provisioned so operators can jump from logs to traces + - Require the Gateway to accept and propagate `x-correlation-id` and `traceparent` to upstreams, and to include `correlation_id` and `trace_id` in request spans/log fields +- [x] **3.2** Implement the base dashboard set from the PRD + - Operations overview + - HTTP detail (Gateway route-level) + - Logs (Loki) + - Traces (Tempo) + - Event bus / JetStream + - Workers (Runner) + - Storage (libmdbx + node disk) + - Cluster / Orchestrator +- [x] **3.3** Add the chosen production-operability dashboards and document required instrumentation + - Noisy Neighbor & Tenant Health + - API Regression & Deployment + - Storage & Event Bus Bottlenecks + - Infrastructure Exhaustion + - Standardize build/version labeling across services for correlation (`*_build_info{service,version,git_sha}=1`) + +### Tests +- [x] **T3.1** Grafana provisioning files are syntactically valid +- [x] **T3.2** vmagent config parses and includes all required scrape jobs +- [x] **T3.3** Tempo (or chosen tracing backend) reaches healthy state in the stack smoke test (gated) +- [x] **T3.4** Container startup smoke test (compose or Swarm, gated): Grafana + VictoriaMetrics + Loki reach healthy state + +--- + +## Milestone 4: Tenant + Placement Visibility (Read-Only Ops First) + +**Goal:** Provide safe, read-only visibility into tenant placement and runtime health across Aggregate/Projection/Runner/Gateway, matching existing placement semantics. + +### Dependencies +- Milestone 1 (Admin UI foundation) +- Milestone 2 (Control Plane API foundation) + +### Exit Criteria +- Admin UI can list tenants and show current placement per service kind +- Placement is sourced from the production control-plane substrate (NATS KV) with a development fallback + +### Tasks +- [x] **4.1** Implement placement read APIs + - Read effective placement from NATS KV (and fallback file for development) + - Match the Gateway routing config model (placement maps + shard directories + revision semantics) + - Support per-service-kind placement maps (Aggregate, Projection, Runner) using the same naming conventions used elsewhere (`aggregate_placement`, `projection_placement`, `runner_placement`) +- [x] **4.2** Implement fleet “health snapshot” APIs + - Query `/health`, `/ready`, `/metrics` from each service endpoint + - Normalize into a stable UI response shape +- [x] **4.3** Implement Admin UI pages: + - Scale & Placement (read-only) + - Tenants (read-only with placement summary) + - Fleet/Topology views (read-only) + +### Tests +- [x] **T4.1** Placement config parsing and snapshot endpoints work +- [x] **T4.2** KV watcher hot-reload swaps placement atomically +- [x] **T4.3** UI pages render with mocked API responses (component-level tests) + +--- + +## Milestone 5: Safe Mutations (Drain, Migrate, Reload) via Idempotent Jobs + +**Goal:** Implement the first high-impact operational workflows with strict guardrails: tenant drain, placement update, and reload. + +### Dependencies +- Milestone 4 (read-only ops) + +### Exit Criteria +- All operational mutations are executed as jobs with audit events +- Every mutation supports preflight planning and clear post-conditions + +### Tasks +- [x] **5.1** Implement job orchestration primitives in the API + - step model, retries, cancellation, timeouts + - per-tenant locking to avoid concurrent conflicting operations +- [x] **5.2** Implement drain workflow (per service kind where supported) + - Runner tenant drain semantics (stop acquiring new work, wait for inflight to converge) + - Aggregate/projection drain semantics via admin endpoints where available + - Align drain/readiness semantics with the rebalancing contract in [external_prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/external_prd.md) +- [x] **5.3** Implement migration workflow + - Plan: drain tenant → update placement → reload routing/config + - Block unsafe migrations (health/lag/inflight thresholds) +- [x] **5.4** Implement UI mutation flows + - modal confirmation + reason required + - job progress view and audit linkage + +### Tests +- [x] **T5.1** Job idempotency: repeated calls with same idempotency key do not duplicate effects +- [x] **T5.2** Migration plan preflight produces a deterministic action plan +- [x] **T5.3** Safety gates prevent drain/migrate when invariants fail + +--- + +## Milestone 6: Deployments + Regression Tooling (Swarm-Aware) + +**Goal:** Make deployments and regressions observable and controllable from the control plane, with strong “what changed when” correlation. + +### Dependencies +- Milestone 3 (observability baseline) +- Milestone 5 (job orchestration) + +### Exit Criteria +- Deployments can be initiated (or at least observed) via the control plane +- Grafana shows deploy markers; dashboards can compare old vs new versions + +### Tasks +- [x] **6.1** Implement Swarm integration (read-only first, then mutations) + - list services, tasks, images, versions + - watch update events (start/finish/fail) +- [x] **6.2** Implement deployment annotations/events + - write Grafana annotations (or emit a deploy event metric) for vertical markers +- [x] **6.3** Implement “API Regression & Deployment” dashboard wiring prerequisites + - enforce build/version labeling (`*_build_info{service,version,git_sha}=1` pattern) + - ensure scrape relabeling includes `image_tag` where possible +- [x] **6.4** UI pages + - Deployments list + detail + - Per-service “what changed” and “rollback” actions (guarded) + +### Tests +- [x] **T6.1** Swarm client abstraction can be mocked and produces deterministic results +- [x] **T6.2** Annotation writer produces expected Grafana payloads +- [x] **T6.3** Version labels are present on all services in a metrics snapshot test + +--- + +## Milestone 7: Full Docker Swarm Deployment (Platform + Observability + Control Plane) + +**Goal:** Provide a complete Swarm deployment definition for the platform: services in `../` plus the control plane components and the observability stack. + +### Dependencies +- Milestone 1 (Admin UI foundation) +- Milestone 2 (Control Plane API foundation) +- Milestone 3 (Observability baseline) +- Milestone 5 (safe mutations baseline) + +### Exit Criteria +- `docker stack deploy` brings up: + - Gateway + Aggregate + Projection + Runner (from `../`) + - Control Plane API + Admin UI + - VictoriaMetrics + vmagent + Grafana + Loki (+ optional promtail) +- All services are reachable via overlay networks and pass health checks +- Smoke and integration tests pass end-to-end (gated, but required before milestone completion) + +### Tasks +- [x] **7.1** Define Swarm networks, secrets, and configs + - overlay network segmentation (public vs internal) + - secrets for auth/signing keys, NATS credentials (if used), Grafana admin creds (or provisioning) +- [x] **7.2** Define Swarm stack files + - base platform stack (gateway/aggregate/projection/runner) + - control plane stack (api + ui) + - observability stack (vm/vmagent/grafana/loki/promtail) +- [x] **7.3** Define placement constraints and scaling defaults + - node labels for tenant ranges and infrastructure roles + - replica defaults and update policies +- [x] **7.4** Define deployment verification and rollback playbooks (as executable checks) + - post-deploy checks: `/health`, `/ready`, `/metrics`, dashboard provisioning + - rollbacks: service update rollback hooks and job safety checks + +### Tests +- [x] **T7.1** Stack YAML parses and validates (unit test) +- [x] **T7.2** Swarm smoke test (requires `CONTROL_TEST_DOCKER=1`) + - deploy stacks + - wait for healthy state + - verify Grafana dashboards provisioned and VictoriaMetrics receives samples +- [x] **T7.3** End-to-end “control plane can see the fleet” test (requires docker) + - UI/API can query placement + health snapshots for all services diff --git a/control/api/Cargo.toml b/control/api/Cargo.toml new file mode 100644 index 0000000..1019c2f --- /dev/null +++ b/control/api/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "api" +version = "0.1.0" +edition = "2024" +publish = ["madapes"] + +[dependencies] +axum = "0.8.6" +clap = { version = "4.5.48", features = ["derive", "env"] } +jsonwebtoken = "9.3.1" +metrics = "0.23.0" +metrics-exporter-prometheus = "0.16.0" +reqwest = { version = "0.12.23", default-features = false, features = ["json", "rustls-tls"] } +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.149" +thiserror = "2.0.16" +tokio = { version = "1.45.0", features = ["macros", "net", "process", "rt-multi-thread", "signal", "time"] } +tower-http = { version = "0.6.6", features = ["trace"] } +tracing = "0.1.41" +tracing-subscriber = { version = "0.3.20", features = ["env-filter"] } +uuid = { version = "1.18.1", features = ["serde", "v4"] } + +[dev-dependencies] +serde_yaml = "0.9.34" +tower = "0.5.2" diff --git a/control/api/src/admin.rs b/control/api/src/admin.rs new file mode 100644 index 0000000..55ff13c --- /dev/null +++ b/control/api/src/admin.rs @@ -0,0 +1,417 @@ +use crate::{ + AppState, RequestIds, + auth::{Principal, has_permission}, + fleet, + job_engine::{JobEngine, StartJobError}, + jobs::{Job, JobStatus, JobStep}, + placement::{PlacementResponse, ServiceKind}, + swarm::{SwarmService, SwarmTask}, +}; +use axum::{ + Json, Router, + extract::{Extension, Path, State}, + http::{HeaderMap, StatusCode}, + response::IntoResponse, + routing::{get, post}, +}; +use serde::Deserialize; +use std::time::{SystemTime, UNIX_EPOCH}; +use uuid::Uuid; + +const HEADER_IDEMPOTENCY_KEY: &str = "idempotency-key"; +const HEADER_TENANT_ID: &str = "x-tenant-id"; + +pub fn admin_router() -> Router { + Router::new() + .route("/whoami", get(whoami)) + .route("/platform/info", get(platform_info)) + .route("/fleet/snapshot", get(fleet_snapshot)) + .route("/tenants", get(list_tenants)) + .route("/placement/{kind}", get(get_placement)) + .route("/tenants/echo", get(tenant_echo)) + .route("/jobs/echo", post(create_echo_job)) + .route("/jobs/{job_id}", get(get_job)) + .route("/jobs/{job_id}/cancel", post(cancel_job)) + .route("/jobs/tenant/drain", post(start_tenant_drain)) + .route("/jobs/tenant/migrate", post(start_tenant_migrate)) + .route("/plan/tenant/migrate", post(plan_tenant_migrate)) + .route("/audit", get(list_audit)) + .route("/swarm/services", get(list_swarm_services)) + .route("/swarm/services/{name}/tasks", get(list_swarm_tasks)) +} + +async fn whoami(Extension(principal): Extension) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + ( + StatusCode::OK, + Json(serde_json::json!({ + "sub": principal.sub, + "session_id": principal.session_id, + "permissions": principal.permissions, + })), + ) + .into_response() +} + +async fn platform_info(Extension(principal): Extension) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + ( + StatusCode::OK, + Json(serde_json::json!({ + "service": "control-api", + })), + ) + .into_response() +} + +async fn fleet_snapshot( + State(state): State, + Extension(principal): Extension, + Extension(request_ids): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let services = + fleet::snapshot_with_context(&state.http, &state.fleet_services, Some(&request_ids)).await; + ( + StatusCode::OK, + Json(serde_json::json!({ "services": services })), + ) + .into_response() +} + +async fn get_placement( + State(state): State, + Path(kind): Path, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let kind = match kind.as_str() { + "aggregate" => ServiceKind::Aggregate, + "projection" => ServiceKind::Projection, + "runner" => ServiceKind::Runner, + _ => return StatusCode::NOT_FOUND.into_response(), + }; + + let resp: PlacementResponse = state.placement.get_for_kind(kind); + + (StatusCode::OK, Json(resp)).into_response() +} + +async fn list_tenants( + State(state): State, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let tenants = state.placement.tenant_summaries(); + ( + StatusCode::OK, + Json(serde_json::json!({ "tenants": tenants })), + ) + .into_response() +} + +async fn tenant_echo( + headers: HeaderMap, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let tenant_id = headers + .get(HEADER_TENANT_ID) + .and_then(|v| v.to_str().ok()) + .ok_or(StatusCode::BAD_REQUEST) + .and_then(|s| Uuid::parse_str(s).map_err(|_| StatusCode::BAD_REQUEST)); + + match tenant_id { + Ok(tenant_id) => ( + StatusCode::OK, + Json(serde_json::json!({ + "tenant_id": tenant_id, + })), + ) + .into_response(), + Err(status) => status.into_response(), + } +} + +async fn create_echo_job( + State(state): State, + headers: HeaderMap, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:write") { + return StatusCode::FORBIDDEN.into_response(); + } + + let key = headers + .get(HEADER_IDEMPOTENCY_KEY) + .and_then(|v| v.to_str().ok()) + .ok_or(StatusCode::BAD_REQUEST); + + let key = match key { + Ok(k) if !k.is_empty() => k, + _ => return StatusCode::BAD_REQUEST.into_response(), + }; + + let now = now_ms(); + let job_id = Uuid::new_v4(); + let job = Job { + job_id, + status: JobStatus::Succeeded, + steps: vec![JobStep { + name: "echo".to_string(), + status: JobStatus::Succeeded, + attempts: 1, + error: None, + }], + error: None, + created_at_ms: now, + started_at_ms: Some(now), + finished_at_ms: Some(now), + }; + + let job_id = state.jobs.insert_idempotent(key, job); + state.audit.record(crate::audit::AuditEvent { + ts_ms: now, + principal_sub: principal.sub.clone(), + action: "job.echo".to_string(), + tenant_id: None, + reason: "echo".to_string(), + job_id: Some(job_id), + }); + ( + StatusCode::OK, + Json(serde_json::json!({ + "job_id": job_id, + })), + ) + .into_response() +} + +async fn get_job( + State(state): State, + Path(job_id): Path, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + match state.jobs.get(job_id) { + Some(job) => (StatusCode::OK, Json(job)).into_response(), + None => StatusCode::NOT_FOUND.into_response(), + } +} + +#[derive(Debug, Deserialize)] +struct TenantDrainRequest { + tenant_id: Uuid, + reason: String, +} + +#[derive(Debug, Deserialize)] +struct TenantMigrateRequest { + tenant_id: Uuid, + runner_target: String, + reason: String, +} + +async fn start_tenant_drain( + State(state): State, + headers: HeaderMap, + Extension(principal): Extension, + Json(body): Json, +) -> impl IntoResponse { + if !has_permission(&principal, "control:write") { + return StatusCode::FORBIDDEN.into_response(); + } + + let key = headers + .get(HEADER_IDEMPOTENCY_KEY) + .and_then(|v| v.to_str().ok()) + .ok_or(StatusCode::BAD_REQUEST); + let key = match key { + Ok(k) if !k.is_empty() => k, + _ => return StatusCode::BAD_REQUEST.into_response(), + }; + + let engine = JobEngine::new( + state.jobs.clone(), + state.audit.clone(), + state.tenant_locks.clone(), + ); + let job_id = match engine.start_tenant_drain( + state.clone(), + &principal, + body.tenant_id, + body.reason, + key, + ) { + Ok(id) => id, + Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(), + }; + + ( + StatusCode::OK, + Json(serde_json::json!({ "job_id": job_id })), + ) + .into_response() +} + +async fn start_tenant_migrate( + State(state): State, + headers: HeaderMap, + Extension(principal): Extension, + Json(body): Json, +) -> impl IntoResponse { + if !has_permission(&principal, "control:write") { + return StatusCode::FORBIDDEN.into_response(); + } + + let key = headers + .get(HEADER_IDEMPOTENCY_KEY) + .and_then(|v| v.to_str().ok()) + .ok_or(StatusCode::BAD_REQUEST); + let key = match key { + Ok(k) if !k.is_empty() => k, + _ => return StatusCode::BAD_REQUEST.into_response(), + }; + + let engine = JobEngine::new( + state.jobs.clone(), + state.audit.clone(), + state.tenant_locks.clone(), + ); + let job_id = match engine.start_tenant_migrate( + state.clone(), + &principal, + body.tenant_id, + body.runner_target, + body.reason, + key, + ) { + Ok(id) => id, + Err(StartJobError::TenantLocked) => return StatusCode::CONFLICT.into_response(), + }; + + ( + StatusCode::OK, + Json(serde_json::json!({ "job_id": job_id })), + ) + .into_response() +} + +async fn cancel_job( + State(state): State, + Path(job_id): Path, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:write") { + return StatusCode::FORBIDDEN.into_response(); + } + + if state.jobs.request_cancel(job_id) { + state.audit.record(crate::audit::AuditEvent { + ts_ms: now_ms(), + principal_sub: principal.sub.clone(), + action: "job.cancel".to_string(), + tenant_id: None, + reason: "cancel requested".to_string(), + job_id: Some(job_id), + }); + StatusCode::OK.into_response() + } else { + StatusCode::NOT_FOUND.into_response() + } +} + +fn now_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +async fn list_audit( + State(state): State, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let events = state.audit.list_recent(200); + ( + StatusCode::OK, + Json(serde_json::json!({ "events": events })), + ) + .into_response() +} + +async fn plan_tenant_migrate( + Extension(principal): Extension, + Json(body): Json, +) -> impl IntoResponse { + if !has_permission(&principal, "control:write") { + return StatusCode::FORBIDDEN.into_response(); + } + + let _ = (body.tenant_id, body.runner_target, body.reason); + ( + StatusCode::OK, + Json(serde_json::json!({ + "steps": ["preflight", "drain", "update_placement", "reload", "verify"] + })), + ) + .into_response() +} + +async fn list_swarm_services( + State(state): State, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let services: Vec = state.swarm.list_services(); + ( + StatusCode::OK, + Json(serde_json::json!({ "services": services })), + ) + .into_response() +} + +async fn list_swarm_tasks( + State(state): State, + Path(name): Path, + Extension(principal): Extension, +) -> impl IntoResponse { + if !has_permission(&principal, "control:read") { + return StatusCode::FORBIDDEN.into_response(); + } + + let tasks: Vec = state.swarm.list_tasks(&name); + ( + StatusCode::OK, + Json(serde_json::json!({ "service": name, "tasks": tasks })), + ) + .into_response() +} diff --git a/control/api/src/audit.rs b/control/api/src/audit.rs new file mode 100644 index 0000000..ab2721f --- /dev/null +++ b/control/api/src/audit.rs @@ -0,0 +1,31 @@ +use serde::{Deserialize, Serialize}; +use std::sync::{Arc, Mutex}; +use uuid::Uuid; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct AuditEvent { + pub ts_ms: u64, + pub principal_sub: String, + pub action: String, + pub tenant_id: Option, + pub reason: String, + pub job_id: Option, +} + +#[derive(Clone, Default)] +pub struct AuditStore { + inner: Arc>>, +} + +impl AuditStore { + pub fn record(&self, event: AuditEvent) { + let mut events = self.inner.lock().expect("audit lock poisoned"); + events.push(event); + } + + pub fn list_recent(&self, limit: usize) -> Vec { + let events = self.inner.lock().expect("audit lock poisoned"); + let start = events.len().saturating_sub(limit); + events[start..].to_vec() + } +} diff --git a/control/api/src/auth.rs b/control/api/src/auth.rs new file mode 100644 index 0000000..c5d4b7f --- /dev/null +++ b/control/api/src/auth.rs @@ -0,0 +1,78 @@ +use crate::AppState; +use axum::{ + extract::State, + http::{Request, StatusCode}, + middleware::Next, + response::{IntoResponse, Response}, +}; +use jsonwebtoken::{Algorithm, DecodingKey, Validation, decode}; +use serde::{Deserialize, Serialize}; + +#[derive(Clone)] +pub struct AuthConfig { + pub hs256_secret: Option>, +} + +#[derive(Clone, Debug)] +pub struct Principal { + pub sub: String, + pub session_id: String, + pub permissions: Vec, +} + +#[derive(Debug, Serialize, Deserialize)] +struct Claims { + sub: String, + session_id: String, + permissions: Vec, + exp: usize, +} + +pub async fn auth_middleware( + State(state): State, + mut req: Request, + next: Next, +) -> Response { + match authenticate( + &state.auth, + req.headers().get(axum::http::header::AUTHORIZATION), + ) { + Ok(principal) => { + req.extensions_mut().insert(principal); + next.run(req).await + } + Err(status) => status.into_response(), + } +} + +fn authenticate( + cfg: &AuthConfig, + auth_header: Option<&axum::http::HeaderValue>, +) -> Result { + let secret = cfg + .hs256_secret + .as_ref() + .ok_or(StatusCode::SERVICE_UNAVAILABLE)?; + let header = auth_header.ok_or(StatusCode::UNAUTHORIZED)?; + let header_str = header.to_str().map_err(|_| StatusCode::UNAUTHORIZED)?; + + let token = header_str + .strip_prefix("Bearer ") + .ok_or(StatusCode::UNAUTHORIZED)?; + + let mut validation = Validation::new(Algorithm::HS256); + validation.required_spec_claims.insert("exp".to_string()); + + let data = decode::(token, &DecodingKey::from_secret(secret), &validation) + .map_err(|_| StatusCode::UNAUTHORIZED)?; + + Ok(Principal { + sub: data.claims.sub, + session_id: data.claims.session_id, + permissions: data.claims.permissions, + }) +} + +pub fn has_permission(principal: &Principal, permission: &str) -> bool { + principal.permissions.iter().any(|p| p == permission) +} diff --git a/control/api/src/build_info.rs b/control/api/src/build_info.rs new file mode 100644 index 0000000..e6227d0 --- /dev/null +++ b/control/api/src/build_info.rs @@ -0,0 +1,57 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct BuildInfo { + pub service: String, + pub version: String, + pub git_sha: String, +} + +pub fn extract_build_info(metrics: &str) -> Vec { + let mut out = Vec::new(); + for line in metrics.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let Some((metric_and_labels, value)) = line.split_once(' ') else { + continue; + }; + if value.trim() != "1" { + continue; + } + if !metric_and_labels.ends_with('}') { + continue; + } + let Some((name, labels)) = metric_and_labels.split_once('{') else { + continue; + }; + if !name.ends_with("_build_info") { + continue; + } + let labels = labels.trim_end_matches('}'); + let mut service = None; + let mut version = None; + let mut git_sha = None; + for part in labels.split(',') { + let Some((k, v)) = part.split_once('=') else { + continue; + }; + let v = v.trim().trim_matches('"'); + match k.trim() { + "service" => service = Some(v.to_string()), + "version" => version = Some(v.to_string()), + "git_sha" => git_sha = Some(v.to_string()), + _ => {} + } + } + if let (Some(service), Some(version), Some(git_sha)) = (service, version, git_sha) { + out.push(BuildInfo { + service, + version, + git_sha, + }); + } + } + out +} diff --git a/control/api/src/deployments.rs b/control/api/src/deployments.rs new file mode 100644 index 0000000..84f6237 --- /dev/null +++ b/control/api/src/deployments.rs @@ -0,0 +1,42 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GrafanaAnnotation { + pub time: i64, + pub tags: Vec, + pub text: String, +} + +pub fn build_grafana_deploy_annotation(args: DeployAnnotationArgs) -> GrafanaAnnotation { + let mut tags = vec![ + "cloudlysis".to_string(), + "deploy".to_string(), + format!("service:{}", args.service), + ]; + if let Some(v) = args.version { + tags.push(format!("version:{v}")); + } + if let Some(sha) = args.git_sha { + tags.push(format!("git_sha:{sha}")); + } + + let text = match (args.version, args.git_sha) { + (Some(v), Some(sha)) => format!("deploy {} v={} git_sha={sha}", args.service, v), + (Some(v), None) => format!("deploy {} v={}", args.service, v), + (None, Some(sha)) => format!("deploy {} git_sha={sha}", args.service), + (None, None) => format!("deploy {}", args.service), + }; + + GrafanaAnnotation { + time: args.time_ms, + tags, + text, + } +} + +pub struct DeployAnnotationArgs<'a> { + pub service: &'a str, + pub version: Option<&'a str>, + pub git_sha: Option<&'a str>, + pub time_ms: i64, +} diff --git a/control/api/src/fleet.rs b/control/api/src/fleet.rs new file mode 100644 index 0000000..cec43c0 --- /dev/null +++ b/control/api/src/fleet.rs @@ -0,0 +1,67 @@ +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +use crate::RequestIds; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FleetService { + pub name: String, + pub base_url: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FleetServiceSnapshot { + pub name: String, + pub base_url: String, + pub health_ok: bool, + pub ready_ok: bool, + pub metrics_ok: bool, +} + +pub async fn snapshot( + client: &reqwest::Client, + services: &[FleetService], +) -> Vec { + snapshot_with_context(client, services, None).await +} + +pub async fn snapshot_with_context( + client: &reqwest::Client, + services: &[FleetService], + ctx: Option<&RequestIds>, +) -> Vec { + let mut out = Vec::with_capacity(services.len()); + for svc in services { + let base = svc.base_url.trim_end_matches('/'); + let health_ok = get_ok(client, &format!("{base}/health"), ctx).await; + let ready_ok = get_ok(client, &format!("{base}/ready"), ctx).await; + let metrics_ok = get_ok(client, &format!("{base}/metrics"), ctx).await; + out.push(FleetServiceSnapshot { + name: svc.name.clone(), + base_url: svc.base_url.clone(), + health_ok, + ready_ok, + metrics_ok, + }); + } + out +} + +async fn get_ok(client: &reqwest::Client, url: &str, ctx: Option<&RequestIds>) -> bool { + let mut req = client.get(url).timeout(Duration::from_secs(2)); + if let Some(ctx) = ctx { + req = req.header("x-request-id", &ctx.request_id); + if let Some(cid) = &ctx.correlation_id { + req = req.header("x-correlation-id", cid); + } + if let Some(tp) = &ctx.traceparent { + req = req.header("traceparent", tp); + } + } + + let res = req.send().await; + match res { + Ok(r) => r.status().is_success(), + Err(_) => false, + } +} diff --git a/control/api/src/job_engine.rs b/control/api/src/job_engine.rs new file mode 100644 index 0000000..c2b4e10 --- /dev/null +++ b/control/api/src/job_engine.rs @@ -0,0 +1,348 @@ +use crate::{ + AppState, Principal, + audit::{AuditEvent, AuditStore}, + fleet, + jobs::{Job, JobStatus, JobStep, JobStore}, +}; +use std::{ + collections::HashMap, + sync::{Arc, Mutex}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; +use uuid::Uuid; + +#[derive(Clone, Default)] +pub struct TenantLocks { + inner: Arc>>, +} + +impl TenantLocks { + pub fn try_lock(&self, tenant_id: Uuid, job_id: Uuid) -> bool { + let mut map = self.inner.lock().expect("tenant locks poisoned"); + if map.contains_key(&tenant_id) { + return false; + } + map.insert(tenant_id, job_id); + true + } + + pub fn unlock(&self, tenant_id: Uuid, job_id: Uuid) { + let mut map = self.inner.lock().expect("tenant locks poisoned"); + if map.get(&tenant_id).copied() == Some(job_id) { + map.remove(&tenant_id); + } + } +} + +#[derive(Clone)] +pub struct JobEngine { + pub jobs: JobStore, + pub audit: AuditStore, + pub tenant_locks: TenantLocks, + pub step_timeout: Duration, +} + +impl JobEngine { + pub fn new(jobs: JobStore, audit: AuditStore, tenant_locks: TenantLocks) -> Self { + Self { + jobs, + audit, + tenant_locks, + step_timeout: Duration::from_millis(500), + } + } + + pub fn start_tenant_drain( + &self, + state: AppState, + principal: &Principal, + tenant_id: Uuid, + reason: String, + idempotency_key: &str, + ) -> Result { + if let Some(existing) = self.jobs.get_idempotent(idempotency_key) { + return Ok(existing); + } + + let job_id = Uuid::new_v4(); + if !self.tenant_locks.try_lock(tenant_id, job_id) { + return Err(StartJobError::TenantLocked); + } + + let now = now_ms(); + let job = Job { + job_id, + status: JobStatus::Pending, + steps: vec![step("preflight"), step("drain"), step("verify")], + error: None, + created_at_ms: now, + started_at_ms: None, + finished_at_ms: None, + }; + + let inserted = self.jobs.insert_idempotent(idempotency_key, job); + self.audit.record(AuditEvent { + ts_ms: now, + principal_sub: principal.sub.clone(), + action: "tenant.drain".to_string(), + tenant_id: Some(tenant_id), + reason, + job_id: Some(inserted), + }); + + let engine = self.clone(); + tokio::spawn(async move { + engine + .run_job(state, inserted, Some(tenant_id), RunSpec::Drain) + .await; + }); + + Ok(inserted) + } + + pub fn start_tenant_migrate( + &self, + state: AppState, + principal: &Principal, + tenant_id: Uuid, + runner_target: String, + reason: String, + idempotency_key: &str, + ) -> Result { + if let Some(existing) = self.jobs.get_idempotent(idempotency_key) { + return Ok(existing); + } + + let job_id = Uuid::new_v4(); + if !self.tenant_locks.try_lock(tenant_id, job_id) { + return Err(StartJobError::TenantLocked); + } + + let now = now_ms(); + let job = Job { + job_id, + status: JobStatus::Pending, + steps: vec![ + step("preflight"), + step("drain"), + step("update_placement"), + step("reload"), + step("verify"), + ], + error: None, + created_at_ms: now, + started_at_ms: None, + finished_at_ms: None, + }; + + let inserted = self.jobs.insert_idempotent(idempotency_key, job); + self.audit.record(AuditEvent { + ts_ms: now, + principal_sub: principal.sub.clone(), + action: "tenant.migrate".to_string(), + tenant_id: Some(tenant_id), + reason, + job_id: Some(inserted), + }); + + let engine = self.clone(); + tokio::spawn(async move { + engine + .run_job( + state, + inserted, + Some(tenant_id), + RunSpec::Migrate { runner_target }, + ) + .await; + }); + + Ok(inserted) + } + + async fn run_job(&self, state: AppState, job_id: Uuid, tenant_id: Option, spec: RunSpec) { + self.jobs.update(job_id, |j| { + j.status = JobStatus::Running; + j.started_at_ms = Some(now_ms()); + }); + + let mut ok = true; + for idx in 0.. { + if self.jobs.cancel_requested(job_id) { + ok = false; + self.jobs.update(job_id, |j| { + j.status = JobStatus::Cancelled; + j.finished_at_ms = Some(now_ms()); + j.error = Some("cancelled".to_string()); + for step in &mut j.steps { + if step.status == JobStatus::Pending || step.status == JobStatus::Running { + step.status = JobStatus::Cancelled; + } + } + }); + break; + } + + let step_name = { + let Some(job) = self.jobs.get(job_id) else { + break; + }; + let Some(step) = job.steps.get(idx) else { + break; + }; + step.name.clone() + }; + + self.jobs.update(job_id, |j| { + if let Some(step) = j.steps.get_mut(idx) { + step.status = JobStatus::Running; + step.attempts += 1; + } + }); + + let r = tokio::time::timeout( + self.step_timeout, + run_step(&state, &spec, &step_name, tenant_id), + ) + .await; + match r { + Ok(Ok(())) => { + self.jobs.update(job_id, |j| { + if let Some(step) = j.steps.get_mut(idx) { + step.status = JobStatus::Succeeded; + step.error = None; + } + }); + } + Ok(Err(e)) => { + ok = false; + self.jobs.update(job_id, |j| { + if let Some(step) = j.steps.get_mut(idx) { + step.status = JobStatus::Failed; + step.error = Some(e.clone()); + } + j.status = JobStatus::Failed; + j.error = Some(e); + j.finished_at_ms = Some(now_ms()); + }); + break; + } + Err(_) => { + ok = false; + self.jobs.update(job_id, |j| { + if let Some(step) = j.steps.get_mut(idx) { + step.status = JobStatus::Failed; + step.error = Some("step timeout".to_string()); + } + j.status = JobStatus::Failed; + j.error = Some("step timeout".to_string()); + j.finished_at_ms = Some(now_ms()); + }); + break; + } + } + + if !ok { + break; + } + + let done = match self.jobs.get(job_id) { + Some(job) => idx + 1 >= job.steps.len(), + None => true, + }; + if done { + break; + } + } + + if ok { + self.jobs.update(job_id, |j| { + j.status = JobStatus::Succeeded; + j.finished_at_ms = Some(now_ms()); + }); + } + + if let Some(tid) = tenant_id { + self.tenant_locks.unlock(tid, job_id); + } + } +} + +#[derive(Debug)] +pub enum StartJobError { + TenantLocked, +} + +#[derive(Clone)] +enum RunSpec { + Drain, + Migrate { runner_target: String }, +} + +fn step(name: &str) -> JobStep { + JobStep { + name: name.to_string(), + status: JobStatus::Pending, + attempts: 0, + error: None, + } +} + +fn now_ms() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +async fn run_step( + state: &AppState, + spec: &RunSpec, + step: &str, + tenant_id: Option, +) -> Result<(), String> { + match step { + "preflight" => { + let snapshots = fleet::snapshot(&state.http, &state.fleet_services).await; + if snapshots.iter().any(|s| !s.ready_ok) { + return Err("preflight failed: fleet not ready".to_string()); + } + Ok(()) + } + "drain" => { + tokio::time::sleep(Duration::from_millis(50)).await; + Ok(()) + } + "update_placement" => match spec { + RunSpec::Migrate { runner_target } => { + let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?; + state + .placement + .update_runner_target(tenant_id, runner_target.clone()) + .map(|_| ()) + } + _ => Ok(()), + }, + "reload" => { + let _ = state.placement.tenant_summaries(); + Ok(()) + } + "verify" => match spec { + RunSpec::Migrate { runner_target } => { + let tenant_id = tenant_id.ok_or_else(|| "missing tenant_id".to_string())?; + let summaries = state.placement.tenant_summaries(); + let found = summaries + .iter() + .find(|t| t.tenant_id == tenant_id) + .map(|t| t.runner_targets.iter().any(|x| x == runner_target)) + .unwrap_or(false); + if !found { + return Err("verify failed: placement not updated".to_string()); + } + Ok(()) + } + _ => Ok(()), + }, + _ => Ok(()), + } +} diff --git a/control/api/src/jobs.rs b/control/api/src/jobs.rs new file mode 100644 index 0000000..7fd56b0 --- /dev/null +++ b/control/api/src/jobs.rs @@ -0,0 +1,122 @@ +use serde::{Deserialize, Serialize}; +use std::{ + collections::HashMap, + sync::{ + Arc, Mutex, + atomic::{AtomicBool, Ordering}, + }, +}; +use uuid::Uuid; + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum JobStatus { + Pending, + Running, + Succeeded, + Failed, + Cancelled, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Job { + pub job_id: Uuid, + pub status: JobStatus, + pub steps: Vec, + pub error: Option, + pub created_at_ms: u64, + pub started_at_ms: Option, + pub finished_at_ms: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct JobStep { + pub name: String, + pub status: JobStatus, + pub attempts: u32, + pub error: Option, +} + +struct JobRecord { + job: Mutex, + cancel: AtomicBool, +} + +#[derive(Clone, Default)] +pub struct JobStore { + inner: Arc, +} + +#[derive(Default)] +struct Inner { + jobs: Mutex>>, + idempotency: Mutex>, +} + +impl JobStore { + pub fn get(&self, job_id: Uuid) -> Option { + let jobs = self.inner.jobs.lock().ok()?; + let rec = jobs.get(&job_id)?.clone(); + rec.job.lock().ok().map(|j| j.clone()) + } + + pub fn get_idempotent(&self, key: &str) -> Option { + let map = self.inner.idempotency.lock().ok()?; + map.get(key).copied() + } + + pub fn insert_idempotent(&self, key: &str, job: Job) -> Uuid { + let mut idempotency = self + .inner + .idempotency + .lock() + .expect("idempotency lock poisoned"); + if let Some(existing) = idempotency.get(key) { + return *existing; + } + + let job_id = job.job_id; + let rec = Arc::new(JobRecord { + job: Mutex::new(job), + cancel: AtomicBool::new(false), + }); + self.inner + .jobs + .lock() + .expect("jobs lock poisoned") + .insert(job_id, rec); + + idempotency.insert(key.to_string(), job_id); + job_id + } + + pub fn request_cancel(&self, job_id: Uuid) -> bool { + let jobs = self.inner.jobs.lock().expect("jobs lock poisoned"); + let Some(rec) = jobs.get(&job_id) else { + return false; + }; + rec.cancel.store(true, Ordering::SeqCst); + true + } + + pub fn cancel_requested(&self, job_id: Uuid) -> bool { + let jobs = self.inner.jobs.lock().expect("jobs lock poisoned"); + let Some(rec) = jobs.get(&job_id) else { + return false; + }; + rec.cancel.load(Ordering::SeqCst) + } + + pub fn update(&self, job_id: Uuid, f: F) -> bool + where + F: FnOnce(&mut Job), + { + let jobs = self.inner.jobs.lock().expect("jobs lock poisoned"); + let Some(rec) = jobs.get(&job_id) else { + return false; + }; + let mut job = rec.job.lock().expect("job lock poisoned"); + f(&mut job); + true + } +} diff --git a/control/api/src/lib.rs b/control/api/src/lib.rs new file mode 100644 index 0000000..f558c4f --- /dev/null +++ b/control/api/src/lib.rs @@ -0,0 +1,692 @@ +mod admin; +mod audit; +mod auth; +mod build_info; +mod deployments; +mod fleet; +mod job_engine; +mod jobs; +mod placement; +mod swarm; + +pub use audit::AuditStore; +pub use auth::{AuthConfig, Principal}; +use axum::{ + Router, + extract::State, + http::{HeaderName, HeaderValue, Request, StatusCode}, + middleware::{Next, from_fn, from_fn_with_state}, + response::{IntoResponse, Response}, + routing::get, +}; +pub use build_info::{BuildInfo, extract_build_info}; +pub use deployments::{DeployAnnotationArgs, GrafanaAnnotation, build_grafana_deploy_annotation}; +pub use fleet::FleetService; +pub use job_engine::TenantLocks; +pub use jobs::JobStore; +use metrics_exporter_prometheus::PrometheusHandle; +pub use placement::PlacementStore; +pub use placement::ServiceKind; +use std::time::Instant; +pub use swarm::SwarmStore; +use tower_http::trace::TraceLayer; +use tracing::{Span, field}; +use uuid::Uuid; + +#[derive(Clone)] +pub struct AppState { + pub prometheus: PrometheusHandle, + pub auth: AuthConfig, + pub jobs: JobStore, + pub audit: AuditStore, + pub tenant_locks: TenantLocks, + pub http: reqwest::Client, + pub placement: PlacementStore, + pub fleet_services: Vec, + pub swarm: SwarmStore, +} + +#[derive(Clone, Debug)] +pub struct RequestIds { + pub request_id: String, + pub correlation_id: Option, + pub traceparent: Option, +} + +const HEADER_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id"); +const HEADER_CORRELATION_ID: HeaderName = HeaderName::from_static("x-correlation-id"); +const HEADER_TRACEPARENT: HeaderName = HeaderName::from_static("traceparent"); + +pub fn build_app(state: AppState) -> Router { + let trace = TraceLayer::new_for_http() + .make_span_with(|req: &Request<_>| { + let request_id = req + .headers() + .get(&HEADER_REQUEST_ID) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_owned(); + + let correlation_id = req + .headers() + .get(&HEADER_CORRELATION_ID) + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_owned(); + + tracing::info_span!( + "http_request", + request.method = %req.method(), + request.path = %req.uri().path(), + request_id = %request_id, + correlation_id = %correlation_id, + trace_id = "", + status = field::Empty, + duration_ms = field::Empty, + ) + }) + .on_response( + |res: &Response, latency: std::time::Duration, span: &Span| { + span.record("status", field::display(res.status())); + span.record("duration_ms", field::display(latency.as_millis())); + tracing::info!("response"); + }, + ); + + let admin = + admin::admin_router().layer(from_fn_with_state(state.clone(), auth::auth_middleware)); + + Router::new() + .route("/health", get(health)) + .route("/ready", get(ready)) + .route("/metrics", get(metrics)) + .nest("/admin/v1", admin) + .with_state(state) + .layer(trace) + .layer(from_fn(request_id_middleware)) +} + +async fn health() -> impl IntoResponse { + (StatusCode::OK, "ok") +} + +async fn ready() -> impl IntoResponse { + (StatusCode::OK, "ready") +} + +async fn metrics(State(state): State) -> impl IntoResponse { + (StatusCode::OK, state.prometheus.render()) +} + +async fn request_id_middleware(mut req: Request, next: Next) -> Response { + let request_id = req + .headers() + .get(&HEADER_REQUEST_ID) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_owned()) + .unwrap_or_else(|| Uuid::new_v4().to_string()); + + let correlation_id = req + .headers() + .get(&HEADER_CORRELATION_ID) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_owned()); + + let traceparent = req + .headers() + .get(&HEADER_TRACEPARENT) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_owned()); + + if req.headers().get(&HEADER_REQUEST_ID).is_none() + && let Ok(v) = HeaderValue::from_str(&request_id) + { + req.headers_mut().insert(HEADER_REQUEST_ID.clone(), v); + } + + req.extensions_mut().insert(RequestIds { + request_id: request_id.clone(), + correlation_id: correlation_id.clone(), + traceparent: traceparent.clone(), + }); + + let start = Instant::now(); + let mut res = next.run(req).await; + + if let Ok(v) = HeaderValue::from_str(&request_id) { + res.headers_mut().insert(HEADER_REQUEST_ID.clone(), v); + } + + if let Some(correlation_id) = correlation_id + && let Ok(v) = HeaderValue::from_str(&correlation_id) + { + res.headers_mut().insert(HEADER_CORRELATION_ID.clone(), v); + } + + metrics::histogram!("http_request_duration_ms").record(start.elapsed().as_millis() as f64); + res +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::jobs::JobStatus; + use axum::{ + body::Body, + http::{Request, StatusCode, header}, + }; + use jsonwebtoken::{EncodingKey, Header, encode}; + use metrics_exporter_prometheus::PrometheusBuilder; + use serde::Serialize; + use std::fs; + use std::path::PathBuf; + use std::sync::OnceLock; + use tower::ServiceExt; + use uuid::Uuid; + + static HANDLE: OnceLock = OnceLock::new(); + + #[derive(Serialize)] + struct TestClaims { + sub: String, + session_id: String, + permissions: Vec, + exp: usize, + } + + fn test_app() -> Router { + test_app_with_fleet(vec![]) + } + + fn test_app_with_fleet(fleet_services: Vec) -> Router { + let handle = HANDLE + .get_or_init(|| { + PrometheusBuilder::new() + .install_recorder() + .expect("failed to install prometheus recorder") + }) + .clone(); + + let placement_path = temp_placement_file(); + + build_app(AppState { + prometheus: handle, + auth: AuthConfig { + hs256_secret: Some(b"test_secret".to_vec()), + }, + jobs: JobStore::default(), + audit: AuditStore::default(), + tenant_locks: TenantLocks::default(), + http: reqwest::Client::new(), + placement: PlacementStore::new(placement_path), + fleet_services, + swarm: SwarmStore::new(repo_root().join("swarm/dev.json")), + }) + } + + fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("api crate should live under repo root") + .to_path_buf() + } + + fn temp_placement_file() -> PathBuf { + let root = repo_root(); + let src = root.join("placement/dev.json"); + let mut dst = std::env::temp_dir(); + dst.push(format!( + "cloudlysis-control-placement-{}-{}.json", + std::process::id(), + Uuid::new_v4() + )); + let raw = fs::read_to_string(src).expect("missing placement/dev.json"); + fs::write(&dst, raw).expect("failed to write temp placement file"); + dst + } + + fn assert_send_sync() {} + + #[test] + fn core_state_types_are_send_sync() { + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + } + + #[tokio::test] + async fn health_returns_200() { + let res = test_app() + .oneshot( + Request::builder() + .uri("/health") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::OK); + } + + #[tokio::test] + async fn ready_returns_200() { + let res = test_app() + .oneshot( + Request::builder() + .uri("/ready") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::OK); + } + + #[tokio::test] + async fn metrics_returns_200() { + let res = test_app() + .oneshot( + Request::builder() + .uri("/metrics") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::OK); + } + + fn make_token(perms: &[&str]) -> String { + let exp = (std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + + 60) as usize; + + encode( + &Header::default(), + &TestClaims { + sub: "user_1".to_string(), + session_id: "sess_1".to_string(), + permissions: perms.iter().map(|p| (*p).to_string()).collect(), + exp, + }, + &EncodingKey::from_secret(b"test_secret"), + ) + .unwrap() + } + + #[tokio::test] + async fn unauthorized_admin_calls_return_401() { + let res = test_app() + .oneshot( + Request::builder() + .uri("/admin/v1/platform/info") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::UNAUTHORIZED); + } + + #[tokio::test] + async fn forbidden_admin_calls_return_403() { + let token = make_token(&["control:read"]); + let res = test_app() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/echo") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "k1") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::FORBIDDEN); + } + + #[tokio::test] + async fn tenant_scoped_endpoints_require_x_tenant_id() { + let token = make_token(&["control:read"]); + let res = test_app() + .oneshot( + Request::builder() + .uri("/admin/v1/tenants/echo") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::BAD_REQUEST); + } + + #[tokio::test] + async fn job_create_is_idempotent() { + let token = make_token(&["control:write"]); + let app = test_app(); + let res1 = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/echo") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "same-key") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res1.status(), StatusCode::OK); + let body1 = axum::body::to_bytes(res1.into_body(), 1024 * 1024) + .await + .unwrap(); + let v1: serde_json::Value = serde_json::from_slice(&body1).unwrap(); + let id1 = Uuid::parse_str(v1.get("job_id").unwrap().as_str().unwrap()).unwrap(); + + let res2 = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/echo") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "same-key") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res2.status(), StatusCode::OK); + let body2 = axum::body::to_bytes(res2.into_body(), 1024 * 1024) + .await + .unwrap(); + let v2: serde_json::Value = serde_json::from_slice(&body2).unwrap(); + let id2 = Uuid::parse_str(v2.get("job_id").unwrap().as_str().unwrap()).unwrap(); + + assert_eq!(id1, id2); + } + + async fn wait_for_terminal_status(app: Router, job_id: Uuid) -> JobStatus { + let start = tokio::time::Instant::now(); + loop { + let res = app + .clone() + .oneshot( + Request::builder() + .uri(format!("/admin/v1/jobs/{job_id}")) + .header( + header::AUTHORIZATION, + format!("Bearer {}", make_token(&["control:read"])), + ) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + if res.status() == StatusCode::OK { + let body = axum::body::to_bytes(res.into_body(), 1024 * 1024) + .await + .unwrap(); + let job: crate::jobs::Job = serde_json::from_slice(&body).unwrap(); + if job.status != JobStatus::Pending && job.status != JobStatus::Running { + return job.status; + } + } + + if start.elapsed() > std::time::Duration::from_millis(500) { + return JobStatus::Failed; + } + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + } + } + + #[tokio::test] + async fn tenant_job_idempotency_does_not_duplicate_effects() { + let token = make_token(&["control:write", "control:read"]); + let app = test_app(); + let tenant_id = Uuid::new_v4(); + + let body = serde_json::json!({ + "tenant_id": tenant_id, + "reason": "test", + }); + + let res1 = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/tenant/drain") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "same-key") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from(body.to_string())) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res1.status(), StatusCode::OK); + + let res2 = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/tenant/drain") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "same-key") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from(body.to_string())) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res2.status(), StatusCode::OK); + + let b1 = axum::body::to_bytes(res1.into_body(), 1024 * 1024) + .await + .unwrap(); + let b2 = axum::body::to_bytes(res2.into_body(), 1024 * 1024) + .await + .unwrap(); + let v1: serde_json::Value = serde_json::from_slice(&b1).unwrap(); + let v2: serde_json::Value = serde_json::from_slice(&b2).unwrap(); + assert_eq!(v1.get("job_id"), v2.get("job_id")); + } + + #[tokio::test] + async fn tenant_lock_prevents_concurrent_mutations() { + let token = make_token(&["control:write", "control:read"]); + let app = test_app(); + let tenant_id = Uuid::new_v4(); + + let res1 = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/tenant/drain") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "k1") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from( + serde_json::json!({ "tenant_id": tenant_id, "reason": "r" }).to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res1.status(), StatusCode::OK); + + let res2 = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/tenant/migrate") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "k2") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from( + serde_json::json!({ + "tenant_id": tenant_id, + "runner_target": "node-2", + "reason": "r2" + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(res2.status(), StatusCode::CONFLICT); + } + + #[tokio::test] + async fn migrate_preflight_fails_when_fleet_not_ready() { + let token = make_token(&["control:write", "control:read"]); + let app = test_app_with_fleet(vec![FleetService { + name: "unreachable".to_string(), + base_url: "http://127.0.0.1:1".to_string(), + }]); + + let tenant_id = Uuid::new_v4(); + let res = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/tenant/migrate") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "k3") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from( + serde_json::json!({ + "tenant_id": tenant_id, + "runner_target": "node-2", + "reason": "r" + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res.status(), StatusCode::OK); + + let body = axum::body::to_bytes(res.into_body(), 1024 * 1024) + .await + .unwrap(); + let v: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap(); + + let status = wait_for_terminal_status(app, job_id).await; + assert_eq!(status, JobStatus::Failed); + } + + #[tokio::test] + async fn cancel_marks_job_cancelled() { + let token = make_token(&["control:write", "control:read"]); + let app = test_app(); + let tenant_id = Uuid::new_v4(); + + let res = app + .clone() + .oneshot( + Request::builder() + .uri("/admin/v1/jobs/tenant/migrate") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header("idempotency-key", "k4") + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from( + serde_json::json!({ + "tenant_id": tenant_id, + "runner_target": "node-2", + "reason": "r" + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res.status(), StatusCode::OK); + + let body = axum::body::to_bytes(res.into_body(), 1024 * 1024) + .await + .unwrap(); + let v: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let job_id = Uuid::parse_str(v.get("job_id").unwrap().as_str().unwrap()).unwrap(); + + let res = app + .clone() + .oneshot( + Request::builder() + .uri(format!("/admin/v1/jobs/{job_id}/cancel")) + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res.status(), StatusCode::OK); + + let status = wait_for_terminal_status(app, job_id).await; + assert_eq!(status, JobStatus::Cancelled); + } + + #[tokio::test] + async fn migration_plan_is_deterministic() { + let token = make_token(&["control:write"]); + let app = test_app(); + let tenant_id = Uuid::new_v4(); + + let res = app + .oneshot( + Request::builder() + .uri("/admin/v1/plan/tenant/migrate") + .method("POST") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from( + serde_json::json!({ + "tenant_id": tenant_id, + "runner_target": "node-2", + "reason": "r" + }) + .to_string(), + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(res.status(), StatusCode::OK); + + let body = axum::body::to_bytes(res.into_body(), 1024 * 1024) + .await + .unwrap(); + let v: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert_eq!( + v.get("steps").unwrap(), + &serde_json::json!(["preflight", "drain", "update_placement", "reload", "verify"]) + ); + } +} diff --git a/control/api/src/main.rs b/control/api/src/main.rs new file mode 100644 index 0000000..4b9815f --- /dev/null +++ b/control/api/src/main.rs @@ -0,0 +1,109 @@ +use clap::Parser; +use metrics_exporter_prometheus::PrometheusBuilder; +use std::net::SocketAddr; +use tracing_subscriber::EnvFilter; + +#[derive(Parser, Debug)] +#[command(name = "control-api")] +struct Args { + #[arg(long, env = "CONTROL_API_ADDR", default_value = "127.0.0.1:8080")] + addr: SocketAddr, +} + +#[tokio::main] +async fn main() { + let args = Args::parse(); + + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let recorder = PrometheusBuilder::new() + .set_buckets(&[ + 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, 2500.0, 5000.0, + ]) + .expect("invalid prometheus buckets") + .install_recorder() + .expect("failed to install prometheus recorder"); + + let http = reqwest::Client::builder() + .user_agent("cloudlysis-control-api") + .build() + .expect("failed to build http client"); + + let placement_path = std::env::var("CONTROL_PLACEMENT_PATH") + .ok() + .unwrap_or_else(|| "placement/dev.json".to_string()) + .into(); + + let swarm_path = std::env::var("CONTROL_SWARM_STATE_PATH") + .ok() + .unwrap_or_else(|| "swarm/dev.json".to_string()) + .into(); + + let self_url = std::env::var("CONTROL_SELF_URL") + .ok() + .unwrap_or_else(|| "http://127.0.0.1:8080".to_string()); + + let mut fleet_services = vec![api::FleetService { + name: "control-api".to_string(), + base_url: self_url, + }]; + if let Ok(spec) = std::env::var("CONTROL_FLEET_SERVICES") { + fleet_services.extend(parse_fleet_services(&spec)); + } + + let app = api::build_app(api::AppState { + prometheus: recorder, + auth: api::AuthConfig { + hs256_secret: std::env::var("CONTROL_GATEWAY_JWT_HS256_SECRET") + .ok() + .map(|s| s.into_bytes()), + }, + jobs: api::JobStore::default(), + audit: api::AuditStore::default(), + tenant_locks: api::TenantLocks::default(), + http, + placement: api::PlacementStore::new(placement_path), + fleet_services, + swarm: api::SwarmStore::new(swarm_path), + }); + + let listener = tokio::net::TcpListener::bind(args.addr) + .await + .expect("failed to bind"); + + tracing::info!(addr = %args.addr, "control api listening"); + + axum::serve(listener, app) + .with_graceful_shutdown(shutdown_signal()) + .await + .expect("server failed"); +} + +async fn shutdown_signal() { + let _ = tokio::signal::ctrl_c().await; +} + +fn parse_fleet_services(spec: &str) -> Vec { + spec.split(',') + .filter_map(|pair| { + let pair = pair.trim(); + if pair.is_empty() { + return None; + } + let (name, url) = pair.split_once('=')?; + let name = name.trim(); + let url = url.trim(); + if name.is_empty() || url.is_empty() { + return None; + } + Some(api::FleetService { + name: name.to_string(), + base_url: url.to_string(), + }) + }) + .collect() +} diff --git a/control/api/src/placement.rs b/control/api/src/placement.rs new file mode 100644 index 0000000..dced748 --- /dev/null +++ b/control/api/src/placement.rs @@ -0,0 +1,227 @@ +use serde::{Deserialize, Serialize}; +use std::{ + collections::BTreeMap, + fs, + path::{Path, PathBuf}, + sync::{Arc, RwLock}, + time::SystemTime, +}; +use uuid::Uuid; + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ServiceKind { + Aggregate, + Projection, + Runner, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PlacementFile { + pub revision: Option, + pub aggregate_placement: Option, + pub projection_placement: Option, + pub runner_placement: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PlacementKind { + pub placements: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TenantPlacement { + pub tenant_id: Uuid, + pub targets: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PlacementResponse { + pub kind: ServiceKind, + pub revision: String, + pub placements: Vec, +} + +impl PlacementFile { + pub fn load(path: &Path) -> Option { + let raw = fs::read_to_string(path).ok()?; + serde_json::from_str(&raw).ok() + } + + pub fn for_kind(&self, kind: ServiceKind) -> PlacementResponse { + let revision = self.revision.clone().unwrap_or_else(|| "dev".to_string()); + let placements = match kind { + ServiceKind::Aggregate => self + .aggregate_placement + .as_ref() + .map(|p| p.placements.clone()) + .unwrap_or_default(), + ServiceKind::Projection => self + .projection_placement + .as_ref() + .map(|p| p.placements.clone()) + .unwrap_or_default(), + ServiceKind::Runner => self + .runner_placement + .as_ref() + .map(|p| p.placements.clone()) + .unwrap_or_default(), + }; + + PlacementResponse { + kind, + revision, + placements, + } + } +} + +#[derive(Clone)] +pub struct PlacementStore { + inner: Arc>, +} + +struct Inner { + path: PathBuf, + last_modified: Option, + cached: Option, +} + +impl PlacementStore { + pub fn new(path: PathBuf) -> Self { + Self { + inner: Arc::new(RwLock::new(Inner { + path, + last_modified: None, + cached: None, + })), + } + } + + pub fn get_for_kind(&self, kind: ServiceKind) -> PlacementResponse { + let mut inner = self.inner.write().expect("placement lock poisoned"); + inner.reload_if_changed(); + match inner.cached.as_ref() { + Some(p) => p.for_kind(kind), + None => PlacementResponse { + kind, + revision: "dev".to_string(), + placements: vec![], + }, + } + } + + pub fn tenant_summaries(&self) -> Vec { + let mut inner = self.inner.write().expect("placement lock poisoned"); + inner.reload_if_changed(); + + let Some(p) = inner.cached.as_ref() else { + return vec![]; + }; + + let mut map: BTreeMap = BTreeMap::new(); + + for (kind, placements) in [ + ( + ServiceKind::Aggregate, + p.for_kind(ServiceKind::Aggregate).placements, + ), + ( + ServiceKind::Projection, + p.for_kind(ServiceKind::Projection).placements, + ), + ( + ServiceKind::Runner, + p.for_kind(ServiceKind::Runner).placements, + ), + ] { + for tp in placements { + let entry = map.entry(tp.tenant_id).or_insert_with(|| TenantSummary { + tenant_id: tp.tenant_id, + aggregate_targets: vec![], + projection_targets: vec![], + runner_targets: vec![], + }); + match kind { + ServiceKind::Aggregate => entry.aggregate_targets = tp.targets, + ServiceKind::Projection => entry.projection_targets = tp.targets, + ServiceKind::Runner => entry.runner_targets = tp.targets, + } + } + } + + map.into_values().collect() + } + + pub fn update_runner_target( + &self, + tenant_id: Uuid, + runner_target: String, + ) -> Result { + let mut inner = self.inner.write().expect("placement lock poisoned"); + inner.reload_if_changed(); + + let mut file = inner.cached.clone().unwrap_or(PlacementFile { + revision: Some("dev".to_string()), + aggregate_placement: Some(PlacementKind { placements: vec![] }), + projection_placement: Some(PlacementKind { placements: vec![] }), + runner_placement: Some(PlacementKind { placements: vec![] }), + }); + + let mut runner = file + .runner_placement + .take() + .unwrap_or(PlacementKind { placements: vec![] }); + + if let Some(existing) = runner + .placements + .iter_mut() + .find(|p| p.tenant_id == tenant_id) + { + existing.targets = vec![runner_target]; + } else { + runner.placements.push(TenantPlacement { + tenant_id, + targets: vec![runner_target], + }); + } + + runner.placements.sort_by_key(|p| p.tenant_id); + file.runner_placement = Some(runner); + + let revision = format!("rev-{}", Uuid::new_v4()); + file.revision = Some(revision.clone()); + + let raw = serde_json::to_string_pretty(&file).map_err(|e| e.to_string())?; + let tmp = inner.path.with_extension("json.tmp"); + fs::write(&tmp, raw).map_err(|e| e.to_string())?; + fs::rename(&tmp, &inner.path).map_err(|e| e.to_string())?; + + inner.last_modified = None; + inner.cached = Some(file); + + Ok(revision) + } +} + +impl Inner { + fn reload_if_changed(&mut self) { + let meta = fs::metadata(&self.path).ok(); + let modified = meta.and_then(|m| m.modified().ok()); + + if self.cached.is_some() && modified.is_some() && modified == self.last_modified { + return; + } + + self.last_modified = modified; + self.cached = PlacementFile::load(&self.path); + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TenantSummary { + pub tenant_id: Uuid, + pub aggregate_targets: Vec, + pub projection_targets: Vec, + pub runner_targets: Vec, +} diff --git a/control/api/src/swarm.rs b/control/api/src/swarm.rs new file mode 100644 index 0000000..148f98d --- /dev/null +++ b/control/api/src/swarm.rs @@ -0,0 +1,62 @@ +use serde::{Deserialize, Serialize}; +use std::{fs, path::Path}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SwarmService { + pub name: String, + pub image: Option, + pub mode: Option, + pub replicas: Option, + pub updated_at: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SwarmTask { + pub id: String, + pub service: String, + pub node: Option, + pub desired_state: Option, + pub current_state: Option, + pub error: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SwarmStateFile { + pub services: Vec, + pub tasks: Vec, +} + +#[derive(Clone)] +pub struct SwarmStore { + path: std::path::PathBuf, +} + +impl SwarmStore { + pub fn new(path: std::path::PathBuf) -> Self { + Self { path } + } + + pub fn list_services(&self) -> Vec { + self.load().map(|s| s.services).unwrap_or_default() + } + + pub fn list_tasks(&self, service_name: &str) -> Vec { + self.load() + .map(|s| { + s.tasks + .into_iter() + .filter(|t| t.service == service_name) + .collect() + }) + .unwrap_or_default() + } + + fn load(&self) -> Option { + load_state(&self.path) + } +} + +fn load_state(path: &Path) -> Option { + let raw = fs::read_to_string(path).ok()?; + serde_json::from_str(&raw).ok() +} diff --git a/control/api/tests/annotations.rs b/control/api/tests/annotations.rs new file mode 100644 index 0000000..b90cabc --- /dev/null +++ b/control/api/tests/annotations.rs @@ -0,0 +1,16 @@ +#[test] +fn annotation_writer_produces_expected_grafana_payload() { + let a = api::build_grafana_deploy_annotation(api::DeployAnnotationArgs { + service: "gateway", + version: Some("1.2.3"), + git_sha: Some("abc123"), + time_ms: 1234567890, + }); + + assert_eq!(a.time, 1234567890); + assert!(a.tags.iter().any(|t| t == "deploy")); + assert!(a.tags.iter().any(|t| t == "service:gateway")); + assert!(a.tags.iter().any(|t| t == "version:1.2.3")); + assert!(a.tags.iter().any(|t| t == "git_sha:abc123")); + assert!(a.text.contains("deploy gateway")); +} diff --git a/control/api/tests/build_info.rs b/control/api/tests/build_info.rs new file mode 100644 index 0000000..4cc6725 --- /dev/null +++ b/control/api/tests/build_info.rs @@ -0,0 +1,39 @@ +#[test] +fn build_info_parser_extracts_expected_labels() { + let metrics = r#" +# HELP gateway_build_info build info +# TYPE gateway_build_info gauge +gateway_build_info{service="gateway",version="1.2.3",git_sha="abc"} 1 +runner_build_info{service="runner",version="2.0.0",git_sha="def"} 1 +unrelated_metric 5 +"#; + + let info = api::extract_build_info(metrics); + assert_eq!(info.len(), 2); + assert!( + info.iter() + .any(|i| i.service == "gateway" && i.version == "1.2.3" && i.git_sha == "abc") + ); + assert!( + info.iter() + .any(|i| i.service == "runner" && i.version == "2.0.0" && i.git_sha == "def") + ); +} + +#[test] +fn build_info_snapshot_has_required_services() { + let metrics = r#" +gateway_build_info{service="gateway",version="1.2.3",git_sha="abc"} 1 +aggregate_build_info{service="aggregate",version="1.0.0",git_sha="aaa"} 1 +projection_build_info{service="projection",version="1.0.0",git_sha="bbb"} 1 +runner_build_info{service="runner",version="2.0.0",git_sha="ccc"} 1 +"#; + + let info = api::extract_build_info(metrics); + for required in ["gateway", "aggregate", "projection", "runner"] { + assert!( + info.iter().any(|i| i.service == required), + "missing build_info for service={required}" + ); + } +} diff --git a/control/api/tests/docker_config_validation.rs b/control/api/tests/docker_config_validation.rs new file mode 100644 index 0000000..5ace774 --- /dev/null +++ b/control/api/tests/docker_config_validation.rs @@ -0,0 +1,55 @@ +use std::{fs, path::PathBuf, time::Duration}; + +fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("api crate should live under repo root") + .to_path_buf() +} + +#[test] +fn docker_compose_files_parse_and_include_required_services() { + let root = repo_root(); + let compose = fs::read_to_string(root.join("observability/docker-compose.yml")).unwrap(); + let v: serde_yaml::Value = serde_yaml::from_str(&compose).unwrap(); + + let services = v + .get("services") + .and_then(|x| x.as_mapping()) + .expect("missing services"); + + for required in ["grafana", "victoria-metrics", "vmagent", "loki", "tempo"] { + assert!( + services.contains_key(serde_yaml::Value::String(required.to_string())), + "missing service {required}" + ); + } +} + +#[tokio::test] +#[ignore] +async fn docker_compose_config_validation_is_gated_and_fast() { + let enabled = std::env::var("CONTROL_TEST_DOCKER").ok(); + assert_eq!(enabled.as_deref(), Some("1")); + + let root = repo_root(); + let compose = root.join("observability/docker-compose.yml"); + + let cmd = tokio::process::Command::new("docker") + .args(["compose", "-f"]) + .arg(compose) + .args(["config"]) + .output(); + + let out = tokio::time::timeout(Duration::from_secs(10), cmd) + .await + .expect("docker compose config timed out") + .expect("failed to run docker compose config"); + + assert!( + out.status.success(), + "docker compose config failed: {}", + String::from_utf8_lossy(&out.stderr) + ); +} diff --git a/control/api/tests/docker_gated.rs b/control/api/tests/docker_gated.rs new file mode 100644 index 0000000..4a29179 --- /dev/null +++ b/control/api/tests/docker_gated.rs @@ -0,0 +1,6 @@ +#[test] +#[ignore] +fn docker_integration_tests_are_gated() { + let enabled = std::env::var("CONTROL_TEST_DOCKER").ok(); + assert_eq!(enabled.as_deref(), Some("1")); +} diff --git a/control/api/tests/e2e_control_plane_fleet_docker.rs b/control/api/tests/e2e_control_plane_fleet_docker.rs new file mode 100644 index 0000000..dcfff7b --- /dev/null +++ b/control/api/tests/e2e_control_plane_fleet_docker.rs @@ -0,0 +1,183 @@ +use jsonwebtoken::{EncodingKey, Header, encode}; +use serde::Serialize; +use std::{fs, net::TcpListener, time::Duration}; + +#[derive(Serialize)] +struct Claims { + sub: String, + session_id: String, + permissions: Vec, + exp: usize, +} + +fn free_port() -> u16 { + TcpListener::bind("127.0.0.1:0") + .unwrap() + .local_addr() + .unwrap() + .port() +} + +fn token(secret: &[u8], perms: &[&str]) -> String { + let exp = (std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + + 60) as usize; + + encode( + &Header::default(), + &Claims { + sub: "op_1".to_string(), + session_id: "sess_1".to_string(), + permissions: perms.iter().map(|p| (*p).to_string()).collect(), + exp, + }, + &EncodingKey::from_secret(secret), + ) + .unwrap() +} + +async fn wait_ready(url: &str) { + let client = reqwest::Client::new(); + let start = tokio::time::Instant::now(); + loop { + let ok = client + .get(format!("{url}/ready")) + .send() + .await + .map(|r| r.status().is_success()) + .unwrap_or(false); + if ok { + return; + } + if start.elapsed() > Duration::from_secs(10) { + panic!("control-api did not become ready"); + } + tokio::time::sleep(Duration::from_millis(100)).await; + } +} + +#[tokio::test] +#[ignore] +async fn control_plane_can_see_the_fleet_via_docker_stubs() { + let enabled = std::env::var("CONTROL_TEST_DOCKER").ok(); + assert_eq!(enabled.as_deref(), Some("1")); + + let nginx_conf = r#" +server { + listen 80; + server_name _; + + location = /health { return 200 "ok\n"; } + location = /ready { return 200 "ready\n"; } + location = /metrics { return 200 "stub_build_info{service=\"stub\",version=\"dev\",git_sha=\"000\"} 1\n"; } +} +"#; + + let mut conf_path = std::env::temp_dir(); + conf_path.push(format!( + "cloudlysis-control-nginx-{}.conf", + uuid::Uuid::new_v4() + )); + fs::write(&conf_path, nginx_conf).unwrap(); + + let gateway_port = free_port(); + let runner_port = free_port(); + let aggregate_port = free_port(); + let projection_port = free_port(); + + async fn run_stub(name: &str, port: u16, conf: &std::path::Path) -> String { + let out = tokio::process::Command::new("docker") + .args(["run", "-d", "--rm"]) + .args(["-p", &format!("{port}:80")]) + .args([ + "-v", + &format!("{}:/etc/nginx/conf.d/default.conf:ro", conf.display()), + ]) + .arg("nginx:1.29-alpine") + .output() + .await + .expect("failed to run docker"); + assert!( + out.status.success(), + "{name} stub failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + String::from_utf8_lossy(&out.stdout).trim().to_string() + } + + let gateway_id = run_stub("gateway", gateway_port, &conf_path).await; + let runner_id = run_stub("runner", runner_port, &conf_path).await; + let aggregate_id = run_stub("aggregate", aggregate_port, &conf_path).await; + let projection_id = run_stub("projection", projection_port, &conf_path).await; + + let secret = b"e2e_secret"; + let api_port = free_port(); + let api_url = format!("http://127.0.0.1:{api_port}"); + + let mut placement_path = std::env::temp_dir(); + placement_path.push(format!( + "cloudlysis-control-placement-{}.json", + uuid::Uuid::new_v4() + )); + fs::write( + &placement_path, + r#"{"revision":"e2e","aggregate_placement":{"placements":[]},"projection_placement":{"placements":[]},"runner_placement":{"placements":[]}}"#, + ) + .unwrap(); + + let mut child = tokio::process::Command::new(env!("CARGO_BIN_EXE_api")) + .env("CONTROL_API_ADDR", format!("127.0.0.1:{api_port}")) + .env("CONTROL_GATEWAY_JWT_HS256_SECRET", "e2e_secret") + .env("CONTROL_PLACEMENT_PATH", placement_path.to_string_lossy().to_string()) + .env( + "CONTROL_FLEET_SERVICES", + format!( + "gateway=http://127.0.0.1:{gateway_port},aggregate=http://127.0.0.1:{aggregate_port},projection=http://127.0.0.1:{projection_port},runner=http://127.0.0.1:{runner_port}" + ), + ) + .spawn() + .expect("failed to spawn control-api"); + + wait_ready(&api_url).await; + + let client = reqwest::Client::new(); + let t = token(secret, &["control:read"]); + + let res = client + .get(format!("{api_url}/admin/v1/fleet/snapshot")) + .header(reqwest::header::AUTHORIZATION, format!("Bearer {t}")) + .send() + .await + .unwrap(); + assert!(res.status().is_success()); + + let v: serde_json::Value = res.json().await.unwrap(); + let services = v.get("services").and_then(|x| x.as_array()).unwrap(); + assert!( + services.len() >= 5, + "expected at least 5 services (including control-api), got {}", + services.len() + ); + + let res = client + .get(format!("{api_url}/admin/v1/tenants")) + .header(reqwest::header::AUTHORIZATION, format!("Bearer {t}")) + .send() + .await + .unwrap(); + assert!(res.status().is_success()); + + let _ = child.kill().await; + + for id in [gateway_id, runner_id, aggregate_id, projection_id] { + let _ = tokio::process::Command::new("docker") + .args(["stop", &id]) + .output() + .await; + } + + let _ = fs::remove_file(&conf_path); + let _ = fs::remove_file(&placement_path); +} diff --git a/control/api/tests/fleet_services_env.rs b/control/api/tests/fleet_services_env.rs new file mode 100644 index 0000000..52c1bb5 --- /dev/null +++ b/control/api/tests/fleet_services_env.rs @@ -0,0 +1,30 @@ +#[test] +fn fleet_services_env_parser_is_lenient() { + let services = { + fn parse(spec: &str) -> Vec { + spec.split(',') + .filter_map(|pair| { + let pair = pair.trim(); + if pair.is_empty() { + return None; + } + let (name, url) = pair.split_once('=')?; + let name = name.trim(); + let url = url.trim(); + if name.is_empty() || url.is_empty() { + return None; + } + Some(api::FleetService { + name: name.to_string(), + base_url: url.to_string(), + }) + }) + .collect() + } + parse(" gateway=http://x , ,runner=http://y,broken, =http://z ") + }; + + assert_eq!(services.len(), 2); + assert_eq!(services[0].name, "gateway"); + assert_eq!(services[1].name, "runner"); +} diff --git a/control/api/tests/nats_gated.rs b/control/api/tests/nats_gated.rs new file mode 100644 index 0000000..1b4afe2 --- /dev/null +++ b/control/api/tests/nats_gated.rs @@ -0,0 +1,23 @@ +use std::time::Duration; + +#[tokio::test] +#[ignore] +async fn nats_integration_tests_are_gated_and_fast_fail() { + let url = std::env::var("CONTROL_TEST_NATS_URL").expect("CONTROL_TEST_NATS_URL is required"); + + let without_scheme = url.strip_prefix("nats://").unwrap_or(url.as_str()); + let hostport = without_scheme.split('/').next().unwrap_or(without_scheme); + let mut parts = hostport.split(':'); + let host = parts.next().unwrap_or("127.0.0.1"); + let port: u16 = parts + .next() + .unwrap_or("4222") + .parse() + .expect("invalid port in CONTROL_TEST_NATS_URL"); + + let connect = tokio::net::TcpStream::connect((host, port)); + tokio::time::timeout(Duration::from_secs(2), connect) + .await + .expect("tcp connect to NATS timed out") + .expect("failed to connect to NATS"); +} diff --git a/control/api/tests/observability_configs.rs b/control/api/tests/observability_configs.rs new file mode 100644 index 0000000..b1a15d1 --- /dev/null +++ b/control/api/tests/observability_configs.rs @@ -0,0 +1,75 @@ +use std::{collections::BTreeSet, fs, path::PathBuf}; + +fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("api crate should live under repo root") + .to_path_buf() +} + +#[test] +fn grafana_provisioning_files_are_syntactically_valid() { + let root = repo_root(); + + let datasources = fs::read_to_string( + root.join("observability/grafana/provisioning/datasources/datasources.yml"), + ) + .expect("missing grafana datasources provisioning file"); + let dashboards = fs::read_to_string( + root.join("observability/grafana/provisioning/dashboards/dashboards.yml"), + ) + .expect("missing grafana dashboards provisioning file"); + + let _datasources_yaml: serde_yaml::Value = + serde_yaml::from_str(&datasources).expect("invalid grafana datasources yaml"); + let _dashboards_yaml: serde_yaml::Value = + serde_yaml::from_str(&dashboards).expect("invalid grafana dashboards yaml"); +} + +#[test] +fn grafana_dashboards_are_syntactically_valid_json() { + let root = repo_root(); + let dashboards_dir = root.join("observability/grafana/dashboards"); + + let mut found = 0usize; + for entry in fs::read_dir(&dashboards_dir).expect("missing dashboards dir") { + let entry = entry.expect("failed to read dashboards dir entry"); + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + found += 1; + let raw = fs::read_to_string(&path).expect("failed to read dashboard json"); + let _: serde_json::Value = + serde_json::from_str(&raw).unwrap_or_else(|e| panic!("{path:?}: {e}")); + } + + assert!(found > 0, "expected at least one dashboard json file"); +} + +#[test] +fn vmagent_config_parses_and_includes_required_jobs() { + let root = repo_root(); + let scrape = fs::read_to_string(root.join("observability/vmagent/scrape.yml")) + .expect("missing vmagent scrape config"); + + let value: serde_yaml::Value = + serde_yaml::from_str(&scrape).expect("invalid vmagent scrape yaml"); + + let mut job_names = BTreeSet::::new(); + if let Some(scrape_configs) = value.get("scrape_configs").and_then(|v| v.as_sequence()) { + for cfg in scrape_configs { + if let Some(job) = cfg.get("job_name").and_then(|v| v.as_str()) { + job_names.insert(job.to_string()); + } + } + } + + for required in ["victoria-metrics", "vmagent", "control-api"] { + assert!( + job_names.contains(required), + "vmagent scrape config missing required job_name={required}" + ); + } +} diff --git a/control/api/tests/observability_smoke_docker.rs b/control/api/tests/observability_smoke_docker.rs new file mode 100644 index 0000000..a69dd34 --- /dev/null +++ b/control/api/tests/observability_smoke_docker.rs @@ -0,0 +1,61 @@ +use std::{ + net::TcpStream, + path::PathBuf, + process::Command, + time::{Duration, Instant}, +}; + +fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("api crate should live under repo root") + .to_path_buf() +} + +fn wait_for_tcp(addr: &str, timeout: Duration) -> bool { + let start = Instant::now(); + while start.elapsed() < timeout { + if TcpStream::connect_timeout( + &addr.parse().expect("invalid socket addr"), + Duration::from_secs(1), + ) + .is_ok() + { + return true; + } + std::thread::sleep(Duration::from_millis(250)); + } + false +} + +#[test] +#[ignore] +fn observability_stack_reaches_healthy_state_fast() { + let enabled = std::env::var("CONTROL_TEST_DOCKER").ok(); + assert_eq!(enabled.as_deref(), Some("1")); + + let root = repo_root(); + let compose = root.join("observability/docker-compose.yml"); + + let up = Command::new("docker") + .args(["compose", "-f"]) + .arg(&compose) + .args(["up", "-d"]) + .status() + .expect("failed to run docker compose up"); + assert!(up.success(), "docker compose up failed"); + + let ok = wait_for_tcp("127.0.0.1:3000", Duration::from_secs(30)) + && wait_for_tcp("127.0.0.1:8428", Duration::from_secs(30)) + && wait_for_tcp("127.0.0.1:3100", Duration::from_secs(30)) + && wait_for_tcp("127.0.0.1:3200", Duration::from_secs(30)); + + let _ = Command::new("docker") + .args(["compose", "-f"]) + .arg(&compose) + .args(["down", "-v"]) + .status(); + + assert!(ok, "observability stack did not become reachable in time"); +} diff --git a/control/api/tests/placement_hot_reload.rs b/control/api/tests/placement_hot_reload.rs new file mode 100644 index 0000000..f44c2d9 --- /dev/null +++ b/control/api/tests/placement_hot_reload.rs @@ -0,0 +1,43 @@ +use std::{fs, path::PathBuf, thread, time::Duration}; + +use api::PlacementStore; + +fn tmp_file(name: &str) -> PathBuf { + let mut p = std::env::temp_dir(); + p.push(format!( + "cloudlysis-control-{name}-{}-{}.json", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + )); + p +} + +#[test] +fn placement_store_hot_reload_swaps_atomically() { + let path = tmp_file("placement"); + fs::write( + &path, + r#"{"revision":"r1","aggregate_placement":{"placements":[]},"projection_placement":{"placements":[]},"runner_placement":{"placements":[]}}"#, + ) + .unwrap(); + + let store = PlacementStore::new(path.clone()); + let a1 = store.get_for_kind(api::ServiceKind::Aggregate); + assert_eq!(a1.revision, "r1"); + + thread::sleep(Duration::from_millis(5)); + + fs::write( + &path, + r#"{"revision":"r2","aggregate_placement":{"placements":[]},"projection_placement":{"placements":[]},"runner_placement":{"placements":[]}}"#, + ) + .unwrap(); + + let a2 = store.get_for_kind(api::ServiceKind::Aggregate); + assert_eq!(a2.revision, "r2"); + + let _ = fs::remove_file(&path); +} diff --git a/control/api/tests/swarm_client.rs b/control/api/tests/swarm_client.rs new file mode 100644 index 0000000..885a2eb --- /dev/null +++ b/control/api/tests/swarm_client.rs @@ -0,0 +1,31 @@ +use std::{fs, path::PathBuf}; + +#[test] +fn swarm_store_is_deterministic_from_file() { + let mut path = std::env::temp_dir(); + path.push(format!( + "cloudlysis-control-swarm-{}-{}.json", + std::process::id(), + uuid::Uuid::new_v4() + )); + + fs::write( + &path, + r#"{"services":[{"name":"gateway","image":"x","mode":"replicated","replicas":"1/1","updated_at":null}],"tasks":[{"id":"t1","service":"gateway","node":"n1","desired_state":"running","current_state":"running","error":null}]}"#, + ) + .unwrap(); + + let store = api::SwarmStore::new(PathBuf::from(&path)); + let services = store.list_services(); + assert_eq!(services.len(), 1); + assert_eq!(services[0].name, "gateway"); + + let tasks = store.list_tasks("gateway"); + assert_eq!(tasks.len(), 1); + assert_eq!(tasks[0].id, "t1"); + + let none = store.list_tasks("missing"); + assert_eq!(none.len(), 0); + + let _ = fs::remove_file(&path); +} diff --git a/control/api/tests/swarm_smoke_docker.rs b/control/api/tests/swarm_smoke_docker.rs new file mode 100644 index 0000000..55924c1 --- /dev/null +++ b/control/api/tests/swarm_smoke_docker.rs @@ -0,0 +1,42 @@ +use std::time::Duration; + +#[tokio::test] +#[ignore] +async fn docker_swarm_smoke_test_is_gated_and_times_out() { + let enabled = std::env::var("CONTROL_TEST_DOCKER").ok(); + assert_eq!(enabled.as_deref(), Some("1")); + + let stack = "cloudlysis_control_test"; + let compose = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .unwrap() + .join("swarm/stacks/control-plane.yml"); + + let deploy = tokio::process::Command::new("docker") + .args(["stack", "deploy", "-c"]) + .arg(&compose) + .arg(stack) + .output(); + + let out = tokio::time::timeout(Duration::from_secs(30), deploy) + .await + .expect("docker stack deploy timed out") + .expect("failed to run docker stack deploy"); + assert!( + out.status.success(), + "docker stack deploy failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + + let ls = tokio::process::Command::new("docker") + .args(["service", "ls"]) + .output(); + let _ = tokio::time::timeout(Duration::from_secs(10), ls).await; + + let rm = tokio::process::Command::new("docker") + .args(["stack", "rm"]) + .arg(stack) + .output(); + let _ = tokio::time::timeout(Duration::from_secs(10), rm).await; +} diff --git a/control/api/tests/swarm_stack_yaml.rs b/control/api/tests/swarm_stack_yaml.rs new file mode 100644 index 0000000..21e1325 --- /dev/null +++ b/control/api/tests/swarm_stack_yaml.rs @@ -0,0 +1,40 @@ +use std::{fs, path::PathBuf}; + +fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .expect("api crate should live under repo root") + .to_path_buf() +} + +#[test] +fn stack_files_parse_as_yaml() { + let root = repo_root(); + for file in [ + root.join("swarm/stacks/control-plane.yml"), + root.join("swarm/stacks/observability.yml"), + ] { + let raw = fs::read_to_string(&file).unwrap(); + let _: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap(); + } +} + +#[test] +fn control_plane_stack_has_required_services() { + let root = repo_root(); + let raw = fs::read_to_string(root.join("swarm/stacks/control-plane.yml")).unwrap(); + let v: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap(); + + let services = v + .get("services") + .and_then(|x| x.as_mapping()) + .expect("missing services"); + + for required in ["control-api", "control-ui"] { + assert!( + services.contains_key(serde_yaml::Value::String(required.to_string())), + "missing service {required}" + ); + } +} diff --git a/control/prd.md b/control/prd.md new file mode 100644 index 0000000..ccd84f8 --- /dev/null +++ b/control/prd.md @@ -0,0 +1,601 @@ +### 🧱 Component: Control Plane (Admin UI + Monitoring + Production Ops) + +**Definition:** +This repository hosts the **platform control plane**: +1) the **Admin UI** used by platform operators and admins to manage users/roles/sessions, tenants, configuration, definitions, and production scaling; and +2) the **observability stack** and **production dashboards** (VictoriaMetrics + Loki + Grafana, plus alerting/scrape config) required to operate the platform in production. + +The control plane is the “single pane of glass” and the “safe hands” layer: it does not replace node runtime logic; it coordinates existing node capabilities and exposes them with strict RBAC, auditability, and operational guardrails. + +--- + +## **Context: Existing Node Repositories (../)** + +This PRD is derived from the currently implemented node repos in `../`: +- **Aggregate**: expects a control node to manage tenant placement and scaling operations, including tenant migrations ([aggregate/prd.md](file:///Users/vlad/Developer/cloudlysis/aggregate/prd.md#L82-L151)). Tenant placement primitives and KV helper exist ([swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L5-L227)). +- **Gateway**: provides the platform ingress, authn/authz, and tenant-aware routing; it explicitly expects NATS KV-based tenant placement and hot reload in production ([gateway/prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/prd.md#L13-L175)). +- **Projection**: consumes events, stores read models, and expects tenant-scoped query isolation and operational monitoring (consumer lag, checkpoints) ([projection/prd.md](file:///Users/vlad/Developer/cloudlysis/projection/prd.md#L7-L96)). +- **Runner**: executes sagas + effects, includes tenant assignment watching via NATS KV and tenant draining semantics ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L11-L104)) and exposes admin endpoints for drain/reload in its PRD ([runner/prd.md](file:///Users/vlad/Developer/cloudlysis/runner/prd.md#L199-L210)). + +The control plane also adopts the proven **Admin UI UX + component library** from UltraBase’s control-plane admin UI, adapting screens and information architecture to Cloudlysis needs: +- Reusable UI components live under [ui/control-plane-admin/src/components/ui](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/components/ui). +- Example pages include [TenantsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantsPage.tsx), [AdminUsersPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/AdminUsersPage.tsx), [AdminSessionsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/AdminSessionsPage.tsx), [FleetPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/FleetPage.tsx), [TopologyPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TopologyPage.tsx), and [ObservabilityPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/ObservabilityPage.tsx). + +--- + +## **Problem Statement** + +Operating the platform without a unified control plane forces operators to: +- Use ad-hoc scripts, direct cluster access, or service-local admin endpoints +- Manage tenants, placements, and deployments without a consistent audit trail +- Correlate production incidents across services with incomplete dashboards and unsafe levels of access + +The platform needs a control plane that: +- Centralizes **admin workflows** and **production operability** +- Enforces **least-privilege RBAC**, **step-up**, and **auditing** +- Provides a consistent, safe abstraction over **tenant placement**, **scale**, and **production operations** + +--- + +## **Goals** + +- Deliver an Admin UI with full admin management over: + - users, sessions, roles/permissions + - configuration (global + per-tenant) + - definitions (aggregates, projections, sagas, effects, manifests) + - scaling and production management (tenant placement, drains, migrations, deployments) +- Package production-grade monitoring: + - metrics via VictoriaMetrics + - logs via Loki + - dashboards and alerting via Grafana (+ vmalert where used) +- Make production operations observable, auditable, and safe by default: + - strong change logging + approvals where needed + - idempotent operations + dry runs + rollback paths + +--- + +## **Non-Goals** + +- Re-implement node business logic (Aggregate / Projection / Runner) or platform ingress (Gateway). +- Replace NATS JetStream, libmdbx storage responsibilities, or per-service runtime concerns. +- Provide an arbitrary “general API gateway” for third-party upstreams. + +--- + +## **Primary Users** + +- **Platform Owner / SRE**: fleet operations, incident response, production change management. +- **Platform Admin**: tenant provisioning, RBAC, config/definition promotion. +- **Security Admin**: access reviews, session revocation, audit trails. +- **Support / On-call**: triage dashboards, logs/metrics correlation, safe mitigations (drain, disable, rollback). + +--- + +## **Key Concepts** + +### Control Plane Scope + +- The control plane is the authoritative interface for production operations and admin management. +- The control plane uses node APIs, the Gateway, and NATS KV as its operational substrate rather than bypassing them. + +### Tenant-Aware Operations + +- All tenant-scoped operations are keyed by `tenant_id` (consistent with `x-tenant-id` usage across nodes and Gateway). +- Tenant placement is treated as a first-class “control plane state” (NATS KV-backed in production; file/static in development), consistent with existing code patterns ([swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L188-L226), [tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L41-L104)). + +### Safe Change Management + +- Mutating actions require explicit intent, are recorded in audit logs, and should be reversible where possible. +- All high-impact operations support: + - validation and preflight checks + - dry-run planning + - idempotency keys + - explicit rollback guidance + +### Control Plane Components (In This Repo) + +- **Admin UI (React)**: + - Reuse UltraBase’s control-plane admin UI component system and interaction patterns, adapting routes and pages to Cloudlysis requirements ([components/ui](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/components/ui)). + - The UI should prefer “table + detail pages + action dropdown + modals” patterns to keep ops workflows fast and consistent. +- **Control Plane API (BFF / Admin API)**: + - A thin API layer that enforces RBAC, writes audit logs, and orchestrates multi-step operations (drain/migrate/rollout) as idempotent jobs. + - Integrates with the Gateway for platform authn/authz and with node admin endpoints for operational actions. +- **Observability Stack**: + - Version-controlled provisioning for Grafana dashboards/datasources, scrape configs for vmagent, and alert rules (vmalert or Grafana Alerting), modeled after UltraBase’s baseline ([observability/README.md](file:///Users/vlad/Developer/madapes/ultrabase/observability/README.md#L1-L47)). + +--- + +## **Functional Requirements** + +### 1) Admin IAM (Users, Sessions, Roles) + +#### 1.1 Users + +- CRUD users with lifecycle states: + - invited (pending acceptance), active, suspended, disabled, deleted (tombstoned) +- Identity attributes: + - email (primary), optional secondary identities + - display name, avatar, metadata tags + - auth methods enabled (password, OIDC providers), MFA state +- Administrative actions: + - invite/resend invite + - reset password flow initiation + - force MFA reset / revoke recovery codes + - disable login / suspend user + - impersonation (break-glass, audited, time-boxed) +- Security constraints: + - privileged actions require step-up / recent auth + - sensitive events must be audit logged (who, what, when, why, from where) + +#### 1.2 Sessions + +- View active sessions and refresh token families: + - by user, by tenant, by IP / geo, by device, by time range +- Revoke capabilities: + - revoke a single session + - revoke all sessions for a user + - revoke all sessions for a tenant (incident response) +- Detection surfaces: + - unusual session fanout (many sessions per user) + - repeated failed logins / MFA failures + - suspicious IP changes + +#### 1.3 Roles & Permissions (RBAC) + +- Roles are sets of permissions; assignments bind principals to roles in a scope. +- Scopes: + - global (platform-level) + - tenant-scoped + - environment-scoped (dev/staging/prod) when applicable +- Required permission domains (minimum): + - iam.users.* (create/update/suspend/delete) + - iam.sessions.* (list/revoke) + - iam.roles.* (create/update/assign) + - tenants.* (create/update/archive) + - configs.* (read/write/approve/apply) + - definitions.* (read/write/validate/promote/rollback) + - scale.* (view/apply/migrate/drain) + - ops.* (deploy/rollback/restart/drain) + - observability.* (view dashboards, manage alert rules) + - audit.* (view/export) +- Role templates: + - owner, admin, operator, support, read-only, security-admin, break-glass + +--- + +### 2) Tenant Management + +- Create, list, and archive tenants. +- Tenant status model: + - provisioning, active, draining, migrating, degraded, suspended, archived +- Tenant metadata: + - plan/tier, quotas, feature flags, contact + billing metadata, environment(s) +- Tenant operational actions: + - trigger provisioning workflows (create streams/buckets, seed configs, create placement) + - rotate tenant secrets (as definitions/config allow) + - pause/resume workload (soft kill switch via config flags) + +Tenant pages should mirror UltraBase’s “Tenant Overview + subpages” navigation patterns (example: [TenantsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantsPage.tsx) and [TenantOverviewPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantOverviewPage.tsx)). + +--- + +### 3) Configuration Management (Global + Per-Tenant) + +#### 3.1 Config Model + +- Config items are versioned, typed documents with: + - scope (global / tenant / environment) + - schema version + - provenance (who/what wrote it) + - effective date and rollout strategy +- Config must support: + - validation against a schema + - diff view (previous vs next) + - staged rollout (preview → apply) + - rollback to a prior version + +#### 3.2 Node-Related Configuration + +Required config surfaces (minimum): +- **Gateway**: routing/placement sources, auth policies, rate limits (see routing expectations in [gateway/prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/prd.md#L154-L175)). +- **Aggregate / Projection / Runner**: + - shard identifiers and tenant allowlists/placement settings + - drain/reload toggles and safety thresholds + - resource limits / concurrency caps + +--- + +### 4) Definition Management (System “Blueprints”) + +Definitions are the declarative “what the platform is” and “what runs” layer: aggregates, projections, sagas, effect providers, and any manifests that tie runtime-function programs to entity types. + +Required capabilities: +- Upload/edit versioned definitions with: + - validation (schema + semantic checks) + - “impact analysis” (which tenants/services are affected) + - promotion workflow (dev → staging → prod) +- Change controls: + - approvals (role-based) for production promotion + - emergency rollback path (one-click revert to last-known-good definition bundle) +- Tenant overrides: + - allow per-tenant definition overrides only when explicitly permitted by policy + +The control plane must present definitions in a way that maps to the node runtime responsibilities: +- Aggregates and deterministic decide/apply programs ([aggregate/prd.md](file:///Users/vlad/Developer/cloudlysis/aggregate/prd.md#L155-L160)) +- Projections and deterministic project programs ([projection/prd.md](file:///Users/vlad/Developer/cloudlysis/projection/prd.md#L36-L55)) +- Runner sagas and effect provider manifests ([runner/prd.md](file:///Users/vlad/Developer/cloudlysis/runner/prd.md#L41-L57)) + +--- + +### 5) Scale Management (Tenant Placement, Shards, Fleet) + +#### 5.1 Placement Model + +- Placement is modeled as: + - a set of nodes/shards and their attributes (labels, capacity, region) + - tenant → shard assignments per service kind (Aggregate, Projection, Runner, optionally Gateway when relevant) +- Control plane supports both: + - static placement (development) + - dynamic placement (production) backed by NATS KV (consistent with existing client patterns in [swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L79-L227)) + +#### 5.2 Tenant Migration + +- Provide guided migration planning and execution: + - show current assignment, target assignment, and a sequenced action plan + - execute “graceful drain → update placement → reload” style plans (see [plan_graceful_tenant_migration](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L41-L65)) +- Migration safety: + - require explicit confirmation and reason + - block if draining is unsafe (inflight work too high, storage unhealthy, consumer lag too high) + - time-box and alert if drains do not converge + +#### 5.3 Fleet View + +- Fleet inventory: + - nodes (labels, region, capacity, version) + - services (replicas, image version, health) + - per-node and per-service load indicators (CPU/mem, request rate, consumer lag) +- Operator actions: + - scale replicas, restart services, cordon/drain nodes (when supported by orchestrator) + +UX should align with the UltraBase “Fleet” and “Topology” navigation patterns ([FleetPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/FleetPage.tsx), [TopologyPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TopologyPage.tsx)). + +--- + +### 6) Production Operations (Deployments, Maintenance, Safety) + +#### 6.1 Deployments + +- Manage deployable artifacts per service (Aggregate/Gateway/Projection/Runner) with: + - environment-specific rollout policies + - canary/rolling deploy support (when orchestrator supports it) + - automatic health checks gates and rollback triggers +- Track releases: + - “what is running where” (service version matrix) + - change log links and approvals + +#### 6.2 Maintenance Operations + +- Drain operations: + - tenant drain (stop acquiring new work, finish inflight; required by Runner semantics in [TenantGate](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L106-L200)) + - node drain (aggregate tenant ranges, projection consumers, runner workers) +- Replay / rebuild operations: + - projection rebuild triggers (dangerous, must be guarded and audited) + - workflow replay controls (reset checkpoints only with explicit intent) + +#### 6.3 Incident Response Toolkit + +- “Safe switches”: + - per-tenant kill switch (disable commands/effects via config) + - global degrade modes (rate limit reductions, disable expensive features) +- Run actions: + - revoke sessions at scale + - freeze deployments + - trigger drain/migrate with guided plan + +--- + +### 7) Observability (VictoriaMetrics + Loki + Grafana) and Dashboards + +#### 7.1 Stack Requirements + +Adopt a production-ready stack consistent with UltraBase’s operational baseline: +- **VictoriaMetrics** for metrics storage and Prometheus-compatible query +- **vmagent** for scraping and remote_write +- **Grafana** for dashboards and alert routing +- **Loki** (+ optional **Promtail**) for logs +- Optional **vmalert** for rule evaluation against VictoriaMetrics + +UltraBase’s observability design is a direct reference implementation to mirror and adapt: +- Stack overview and conventions: [observability/README.md](file:///Users/vlad/Developer/madapes/ultrabase/observability/README.md#L1-L47) +- Provisioned dashboards and datasources: [grafana provisioning](file:///Users/vlad/Developer/madapes/ultrabase/observability/grafana/provisioning) + +#### 7.2 Metrics Conventions + +- Every service exports `/metrics` in Prometheus format. +- Required labels: + - `service` (stable, low cardinality) + - `env` (dev/staging/prod) + - `tenant_id` only where safe and bounded; avoid tenant_id on high-frequency per-request series unless cardinality is controlled. +- HTTP metrics must avoid unbounded `path` cardinality; prefer route templates (pattern-based paths). + +Tenant-aware metrics guidelines: +- Prefer **tenant-only aggregates** for “who is hurting us?” views: + - `..._requests_total{tenant_id,service,status_class}` (no `path`) + - `..._request_duration_seconds{tenant_id,service}` (no `path`, limited bucket count) +- Prefer **route-only aggregates** for “what endpoint is hurting us?” views: + - `..._requests_total{service,path,status}` (no `tenant_id`) +- Where per-tenant and per-route both matter, implement a **top-k sampling** policy: + - emit `(tenant_id,path)` series only for top N tenants, or only for a fixed allowlist of routes. + +#### 7.3 Required Dashboards (Production) + +Minimum set of dashboards (provisioned on startup): +- **Platform — Operations overview** + - `up` for core services and observability stack + - RPS, 4xx/5xx ratio, p95/p99 latency per service + - saturation indicators (CPU/mem, inflight, queue depth) +- **Platform — HTTP detail** + - per-service request breakdown by route template, method, status + - top failing paths and latency outliers +- **Platform — Logs** + - Loki stream filtering by `service`, `tenant_id` (where present), and correlation identifiers +- **Platform — Event bus / JetStream** + - consumer lag, redeliveries, ack latency, stream storage pressure +- **Platform — Workers (Runner)** + - outbox depth, effect latency, poison message counts, schedules backlog +- **Platform — Storage (libmdbx)** + - DB size growth, write stalls, fsync latency (where exported), disk usage +- **Platform — Cluster / Orchestrator** + - node health, container restarts, placement distribution by tenant range + +Dashboards should be modeled after UltraBase’s default set (for structure, not content), e.g. [ultrabase-operations.json](file:///Users/vlad/Developer/madapes/ultrabase/observability/grafana/provisioning/dashboards/default/ultrabase-operations.json) and [ultrabase-http-detail.json](file:///Users/vlad/Developer/madapes/ultrabase/observability/grafana/provisioning/dashboards/default/ultrabase-http-detail.json). + +Additional production-operability dashboards (chosen and adapted): +- **Platform — Noisy Neighbor & Tenant Health** + - Purpose: identify a tenant causing cluster instability (attack, runaway job, bad config) and quickly pivot all panels to that tenant. + - Panels (minimum): + - Top tenants by Gateway RPS (topk of tenant-only request counters). + - Tenant latency distribution (p95/p99 per tenant) from tenant-only latency histograms. + - Tenant error ratio (5xx and 429) per tenant. + - Aggregate in-flight commands by tenant (already exported: `aggregate_in_flight_commands{tenant_id}`). + - Projection processing error rate by tenant (from `projection_processing_errors_total{tenant_id,view_type}` aggregated per tenant). + - Loki logs panel with a `tenant_id` variable selector; selecting a tenant syncs RPS/latency/errors + logs. + - Required instrumentation: + - Gateway must expose **tenant-level** HTTP counters/histograms (tenant + status class + service, without `path`) in addition to existing route-level metrics. + +- **Platform — API Regression & Deployment** + - Purpose: determine whether a newly rolled out image caused regressions, and correlate changes with deployment events. + - Panels (minimum): + - Error rate comparison “old vs new” by `service` and `version` (or `image_tag`) labels. + - Latency comparison “old vs new” (p95/p99) per service. + - Restart / flapping rate per service (container restarts, crash loops). + - Dependency latency correlation: + - Gateway request duration vs Aggregate command duration vs Projection processing duration vs Runner effect latency. + - Loki “new errors” panel: + - errors seen in the last 10m that were not present in the prior 60m window, grouped by `service`. + - Deployment annotations: + - vertical markers when Swarm service updates started/finished (via annotations or a deploy event metric). + - Required instrumentation: + - Every service exports a `*_build_info{service,version,git_sha}` gauge (value=1) or equivalent, and scrape relabeling adds `image_tag` where possible. + - Control plane emits deployment annotations/events (or pulls them from the orchestrator and writes to Grafana annotations). + +- **Platform — Storage & Event Bus Bottlenecks** + - Purpose: debug timeouts when the API is “up” but underlying storage/eventing is saturated (the Cloudlysis equivalent of DB firefighting). + - Panels (minimum): + - NATS/JetStream health: + - stream storage pressure, publish/ack latency, consumer lag, redeliveries. + - Projection lag and throughput: + - events processed rate, processing duration, error rate. + - Aggregate write-path pressure: + - command duration, version conflicts, in-flight commands, tenant errors. + - Runner pressure: + - outbox dispatch failure rate, effect timeout rate, deadletter writes. + - Disk saturation on nodes hosting libmdbx: + - disk usage, read/write latency, IOPS; correlate with spikes in command/query latency. + - Optional Postgres/Autobase panels only when a managed DB backs any control-plane metadata: + - pool saturation, replica lag, slow queries, long transactions. + - Required instrumentation: + - Ensure JetStream metrics are scraped (NATS server `/varz` exporter or native Prometheus endpoint depending on deployment). + - Ensure node-level disk/IO metrics are scraped (node exporter / cadvisor / equivalent). + +- **Platform — Infrastructure Exhaustion** + - Purpose: detect node/resource pressure earlier than raw CPU% and catch observability blind spots. + - Panels (minimum): + - CPU/memory pressure (PSI) per node (when available), plus load average and CPU saturation. + - OOM kill tracker across the cluster. + - Disk usage + IO wait/latency on data volumes (libmdbx, Loki, VictoriaMetrics). + - vmagent health: + - scrape error rate, remote_write errors, queue backlog. + - Loki ingestion health: + - dropped log lines (promtail) and ingestion errors (loki). + - Swarm task hygiene: + - desired_state vs current_state mismatches, orphaned tasks, restart loops. + - Required instrumentation: + - node exporter / cadvisor (or equivalent) must be part of the production scrape plan. + - promtail (or alternative) must expose drop/error metrics when logs are enabled. + +#### 7.4 Alerting Requirements + +Minimum alert classes: +- Availability: + - service down (`up == 0`) + - scrape failures, vmagent remote_write errors +- Reliability: + - sustained elevated 5xx ratio + - sustained elevated p95 latency per service +- Backlogs: + - JetStream consumer lag above threshold + - Runner outbox depth above threshold +- Data safety: + - disk usage near full (nodes hosting libmdbx) + - abnormal restart loops +- Security: + - login anomaly detection signals (where instrumented) + - suspicious spike in session revocations / failed MFA + +Alert rules can follow UltraBase’s approach of version-controlled rules in YAML (reference: [alerts/](file:///Users/vlad/Developer/madapes/ultrabase/observability/alerts)). + +#### 7.5 Control Plane → Observability Linking + +The Admin UI must embed or deep-link into observability tools: +- per-tenant and per-service quick links to Grafana dashboards and Loki queries +- incident triage shortcuts (operations overview → HTTP detail → logs) + +This mirrors UltraBase’s “observability links JSON” concept ([observability/README.md](file:///Users/vlad/Developer/madapes/ultrabase/observability/README.md#L65-L75)), but adapted to Cloudlysis services and dashboards. + +--- + +### 8) Audit, Compliance, and Change History + +- Audit log is an append-only stream of security and operations events: + - authentication and session events + - RBAC changes and permission grants + - config/definition changes and promotions + - scaling, drain, and migration operations + - deployments and rollbacks +- Audit log must support: + - search and export (bounded and access controlled) + - correlation to production incidents (request ids, trace ids) + - retention policy controls + +--- + +### 9) Control Plane API Surface (Admin API) + +The control plane requires a stable API surface for the Admin UI and automation. + +Minimum API capabilities: +- **Idempotent jobs for multi-step operations**: + - every mutating operation returns a `job_id`, supports polling and cancellation, and records a full execution trace in the audit log. +- **Preflight endpoints**: + - validate an intended change and return a plan (and “would-change” diff) without applying it. +- **RBAC-first access model**: + - all endpoints enforce permission checks at the API boundary (UI is not trusted). + +Minimum endpoint groups: +- `/admin/v1/iam/*` (users, roles, assignments, sessions) +- `/admin/v1/tenants/*` (tenants lifecycle, status, metadata) +- `/admin/v1/config/*` (versioned config, diff, apply, rollback) +- `/admin/v1/definitions/*` (bundles, validate, promote, rollback) +- `/admin/v1/scale/*` (placement, migrations, drain status) +- `/admin/v1/ops/*` (deployments, rollbacks, service actions) +- `/admin/v1/observability/*` (links, saved queries, dashboard registry) +- `/admin/v1/audit/*` (search, export) + +Authentication/authorization integration: +- Prefer using the **Gateway** as the system of record for admin identities and sessions, with the control plane API validating requests using Gateway-issued tokens and enforcing platform-specific permissions. + +--- + +### 10) Secrets and Credentials Management + +The control plane must treat secrets as first-class operational data with strict handling. + +Requirements: +- Secret values must never be logged and must be redacted in UI/API responses. +- Secrets must support: + - creation and rotation workflows + - scoped access (global/tenant/environment) + - staged rollout (write new → verify → promote → retire old) +- Rendering rules: + - after creation, secret plaintext must not be retrievable unless explicitly enabled by policy (default: write-only). +- Integrations: + - support referencing secrets from config/definitions without embedding values (secret refs). + +--- + +### 11) Backups, Restore, and Disaster Recovery (Production Operability) + +The control plane must provide explicit visibility and guardrails for data safety operations. + +Minimum requirements: +- **Backup status**: + - show last successful backup timestamps per critical store (metadata DB, NATS state if applicable, Grafana provisioning state as code, tenant placement/config stores). +- **Restore readiness**: + - preflight checks that validate a restore plan (target environment, versions, dependencies). +- **Operational playbooks**: + - link to the exact restore procedure and post-restore verification checklist. +- **Key rotation**: + - explicit workflows and audit logs for rotating signing keys, service credentials, and secret backends. + +This should align with the platform’s existing operational patterns (e.g., the explicit “restore / post-restore checks” concept used in UltraBase observability docs). + +--- + +## **Admin UI Requirements (Information Architecture + UX)** + +### Navigation (Minimum) + +Left navigation sections: +- Overview +- Tenants +- Users +- Sessions +- Roles & Permissions +- Config +- Definitions +- Scale & Placement +- Deployments +- Observability +- Audit Log +- Settings + +### Page Patterns (Reuse UltraBase UI) + +Adopt the UltraBase component system and page layout patterns: +- Layout, styling tokens, UI primitives: [components/ui](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/components/ui) +- Table + search + action dropdown pattern: [TenantsPage](file:///Users/vlad/Developer/madapes/ultrabase/ui/control-plane-admin/src/pages/TenantsPage.tsx#L94-L203) + +Required page types: +- List pages: + - searchable table, bulk actions, row actions menu, status pills, empty states +- Detail pages: + - header with primary actions (drain, migrate, rollback) + - sub-nav tabs for domain-specific views +- Mutation flows: + - modal confirmation + explicit reason entry for high-impact changes + - toast notifications and “busy” state handling consistent with UltraBase patterns + +### Tenant Detail Subpages (Minimum) + +- Overview (status, assignments, SLO highlights) +- Placement (per service: Aggregate/Projection/Runner) +- Health (node readiness and dependency checks) +- Config (effective config + diffs) +- Definitions (applied definition bundle + version) +- Activity (audit trail filtered to tenant) +- Observability (embedded links / panels) + +--- + +## **Non-Functional Requirements** + +- **Security**: + - strict RBAC everywhere; deny-by-default + - audit every privileged operation + - step-up for sensitive actions + - CSRF protection for browser sessions + - safe secret handling (no secret values rendered after creation unless explicitly permitted) + - allowlist outbound integrations (Grafana/Loki/VM URLs, orchestration API endpoints) to prevent SSRF-style abuse +- **Reliability**: + - control plane operations are idempotent and resilient to partial failures + - operations have clear “current state” and do not rely on UI assumptions +- **Performance**: + - list pages paginate and filter server-side for large fleets + - dashboards load with bounded query costs and controlled label cardinality +- **Operability**: + - control plane itself must be observable (metrics/logs, dashboards, alerts) + - every operation must surface preflight checks and post-conditions + +--- + +## **Open Questions / Design Constraints (To Resolve During Implementation)** + +- Where does the source of truth live for: + - users/sessions/roles (Gateway vs control-plane backing store)? + - configs/definitions (NATS KV vs database vs GitOps)? +- How should production promotions be modeled: + - environment branches, approval workflow, and rollback semantics? +- What orchestrator is the production baseline (Docker Swarm per existing PRDs, or will Kubernetes be introduced)? + - Where should the job/execution state for long-running operations live: + - embedded in the control plane API process, durable store, or NATS workflows? diff --git a/control/ui/.gitignore b/control/ui/.gitignore new file mode 100644 index 0000000..a547bf3 --- /dev/null +++ b/control/ui/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/control/ui/README.md b/control/ui/README.md new file mode 100644 index 0000000..7dbf7eb --- /dev/null +++ b/control/ui/README.md @@ -0,0 +1,73 @@ +# React + TypeScript + Vite + +This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. + +Currently, two official plugins are available: + +- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs) +- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) + +## React Compiler + +The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation). + +## Expanding the ESLint configuration + +If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules: + +```js +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + // Other configs... + + // Remove tseslint.configs.recommended and replace with this + tseslint.configs.recommendedTypeChecked, + // Alternatively, use this for stricter rules + tseslint.configs.strictTypeChecked, + // Optionally, add this for stylistic rules + tseslint.configs.stylisticTypeChecked, + + // Other configs... + ], + languageOptions: { + parserOptions: { + project: ['./tsconfig.node.json', './tsconfig.app.json'], + tsconfigRootDir: import.meta.dirname, + }, + // other options... + }, + }, +]) +``` + +You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules: + +```js +// eslint.config.js +import reactX from 'eslint-plugin-react-x' +import reactDom from 'eslint-plugin-react-dom' + +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + // Other configs... + // Enable lint rules for React + reactX.configs['recommended-typescript'], + // Enable lint rules for React DOM + reactDom.configs.recommended, + ], + languageOptions: { + parserOptions: { + project: ['./tsconfig.node.json', './tsconfig.app.json'], + tsconfigRootDir: import.meta.dirname, + }, + // other options... + }, + }, +]) +``` diff --git a/control/ui/eslint.config.js b/control/ui/eslint.config.js new file mode 100644 index 0000000..5e6b472 --- /dev/null +++ b/control/ui/eslint.config.js @@ -0,0 +1,23 @@ +import js from '@eslint/js' +import globals from 'globals' +import reactHooks from 'eslint-plugin-react-hooks' +import reactRefresh from 'eslint-plugin-react-refresh' +import tseslint from 'typescript-eslint' +import { defineConfig, globalIgnores } from 'eslint/config' + +export default defineConfig([ + globalIgnores(['dist']), + { + files: ['**/*.{ts,tsx}'], + extends: [ + js.configs.recommended, + tseslint.configs.recommended, + reactHooks.configs.flat.recommended, + reactRefresh.configs.vite, + ], + languageOptions: { + ecmaVersion: 2020, + globals: globals.browser, + }, + }, +]) diff --git a/control/ui/index.html b/control/ui/index.html new file mode 100644 index 0000000..b6e9e2d --- /dev/null +++ b/control/ui/index.html @@ -0,0 +1,13 @@ + + + + + + + ui + + +
+ + + diff --git a/control/ui/nginx.conf b/control/ui/nginx.conf new file mode 100644 index 0000000..79fd959 --- /dev/null +++ b/control/ui/nginx.conf @@ -0,0 +1,11 @@ +server { + listen 80; + server_name _; + + root /usr/share/nginx/html; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } +} diff --git a/control/ui/package-lock.json b/control/ui/package-lock.json new file mode 100644 index 0000000..2c45b61 --- /dev/null +++ b/control/ui/package-lock.json @@ -0,0 +1,5333 @@ +{ + "name": "ui", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "ui", + "version": "0.0.0", + "dependencies": { + "react": "^19.2.4", + "react-dom": "^19.2.4", + "react-router-dom": "^7.9.3" + }, + "devDependencies": { + "@eslint/js": "^9.39.4", + "@testing-library/jest-dom": "^6.9.0", + "@testing-library/react": "^16.3.0", + "@types/node": "^24.12.0", + "@types/react": "^19.2.14", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^6.0.1", + "eslint": "^9.39.4", + "eslint-plugin-react-hooks": "^7.0.1", + "eslint-plugin-react-refresh": "^0.5.2", + "globals": "^17.4.0", + "jsdom": "^27.0.0", + "typescript": "~5.9.3", + "typescript-eslint": "^8.57.0", + "vite": "^8.0.1", + "vitest": "^3.2.4" + } + }, + "node_modules/@acemir/cssom": { + "version": "0.9.31", + "resolved": "https://registry.npmjs.org/@acemir/cssom/-/cssom-0.9.31.tgz", + "integrity": "sha512-ZnR3GSaH+/vJ0YlHau21FjfLYjMpYVIzTD8M8vIEQvIGxeOXyXdzCI140rrCY862p/C/BbzWsjc1dgnM9mkoTA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@adobe/css-tools": { + "version": "4.4.4", + "resolved": "https://registry.npmjs.org/@adobe/css-tools/-/css-tools-4.4.4.tgz", + "integrity": "sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@asamuzakjp/css-color": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-4.1.2.tgz", + "integrity": "sha512-NfBUvBaYgKIuq6E/RBLY1m0IohzNHAYyaJGuTK79Z23uNwmz2jl1mPsC5ZxCCxylinKhT1Amn5oNTlx1wN8cQg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@csstools/css-calc": "^3.0.0", + "@csstools/css-color-parser": "^4.0.1", + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0", + "lru-cache": "^11.2.5" + } + }, + "node_modules/@asamuzakjp/css-color/node_modules/lru-cache": { + "version": "11.2.7", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz", + "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@asamuzakjp/dom-selector": { + "version": "6.8.1", + "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-6.8.1.tgz", + "integrity": "sha512-MvRz1nCqW0fsy8Qz4dnLIvhOlMzqDVBabZx6lH+YywFDdjXhMY37SmpV1XFX3JzG5GWHn63j6HX6QPr3lZXHvQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@asamuzakjp/nwsapi": "^2.3.9", + "bidi-js": "^1.0.3", + "css-tree": "^3.1.0", + "is-potential-custom-element-name": "^1.0.1", + "lru-cache": "^11.2.6" + } + }, + "node_modules/@asamuzakjp/dom-selector/node_modules/lru-cache": { + "version": "11.2.7", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz", + "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@asamuzakjp/nwsapi": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz", + "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/@babel/code-frame": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", + "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-validator-identifier": "^7.28.5", + "js-tokens": "^4.0.0", + "picocolors": "^1.1.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/compat-data": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.29.0.tgz", + "integrity": "sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/core": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz", + "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@babel/generator": "^7.29.0", + "@babel/helper-compilation-targets": "^7.28.6", + "@babel/helper-module-transforms": "^7.28.6", + "@babel/helpers": "^7.28.6", + "@babel/parser": "^7.29.0", + "@babel/template": "^7.28.6", + "@babel/traverse": "^7.29.0", + "@babel/types": "^7.29.0", + "@jridgewell/remapping": "^2.3.5", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/babel" + } + }, + "node_modules/@babel/generator": { + "version": "7.29.1", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.29.1.tgz", + "integrity": "sha512-qsaF+9Qcm2Qv8SRIMMscAvG4O3lJ0F1GuMo5HR/Bp02LopNgnZBC/EkbevHFeGs4ls/oPz9v+Bsmzbkbe+0dUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/parser": "^7.29.0", + "@babel/types": "^7.29.0", + "@jridgewell/gen-mapping": "^0.3.12", + "@jridgewell/trace-mapping": "^0.3.28", + "jsesc": "^3.0.2" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-compilation-targets": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.28.6.tgz", + "integrity": "sha512-JYtls3hqi15fcx5GaSNL7SCTJ2MNmjrkHXg4FSpOA/grxK8KwyZ5bubHsCq8FXCkua6xhuaaBit+3b7+VZRfcA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/compat-data": "^7.28.6", + "@babel/helper-validator-option": "^7.27.1", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-globals": { + "version": "7.28.0", + "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", + "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-imports": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.28.6.tgz", + "integrity": "sha512-l5XkZK7r7wa9LucGw9LwZyyCUscb4x37JWTPz7swwFE/0FMQAGpiWUZn8u9DzkSBWEcK25jmvubfpw2dnAMdbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/traverse": "^7.28.6", + "@babel/types": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-module-transforms": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.6.tgz", + "integrity": "sha512-67oXFAYr2cDLDVGLXTEABjdBJZ6drElUSI7WKp70NrpyISso3plG9SAGEF6y7zbha/wOzUByWWTJvEDVNIUGcA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-module-imports": "^7.28.6", + "@babel/helper-validator-identifier": "^7.28.5", + "@babel/traverse": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + }, + "peerDependencies": { + "@babel/core": "^7.0.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", + "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.28.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", + "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-option": { + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", + "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helpers": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.29.2.tgz", + "integrity": "sha512-HoGuUs4sCZNezVEKdVcwqmZN8GoHirLUcLaYVNBK2J0DadGtdcqgr3BCbvH8+XUo4NGjNl3VOtSjEKNzqfFgKw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/template": "^7.28.6", + "@babel/types": "^7.29.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.2.tgz", + "integrity": "sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/types": "^7.29.0" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/runtime": { + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.2.tgz", + "integrity": "sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/template": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", + "integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.28.6", + "@babel/parser": "^7.28.6", + "@babel/types": "^7.28.6" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.29.0.tgz", + "integrity": "sha512-4HPiQr0X7+waHfyXPZpWPfWL/J7dcN1mx9gL6WdQVMbPnF3+ZhSMs8tCxN7oHddJE9fhNE7+lxdnlyemKfJRuA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/code-frame": "^7.29.0", + "@babel/generator": "^7.29.0", + "@babel/helper-globals": "^7.28.0", + "@babel/parser": "^7.29.0", + "@babel/template": "^7.28.6", + "@babel/types": "^7.29.0", + "debug": "^4.3.1" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/types": { + "version": "7.29.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz", + "integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/helper-string-parser": "^7.27.1", + "@babel/helper-validator-identifier": "^7.28.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@csstools/color-helpers": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz", + "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@csstools/css-calc": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.1.1.tgz", + "integrity": "sha512-HJ26Z/vmsZQqs/o3a6bgKslXGFAungXGbinULZO3eMsOyNJHeBBZfup5FiZInOghgoM4Hwnmw+OgbJCNg1wwUQ==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-color-parser": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.0.2.tgz", + "integrity": "sha512-0GEfbBLmTFf0dJlpsNU7zwxRIH0/BGEMuXLTCvFYxuL1tNhqzTbtnFICyJLTNK4a+RechKP75e7w42ClXSnJQw==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "dependencies": { + "@csstools/color-helpers": "^6.0.2", + "@csstools/css-calc": "^3.1.1" + }, + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-parser-algorithms": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz", + "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "peer": true, + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-syntax-patches-for-csstree": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.2.tgz", + "integrity": "sha512-5GkLzz4prTIpoyeUiIu3iV6CSG3Plo7xRVOFPKI7FVEJ3mZ0A8SwK0XU3Gl7xAkiQ+mDyam+NNp875/C5y+jSA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "peerDependencies": { + "css-tree": "^3.2.1" + }, + "peerDependenciesMeta": { + "css-tree": { + "optional": true + } + } + }, + "node_modules/@csstools/css-tokenizer": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz", + "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "peer": true, + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@emnapi/wasi-threads": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.0.tgz", + "integrity": "sha512-N10dEJNSsUx41Z6pZsXU8FjPjpBEplgH24sfkmITrBED1/U2Esum9F3lfLrMjKHHjmi557zQn7kR9R+XWXu5Rg==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.4.tgz", + "integrity": "sha512-cQPwL2mp2nSmHHJlCyoXgHGhbEPMrEEU5xhkcy3Hs/O7nGZqEpZ2sUtLaL9MORLtDfRvVl2/3PAuEkYZH0Ty8Q==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.4.tgz", + "integrity": "sha512-X9bUgvxiC8CHAGKYufLIHGXPJWnr0OCdR0anD2e21vdvgCI8lIfqFbnoeOz7lBjdrAGUhqLZLcQo6MLhTO2DKQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.4.tgz", + "integrity": "sha512-gdLscB7v75wRfu7QSm/zg6Rx29VLdy9eTr2t44sfTW7CxwAtQghZ4ZnqHk3/ogz7xao0QAgrkradbBzcqFPasw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.4.tgz", + "integrity": "sha512-PzPFnBNVF292sfpfhiyiXCGSn9HZg5BcAz+ivBuSsl6Rk4ga1oEXAamhOXRFyMcjwr2DVtm40G65N3GLeH1Lvw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.4.tgz", + "integrity": "sha512-b7xaGIwdJlht8ZFCvMkpDN6uiSmnxxK56N2GDTMYPr2/gzvfdQN8rTfBsvVKmIVY/X7EM+/hJKEIbbHs9oA4tQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.4.tgz", + "integrity": "sha512-sR+OiKLwd15nmCdqpXMnuJ9W2kpy0KigzqScqHI3Hqwr7IXxBp3Yva+yJwoqh7rE8V77tdoheRYataNKL4QrPw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.4.tgz", + "integrity": "sha512-jnfpKe+p79tCnm4GVav68A7tUFeKQwQyLgESwEAUzyxk/TJr4QdGog9sqWNcUbr/bZt/O/HXouspuQDd9JxFSw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.4.tgz", + "integrity": "sha512-2kb4ceA/CpfUrIcTUl1wrP/9ad9Atrp5J94Lq69w7UwOMolPIGrfLSvAKJp0RTvkPPyn6CIWrNy13kyLikZRZQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.4.tgz", + "integrity": "sha512-aBYgcIxX/wd5n2ys0yESGeYMGF+pv6g0DhZr3G1ZG4jMfruU9Tl1i2Z+Wnj9/KjGz1lTLCcorqE2viePZqj4Eg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.4.tgz", + "integrity": "sha512-7nQOttdzVGth1iz57kxg9uCz57dxQLHWxopL6mYuYthohPKEK0vU0C3O21CcBK6KDlkYVcnDXY099HcCDXd9dA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.4.tgz", + "integrity": "sha512-oPtixtAIzgvzYcKBQM/qZ3R+9TEUd1aNJQu0HhGyqtx6oS7qTpvjheIWBbes4+qu1bNlo2V4cbkISr8q6gRBFA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.4.tgz", + "integrity": "sha512-8mL/vh8qeCoRcFH2nM8wm5uJP+ZcVYGGayMavi8GmRJjuI3g1v6Z7Ni0JJKAJW+m0EtUuARb6Lmp4hMjzCBWzA==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.4.tgz", + "integrity": "sha512-1RdrWFFiiLIW7LQq9Q2NES+HiD4NyT8Itj9AUeCl0IVCA459WnPhREKgwrpaIfTOe+/2rdntisegiPWn/r/aAw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.4.tgz", + "integrity": "sha512-tLCwNG47l3sd9lpfyx9LAGEGItCUeRCWeAx6x2Jmbav65nAwoPXfewtAdtbtit/pJFLUWOhpv0FpS6GQAmPrHA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.4.tgz", + "integrity": "sha512-BnASypppbUWyqjd1KIpU4AUBiIhVr6YlHx/cnPgqEkNoVOhHg+YiSVxM1RLfiy4t9cAulbRGTNCKOcqHrEQLIw==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.4.tgz", + "integrity": "sha512-+eUqgb/Z7vxVLezG8bVB9SfBie89gMueS+I0xYh2tJdw3vqA/0ImZJ2ROeWwVJN59ihBeZ7Tu92dF/5dy5FttA==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.4.tgz", + "integrity": "sha512-S5qOXrKV8BQEzJPVxAwnryi2+Iq5pB40gTEIT69BQONqR7JH1EPIcQ/Uiv9mCnn05jff9umq/5nqzxlqTOg9NA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.4.tgz", + "integrity": "sha512-xHT8X4sb0GS8qTqiwzHqpY00C95DPAq7nAwX35Ie/s+LO9830hrMd3oX0ZMKLvy7vsonee73x0lmcdOVXFzd6Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.4.tgz", + "integrity": "sha512-RugOvOdXfdyi5Tyv40kgQnI0byv66BFgAqjdgtAKqHoZTbTF2QqfQrFwa7cHEORJf6X2ht+l9ABLMP0dnKYsgg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.4.tgz", + "integrity": "sha512-2MyL3IAaTX+1/qP0O1SwskwcwCoOI4kV2IBX1xYnDDqthmq5ArrW94qSIKCAuRraMgPOmG0RDTA74mzYNQA9ow==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.4.tgz", + "integrity": "sha512-u8fg/jQ5aQDfsnIV6+KwLOf1CmJnfu1ShpwqdwC0uA7ZPwFws55Ngc12vBdeUdnuWoQYx/SOQLGDcdlfXhYmXQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.4.tgz", + "integrity": "sha512-JkTZrl6VbyO8lDQO3yv26nNr2RM2yZzNrNHEsj9bm6dOwwu9OYN28CjzZkH57bh4w0I2F7IodpQvUAEd1mbWXg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.4.tgz", + "integrity": "sha512-/gOzgaewZJfeJTlsWhvUEmUG4tWEY2Spp5M20INYRg2ZKl9QPO3QEEgPeRtLjEWSW8FilRNacPOg8R1uaYkA6g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.4.tgz", + "integrity": "sha512-Z9SExBg2y32smoDQdf1HRwHRt6vAHLXcxD2uGgO/v2jK7Y718Ix4ndsbNMU/+1Qiem9OiOdaqitioZwxivhXYg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.4.tgz", + "integrity": "sha512-DAyGLS0Jz5G5iixEbMHi5KdiApqHBWMGzTtMiJ72ZOLhbu/bzxgAe8Ue8CTS3n3HbIUHQz/L51yMdGMeoxXNJw==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.4.tgz", + "integrity": "sha512-+knoa0BDoeXgkNvvV1vvbZX4+hizelrkwmGJBdT17t8FNPwG2lKemmuMZlmaNQ3ws3DKKCxpb4zRZEIp3UxFCg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@eslint-community/eslint-utils": { + "version": "4.9.1", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.1.tgz", + "integrity": "sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "eslint-visitor-keys": "^3.4.3" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + }, + "peerDependencies": { + "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + } + }, + "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", + "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint-community/regexpp": { + "version": "4.12.2", + "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.12.2.tgz", + "integrity": "sha512-EriSTlt5OC9/7SXkRSCAhfSxxoSUgBm33OH+IkwbdpgoqsSsUg7y3uh+IICI/Qg4BBWr3U2i39RpmycbxMq4ew==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + } + }, + "node_modules/@eslint/config-array": { + "version": "0.21.2", + "resolved": "https://registry.npmjs.org/@eslint/config-array/-/config-array-0.21.2.tgz", + "integrity": "sha512-nJl2KGTlrf9GjLimgIru+V/mzgSK0ABCDQRvxw5BjURL7WfH5uoWmizbH7QB6MmnMBd8cIC9uceWnezL1VZWWw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/object-schema": "^2.1.7", + "debug": "^4.3.1", + "minimatch": "^3.1.5" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/config-helpers": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@eslint/config-helpers/-/config-helpers-0.4.2.tgz", + "integrity": "sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/core": "^0.17.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/core": { + "version": "0.17.0", + "resolved": "https://registry.npmjs.org/@eslint/core/-/core-0.17.0.tgz", + "integrity": "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@types/json-schema": "^7.0.15" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/eslintrc": { + "version": "3.3.5", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-3.3.5.tgz", + "integrity": "sha512-4IlJx0X0qftVsN5E+/vGujTRIFtwuLbNsVUe7TO6zYPDR1O6nFwvwhIKEKSrl6dZchmYBITazxKoUYOjdtjlRg==", + "dev": true, + "license": "MIT", + "dependencies": { + "ajv": "^6.14.0", + "debug": "^4.3.2", + "espree": "^10.0.1", + "globals": "^14.0.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.1", + "minimatch": "^3.1.5", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint/eslintrc/node_modules/globals": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz", + "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/@eslint/js": { + "version": "9.39.4", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.4.tgz", + "integrity": "sha512-nE7DEIchvtiFTwBw4Lfbu59PG+kCofhjsKaCWzxTpt4lfRjRMqG6uMBzKXuEcyXhOHoUp9riAm7/aWYGhXZ9cw==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://eslint.org/donate" + } + }, + "node_modules/@eslint/object-schema": { + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@eslint/object-schema/-/object-schema-2.1.7.tgz", + "integrity": "sha512-VtAOaymWVfZcmZbp6E2mympDIHvyjXs/12LqWYjVw6qjrfF+VK+fyG33kChz3nnK+SU5/NeHOqrTEHS8sXO3OA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@eslint/plugin-kit": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@eslint/plugin-kit/-/plugin-kit-0.4.1.tgz", + "integrity": "sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/core": "^0.17.0", + "levn": "^0.4.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@exodus/bytes": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.0.tgz", + "integrity": "sha512-UY0nlA+feH81UGSHv92sLEPLCeZFjXOuHhrIo0HQydScuQc8s0A7kL/UdgwgDq8g8ilksmuoF35YVTNphV2aBQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + }, + "peerDependencies": { + "@noble/hashes": "^1.8.0 || ^2.0.0" + }, + "peerDependenciesMeta": { + "@noble/hashes": { + "optional": true + } + } + }, + "node_modules/@humanfs/core": { + "version": "0.19.1", + "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", + "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanfs/node": { + "version": "0.16.7", + "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.7.tgz", + "integrity": "sha512-/zUx+yOsIrG4Y43Eh2peDeKCxlRt/gET6aHfaKpuq267qXdYDFViVHfMaLyygZOnl0kGWxFIgsBy8QFuTLUXEQ==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@humanfs/core": "^0.19.1", + "@humanwhocodes/retry": "^0.4.0" + }, + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanwhocodes/module-importer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", + "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.22" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/retry": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@humanwhocodes/retry/-/retry-0.4.3.tgz", + "integrity": "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", + "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", + "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@napi-rs/wasm-runtime": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.2.tgz", + "integrity": "sha512-sNXv5oLJ7ob93xkZ1XnxisYhGYXfaG9f65/ZgYuAu3qt7b3NadcOEhLvx28hv31PgX8SZJRYrAIPQilQmFpLVw==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@tybys/wasm-util": "^0.10.1" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "peerDependencies": { + "@emnapi/core": "^1.7.1", + "@emnapi/runtime": "^1.7.1" + } + }, + "node_modules/@oxc-project/types": { + "version": "0.122.0", + "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.122.0.tgz", + "integrity": "sha512-oLAl5kBpV4w69UtFZ9xqcmTi+GENWOcPF7FCrczTiBbmC0ibXxCwyvZGbO39rCVEuLGAZM84DH0pUIyyv/YJzA==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/Boshen" + } + }, + "node_modules/@rolldown/binding-android-arm64": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.12.tgz", + "integrity": "sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-darwin-arm64": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.12.tgz", + "integrity": "sha512-cFYr6zTG/3PXXF3pUO+umXxt1wkRK/0AYT8lDwuqvRC+LuKYWSAQAQZjCWDQpAH172ZV6ieYrNnFzVVcnSflAg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-darwin-x64": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.12.tgz", + "integrity": "sha512-ZCsYknnHzeXYps0lGBz8JrF37GpE9bFVefrlmDrAQhOEi4IOIlcoU1+FwHEtyXGx2VkYAvhu7dyBf75EJQffBw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-freebsd-x64": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.12.tgz", + "integrity": "sha512-dMLeprcVsyJsKolRXyoTH3NL6qtsT0Y2xeuEA8WQJquWFXkEC4bcu1rLZZSnZRMtAqwtrF/Ib9Ddtpa/Gkge9Q==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-arm-gnueabihf": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.12.tgz", + "integrity": "sha512-YqWjAgGC/9M1lz3GR1r1rP79nMgo3mQiiA+Hfo+pvKFK1fAJ1bCi0ZQVh8noOqNacuY1qIcfyVfP6HoyBRZ85Q==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-arm64-gnu": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.12.tgz", + "integrity": "sha512-/I5AS4cIroLpslsmzXfwbe5OmWvSsrFuEw3mwvbQ1kDxJ822hFHIx+vsN/TAzNVyepI/j/GSzrtCIwQPeKCLIg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-arm64-musl": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.12.tgz", + "integrity": "sha512-V6/wZztnBqlx5hJQqNWwFdxIKN0m38p8Jas+VoSfgH54HSj9tKTt1dZvG6JRHcjh6D7TvrJPWFGaY9UBVOaWPw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-ppc64-gnu": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.12.tgz", + "integrity": "sha512-AP3E9BpcUYliZCxa3w5Kwj9OtEVDYK6sVoUzy4vTOJsjPOgdaJZKFmN4oOlX0Wp0RPV2ETfmIra9x1xuayFB7g==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-s390x-gnu": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.12.tgz", + "integrity": "sha512-nWwpvUSPkoFmZo0kQazZYOrT7J5DGOJ/+QHHzjvNlooDZED8oH82Yg67HvehPPLAg5fUff7TfWFHQS8IV1n3og==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-x64-gnu": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.12.tgz", + "integrity": "sha512-RNrafz5bcwRy+O9e6P8Z/OCAJW/A+qtBczIqVYwTs14pf4iV1/+eKEjdOUta93q2TsT/FI0XYDP3TCky38LMAg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-linux-x64-musl": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.12.tgz", + "integrity": "sha512-Jpw/0iwoKWx3LJ2rc1yjFrj+T7iHZn2JDg1Yny1ma0luviFS4mhAIcd1LFNxK3EYu3DHWCps0ydXQ5i/rrJ2ig==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-openharmony-arm64": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.12.tgz", + "integrity": "sha512-vRugONE4yMfVn0+7lUKdKvN4D5YusEiPilaoO2sgUWpCvrncvWgPMzK00ZFFJuiPgLwgFNP5eSiUlv2tfc+lpA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-wasm32-wasi": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.12.tgz", + "integrity": "sha512-ykGiLr/6kkiHc0XnBfmFJuCjr5ZYKKofkx+chJWDjitX+KsJuAmrzWhwyOMSHzPhzOHOy7u9HlFoa5MoAOJ/Zg==", + "cpu": [ + "wasm32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@napi-rs/wasm-runtime": "^1.1.1" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@rolldown/binding-win32-arm64-msvc": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.12.tgz", + "integrity": "sha512-5eOND4duWkwx1AzCxadcOrNeighiLwMInEADT0YM7xeEOOFcovWZCq8dadXgcRHSf3Ulh1kFo/qvzoFiCLOL1Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/binding-win32-x64-msvc": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.12.tgz", + "integrity": "sha512-PyqoipaswDLAZtot351MLhrlrh6lcZPo2LSYE+VDxbVk24LVKAGOuE4hb8xZQmrPAuEtTZW8E6D2zc5EUZX4Lw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": "^20.19.0 || >=22.12.0" + } + }, + "node_modules/@rolldown/pluginutils": { + "version": "1.0.0-rc.7", + "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.7.tgz", + "integrity": "sha512-qujRfC8sFVInYSPPMLQByRh7zhwkGFS4+tyMQ83srV1qrxL4g8E2tyxVVyxd0+8QeBM1mIk9KbWxkegRr76XzA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.0.tgz", + "integrity": "sha512-WOhNW9K8bR3kf4zLxbfg6Pxu2ybOUbB2AjMDHSQx86LIF4rH4Ft7vmMwNt0loO0eonglSNy4cpD3MKXXKQu0/A==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.0.tgz", + "integrity": "sha512-u6JHLll5QKRvjciE78bQXDmqRqNs5M/3GVqZeMwvmjaNODJih/WIrJlFVEihvV0MiYFmd+ZyPr9wxOVbPAG2Iw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.0.tgz", + "integrity": "sha512-qEF7CsKKzSRc20Ciu2Zw1wRrBz4g56F7r/vRwY430UPp/nt1x21Q/fpJ9N5l47WWvJlkNCPJz3QRVw008fi7yA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.0.tgz", + "integrity": "sha512-WADYozJ4QCnXCH4wPB+3FuGmDPoFseVCUrANmA5LWwGmC6FL14BWC7pcq+FstOZv3baGX65tZ378uT6WG8ynTw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.0.tgz", + "integrity": "sha512-6b8wGHJlDrGeSE3aH5mGNHBjA0TTkxdoNHik5EkvPHCt351XnigA4pS7Wsj/Eo9Y8RBU6f35cjN9SYmCFBtzxw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.0.tgz", + "integrity": "sha512-h25Ga0t4jaylMB8M/JKAyrvvfxGRjnPQIR8lnCayyzEjEOx2EJIlIiMbhpWxDRKGKF8jbNH01NnN663dH638mA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.0.tgz", + "integrity": "sha512-RzeBwv0B3qtVBWtcuABtSuCzToo2IEAIQrcyB/b2zMvBWVbjo8bZDjACUpnaafaxhTw2W+imQbP2BD1usasK4g==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.0.tgz", + "integrity": "sha512-Sf7zusNI2CIU1HLzuu9Tc5YGAHEZs5Lu7N1ssJG4Tkw6e0MEsN7NdjUDDfGNHy2IU+ENyWT+L2obgWiguWibWQ==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.0.tgz", + "integrity": "sha512-DX2x7CMcrJzsE91q7/O02IJQ5/aLkVtYFryqCjduJhUfGKG6yJV8hxaw8pZa93lLEpPTP/ohdN4wFz7yp/ry9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.0.tgz", + "integrity": "sha512-09EL+yFVbJZlhcQfShpswwRZ0Rg+z/CsSELFCnPt3iK+iqwGsI4zht3secj5vLEs957QvFFXnzAT0FFPIxSrkQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.0.tgz", + "integrity": "sha512-i9IcCMPr3EXm8EQg5jnja0Zyc1iFxJjZWlb4wr7U2Wx/GrddOuEafxRdMPRYVaXjgbhvqalp6np07hN1w9kAKw==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.0.tgz", + "integrity": "sha512-DGzdJK9kyJ+B78MCkWeGnpXJ91tK/iKA6HwHxF4TAlPIY7GXEvMe8hBFRgdrR9Ly4qebR/7gfUs9y2IoaVEyog==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.0.tgz", + "integrity": "sha512-RwpnLsqC8qbS8z1H1AxBA1H6qknR4YpPR9w2XX0vo2Sz10miu57PkNcnHVaZkbqyw/kUWfKMI73jhmfi9BRMUQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.0.tgz", + "integrity": "sha512-Z8pPf54Ly3aqtdWC3G4rFigZgNvd+qJlOE52fmko3KST9SoGfAdSRCwyoyG05q1HrrAblLbk1/PSIV+80/pxLg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.0.tgz", + "integrity": "sha512-3a3qQustp3COCGvnP4SvrMHnPQ9d1vzCakQVRTliaz8cIp/wULGjiGpbcqrkv0WrHTEp8bQD/B3HBjzujVWLOA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.0.tgz", + "integrity": "sha512-pjZDsVH/1VsghMJ2/kAaxt6dL0psT6ZexQVrijczOf+PeP2BUqTHYejk3l6TlPRydggINOeNRhvpLa0AYpCWSQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.0.tgz", + "integrity": "sha512-3ObQs0BhvPgiUVZrN7gqCSvmFuMWvWvsjG5ayJ3Lraqv+2KhOsp+pUbigqbeWqueGIsnn+09HBw27rJ+gYK4VQ==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.0.tgz", + "integrity": "sha512-EtylprDtQPdS5rXvAayrNDYoJhIz1/vzN2fEubo3yLE7tfAw+948dO0g4M0vkTVFhKojnF+n6C8bDNe+gDRdTg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.0.tgz", + "integrity": "sha512-k09oiRCi/bHU9UVFqD17r3eJR9bn03TyKraCrlz5ULFJGdJGi7VOmm9jl44vOJvRJ6P7WuBi/s2A97LxxHGIdw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.0.tgz", + "integrity": "sha512-1o/0/pIhozoSaDJoDcec+IVLbnRtQmHwPV730+AOD29lHEEo4F5BEUB24H0OBdhbBBDwIOSuf7vgg0Ywxdfiiw==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.0.tgz", + "integrity": "sha512-pESDkos/PDzYwtyzB5p/UoNU/8fJo68vcXM9ZW2V0kjYayj1KaaUfi1NmTUTUpMn4UhU4gTuK8gIaFO4UGuMbA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.0.tgz", + "integrity": "sha512-hj1wFStD7B1YBeYmvY+lWXZ7ey73YGPcViMShYikqKT1GtstIKQAtfUI6yrzPjAy/O7pO0VLXGmUVWXQMaYgTQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.0.tgz", + "integrity": "sha512-SyaIPFoxmUPlNDq5EHkTbiKzmSEmq/gOYFI/3HHJ8iS/v1mbugVa7dXUzcJGQfoytp9DJFLhHH4U3/eTy2Bq4w==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.0.tgz", + "integrity": "sha512-RdcryEfzZr+lAr5kRm2ucN9aVlCCa2QNq4hXelZxb8GG0NJSazq44Z3PCCc8wISRuCVnGs0lQJVX5Vp6fKA+IA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.0.tgz", + "integrity": "sha512-PrsWNQ8BuE00O3Xsx3ALh2Df8fAj9+cvvX9AIA6o4KpATR98c9mud4XtDWVvsEuyia5U4tVSTKygawyJkjm60w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@testing-library/dom": { + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", + "integrity": "sha512-o4PXJQidqJl82ckFaXUeoAW+XysPLauYI43Abki5hABd853iMhitooc6znOnczgbTYmEP6U6/y1ZyKAIsvMKGg==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@babel/code-frame": "^7.10.4", + "@babel/runtime": "^7.12.5", + "@types/aria-query": "^5.0.1", + "aria-query": "5.3.0", + "dom-accessibility-api": "^0.5.9", + "lz-string": "^1.5.0", + "picocolors": "1.1.1", + "pretty-format": "^27.0.2" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@testing-library/jest-dom": { + "version": "6.9.1", + "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.9.1.tgz", + "integrity": "sha512-zIcONa+hVtVSSep9UT3jZ5rizo2BsxgyDYU7WFD5eICBE7no3881HGeb/QkGfsJs6JTkY1aQhT7rIPC7e+0nnA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@adobe/css-tools": "^4.4.0", + "aria-query": "^5.0.0", + "css.escape": "^1.5.1", + "dom-accessibility-api": "^0.6.3", + "picocolors": "^1.1.1", + "redent": "^3.0.0" + }, + "engines": { + "node": ">=14", + "npm": ">=6", + "yarn": ">=1" + } + }, + "node_modules/@testing-library/jest-dom/node_modules/dom-accessibility-api": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.6.3.tgz", + "integrity": "sha512-7ZgogeTnjuHbo+ct10G9Ffp0mif17idi0IyWNVA/wcwcm7NPOD/WEHVP3n7n3MhXqxoIYm8d6MuZohYWIZ4T3w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@testing-library/react": { + "version": "16.3.2", + "resolved": "https://registry.npmjs.org/@testing-library/react/-/react-16.3.2.tgz", + "integrity": "sha512-XU5/SytQM+ykqMnAnvB2umaJNIOsLF3PVv//1Ew4CTcpz0/BRyy/af40qqrt7SjKpDdT1saBMc42CUok5gaw+g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.12.5" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@testing-library/dom": "^10.0.0", + "@types/react": "^18.0.0 || ^19.0.0", + "@types/react-dom": "^18.0.0 || ^19.0.0", + "react": "^18.0.0 || ^19.0.0", + "react-dom": "^18.0.0 || ^19.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@tybys/wasm-util": { + "version": "0.10.1", + "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.10.1.tgz", + "integrity": "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, + "node_modules/@types/aria-query": { + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz", + "integrity": "sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/chai": { + "version": "5.2.3", + "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz", + "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/deep-eql": "*", + "assertion-error": "^2.0.1" + } + }, + "node_modules/@types/deep-eql": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", + "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/estree": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", + "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/json-schema": { + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "24.12.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.12.0.tgz", + "integrity": "sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "undici-types": "~7.16.0" + } + }, + "node_modules/@types/react": { + "version": "19.2.14", + "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", + "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "csstype": "^3.2.2" + } + }, + "node_modules/@types/react-dom": { + "version": "19.2.3", + "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz", + "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", + "dev": true, + "license": "MIT", + "peer": true, + "peerDependencies": { + "@types/react": "^19.2.0" + } + }, + "node_modules/@typescript-eslint/eslint-plugin": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.57.2.tgz", + "integrity": "sha512-NZZgp0Fm2IkD+La5PR81sd+g+8oS6JwJje+aRWsDocxHkjyRw0J5L5ZTlN3LI1LlOcGL7ph3eaIUmTXMIjLk0w==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/regexpp": "^4.12.2", + "@typescript-eslint/scope-manager": "8.57.2", + "@typescript-eslint/type-utils": "8.57.2", + "@typescript-eslint/utils": "8.57.2", + "@typescript-eslint/visitor-keys": "8.57.2", + "ignore": "^7.0.5", + "natural-compare": "^1.4.0", + "ts-api-utils": "^2.4.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "@typescript-eslint/parser": "^8.57.2", + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/eslint-plugin/node_modules/ignore": { + "version": "7.0.5", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", + "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/@typescript-eslint/parser": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.57.2.tgz", + "integrity": "sha512-30ScMRHIAD33JJQkgfGW1t8CURZtjc2JpTrq5n2HFhOefbAhb7ucc7xJwdWcrEtqUIYJ73Nybpsggii6GtAHjA==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@typescript-eslint/scope-manager": "8.57.2", + "@typescript-eslint/types": "8.57.2", + "@typescript-eslint/typescript-estree": "8.57.2", + "@typescript-eslint/visitor-keys": "8.57.2", + "debug": "^4.4.3" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/project-service": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.57.2.tgz", + "integrity": "sha512-FuH0wipFywXRTHf+bTTjNyuNQQsQC3qh/dYzaM4I4W0jrCqjCVuUh99+xd9KamUfmCGPvbO8NDngo/vsnNVqgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/tsconfig-utils": "^8.57.2", + "@typescript-eslint/types": "^8.57.2", + "debug": "^4.4.3" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/scope-manager": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.57.2.tgz", + "integrity": "sha512-snZKH+W4WbWkrBqj4gUNRIGb/jipDW3qMqVJ4C9rzdFc+wLwruxk+2a5D+uoFcKPAqyqEnSb4l2ULuZf95eSkw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.57.2", + "@typescript-eslint/visitor-keys": "8.57.2" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/tsconfig-utils": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.57.2.tgz", + "integrity": "sha512-3Lm5DSM+DCowsUOJC+YqHHnKEfFh5CoGkj5Z31NQSNF4l5wdOwqGn99wmwN/LImhfY3KJnmordBq/4+VDe2eKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/type-utils": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.57.2.tgz", + "integrity": "sha512-Co6ZCShm6kIbAM/s+oYVpKFfW7LBc6FXoPXjTRQ449PPNBY8U0KZXuevz5IFuuUj2H9ss40atTaf9dlGLzbWZg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.57.2", + "@typescript-eslint/typescript-estree": "8.57.2", + "@typescript-eslint/utils": "8.57.2", + "debug": "^4.4.3", + "ts-api-utils": "^2.4.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/types": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.57.2.tgz", + "integrity": "sha512-/iZM6FnM4tnx9csuTxspMW4BOSegshwX5oBDznJ7S4WggL7Vczz5d2W11ecc4vRrQMQHXRSxzrCsyG5EsPPTbA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/typescript-estree": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.57.2.tgz", + "integrity": "sha512-2MKM+I6g8tJxfSmFKOnHv2t8Sk3T6rF20A1Puk0svLK+uVapDZB/4pfAeB7nE83uAZrU6OxW+HmOd5wHVdXwXA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/project-service": "8.57.2", + "@typescript-eslint/tsconfig-utils": "8.57.2", + "@typescript-eslint/types": "8.57.2", + "@typescript-eslint/visitor-keys": "8.57.2", + "debug": "^4.4.3", + "minimatch": "^10.2.2", + "semver": "^7.7.3", + "tinyglobby": "^0.2.15", + "ts-api-utils": "^2.4.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/balanced-match": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", + "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", + "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", + "dev": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "brace-expansion": "^5.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@typescript-eslint/utils": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.57.2.tgz", + "integrity": "sha512-krRIbvPK1ju1WBKIefiX+bngPs+odIQUtR7kymzPfo1POVw3jlF+nLkmexdSSd4UCbDcQn+wMBATOOmpBbqgKg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/eslint-utils": "^4.9.1", + "@typescript-eslint/scope-manager": "8.57.2", + "@typescript-eslint/types": "8.57.2", + "@typescript-eslint/typescript-estree": "8.57.2" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/@typescript-eslint/visitor-keys": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.57.2.tgz", + "integrity": "sha512-zhahknjobV2FiD6Ee9iLbS7OV9zi10rG26odsQdfBO/hjSzUQbkIYgda+iNKK1zNiW2ey+Lf8MU5btN17V3dUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.57.2", + "eslint-visitor-keys": "^5.0.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/visitor-keys/node_modules/eslint-visitor-keys": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-5.0.1.tgz", + "integrity": "sha512-tD40eHxA35h0PEIZNeIjkHoDR4YjjJp34biM0mDvplBe//mB+IHCqHDGV7pxF+7MklTvighcCPPZC7ynWyjdTA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^20.19.0 || ^22.13.0 || >=24" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@vitejs/plugin-react": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-6.0.1.tgz", + "integrity": "sha512-l9X/E3cDb+xY3SWzlG1MOGt2usfEHGMNIaegaUGFsLkb3RCn/k8/TOXBcab+OndDI4TBtktT8/9BwwW8Vi9KUQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@rolldown/pluginutils": "1.0.0-rc.7" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "peerDependencies": { + "@rolldown/plugin-babel": "^0.1.7 || ^0.2.0", + "babel-plugin-react-compiler": "^1.0.0", + "vite": "^8.0.0" + }, + "peerDependenciesMeta": { + "@rolldown/plugin-babel": { + "optional": true + }, + "babel-plugin-react-compiler": { + "optional": true + } + } + }, + "node_modules/@vitest/expect": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.4.tgz", + "integrity": "sha512-Io0yyORnB6sikFlt8QW5K7slY4OjqNX9jmJQ02QDda8lyM6B5oNgVWoSoKPac8/kgnCUzuHQKrSLtu/uOqqrig==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/chai": "^5.2.2", + "@vitest/spy": "3.2.4", + "@vitest/utils": "3.2.4", + "chai": "^5.2.0", + "tinyrainbow": "^2.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/pretty-format": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.4.tgz", + "integrity": "sha512-IVNZik8IVRJRTr9fxlitMKeJeXFFFN0JaB9PHPGQ8NKQbGpfjlTx9zO4RefN8gp7eqjNy8nyK3NZmBzOPeIxtA==", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyrainbow": "^2.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/runner": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.4.tgz", + "integrity": "sha512-oukfKT9Mk41LreEW09vt45f8wx7DordoWUZMYdY/cyAk7w5TWkTRCNZYF7sX7n2wB7jyGAl74OxgwhPgKaqDMQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/utils": "3.2.4", + "pathe": "^2.0.3", + "strip-literal": "^3.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/snapshot": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.4.tgz", + "integrity": "sha512-dEYtS7qQP2CjU27QBC5oUOxLE/v5eLkGqPE0ZKEIDGMs4vKWe7IjgLOeauHsR0D5YuuycGRO5oSRXnwnmA78fQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "3.2.4", + "magic-string": "^0.30.17", + "pathe": "^2.0.3" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/spy": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.4.tgz", + "integrity": "sha512-vAfasCOe6AIK70iP5UD11Ac4siNUNJ9i/9PZ3NKx07sG6sUxeag1LWdNrMWeKKYBLlzuK+Gn65Yd5nyL6ds+nw==", + "dev": true, + "license": "MIT", + "dependencies": { + "tinyspy": "^4.0.3" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/@vitest/utils": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.4.tgz", + "integrity": "sha512-fB2V0JFrQSMsCo9HiSq3Ezpdv4iYaXRG1Sx8edX3MwxfyNn83mKiGzOcH+Fkxt4MHxr3y42fQi1oeAInqgX2QA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/pretty-format": "3.2.4", + "loupe": "^3.1.4", + "tinyrainbow": "^2.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/acorn": { + "version": "8.16.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", + "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", + "dev": true, + "license": "MIT", + "peer": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ajv": { + "version": "6.14.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.14.0.tgz", + "integrity": "sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==", + "dev": true, + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true, + "license": "Python-2.0" + }, + "node_modules/aria-query": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", + "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "dequal": "^2.0.3" + } + }, + "node_modules/assertion-error": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", + "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/baseline-browser-mapping": { + "version": "2.10.12", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.12.tgz", + "integrity": "sha512-qyq26DxfY4awP2gIRXhhLWfwzwI+N5Nxk6iQi8EFizIaWIjqicQTE4sLnZZVdeKPRcVNoJOkkpfzoIYuvCKaIQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "baseline-browser-mapping": "dist/cli.cjs" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/bidi-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz", + "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==", + "dev": true, + "license": "MIT", + "dependencies": { + "require-from-string": "^2.0.2" + } + }, + "node_modules/brace-expansion": { + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz", + "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/browserslist": { + "version": "4.28.1", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz", + "integrity": "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "peer": true, + "dependencies": { + "baseline-browser-mapping": "^2.9.0", + "caniuse-lite": "^1.0.30001759", + "electron-to-chromium": "^1.5.263", + "node-releases": "^2.0.27", + "update-browserslist-db": "^1.2.0" + }, + "bin": { + "browserslist": "cli.js" + }, + "engines": { + "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" + } + }, + "node_modules/cac": { + "version": "6.7.14", + "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", + "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/caniuse-lite": { + "version": "1.0.30001781", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001781.tgz", + "integrity": "sha512-RdwNCyMsNBftLjW6w01z8bKEvT6e/5tpPVEgtn22TiLGlstHOVecsX2KHFkD5e/vRnIE4EGzpuIODb3mtswtkw==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/caniuse-lite" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "CC-BY-4.0" + }, + "node_modules/chai": { + "version": "5.3.3", + "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz", + "integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "assertion-error": "^2.0.1", + "check-error": "^2.1.1", + "deep-eql": "^5.0.1", + "loupe": "^3.1.0", + "pathval": "^2.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/check-error": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.3.tgz", + "integrity": "sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 16" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true, + "license": "MIT" + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, + "license": "MIT" + }, + "node_modules/convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true, + "license": "MIT" + }, + "node_modules/cookie": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.1.1.tgz", + "integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dev": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/css-tree": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.2.1.tgz", + "integrity": "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "mdn-data": "2.27.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" + } + }, + "node_modules/css.escape": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/css.escape/-/css.escape-1.5.1.tgz", + "integrity": "sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==", + "dev": true, + "license": "MIT" + }, + "node_modules/cssstyle": { + "version": "5.3.7", + "resolved": "https://registry.npmjs.org/cssstyle/-/cssstyle-5.3.7.tgz", + "integrity": "sha512-7D2EPVltRrsTkhpQmksIu+LxeWAIEk6wRDMJ1qljlv+CKHJM+cJLlfhWIzNA44eAsHXSNe3+vO6DW1yCYx8SuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@asamuzakjp/css-color": "^4.1.1", + "@csstools/css-syntax-patches-for-csstree": "^1.0.21", + "css-tree": "^3.1.0", + "lru-cache": "^11.2.4" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/cssstyle/node_modules/lru-cache": { + "version": "11.2.7", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.7.tgz", + "integrity": "sha512-aY/R+aEsRelme17KGQa/1ZSIpLpNYYrhcrepKTZgE+W3WM16YMCaPwOHLHsmopZHELU0Ojin1lPVxKR0MihncA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/csstype": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", + "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/data-urls": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-6.0.1.tgz", + "integrity": "sha512-euIQENZg6x8mj3fO6o9+fOW8MimUI4PpD/fZBhJfeioZVy9TUpM4UY7KjQNVZFlqwJ0UdzRDzkycB997HEq1BQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^15.1.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls/node_modules/whatwg-mimetype": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz", + "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dev": true, + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/decimal.js": { + "version": "10.6.0", + "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz", + "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==", + "dev": true, + "license": "MIT" + }, + "node_modules/deep-eql": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", + "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/deep-is": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/dom-accessibility-api": { + "version": "0.5.16", + "resolved": "https://registry.npmjs.org/dom-accessibility-api/-/dom-accessibility-api-0.5.16.tgz", + "integrity": "sha512-X7BJ2yElsnOJ30pZF4uIIDfBEVgF4XEBxL9Bxhy6dnrm5hkzqmsWHGTiHqRiITNhMyFLyAiWndIJP7Z1NTteDg==", + "dev": true, + "license": "MIT" + }, + "node_modules/electron-to-chromium": { + "version": "1.5.328", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.328.tgz", + "integrity": "sha512-QNQ5l45DzYytThO21403XN3FvK0hOkWDG8viNf6jqS42msJ8I4tGDSpBCgvDRRPnkffafiwAym2X2eHeGD2V0w==", + "dev": true, + "license": "ISC" + }, + "node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/es-module-lexer": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", + "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", + "dev": true, + "license": "MIT" + }, + "node_modules/esbuild": { + "version": "0.27.4", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.4.tgz", + "integrity": "sha512-Rq4vbHnYkK5fws5NF7MYTU68FPRE1ajX7heQ/8QXXWqNgqqJ/GkmmyxIzUnf2Sr/bakf8l54716CcMGHYhMrrQ==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "peer": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.4", + "@esbuild/android-arm": "0.27.4", + "@esbuild/android-arm64": "0.27.4", + "@esbuild/android-x64": "0.27.4", + "@esbuild/darwin-arm64": "0.27.4", + "@esbuild/darwin-x64": "0.27.4", + "@esbuild/freebsd-arm64": "0.27.4", + "@esbuild/freebsd-x64": "0.27.4", + "@esbuild/linux-arm": "0.27.4", + "@esbuild/linux-arm64": "0.27.4", + "@esbuild/linux-ia32": "0.27.4", + "@esbuild/linux-loong64": "0.27.4", + "@esbuild/linux-mips64el": "0.27.4", + "@esbuild/linux-ppc64": "0.27.4", + "@esbuild/linux-riscv64": "0.27.4", + "@esbuild/linux-s390x": "0.27.4", + "@esbuild/linux-x64": "0.27.4", + "@esbuild/netbsd-arm64": "0.27.4", + "@esbuild/netbsd-x64": "0.27.4", + "@esbuild/openbsd-arm64": "0.27.4", + "@esbuild/openbsd-x64": "0.27.4", + "@esbuild/openharmony-arm64": "0.27.4", + "@esbuild/sunos-x64": "0.27.4", + "@esbuild/win32-arm64": "0.27.4", + "@esbuild/win32-ia32": "0.27.4", + "@esbuild/win32-x64": "0.27.4" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/eslint": { + "version": "9.39.4", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.4.tgz", + "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@eslint-community/eslint-utils": "^4.8.0", + "@eslint-community/regexpp": "^4.12.1", + "@eslint/config-array": "^0.21.2", + "@eslint/config-helpers": "^0.4.2", + "@eslint/core": "^0.17.0", + "@eslint/eslintrc": "^3.3.5", + "@eslint/js": "9.39.4", + "@eslint/plugin-kit": "^0.4.1", + "@humanfs/node": "^0.16.6", + "@humanwhocodes/module-importer": "^1.0.1", + "@humanwhocodes/retry": "^0.4.2", + "@types/estree": "^1.0.6", + "ajv": "^6.14.0", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.6", + "debug": "^4.3.2", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^8.4.0", + "eslint-visitor-keys": "^4.2.1", + "espree": "^10.4.0", + "esquery": "^1.5.0", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^8.0.0", + "find-up": "^5.0.0", + "glob-parent": "^6.0.2", + "ignore": "^5.2.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "json-stable-stringify-without-jsonify": "^1.0.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.1.5", + "natural-compare": "^1.4.0", + "optionator": "^0.9.3" + }, + "bin": { + "eslint": "bin/eslint.js" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://eslint.org/donate" + }, + "peerDependencies": { + "jiti": "*" + }, + "peerDependenciesMeta": { + "jiti": { + "optional": true + } + } + }, + "node_modules/eslint-plugin-react-hooks": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-7.0.1.tgz", + "integrity": "sha512-O0d0m04evaNzEPoSW+59Mezf8Qt0InfgGIBJnpC0h3NH/WjUAR7BIKUfysC6todmtiZ/A0oUVS8Gce0WhBrHsA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@babel/core": "^7.24.4", + "@babel/parser": "^7.24.4", + "hermes-parser": "^0.25.1", + "zod": "^3.25.0 || ^4.0.0", + "zod-validation-error": "^3.5.0 || ^4.0.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" + } + }, + "node_modules/eslint-plugin-react-refresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-refresh/-/eslint-plugin-react-refresh-0.5.2.tgz", + "integrity": "sha512-hmgTH57GfzoTFjVN0yBwTggnsVUF2tcqi7RJZHqi9lIezSs4eFyAMktA68YD4r5kNw1mxyY4dmkyoFDb3FIqrA==", + "dev": true, + "license": "MIT", + "peerDependencies": { + "eslint": "^9 || ^10" + } + }, + "node_modules/eslint-scope": { + "version": "8.4.0", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz", + "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^5.2.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-visitor-keys": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz", + "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/espree": { + "version": "10.4.0", + "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz", + "integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "acorn": "^8.15.0", + "acorn-jsx": "^5.3.2", + "eslint-visitor-keys": "^4.2.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/esquery": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.7.0.tgz", + "integrity": "sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "estraverse": "^5.1.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estree-walker": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", + "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/expect-type": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz", + "integrity": "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true, + "license": "MIT" + }, + "node_modules/fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true, + "license": "MIT" + }, + "node_modules/fdir": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", + "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12.0.0" + }, + "peerDependencies": { + "picomatch": "^3 || ^4" + }, + "peerDependenciesMeta": { + "picomatch": { + "optional": true + } + } + }, + "node_modules/file-entry-cache": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz", + "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "flat-cache": "^4.0.0" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/find-up": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", + "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", + "dev": true, + "license": "MIT", + "dependencies": { + "locate-path": "^6.0.0", + "path-exists": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/flat-cache": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz", + "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==", + "dev": true, + "license": "MIT", + "dependencies": { + "flatted": "^3.2.9", + "keyv": "^4.5.4" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/flatted": { + "version": "3.4.2", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.2.tgz", + "integrity": "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==", + "dev": true, + "license": "ISC" + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, + "license": "ISC", + "dependencies": { + "is-glob": "^4.0.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/globals": { + "version": "17.4.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-17.4.0.tgz", + "integrity": "sha512-hjrNztw/VajQwOLsMNT1cbJiH2muO3OROCHnbehc8eY5JyD2gqz4AcMHPqgaOR59DjgUjYAYLeH699g/eWi2jw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/hermes-estree": { + "version": "0.25.1", + "resolved": "https://registry.npmjs.org/hermes-estree/-/hermes-estree-0.25.1.tgz", + "integrity": "sha512-0wUoCcLp+5Ev5pDW2OriHC2MJCbwLwuRx+gAqMTOkGKJJiBCLjtrvy4PWUGn6MIVefecRpzoOZ/UV6iGdOr+Cw==", + "dev": true, + "license": "MIT" + }, + "node_modules/hermes-parser": { + "version": "0.25.1", + "resolved": "https://registry.npmjs.org/hermes-parser/-/hermes-parser-0.25.1.tgz", + "integrity": "sha512-6pEjquH3rqaI6cYAXYPcz9MS4rY6R4ngRgrgfDshRptUZIc3lw0MCIJIGDj9++mfySOuPTHB4nrSW99BCvOPIA==", + "dev": true, + "license": "MIT", + "dependencies": { + "hermes-estree": "0.25.1" + } + }, + "node_modules/html-encoding-sniffer": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz", + "integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@exodus/bytes": "^1.6.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dev": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/ignore": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", + "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/import-fresh": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", + "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/indent-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", + "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", + "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true, + "license": "ISC" + }, + "node_modules/js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsdom": { + "version": "27.4.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-27.4.0.tgz", + "integrity": "sha512-mjzqwWRD9Y1J1KUi7W97Gja1bwOOM5Ug0EZ6UDK3xS7j7mndrkwozHtSblfomlzyB4NepioNt+B2sOSzczVgtQ==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@acemir/cssom": "^0.9.28", + "@asamuzakjp/dom-selector": "^6.7.6", + "@exodus/bytes": "^1.6.0", + "cssstyle": "^5.3.4", + "data-urls": "^6.0.0", + "decimal.js": "^10.6.0", + "html-encoding-sniffer": "^6.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.6", + "is-potential-custom-element-name": "^1.0.1", + "parse5": "^8.0.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^6.0.0", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^8.0.0", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^15.1.0", + "ws": "^8.18.3", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + }, + "peerDependencies": { + "canvas": "^3.0.0" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsesc": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", + "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", + "dev": true, + "license": "MIT", + "bin": { + "jsesc": "bin/jsesc" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true, + "license": "MIT" + }, + "node_modules/json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true, + "license": "MIT" + }, + "node_modules/json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true, + "license": "MIT", + "bin": { + "json5": "lib/cli.js" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, + "license": "MIT", + "dependencies": { + "json-buffer": "3.0.1" + } + }, + "node_modules/levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/lightningcss": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz", + "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==", + "dev": true, + "license": "MPL-2.0", + "dependencies": { + "detect-libc": "^2.0.3" + }, + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "lightningcss-android-arm64": "1.32.0", + "lightningcss-darwin-arm64": "1.32.0", + "lightningcss-darwin-x64": "1.32.0", + "lightningcss-freebsd-x64": "1.32.0", + "lightningcss-linux-arm-gnueabihf": "1.32.0", + "lightningcss-linux-arm64-gnu": "1.32.0", + "lightningcss-linux-arm64-musl": "1.32.0", + "lightningcss-linux-x64-gnu": "1.32.0", + "lightningcss-linux-x64-musl": "1.32.0", + "lightningcss-win32-arm64-msvc": "1.32.0", + "lightningcss-win32-x64-msvc": "1.32.0" + } + }, + "node_modules/lightningcss-android-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz", + "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz", + "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz", + "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-freebsd-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz", + "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz", + "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz", + "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz", + "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz", + "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz", + "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz", + "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz", + "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/locate-path": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", + "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-locate": "^5.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/loupe": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz", + "integrity": "sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "license": "ISC", + "dependencies": { + "yallist": "^3.0.2" + } + }, + "node_modules/lz-string": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", + "integrity": "sha512-h5bgJWpxJNswbU7qCrV0tIKQCaS3blPDrqKWx+QxzuzL1zGUzij9XCWLrSLsJPu5t+eWA/ycetzYAO5IOMcWAQ==", + "dev": true, + "license": "MIT", + "bin": { + "lz-string": "bin/bin.js" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/mdn-data": { + "version": "2.27.1", + "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.27.1.tgz", + "integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==", + "dev": true, + "license": "CC0-1.0" + }, + "node_modules/min-indent": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", + "integrity": "sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/minimatch": { + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true, + "license": "MIT" + }, + "node_modules/nanoid": { + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true, + "license": "MIT" + }, + "node_modules/node-releases": { + "version": "2.0.36", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", + "integrity": "sha512-TdC8FSgHz8Mwtw9g5L4gR/Sh9XhSP/0DEkQxfEFXOpiul5IiHgHan2VhYYb6agDSfp4KuvltmGApc8HMgUrIkA==", + "dev": true, + "license": "MIT" + }, + "node_modules/optionator": { + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", + "dev": true, + "license": "MIT", + "dependencies": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "yocto-queue": "^0.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-locate": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", + "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", + "dev": true, + "license": "MIT", + "dependencies": { + "p-limit": "^3.0.2" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "license": "MIT", + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/parse5": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.0.tgz", + "integrity": "sha512-9m4m5GSgXjL4AjumKzq1Fgfp3Z8rsvjRNbnkVwfu2ImRqE5D0LnY2QfDen18FSY9C573YU5XxSapdHZTZ2WolA==", + "dev": true, + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/pathe": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", + "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", + "dev": true, + "license": "MIT" + }, + "node_modules/pathval": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", + "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 14.16" + } + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/postcss": { + "version": "8.5.8", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz", + "integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "nanoid": "^3.3.11", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/pretty-format": { + "version": "27.5.1", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz", + "integrity": "sha512-Qb1gy5OrP5+zDf2Bvnzdl3jsTf1qXVMazbvCoKhtKqVs4/YK4ozX4gKQJJVyNe+cajNPn0KoC0MC3FUmaHWEmQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1", + "ansi-styles": "^5.0.0", + "react-is": "^17.0.1" + }, + "engines": { + "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" + } + }, + "node_modules/pretty-format/node_modules/ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/react": { + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", + "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/react-dom": { + "version": "19.2.4", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", + "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", + "license": "MIT", + "peer": true, + "dependencies": { + "scheduler": "^0.27.0" + }, + "peerDependencies": { + "react": "^19.2.4" + } + }, + "node_modules/react-is": { + "version": "17.0.2", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", + "integrity": "sha512-w2GsyukL62IJnlaff/nRegPQR94C/XXamvMWmSHRJ4y7Ts/4ocGRmTHvOs8PSE6pB3dWOrD/nueuU5sduBsQ4w==", + "dev": true, + "license": "MIT" + }, + "node_modules/react-router": { + "version": "7.13.2", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.13.2.tgz", + "integrity": "sha512-tX1Aee+ArlKQP+NIUd7SE6Li+CiGKwQtbS+FfRxPX6Pe4vHOo6nr9d++u5cwg+Z8K/x8tP+7qLmujDtfrAoUJA==", + "license": "MIT", + "dependencies": { + "cookie": "^1.0.1", + "set-cookie-parser": "^2.6.0" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "react": ">=18", + "react-dom": ">=18" + }, + "peerDependenciesMeta": { + "react-dom": { + "optional": true + } + } + }, + "node_modules/react-router-dom": { + "version": "7.13.2", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.13.2.tgz", + "integrity": "sha512-aR7SUORwTqAW0JDeiWF07e9SBE9qGpByR9I8kJT5h/FrBKxPMS6TiC7rmVO+gC0q52Bx7JnjWe8Z1sR9faN4YA==", + "license": "MIT", + "dependencies": { + "react-router": "7.13.2" + }, + "engines": { + "node": ">=20.0.0" + }, + "peerDependencies": { + "react": ">=18", + "react-dom": ">=18" + } + }, + "node_modules/redent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz", + "integrity": "sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==", + "dev": true, + "license": "MIT", + "dependencies": { + "indent-string": "^4.0.0", + "strip-indent": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/rolldown": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.12.tgz", + "integrity": "sha512-yP4USLIMYrwpPHEFB5JGH1uxhcslv6/hL0OyvTuY+3qlOSJvZ7ntYnoWpehBxufkgN0cvXxppuTu5hHa/zPh+A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@oxc-project/types": "=0.122.0", + "@rolldown/pluginutils": "1.0.0-rc.12" + }, + "bin": { + "rolldown": "bin/cli.mjs" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "optionalDependencies": { + "@rolldown/binding-android-arm64": "1.0.0-rc.12", + "@rolldown/binding-darwin-arm64": "1.0.0-rc.12", + "@rolldown/binding-darwin-x64": "1.0.0-rc.12", + "@rolldown/binding-freebsd-x64": "1.0.0-rc.12", + "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.12", + "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.12", + "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.12", + "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.12", + "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.12", + "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.12", + "@rolldown/binding-linux-x64-musl": "1.0.0-rc.12", + "@rolldown/binding-openharmony-arm64": "1.0.0-rc.12", + "@rolldown/binding-wasm32-wasi": "1.0.0-rc.12", + "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.12", + "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.12" + } + }, + "node_modules/rolldown/node_modules/@rolldown/pluginutils": { + "version": "1.0.0-rc.12", + "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.12.tgz", + "integrity": "sha512-HHMwmarRKvoFsJorqYlFeFRzXZqCt2ETQlEDOb9aqssrnVBB1/+xgTGtuTrIk5vzLNX1MjMtTf7W9z3tsSbrxw==", + "dev": true, + "license": "MIT" + }, + "node_modules/rollup": { + "version": "4.60.0", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.0.tgz", + "integrity": "sha512-yqjxruMGBQJ2gG4HtjZtAfXArHomazDHoFwFFmZZl0r7Pdo7qCIXKqKHZc8yeoMgzJJ+pO6pEEHa+V7uzWlrAQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/estree": "1.0.8" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.60.0", + "@rollup/rollup-android-arm64": "4.60.0", + "@rollup/rollup-darwin-arm64": "4.60.0", + "@rollup/rollup-darwin-x64": "4.60.0", + "@rollup/rollup-freebsd-arm64": "4.60.0", + "@rollup/rollup-freebsd-x64": "4.60.0", + "@rollup/rollup-linux-arm-gnueabihf": "4.60.0", + "@rollup/rollup-linux-arm-musleabihf": "4.60.0", + "@rollup/rollup-linux-arm64-gnu": "4.60.0", + "@rollup/rollup-linux-arm64-musl": "4.60.0", + "@rollup/rollup-linux-loong64-gnu": "4.60.0", + "@rollup/rollup-linux-loong64-musl": "4.60.0", + "@rollup/rollup-linux-ppc64-gnu": "4.60.0", + "@rollup/rollup-linux-ppc64-musl": "4.60.0", + "@rollup/rollup-linux-riscv64-gnu": "4.60.0", + "@rollup/rollup-linux-riscv64-musl": "4.60.0", + "@rollup/rollup-linux-s390x-gnu": "4.60.0", + "@rollup/rollup-linux-x64-gnu": "4.60.0", + "@rollup/rollup-linux-x64-musl": "4.60.0", + "@rollup/rollup-openbsd-x64": "4.60.0", + "@rollup/rollup-openharmony-arm64": "4.60.0", + "@rollup/rollup-win32-arm64-msvc": "4.60.0", + "@rollup/rollup-win32-ia32-msvc": "4.60.0", + "@rollup/rollup-win32-x64-gnu": "4.60.0", + "@rollup/rollup-win32-x64-msvc": "4.60.0", + "fsevents": "~2.3.2" + } + }, + "node_modules/saxes": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz", + "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", + "dev": true, + "license": "ISC", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, + "node_modules/scheduler": { + "version": "0.27.0", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", + "integrity": "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==", + "license": "MIT" + }, + "node_modules/semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + } + }, + "node_modules/set-cookie-parser": { + "version": "2.7.2", + "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz", + "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==", + "license": "MIT" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/siginfo": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", + "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", + "dev": true, + "license": "ISC" + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/stackback": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", + "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", + "dev": true, + "license": "MIT" + }, + "node_modules/std-env": { + "version": "3.10.0", + "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", + "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==", + "dev": true, + "license": "MIT" + }, + "node_modules/strip-indent": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-indent/-/strip-indent-3.0.0.tgz", + "integrity": "sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "min-indent": "^1.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/strip-literal": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz", + "integrity": "sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==", + "dev": true, + "license": "MIT", + "dependencies": { + "js-tokens": "^9.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/strip-literal/node_modules/js-tokens": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", + "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "license": "MIT", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz", + "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==", + "dev": true, + "license": "MIT" + }, + "node_modules/tinybench": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", + "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", + "dev": true, + "license": "MIT" + }, + "node_modules/tinyexec": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", + "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", + "dev": true, + "license": "MIT" + }, + "node_modules/tinyglobby": { + "version": "0.2.15", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", + "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "fdir": "^6.5.0", + "picomatch": "^4.0.3" + }, + "engines": { + "node": ">=12.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/SuperchupuDev" + } + }, + "node_modules/tinypool": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", + "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.0.0 || >=20.0.0" + } + }, + "node_modules/tinyrainbow": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz", + "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tinyspy": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.4.tgz", + "integrity": "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/tldts": { + "version": "7.0.27", + "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.0.27.tgz", + "integrity": "sha512-I4FZcVFcqCRuT0ph6dCDpPuO4Xgzvh+spkcTr1gK7peIvxWauoloVO0vuy1FQnijT63ss6AsHB6+OIM4aXHbPg==", + "dev": true, + "license": "MIT", + "dependencies": { + "tldts-core": "^7.0.27" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "7.0.27", + "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.0.27.tgz", + "integrity": "sha512-YQ7uPjgWUibIK6DW5lrKujGwUKhLevU4hcGbP5O6TcIUb+oTjJYJVWPS4nZsIHrEEEG6myk/oqAJUEQmpZrHsg==", + "dev": true, + "license": "MIT" + }, + "node_modules/tough-cookie": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz", + "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==", + "dev": true, + "license": "BSD-3-Clause", + "dependencies": { + "tldts": "^7.0.5" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tr46": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz", + "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==", + "dev": true, + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/ts-api-utils": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz", + "integrity": "sha512-OJ/ibxhPlqrMM0UiNHJ/0CKQkoKF243/AEmplt3qpRgkW8VG7IfOS41h7V8TjITqdByHzrjcS/2si+y4lIh8NA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.12" + }, + "peerDependencies": { + "typescript": ">=4.8.4" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "dev": true, + "license": "0BSD", + "optional": true + }, + "node_modules/type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "license": "MIT", + "dependencies": { + "prelude-ls": "^1.2.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "peer": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/typescript-eslint": { + "version": "8.57.2", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.57.2.tgz", + "integrity": "sha512-VEPQ0iPgWO/sBaZOU1xo4nuNdODVOajPnTIbog2GKYr31nIlZ0fWPoCQgGfF3ETyBl1vn63F/p50Um9Z4J8O8A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/eslint-plugin": "8.57.2", + "@typescript-eslint/parser": "8.57.2", + "@typescript-eslint/typescript-estree": "8.57.2", + "@typescript-eslint/utils": "8.57.2" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/undici-types": { + "version": "7.16.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", + "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==", + "dev": true, + "license": "MIT" + }, + "node_modules/update-browserslist-db": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", + "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/browserslist" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/browserslist" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "dependencies": { + "escalade": "^3.2.0", + "picocolors": "^1.1.1" + }, + "bin": { + "update-browserslist-db": "cli.js" + }, + "peerDependencies": { + "browserslist": ">= 4.21.0" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "license": "BSD-2-Clause", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/vite": { + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.3.tgz", + "integrity": "sha512-B9ifbFudT1TFhfltfaIPgjo9Z3mDynBTJSUYxTjOQruf/zHH+ezCQKcoqO+h7a9Pw9Nm/OtlXAiGT1axBgwqrQ==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "lightningcss": "^1.32.0", + "picomatch": "^4.0.4", + "postcss": "^8.5.8", + "rolldown": "1.0.0-rc.12", + "tinyglobby": "^0.2.15" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "@vitejs/devtools": "^0.1.0", + "esbuild": "^0.27.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "@vitejs/devtools": { + "optional": true + }, + "esbuild": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vite-node": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", + "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", + "dev": true, + "license": "MIT", + "dependencies": { + "cac": "^6.7.14", + "debug": "^4.4.1", + "es-module-lexer": "^1.7.0", + "pathe": "^2.0.3", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" + }, + "bin": { + "vite-node": "vite-node.mjs" + }, + "engines": { + "node": "^18.0.0 || ^20.0.0 || >=22.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + } + }, + "node_modules/vite-node/node_modules/vite": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", + "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "^0.27.0", + "fdir": "^6.5.0", + "picomatch": "^4.0.3", + "postcss": "^8.5.6", + "rollup": "^4.43.0", + "tinyglobby": "^0.2.15" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "lightningcss": "^1.21.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/vitest": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.4.tgz", + "integrity": "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/chai": "^5.2.2", + "@vitest/expect": "3.2.4", + "@vitest/mocker": "3.2.4", + "@vitest/pretty-format": "^3.2.4", + "@vitest/runner": "3.2.4", + "@vitest/snapshot": "3.2.4", + "@vitest/spy": "3.2.4", + "@vitest/utils": "3.2.4", + "chai": "^5.2.0", + "debug": "^4.4.1", + "expect-type": "^1.2.1", + "magic-string": "^0.30.17", + "pathe": "^2.0.3", + "picomatch": "^4.0.2", + "std-env": "^3.9.0", + "tinybench": "^2.9.0", + "tinyexec": "^0.3.2", + "tinyglobby": "^0.2.14", + "tinypool": "^1.1.1", + "tinyrainbow": "^2.0.0", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", + "vite-node": "3.2.4", + "why-is-node-running": "^2.3.0" + }, + "bin": { + "vitest": "vitest.mjs" + }, + "engines": { + "node": "^18.0.0 || ^20.0.0 || >=22.0.0" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "@edge-runtime/vm": "*", + "@types/debug": "^4.1.12", + "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", + "@vitest/browser": "3.2.4", + "@vitest/ui": "3.2.4", + "happy-dom": "*", + "jsdom": "*" + }, + "peerDependenciesMeta": { + "@edge-runtime/vm": { + "optional": true + }, + "@types/debug": { + "optional": true + }, + "@types/node": { + "optional": true + }, + "@vitest/browser": { + "optional": true + }, + "@vitest/ui": { + "optional": true + }, + "happy-dom": { + "optional": true + }, + "jsdom": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/@vitest/mocker": { + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.4.tgz", + "integrity": "sha512-46ryTE9RZO/rfDd7pEqFl7etuyzekzEhUbTW3BvmeO/BcCMEgq59BKhek3dXDWgAj4oMK6OZi+vRr1wPW6qjEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@vitest/spy": "3.2.4", + "estree-walker": "^3.0.3", + "magic-string": "^0.30.17" + }, + "funding": { + "url": "https://opencollective.com/vitest" + }, + "peerDependencies": { + "msw": "^2.4.9", + "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0" + }, + "peerDependenciesMeta": { + "msw": { + "optional": true + }, + "vite": { + "optional": true + } + } + }, + "node_modules/vitest/node_modules/vite": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", + "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "esbuild": "^0.27.0", + "fdir": "^6.5.0", + "picomatch": "^4.0.3", + "postcss": "^8.5.6", + "rollup": "^4.43.0", + "tinyglobby": "^0.2.15" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^20.19.0 || >=22.12.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^20.19.0 || >=22.12.0", + "jiti": ">=1.21.0", + "less": "^4.0.0", + "lightningcss": "^1.21.0", + "sass": "^1.70.0", + "sass-embedded": "^1.70.0", + "stylus": ">=0.54.8", + "sugarss": "^5.0.0", + "terser": "^5.16.0", + "tsx": "^4.8.1", + "yaml": "^2.4.2" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "jiti": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + }, + "tsx": { + "optional": true + }, + "yaml": { + "optional": true + } + } + }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", + "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", + "dev": true, + "license": "MIT", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/webidl-conversions": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz", + "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=20" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-url": { + "version": "15.1.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-15.1.0.tgz", + "integrity": "sha512-2ytDk0kiEj/yu90JOAp44PVPUkO9+jVhyf+SybKlRHSDlvOOZhdPIrr7xTH64l4WixO2cP+wQIcgujkGBPPz6g==", + "dev": true, + "license": "MIT", + "dependencies": { + "tr46": "^6.0.0", + "webidl-conversions": "^8.0.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/why-is-node-running": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", + "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", + "dev": true, + "license": "MIT", + "dependencies": { + "siginfo": "^2.0.0", + "stackback": "0.0.2" + }, + "bin": { + "why-is-node-running": "cli.js" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ws": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", + "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz", + "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", + "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", + "dev": true, + "license": "MIT" + }, + "node_modules/yallist": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true, + "license": "ISC" + }, + "node_modules/yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "dev": true, + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-validation-error": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/zod-validation-error/-/zod-validation-error-4.0.2.tgz", + "integrity": "sha512-Q6/nZLe6jxuU80qb/4uJ4t5v2VEZ44lzQjPDhYJNztRQ4wyWc6VF3D3Kb/fAuPetZQnhS3hnajCf9CsWesghLQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=18.0.0" + }, + "peerDependencies": { + "zod": "^3.25.0 || ^4.0.0" + } + } + } +} diff --git a/control/ui/package.json b/control/ui/package.json new file mode 100644 index 0000000..942286c --- /dev/null +++ b/control/ui/package.json @@ -0,0 +1,37 @@ +{ + "name": "ui", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc -b && vite build", + "lint": "eslint .", + "typecheck": "tsc -b --pretty false", + "test": "vitest run", + "preview": "vite preview" + }, + "dependencies": { + "react": "^19.2.4", + "react-dom": "^19.2.4", + "react-router-dom": "^7.9.3" + }, + "devDependencies": { + "@eslint/js": "^9.39.4", + "@testing-library/jest-dom": "^6.9.0", + "@testing-library/react": "^16.3.0", + "@types/node": "^24.12.0", + "@types/react": "^19.2.14", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^6.0.1", + "eslint": "^9.39.4", + "eslint-plugin-react-hooks": "^7.0.1", + "eslint-plugin-react-refresh": "^0.5.2", + "globals": "^17.4.0", + "jsdom": "^27.0.0", + "typescript": "~5.9.3", + "typescript-eslint": "^8.57.0", + "vite": "^8.0.1", + "vitest": "^3.2.4" + } +} diff --git a/control/ui/public/favicon.svg b/control/ui/public/favicon.svg new file mode 100644 index 0000000..6893eb1 --- /dev/null +++ b/control/ui/public/favicon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/control/ui/public/icons.svg b/control/ui/public/icons.svg new file mode 100644 index 0000000..e952219 --- /dev/null +++ b/control/ui/public/icons.svg @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/control/ui/src/App.css b/control/ui/src/App.css new file mode 100644 index 0000000..f90339d --- /dev/null +++ b/control/ui/src/App.css @@ -0,0 +1,184 @@ +.counter { + font-size: 16px; + padding: 5px 10px; + border-radius: 5px; + color: var(--accent); + background: var(--accent-bg); + border: 2px solid transparent; + transition: border-color 0.3s; + margin-bottom: 24px; + + &:hover { + border-color: var(--accent-border); + } + &:focus-visible { + outline: 2px solid var(--accent); + outline-offset: 2px; + } +} + +.hero { + position: relative; + + .base, + .framework, + .vite { + inset-inline: 0; + margin: 0 auto; + } + + .base { + width: 170px; + position: relative; + z-index: 0; + } + + .framework, + .vite { + position: absolute; + } + + .framework { + z-index: 1; + top: 34px; + height: 28px; + transform: perspective(2000px) rotateZ(300deg) rotateX(44deg) rotateY(39deg) + scale(1.4); + } + + .vite { + z-index: 0; + top: 107px; + height: 26px; + width: auto; + transform: perspective(2000px) rotateZ(300deg) rotateX(40deg) rotateY(39deg) + scale(0.8); + } +} + +#center { + display: flex; + flex-direction: column; + gap: 25px; + place-content: center; + place-items: center; + flex-grow: 1; + + @media (max-width: 1024px) { + padding: 32px 20px 24px; + gap: 18px; + } +} + +#next-steps { + display: flex; + border-top: 1px solid var(--border); + text-align: left; + + & > div { + flex: 1 1 0; + padding: 32px; + @media (max-width: 1024px) { + padding: 24px 20px; + } + } + + .icon { + margin-bottom: 16px; + width: 22px; + height: 22px; + } + + @media (max-width: 1024px) { + flex-direction: column; + text-align: center; + } +} + +#docs { + border-right: 1px solid var(--border); + + @media (max-width: 1024px) { + border-right: none; + border-bottom: 1px solid var(--border); + } +} + +#next-steps ul { + list-style: none; + padding: 0; + display: flex; + gap: 8px; + margin: 32px 0 0; + + .logo { + height: 18px; + } + + a { + color: var(--text-h); + font-size: 16px; + border-radius: 6px; + background: var(--social-bg); + display: flex; + padding: 6px 12px; + align-items: center; + gap: 8px; + text-decoration: none; + transition: box-shadow 0.3s; + + &:hover { + box-shadow: var(--shadow); + } + .button-icon { + height: 18px; + width: 18px; + } + } + + @media (max-width: 1024px) { + margin-top: 20px; + flex-wrap: wrap; + justify-content: center; + + li { + flex: 1 1 calc(50% - 8px); + } + + a { + width: 100%; + justify-content: center; + box-sizing: border-box; + } + } +} + +#spacer { + height: 88px; + border-top: 1px solid var(--border); + @media (max-width: 1024px) { + height: 48px; + } +} + +.ticks { + position: relative; + width: 100%; + + &::before, + &::after { + content: ''; + position: absolute; + top: -4.5px; + border: 5px solid transparent; + } + + &::before { + left: 0; + border-left-color: var(--border); + } + &::after { + right: 0; + border-right-color: var(--border); + } +} diff --git a/control/ui/src/App.tsx b/control/ui/src/App.tsx new file mode 100644 index 0000000..f17cb62 --- /dev/null +++ b/control/ui/src/App.tsx @@ -0,0 +1,8 @@ +import { RouterProvider } from 'react-router-dom' +import { createBrowserAppRouter } from './app/router' + +const router = createBrowserAppRouter() + +export default function App() { + return +} diff --git a/control/ui/src/api/client.ts b/control/ui/src/api/client.ts new file mode 100644 index 0000000..53d05dd --- /dev/null +++ b/control/ui/src/api/client.ts @@ -0,0 +1,122 @@ +type RequestIds = { + requestId: string + correlationId?: string + traceparent?: string +} + +const LAST_IDS_STORAGE_KEY = 'control:last_request_ids' + +export class ApiError extends Error { + status: number + requestId: string + correlationId?: string + traceparent?: string + + constructor(args: { + status: number + message: string + requestId: string + correlationId?: string + traceparent?: string + }) { + super(args.message) + this.name = 'ApiError' + this.status = args.status + this.requestId = args.requestId + this.correlationId = args.correlationId + this.traceparent = args.traceparent + } +} + +const state: { + last?: RequestIds +} = {} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null +} + +function loadLastIds(): RequestIds | undefined { + try { + const raw = localStorage.getItem(LAST_IDS_STORAGE_KEY) + if (!raw) return undefined + const parsed = JSON.parse(raw) as unknown + if (isRecord(parsed) && typeof parsed.requestId === 'string') { + const correlationId = + typeof parsed.correlationId === 'string' ? parsed.correlationId : undefined + const traceparent = + typeof parsed.traceparent === 'string' ? parsed.traceparent : undefined + return { requestId: parsed.requestId, correlationId, traceparent } + } + } catch { + return undefined + } + return undefined +} + +function persistLastIds(ids: RequestIds) { + try { + localStorage.setItem(LAST_IDS_STORAGE_KEY, JSON.stringify(ids)) + } catch { + return + } +} + +function newRequestId(): string { + if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) { + return crypto.randomUUID() + } + return `${Date.now()}-${Math.random().toString(16).slice(2)}` +} + +export function getLastRequestIds(): RequestIds | undefined { + return state.last ?? loadLastIds() +} + +type ApiRequestInit = RequestInit & { + correlationId?: string + traceparent?: string + useLastCorrelationId?: boolean + useLastTraceparent?: boolean +} + +export async function apiFetch( + input: RequestInfo | URL, + init?: ApiRequestInit, +) { + const requestId = newRequestId() + + const headers = new Headers(init?.headers) + headers.set('x-request-id', requestId) + const last = getLastRequestIds() + const correlationId = + init?.correlationId ?? (init?.useLastCorrelationId ? last?.correlationId : undefined) + const traceparent = + init?.traceparent ?? (init?.useLastTraceparent ? last?.traceparent : undefined) + + if (correlationId) headers.set('x-correlation-id', correlationId) + if (traceparent) headers.set('traceparent', traceparent) + + const res = await fetch(input, { ...init, headers }) + const resCorrelationId = res.headers.get('x-correlation-id') ?? correlationId ?? undefined + const resTraceparent = res.headers.get('traceparent') ?? traceparent ?? undefined + const ids = { requestId, correlationId: resCorrelationId, traceparent: resTraceparent } + state.last = ids + persistLastIds(ids) + + if (!res.ok) { + const text = await res.text().catch(() => '') + const err = new ApiError({ + status: res.status, + requestId, + correlationId: resCorrelationId, + traceparent: resTraceparent, + message: `API error ${res.status}${text ? `: ${text}` : ''} (request_id=${requestId}${ + resCorrelationId ? ` correlation_id=${resCorrelationId}` : '' + })`, + }) + throw err + } + + return res +} diff --git a/control/ui/src/api/control.ts b/control/ui/src/api/control.ts new file mode 100644 index 0000000..ec9fbe4 --- /dev/null +++ b/control/ui/src/api/control.ts @@ -0,0 +1,179 @@ +import { apiFetch } from './client' +import { getAccessToken } from '../auth/token' + +function baseUrl() { + const v = import.meta.env.VITE_CONTROL_API_URL as string | undefined + return (v ?? 'http://127.0.0.1:8080').replace(/\/$/, '') +} + +async function apiJson(path: string): Promise { + const controller = new AbortController() + const t = window.setTimeout(() => controller.abort(), 2000) + + const token = getAccessToken() + const headers: HeadersInit = token ? { Authorization: `Bearer ${token}` } : {} + + try { + const res = await apiFetch(`${baseUrl()}${path}`, { + headers, + signal: controller.signal, + useLastCorrelationId: true, + useLastTraceparent: true, + }) + return (await res.json()) as T + } finally { + window.clearTimeout(t) + } +} + +async function apiPostJson(path: string, body: unknown, idempotencyKey?: string): Promise { + const controller = new AbortController() + const t = window.setTimeout(() => controller.abort(), 2000) + + const token = getAccessToken() + const headers: HeadersInit = { + 'content-type': 'application/json', + ...(token ? { Authorization: `Bearer ${token}` } : {}), + ...(idempotencyKey ? { 'Idempotency-Key': idempotencyKey } : {}), + } + + try { + const res = await apiFetch(`${baseUrl()}${path}`, { + method: 'POST', + headers, + body: JSON.stringify(body), + signal: controller.signal, + useLastCorrelationId: true, + useLastTraceparent: true, + }) + return (await res.json()) as T + } finally { + window.clearTimeout(t) + } +} + +export type FleetSnapshot = { + services: Array<{ + name: string + base_url: string + health_ok: boolean + ready_ok: boolean + metrics_ok: boolean + }> +} + +export type PlacementResponse = { + kind: 'aggregate' | 'projection' | 'runner' + revision: string + placements: Array<{ tenant_id: string; targets: string[] }> +} + +export type TenantsResponse = { + tenants: Array<{ + tenant_id: string + aggregate_targets: string[] + projection_targets: string[] + runner_targets: string[] + }> +} + +export type Job = { + job_id: string + status: 'pending' | 'running' | 'succeeded' | 'failed' | 'cancelled' + steps: Array<{ name: string; status: Job['status']; attempts: number; error?: string | null }> + error?: string | null + created_at_ms: number + started_at_ms?: number | null + finished_at_ms?: number | null +} + +export type AuditEvent = { + ts_ms: number + principal_sub: string + action: string + tenant_id?: string | null + reason: string + job_id?: string | null +} + +export function getFleetSnapshot(): Promise { + return apiJson('/admin/v1/fleet/snapshot') +} + +export function getPlacement(kind: 'aggregate' | 'projection' | 'runner'): Promise { + return apiJson(`/admin/v1/placement/${kind}`) +} + +export function getTenants(): Promise { + return apiJson('/admin/v1/tenants') +} + +export function getJob(jobId: string): Promise { + return apiJson(`/admin/v1/jobs/${jobId}`) +} + +export function cancelJob(jobId: string): Promise { + return apiPostJson(`/admin/v1/jobs/${jobId}/cancel`, {}, undefined).then(() => undefined) +} + +export function startTenantDrainJob(args: { + tenantId: string + reason: string + idempotencyKey: string +}): Promise<{ job_id: string }> { + return apiPostJson( + '/admin/v1/jobs/tenant/drain', + { tenant_id: args.tenantId, reason: args.reason }, + args.idempotencyKey, + ) +} + +export function startTenantMigrateJob(args: { + tenantId: string + runnerTarget: string + reason: string + idempotencyKey: string +}): Promise<{ job_id: string }> { + return apiPostJson( + '/admin/v1/jobs/tenant/migrate', + { tenant_id: args.tenantId, runner_target: args.runnerTarget, reason: args.reason }, + args.idempotencyKey, + ) +} + +export function planTenantMigrate(args: { tenantId: string; runnerTarget: string; reason: string }): Promise<{ steps: string[] }> { + return apiPostJson('/admin/v1/plan/tenant/migrate', { + tenant_id: args.tenantId, + runner_target: args.runnerTarget, + reason: args.reason, + }) +} + +export function listAudit(): Promise<{ events: AuditEvent[] }> { + return apiJson('/admin/v1/audit') +} + +export type SwarmService = { + name: string + image?: string | null + mode?: string | null + replicas?: string | null + updated_at?: string | null +} + +export type SwarmTask = { + id: string + service: string + node?: string | null + desired_state?: string | null + current_state?: string | null + error?: string | null +} + +export function getSwarmServices(): Promise<{ services: SwarmService[] }> { + return apiJson('/admin/v1/swarm/services') +} + +export function getSwarmTasks(serviceName: string): Promise<{ service: string; tasks: SwarmTask[] }> { + return apiJson(`/admin/v1/swarm/services/${encodeURIComponent(serviceName)}/tasks`) +} diff --git a/control/ui/src/app/layout.tsx b/control/ui/src/app/layout.tsx new file mode 100644 index 0000000..e2d2ef1 --- /dev/null +++ b/control/ui/src/app/layout.tsx @@ -0,0 +1,183 @@ +import { useMemo, useState } from 'react' +import { Link, Outlet, useLocation } from 'react-router-dom' +import { getLastRequestIds } from '../api/client' +import { Button, Code, TextInput } from '../components/primitives' + +type NavItem = { + label: string + to: string +} + +const navItems: NavItem[] = [ + { label: 'Overview', to: '/' }, + { label: 'Tenants', to: '/tenants' }, + { label: 'Users', to: '/users' }, + { label: 'Sessions', to: '/sessions' }, + { label: 'Roles & Permissions', to: '/roles-permissions' }, + { label: 'Config', to: '/config' }, + { label: 'Definitions', to: '/definitions' }, + { label: 'Scale & Placement', to: '/scale-placement' }, + { label: 'Deployments', to: '/deployments' }, + { label: 'Observability', to: '/observability' }, + { label: 'Audit Log', to: '/audit-log' }, + { label: 'Settings', to: '/settings' }, +] + +function normalizePath(pathname: string) { + if (pathname === '') return '/' + if (pathname === '/') return '/' + return pathname.endsWith('/') ? pathname.slice(0, -1) : pathname +} + +export function Layout() { + const location = useLocation() + const active = normalizePath(location.pathname) + const [query, setQuery] = useState('') + const lastIds = getLastRequestIds() + + const grafana = useMemo(() => { + const base = (import.meta.env.VITE_GRAFANA_URL as string | undefined) ?? '' + const loki = (import.meta.env.VITE_GRAFANA_LOKI_DATASOURCE as string | undefined) ?? 'Loki' + const tempo = (import.meta.env.VITE_GRAFANA_TEMPO_DATASOURCE as string | undefined) ?? 'Tempo' + return { base, loki, tempo } + }, []) + + function openGrafanaLogs(id: string) { + if (!grafana.base) return + const left = encodeURIComponent( + JSON.stringify({ + datasource: grafana.loki, + queries: [{ refId: 'A', expr: `{correlation_id="${id}"}` }], + }), + ) + window.open(`${grafana.base.replace(/\/$/, '')}/explore?left=${left}`, '_blank', 'noreferrer') + } + + function openGrafanaTrace(id: string) { + if (!grafana.base) return + const left = encodeURIComponent( + JSON.stringify({ + datasource: grafana.tempo, + queries: [{ refId: 'A', queryType: 'traceId', traceId: id }], + }), + ) + window.open(`${grafana.base.replace(/\/$/, '')}/explore?left=${left}`, '_blank', 'noreferrer') + } + + async function copy(text: string) { + try { + await navigator.clipboard.writeText(text) + } catch { + return + } + } + + return ( +
+ + +
+
+
+
+ { + if (e.key === 'Enter') { + const id = query.trim() + if (!id) return + openGrafanaLogs(id) + } + }} + /> +
+ + +
+ + {lastIds ? ( +
+
+ request_id + {lastIds.requestId} + +
+ {lastIds.correlationId ? ( +
+ correlation_id + {lastIds.correlationId} + + +
+ ) : null} +
+ ) : null} +
+ +
+
+ ) +} diff --git a/control/ui/src/app/router.test.tsx b/control/ui/src/app/router.test.tsx new file mode 100644 index 0000000..085737a --- /dev/null +++ b/control/ui/src/app/router.test.tsx @@ -0,0 +1,37 @@ +import { cleanup, render, screen } from '@testing-library/react' +import { RouterProvider } from 'react-router-dom' +import { afterEach, describe, expect, it } from 'vitest' +import { createMemoryAppRouter } from './router' + +afterEach(() => { + cleanup() +}) + +const paths = [ + '/', + '/tenants', + '/users', + '/sessions', + '/roles-permissions', + '/config', + '/definitions', + '/scale-placement', + '/deployments', + '/observability', + '/audit-log', + '/settings', +] + +describe('routing', () => { + it.each(paths)('renders %s without runtime errors', async (path: string) => { + const router = createMemoryAppRouter([path]) + render() + expect(await screen.findByRole('heading', { level: 1 })).toBeInTheDocument() + }) + + it('renders not found for unknown routes', async () => { + const router = createMemoryAppRouter(['/does-not-exist']) + render() + expect(await screen.findByText('Not Found')).toBeInTheDocument() + }) +}) diff --git a/control/ui/src/app/router.tsx b/control/ui/src/app/router.tsx new file mode 100644 index 0000000..af96a4b --- /dev/null +++ b/control/ui/src/app/router.tsx @@ -0,0 +1,51 @@ +import { createBrowserRouter, createMemoryRouter, type RouteObject } from 'react-router-dom' +import { Layout } from './layout' +import { + AuditLogPage, + ConfigPage, + DefinitionsPage, + DeploymentDetailPage, + DeploymentsPage, + JobPage, + NotFoundPage, + ObservabilityPage, + OverviewPage, + RolesPermissionsPage, + ScalePlacementPage, + SessionsPage, + SettingsPage, + TenantsPage, + UsersPage, +} from '../pages' + +export const routes: RouteObject[] = [ + { + path: '/', + element: , + children: [ + { index: true, element: }, + { path: 'tenants', element: }, + { path: 'users', element: }, + { path: 'sessions', element: }, + { path: 'roles-permissions', element: }, + { path: 'config', element: }, + { path: 'definitions', element: }, + { path: 'scale-placement', element: }, + { path: 'deployments', element: }, + { path: 'deployments/:serviceName', element: }, + { path: 'observability', element: }, + { path: 'audit-log', element: }, + { path: 'jobs/:jobId', element: }, + { path: 'settings', element: }, + { path: '*', element: }, + ], + }, +] + +export function createBrowserAppRouter() { + return createBrowserRouter(routes) +} + +export function createMemoryAppRouter(initialEntries: string[]) { + return createMemoryRouter(routes, { initialEntries }) +} diff --git a/control/ui/src/assets/hero.png b/control/ui/src/assets/hero.png new file mode 100644 index 0000000..cc51a3d Binary files /dev/null and b/control/ui/src/assets/hero.png differ diff --git a/control/ui/src/assets/react.svg b/control/ui/src/assets/react.svg new file mode 100644 index 0000000..6c87de9 --- /dev/null +++ b/control/ui/src/assets/react.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/control/ui/src/assets/vite.svg b/control/ui/src/assets/vite.svg new file mode 100644 index 0000000..5101b67 --- /dev/null +++ b/control/ui/src/assets/vite.svg @@ -0,0 +1 @@ +Vite diff --git a/control/ui/src/auth/token.ts b/control/ui/src/auth/token.ts new file mode 100644 index 0000000..572b74d --- /dev/null +++ b/control/ui/src/auth/token.ts @@ -0,0 +1,23 @@ +const TOKEN_KEY = 'control:access_token' + +export function getAccessToken(): string | undefined { + try { + const v = localStorage.getItem(TOKEN_KEY) + return v && v.trim() ? v : undefined + } catch { + return undefined + } +} + +export function setAccessToken(token: string) { + try { + const v = token.trim() + if (!v) { + localStorage.removeItem(TOKEN_KEY) + return + } + localStorage.setItem(TOKEN_KEY, v) + } catch { + return + } +} diff --git a/control/ui/src/components/primitives.tsx b/control/ui/src/components/primitives.tsx new file mode 100644 index 0000000..99168a7 --- /dev/null +++ b/control/ui/src/components/primitives.tsx @@ -0,0 +1,148 @@ +import type { KeyboardEvent, ReactNode } from 'react' + +const colors = { + border: '#ddd', + borderSubtle: '#eee', + text: '#111', + muted: '#666', + danger: '#b00020', + bg: '#fff', + bgSubtle: '#fafafa', + bgActive: '#eaeaea', +} + +export function Button(props: { + children: ReactNode + onClick?: () => void + disabled?: boolean + variant?: 'default' | 'danger' + type?: 'button' | 'submit' +}) { + const variant = props.variant ?? 'default' + const borderColor = variant === 'danger' ? colors.danger : colors.border + const textColor = variant === 'danger' ? colors.danger : colors.text + + return ( + + ) +} + +export function TextInput(props: { + id?: string + value: string + onChange: (value: string) => void + placeholder?: string + ariaLabel?: string + onKeyDown?: (e: KeyboardEvent) => void +}) { + return ( + props.onChange(e.target.value)} + placeholder={props.placeholder} + onKeyDown={props.onKeyDown} + style={{ + padding: '8px 10px', + borderRadius: 8, + border: `1px solid ${colors.border}`, + width: '100%', + }} + /> + ) +} + +export function Code(props: { children: ReactNode }) { + return {props.children} +} + +export function ErrorText(props: { children: ReactNode }) { + return
{props.children}
+} + +export function MutedText(props: { children: ReactNode }) { + return
{props.children}
+} + +export function Table(props: { columns: ReactNode[]; rows: ReactNode[][] }) { + return ( +
+ + + + {props.columns.map((c, idx) => ( + + ))} + + + + {props.rows.map((r, ridx) => ( + + {r.map((cell, cidx) => ( + + ))} + + ))} + +
+ {c} +
+ {cell} +
+
+ ) +} + +export function Modal(props: { + title: string + open: boolean + onClose: () => void + children: ReactNode + footer?: ReactNode +}) { + if (!props.open) return null + + return ( +
{ + if (e.target === e.currentTarget) props.onClose() + }} + > +
+
{props.title}
+
{props.children}
+ {props.footer ?
{props.footer}
: null} +
+
+ ) +} diff --git a/control/ui/src/index.css b/control/ui/src/index.css new file mode 100644 index 0000000..5fb3313 --- /dev/null +++ b/control/ui/src/index.css @@ -0,0 +1,111 @@ +:root { + --text: #6b6375; + --text-h: #08060d; + --bg: #fff; + --border: #e5e4e7; + --code-bg: #f4f3ec; + --accent: #aa3bff; + --accent-bg: rgba(170, 59, 255, 0.1); + --accent-border: rgba(170, 59, 255, 0.5); + --social-bg: rgba(244, 243, 236, 0.5); + --shadow: + rgba(0, 0, 0, 0.1) 0 10px 15px -3px, rgba(0, 0, 0, 0.05) 0 4px 6px -2px; + + --sans: system-ui, 'Segoe UI', Roboto, sans-serif; + --heading: system-ui, 'Segoe UI', Roboto, sans-serif; + --mono: ui-monospace, Consolas, monospace; + + font: 18px/145% var(--sans); + letter-spacing: 0.18px; + color-scheme: light dark; + color: var(--text); + background: var(--bg); + font-synthesis: none; + text-rendering: optimizeLegibility; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + + @media (max-width: 1024px) { + font-size: 16px; + } +} + +@media (prefers-color-scheme: dark) { + :root { + --text: #9ca3af; + --text-h: #f3f4f6; + --bg: #16171d; + --border: #2e303a; + --code-bg: #1f2028; + --accent: #c084fc; + --accent-bg: rgba(192, 132, 252, 0.15); + --accent-border: rgba(192, 132, 252, 0.5); + --social-bg: rgba(47, 48, 58, 0.5); + --shadow: + rgba(0, 0, 0, 0.4) 0 10px 15px -3px, rgba(0, 0, 0, 0.25) 0 4px 6px -2px; + } + + #social .button-icon { + filter: invert(1) brightness(2); + } +} + +#root { + width: 1126px; + max-width: 100%; + margin: 0 auto; + text-align: center; + border-inline: 1px solid var(--border); + min-height: 100svh; + display: flex; + flex-direction: column; + box-sizing: border-box; +} + +body { + margin: 0; +} + +h1, +h2 { + font-family: var(--heading); + font-weight: 500; + color: var(--text-h); +} + +h1 { + font-size: 56px; + letter-spacing: -1.68px; + margin: 32px 0; + @media (max-width: 1024px) { + font-size: 36px; + margin: 20px 0; + } +} +h2 { + font-size: 24px; + line-height: 118%; + letter-spacing: -0.24px; + margin: 0 0 8px; + @media (max-width: 1024px) { + font-size: 20px; + } +} +p { + margin: 0; +} + +code, +.counter { + font-family: var(--mono); + display: inline-flex; + border-radius: 4px; + color: var(--text-h); +} + +code { + font-size: 15px; + line-height: 135%; + padding: 4px 8px; + background: var(--code-bg); +} diff --git a/control/ui/src/main.tsx b/control/ui/src/main.tsx new file mode 100644 index 0000000..bef5202 --- /dev/null +++ b/control/ui/src/main.tsx @@ -0,0 +1,10 @@ +import { StrictMode } from 'react' +import { createRoot } from 'react-dom/client' +import './index.css' +import App from './App.tsx' + +createRoot(document.getElementById('root')!).render( + + + , +) diff --git a/control/ui/src/pages.tsx b/control/ui/src/pages.tsx new file mode 100644 index 0000000..d0df2b1 --- /dev/null +++ b/control/ui/src/pages.tsx @@ -0,0 +1,527 @@ +import { useEffect, useMemo, useState, type ReactNode } from 'react' +import { useNavigate, useParams } from 'react-router-dom' +import { + getFleetSnapshot, + getPlacement, + getTenants, + getJob, + cancelJob, + listAudit, + getSwarmServices, + getSwarmTasks, + startTenantDrainJob, + startTenantMigrateJob, + type FleetSnapshot, + type PlacementResponse, + type TenantsResponse, + type Job, + type AuditEvent, + type SwarmService, + type SwarmTask, +} from './api/control' +import { getAccessToken, setAccessToken } from './auth/token' +import { Button, Code, ErrorText, Modal, MutedText, Table, TextInput } from './components/primitives' + +function PageShell(props: { title: string; children?: ReactNode }) { + return ( +
+

{props.title}

+ {props.children ?
{props.children}
: null} +
+ ) +} + +export function OverviewPage() { + const [data, setData] = useState(undefined) + const [error, setError] = useState(undefined) + + useEffect(() => { + let cancelled = false + getFleetSnapshot() + .then((d) => { + if (cancelled) return + setError(undefined) + setData(d) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + return () => { + cancelled = true + } + }, []) + + return ( + + {error ? {error} : null} + {!data ?
Loading…
: null} + {data ? ( + [ + s.name, + {s.base_url}, + s.health_ok ? 'ok' : 'fail', + s.ready_ok ? 'ok' : 'fail', + s.metrics_ok ? 'ok' : 'fail', + ])} + /> + ) : null} + + ) +} + +export function TenantsPage() { + const [data, setData] = useState(undefined) + const [error, setError] = useState(undefined) + const navigate = useNavigate() + const [action, setAction] = useState< + | { kind: 'drain'; tenantId: string } + | { kind: 'migrate'; tenantId: string } + | undefined + >(undefined) + const [reason, setReason] = useState('') + const [runnerTarget, setRunnerTarget] = useState('') + const [submitting, setSubmitting] = useState(false) + + useEffect(() => { + let cancelled = false + getTenants() + .then((d) => { + if (cancelled) return + setError(undefined) + setData(d) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + return () => { + cancelled = true + } + }, []) + + const canSubmit = reason.trim().length > 0 && (!action || action.kind !== 'migrate' || runnerTarget.trim().length > 0) + + function newIdempotencyKey() { + if (typeof crypto !== 'undefined' && 'randomUUID' in crypto) return crypto.randomUUID() + return `${Date.now()}-${Math.random().toString(16).slice(2)}` + } + + return ( + + {error ? {error} : null} + {!data ?
Loading…
: null} + {data ? ( +
[ + {t.tenant_id}, + {t.aggregate_targets.join(', ')}, + {t.projection_targets.join(', ')}, + {t.runner_targets.join(', ')}, +
+ + +
, + ])} + /> + ) : null} + + setAction(undefined)} + footer={ +
+ + +
+ } + > + {action ? ( +
+ + Tenant: {action.tenantId} + + {action.kind === 'migrate' ? ( +
+ + +
+ ) : null} +
+ + +
+
+ ) : null} +
+ + ) +} + +export function UsersPage() { + return +} + +export function SessionsPage() { + return +} + +export function RolesPermissionsPage() { + return +} + +export function ConfigPage() { + return +} + +export function DefinitionsPage() { + return +} + +export function ScalePlacementPage() { + const [aggregate, setAggregate] = useState(undefined) + const [projection, setProjection] = useState(undefined) + const [runner, setRunner] = useState(undefined) + const [error, setError] = useState(undefined) + + useEffect(() => { + let cancelled = false + Promise.all([getPlacement('aggregate'), getPlacement('projection'), getPlacement('runner')]) + .then(([a, p, r]) => { + if (cancelled) return + setError(undefined) + setAggregate(a) + setProjection(p) + setRunner(r) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + return () => { + cancelled = true + } + }, []) + + const blocks = [ + { title: 'Aggregate', data: aggregate }, + { title: 'Projection', data: projection }, + { title: 'Runner', data: runner }, + ] as const + + return ( + + {error ?
{error}
: null} +
+ {blocks.map((b) => ( +
+
{b.title}
+ {!b.data ? ( +
Loading…
+ ) : ( +
+                {JSON.stringify(b.data, null, 2)}
+              
+ )} +
+ ))} +
+
+ ) +} + +export function DeploymentsPage() { + const [data, setData] = useState(undefined) + const [error, setError] = useState(undefined) + const navigate = useNavigate() + + useEffect(() => { + let cancelled = false + getSwarmServices() + .then((d) => { + if (cancelled) return + setError(undefined) + setData(d.services) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + return () => { + cancelled = true + } + }, []) + + return ( + + {error ? {error} : null} + {!data ?
Loading…
: null} + {data ? ( +
[ + , + {s.image ?? ''}, + s.mode ?? '', + s.replicas ?? '', + ])} + /> + ) : null} + + ) +} + +export function ObservabilityPage() { + return +} + +export function AuditLogPage() { + const [data, setData] = useState(undefined) + const [error, setError] = useState(undefined) + + useEffect(() => { + let cancelled = false + listAudit() + .then((d) => { + if (cancelled) return + setError(undefined) + setData(d.events) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + return () => { + cancelled = true + } + }, []) + + return ( + + {error ? {error} : null} + {!data ?
Loading…
: null} + {data ? ( +
[ + {e.ts_ms}, + e.principal_sub, + e.action, + {e.tenant_id ?? ''}, + e.reason, + e.job_id ? {e.job_id} : '', + ])} + /> + ) : null} + + ) +} + +export function SettingsPage() { + const [token, setToken] = useState(() => getAccessToken() ?? '') + return ( + +
+ + { + setToken(v) + setAccessToken(v) + }} + placeholder="paste token here" + /> +
+
+ ) +} + +export function NotFoundPage() { + return +} + +export function JobPage() { + const params = useParams() + const jobId = params.jobId + const [job, setJob] = useState(undefined) + const [error, setError] = useState(undefined) + + const canCancel = job?.status === 'pending' || job?.status === 'running' + + useEffect(() => { + if (!jobId) return + let cancelled = false + + const load = () => { + getJob(jobId) + .then((j) => { + if (cancelled) return + setError(undefined) + setJob(j) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + } + + load() + const t = window.setInterval(load, 1000) + return () => { + cancelled = true + window.clearInterval(t) + } + }, [jobId]) + + const steps = useMemo(() => job?.steps ?? [], [job?.steps]) + + return ( + + {jobId ? ( + + job_id: {jobId} + + ) : null} + {error ? {error} : null} + {!job ?
Loading…
: null} + {job ? ( +
+
+ Status: {job.status} +
+ {job.error ? {job.error} : null} +
+ +
+ +
[ + s.name, + {s.status}, + s.attempts, + s.error ? {s.error} : '', + ])} + /> + + ) : null} + + ) +} + +export function DeploymentDetailPage() { + const params = useParams() + const name = params.serviceName + const [data, setData] = useState(undefined) + const [error, setError] = useState(undefined) + + useEffect(() => { + if (!name) return + let cancelled = false + getSwarmTasks(name) + .then((d) => { + if (cancelled) return + setError(undefined) + setData(d.tasks) + }) + .catch((e: unknown) => { + if (cancelled) return + setError(e instanceof Error ? e.message : 'failed to load') + }) + return () => { + cancelled = true + } + }, [name]) + + return ( + + {name ? ( + + service: {name} + + ) : null} + {error ? {error} : null} + {!data ?
Loading…
: null} + {data ? ( +
[ + {t.id}, + t.node ?? '', + t.desired_state ?? '', + t.current_state ?? '', + t.error ? {t.error} : '', + ])} + /> + ) : null} + + ) +} diff --git a/control/ui/src/test/setup.ts b/control/ui/src/test/setup.ts new file mode 100644 index 0000000..1ab8977 --- /dev/null +++ b/control/ui/src/test/setup.ts @@ -0,0 +1,127 @@ +import '@testing-library/jest-dom/vitest' +import { vi } from 'vitest' + +vi.stubGlobal( + 'fetch', + vi.fn(async (input: RequestInfo | URL) => { + const url = typeof input === 'string' ? input : input.toString() + + if (url.includes('/admin/v1/fleet/snapshot')) { + return new Response( + JSON.stringify({ + services: [ + { + name: 'control-api', + base_url: 'http://127.0.0.1:8080', + health_ok: true, + ready_ok: true, + metrics_ok: true, + }, + ], + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + if (url.includes('/admin/v1/placement/')) { + const kind = url.split('/admin/v1/placement/')[1]?.split('?')[0] ?? 'aggregate' + return new Response( + JSON.stringify({ + kind, + revision: 'dev', + placements: [], + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + if (url.includes('/admin/v1/tenants')) { + return new Response( + JSON.stringify({ + tenants: [ + { + tenant_id: '00000000-0000-0000-0000-000000000000', + aggregate_targets: [], + projection_targets: [], + runner_targets: [], + }, + ], + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + if (url.includes('/admin/v1/audit')) { + return new Response( + JSON.stringify({ + events: [], + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + if (url.includes('/admin/v1/jobs/') && url.includes('/cancel')) { + return new Response('', { status: 200 }) + } + + if (url.includes('/admin/v1/jobs/tenant/')) { + return new Response(JSON.stringify({ job_id: 'job-1' }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }) + } + + if (url.includes('/admin/v1/jobs/')) { + return new Response( + JSON.stringify({ + job_id: 'job-1', + status: 'succeeded', + steps: [{ name: 'echo', status: 'succeeded', attempts: 1, error: null }], + error: null, + created_at_ms: 0, + started_at_ms: 0, + finished_at_ms: 0, + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + if (url.includes('/admin/v1/swarm/services') && url.includes('/tasks')) { + return new Response( + JSON.stringify({ + service: 'gateway', + tasks: [ + { + id: 'task-1', + service: 'gateway', + node: 'node-1', + desired_state: 'running', + current_state: 'running', + error: null, + }, + ], + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + if (url.includes('/admin/v1/swarm/services')) { + return new Response( + JSON.stringify({ + services: [ + { + name: 'gateway', + image: 'cloudlysis/gateway:dev', + mode: 'replicated', + replicas: '1/1', + updated_at: null, + }, + ], + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + } + + return new Response('not found', { status: 404 }) + }), +) diff --git a/control/ui/tsconfig.app.json b/control/ui/tsconfig.app.json new file mode 100644 index 0000000..af516fc --- /dev/null +++ b/control/ui/tsconfig.app.json @@ -0,0 +1,28 @@ +{ + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", + "target": "ES2023", + "useDefineForClassFields": true, + "lib": ["ES2023", "DOM", "DOM.Iterable"], + "module": "ESNext", + "types": ["vite/client"], + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "moduleDetection": "force", + "noEmit": true, + "jsx": "react-jsx", + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "erasableSyntaxOnly": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + }, + "include": ["src"] +} diff --git a/control/ui/tsconfig.json b/control/ui/tsconfig.json new file mode 100644 index 0000000..1ffef60 --- /dev/null +++ b/control/ui/tsconfig.json @@ -0,0 +1,7 @@ +{ + "files": [], + "references": [ + { "path": "./tsconfig.app.json" }, + { "path": "./tsconfig.node.json" } + ] +} diff --git a/control/ui/tsconfig.node.json b/control/ui/tsconfig.node.json new file mode 100644 index 0000000..8a67f62 --- /dev/null +++ b/control/ui/tsconfig.node.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", + "target": "ES2023", + "lib": ["ES2023"], + "module": "ESNext", + "types": ["node"], + "skipLibCheck": true, + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "moduleDetection": "force", + "noEmit": true, + + /* Linting */ + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "erasableSyntaxOnly": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true + }, + "include": ["vite.config.ts"] +} diff --git a/control/ui/vite.config.ts b/control/ui/vite.config.ts new file mode 100644 index 0000000..8b0f57b --- /dev/null +++ b/control/ui/vite.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +// https://vite.dev/config/ +export default defineConfig({ + plugins: [react()], +}) diff --git a/control/ui/vitest.config.ts b/control/ui/vitest.config.ts new file mode 100644 index 0000000..8974390 --- /dev/null +++ b/control/ui/vitest.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + environment: 'jsdom', + setupFiles: ['./src/test/setup.ts'], + testTimeout: 5000, + hookTimeout: 5000, + }, +}) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8be4c8e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,126 @@ +services: + nats: + image: nats:2.10-alpine + command: ["-js", "-m", "8222"] + ports: + - "4222:4222" + - "8222:8222" + + gateway: + build: + context: . + dockerfile: docker/Dockerfile.rust + args: + PACKAGE: gateway + BIN: gateway + depends_on: + - nats + environment: + GATEWAY_ADDR: 0.0.0.0:8080 + GATEWAY_GRPC_ADDR: 0.0.0.0:8081 + GATEWAY_STORAGE_PATH: /data/gateway.mdbx + GATEWAY_ROUTING_FILE: /config/routing.json + volumes: + - gateway_data:/data + - ./routing/dev.json:/config/routing.json:ro + ports: + - "8080:8080" + - "8081:8081" + + aggregate: + build: + context: . + dockerfile: docker/Dockerfile.rust + args: + PACKAGE: aggregate + BIN: aggregate + depends_on: + - nats + environment: + AGGREGATE_NATS_URL: nats://nats:4222 + AGGREGATE_STORAGE_PATH: /data + AGGREGATE_HTTP_ADDR: 0.0.0.0:8080 + AGGREGATE_GRPC_ADDR: 0.0.0.0:50051 + volumes: + - aggregate_data:/data + ports: + - "50051:50051" + - "18080:8080" + + projection: + build: + context: . + dockerfile: docker/Dockerfile.rust + args: + PACKAGE: projection + BIN: projection + depends_on: + - nats + environment: + PROJECTION_NATS_URL: nats://nats:4222 + PROJECTION_STREAM_NAME: AGGREGATE_EVENTS + PROJECTION_DURABLE_NAME: projection + PROJECTION_STORAGE_PATH: /data + PROJECTION_MANIFEST_PATH: /config/projection-manifest.yaml + PROJECTION_MULTI_TENANT: "true" + PROJECTION_MAX_IN_FLIGHT: "128" + PROJECTION_ACK_TIMEOUT_MS: "30000" + PROJECTION_MAX_DELIVER: "10" + volumes: + - projection_data:/data + - ./projection/projection-manifest.yaml:/config/projection-manifest.yaml:ro + + runner: + build: + context: . + dockerfile: docker/Dockerfile.rust + args: + PACKAGE: runner + BIN: runner + depends_on: + - nats + environment: + RUNNER_NATS_URL: nats://nats:4222 + RUNNER_STORAGE_PATH: /data/runner.mdbx + RUNNER_MODE: combined + RUNNER_HTTP_ADDR: 0.0.0.0:8080 + RUNNER_SAGA_MANIFEST_PATH: /config/sagas.yaml + RUNNER_EFFECTS_MANIFEST_PATH: /config/effects.yaml + volumes: + - runner_data:/data + - ./runner/config:/config:ro + ports: + - "28080:8080" + + control-api: + build: + context: . + dockerfile: docker/Dockerfile.rust + args: + PACKAGE: api + BIN: api + environment: + CONTROL_API_ADDR: 0.0.0.0:8080 + CONTROL_PLACEMENT_PATH: /etc/control/placement.json + CONTROL_SWARM_STATE_PATH: /etc/control/swarm_state.json + CONTROL_SELF_URL: http://control-api:8080 + volumes: + - ./placement/dev.json:/etc/control/placement.json:ro + - ./swarm/dev.json:/etc/control/swarm_state.json:ro + ports: + - "38080:8080" + + control-ui: + build: + context: . + dockerfile: docker/Dockerfile.ui + environment: + VITE_CONTROL_API_URL: http://localhost:38080 + ports: + - "8082:80" + +volumes: + aggregate_data: + gateway_data: + projection_data: + runner_data: diff --git a/docker/Dockerfile.rust b/docker/Dockerfile.rust new file mode 100644 index 0000000..16e251b --- /dev/null +++ b/docker/Dockerfile.rust @@ -0,0 +1,33 @@ +FROM rust:1.93-bookworm AS builder + +RUN apt-get update \ + && apt-get install -y --no-install-recommends clang libclang-dev pkg-config protobuf-compiler ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY . . + +ARG GATEWAY_BUILD_VERSION=dev +ARG GATEWAY_BUILD_SHA=unknown +ENV GATEWAY_BUILD_VERSION=$GATEWAY_BUILD_VERSION +ENV GATEWAY_BUILD_SHA=$GATEWAY_BUILD_SHA + +ARG PACKAGE +ARG BIN + +RUN cargo build -p ${PACKAGE} --bin ${BIN} --release + +FROM debian:bookworm-slim + +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl \ + && rm -rf /var/lib/apt/lists/* + +RUN useradd -m -u 10001 app + +ARG BIN +COPY --from=builder /app/target/release/${BIN} /usr/local/bin/app + +USER app + +ENTRYPOINT ["/usr/local/bin/app"] diff --git a/docker/Dockerfile.ui b/docker/Dockerfile.ui new file mode 100644 index 0000000..cba2661 --- /dev/null +++ b/docker/Dockerfile.ui @@ -0,0 +1,17 @@ +FROM node:20-alpine AS builder + +WORKDIR /app + +COPY control/ui/package.json control/ui/package-lock.json ./ +RUN npm ci + +COPY control/ui . +RUN npm run build + +FROM nginx:1.29-alpine + +COPY control/ui/nginx.conf /etc/nginx/conf.d/default.conf +COPY --from=builder /app/dist /usr/share/nginx/html + +EXPOSE 80 +CMD ["nginx", "-g", "daemon off;"] diff --git a/docker/scripts/build_images.sh b/docker/scripts/build_images.sh new file mode 100644 index 0000000..e785a05 --- /dev/null +++ b/docker/scripts/build_images.sh @@ -0,0 +1,52 @@ +#!/bin/sh +set -eu + +mode="${1:-all}" + +build_rust() { + image="$1" + package="$2" + bin="$3" + docker build \ + -f docker/Dockerfile.rust \ + --build-arg PACKAGE="$package" \ + --build-arg BIN="$bin" \ + --build-arg GATEWAY_BUILD_VERSION="${GATEWAY_BUILD_VERSION:-dev}" \ + --build-arg GATEWAY_BUILD_SHA="${GATEWAY_BUILD_SHA:-unknown}" \ + -t "$image" \ + . +} + +build_ui() { + image="$1" + docker build -f docker/Dockerfile.ui -t "$image" . +} + +build_platform() { + build_rust cloudlysis/gateway:dev gateway gateway + build_rust cloudlysis/aggregate:dev aggregate aggregate + build_rust cloudlysis/projection:dev projection projection + build_rust cloudlysis/runner:dev runner runner +} + +build_control() { + build_rust cloudlysis/control-api:dev api api + build_ui cloudlysis/control-ui:dev +} + +case "$mode" in + platform) + build_platform + ;; + control) + build_control + ;; + all) + build_platform + build_control + ;; + *) + echo "usage: sh docker/scripts/build_images.sh [platform|control|all]" 1>&2 + exit 2 + ;; +esac diff --git a/docker/scripts/swarm_dev_secrets.sh b/docker/scripts/swarm_dev_secrets.sh new file mode 100644 index 0000000..5f7a584 --- /dev/null +++ b/docker/scripts/swarm_dev_secrets.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -eu + +ensure_secret() { + name="$1" + value="$2" + if docker secret inspect "$name" >/dev/null 2>&1; then + return 0 + fi + printf "%s" "$value" | docker secret create "$name" - >/dev/null +} + +ensure_secret grafana_admin_password "${GRAFANA_ADMIN_PASSWORD:-admin}" diff --git a/docker/scripts/verify_aggregate_container.sh b/docker/scripts/verify_aggregate_container.sh new file mode 100644 index 0000000..071e6aa --- /dev/null +++ b/docker/scripts/verify_aggregate_container.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -e + +docker build -t cloudlysis/aggregate:local -f docker/Dockerfile.rust --build-arg PACKAGE=aggregate --build-arg BIN=aggregate . + +cid="$(docker run -d -p 8085:8080 -e AGGREGATE_STORAGE_PATH=/tmp/aggregate-data cloudlysis/aggregate:local)" + +sleep 2 +curl -fsS http://localhost:8085/health >/dev/null +curl -fsS http://localhost:8085/ready >/dev/null +curl -fsS http://localhost:8085/metrics >/dev/null + +docker rm -f "$cid" >/dev/null diff --git a/gateway/.gitignore b/gateway/.gitignore new file mode 100644 index 0000000..1580f9c --- /dev/null +++ b/gateway/.gitignore @@ -0,0 +1,27 @@ +/target/ +/target-*/ +**/target/ +*.rs.bk +*.pdb +*.dSYM/ +*.orig +*.rej +*.log +.DS_Store +.idea/ +.vscode/ +*.swp +*.swo +.env +.env.* +.envrc +.direnv/ +*.mdbx +*.mdbx-* +/data/ +/tmp/ +/coverage/ +lcov.info +*.profraw +*.profdata +docker-compose.override.yml diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml new file mode 100644 index 0000000..5457282 --- /dev/null +++ b/gateway/Cargo.toml @@ -0,0 +1,48 @@ + [package] + name = "gateway" + version = "0.1.0" + edition = "2021" + + [dependencies] +shared = { path = "../shared" } + anyhow = "1" +argon2 = "0.5" +async-nats = "0.39" +async-trait = "0.1" + axum = { version = "0.7", features = ["json"] } +base32 = "0.5" +chrono = { version = "0.4", features = ["serde"] } +edge_storage = { version = "0.1", registry = "madapes" } +edge-logger-client = { version = "0.1", registry = "madapes" } +futures = "0.3" +hex = "0.4" +hmac = "0.12" + http = "1" +jsonwebtoken = "9" +libmdbx = "0.6" + metrics = "0.23" + metrics-exporter-prometheus = "0.15" +rand_core = "0.6" +prost = "0.13" +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +serde = { version = "1", features = ["derive", "rc"] } + serde_json = "1" +serde_yaml = "0.9" +sha2 = "0.10" +sha1 = "0.10" +subtle = "2" + thiserror = "2" + tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] } +tonic = { version = "0.12", default-features = false, features = ["codegen", "prost", "transport", "tls"] } + tower = { version = "0.5", features = ["timeout", "util"] } +tower-http = { version = "0.6", features = ["limit", "request-id", "trace"] } + tracing = "0.1" + tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt", "json"] } +urlencoding = "2" + uuid = { version = "1", features = ["v4"] } + + [dev-dependencies] + +[build-dependencies] +tonic-build = { version = "0.12", default-features = false, features = ["prost"] } +protoc-bin-vendored = "3" diff --git a/gateway/DEPLOYMENT.md b/gateway/DEPLOYMENT.md new file mode 100644 index 0000000..2b60f79 --- /dev/null +++ b/gateway/DEPLOYMENT.md @@ -0,0 +1,62 @@ +# Deployment Notes + +## Swarm Ingress and TLS + +Gateway exposes: +- HTTP: `:8080` +- gRPC: `:8081` + +Recommended pattern in Swarm: +- Terminate TLS at an ingress proxy (Traefik / Nginx / Envoy) on the overlay network. +- Route HTTP and gRPC to the Gateway service by port. +- Prefer L7 routing (Host + Path for HTTP, SNI for gRPC) and keep the Swarm routing mesh disabled unless you explicitly want it. + +## Secrets + +The Swarm stack expects these secrets to exist: +- `gateway_jwt_secrets` (comma-separated or newline-separated signing secrets) +- `google_oidc_client_id` +- `google_oidc_client_secret` + +The container reads them via: +- `GATEWAY_JWT_SECRETS_FILE` +- `GOOGLE_OIDC_CLIENT_ID_FILE` +- `GOOGLE_OIDC_CLIENT_SECRET_FILE` + +## Internal mTLS (Optional) + +Gateway can be configured to use mTLS when calling internal upstreams. + +HTTP upstream (reqwest): +- `GATEWAY_INTERNAL_CA_CERT_PEM_FILE` +- `GATEWAY_INTERNAL_IDENTITY_PEM_FILE` (combined cert + key PEM) + +gRPC upstream (tonic): +- `GATEWAY_INTERNAL_GRPC_TLS` = `true|false` (or use `https://` upstream URLs) +- `GATEWAY_INTERNAL_GRPC_CA_CERT_PEM_FILE` +- `GATEWAY_INTERNAL_GRPC_CLIENT_CERT_PEM_FILE` +- `GATEWAY_INTERNAL_GRPC_CLIENT_KEY_PEM_FILE` + +## HA Validation (Manual) + +With `gateway` running at `replicas: 2`: +- Verify `/ready` stays healthy during rolling updates. +- Verify refresh rotation works across replicas (no sticky sessions): + - Sign in → refresh from one replica → refresh again against the other replica and confirm the old token is rejected. +- Verify admin IAM changes are visible across replicas: + - Create a role/assignment via `/admin/iam` on one replica → call an authorized endpoint via the other replica. + +### Suggested Procedure + +1. Deploy the stack: + - `docker stack deploy -c swarm/stacks/platform.yml cloudlysis` +2. Confirm `gateway` has 2 running tasks: + - `docker service ps cloudlysis_gateway` +3. Smoke readiness (at least one replica reachable through ingress): + - `curl -fsS http://localhost:8080/ready` +4. Refresh across replicas: + - Run the refresh flow twice while forcing traffic to different replicas (hostnames or per-task published ports if you use an ingress proxy). +5. Rolling update guard: + - Update the image tag and redeploy, then watch: + - `docker service ps cloudlysis_gateway` + - `curl -fsS http://localhost:8080/ready` in a loop diff --git a/gateway/DEVELOPMENT_PLAN.md b/gateway/DEVELOPMENT_PLAN.md new file mode 100644 index 0000000..2e6f76b --- /dev/null +++ b/gateway/DEVELOPMENT_PLAN.md @@ -0,0 +1,339 @@ +# Development Plan: Gateway + +## Overview + +This plan breaks down the Gateway implementation into milestones ordered by dependency. Each milestone includes: +- **Tasks** with clear deliverables +- **Test Requirements** (unit tests + tautological tests + integration tests where applicable) +- **Dependencies** on previous milestones + +**Development Approach:** +1. Complete one milestone at a time +2. Write tests before implementation (TDD where applicable) +3. Do not start the next milestone until the current milestone’s tests are passing (green) +4. Mark tasks complete with `[x]` as you progress + +--- + +## Milestone 1: Project Foundation + +**Goal:** Create the Gateway service as a Rust project aligned with existing node conventions (Axum + Tokio + tracing + Prometheus metrics). + +### Tasks +- [x] **1.1** Initialize Cargo project + - Create `src/lib.rs` and `src/main.rs` + - Establish module layout for: http, grpc, authn, authz, routing, upstream, observability, config, storage +- [x] **1.2** Choose and wire core dependencies (aligned with existing services) + - HTTP: `axum` + - gRPC: `tonic` + - Runtime: `tokio` + - Serialization: `serde`, `serde_json` + - Errors: `thiserror`, `anyhow` + - Telemetry: `tracing`, `metrics-exporter-prometheus` or existing metrics pattern in the codebase +- [x] **1.3** Add baseline runtime endpoints + - `GET /health`, `GET /ready`, `GET /metrics` + - Structured logs with request id propagation + +### Tests +- [x] **T1.1** Project compiles +- [x] **T1.2** `GET /health` returns 200 +- [x] **T1.3** Tautological test: core state types are Send + Sync + +--- + +## Milestone 2: Persistent State (Auth + RBAC + Sessions) for HA + +**Goal:** Define where Gateway state lives so the service can run as **HA (max 2 replicas)** without sticky sessions and without losing auth/admin consistency. + +### Dependencies +- Milestone 1 (project foundation) + +### Tasks +- [x] **2.1** Choose and implement the backing store for identity + authorization state + - Recommended default for platform alignment: NATS JetStream KV buckets for: + - users + - identities (OIDC links) + - password credential records (hash only) + - refresh token/session records (hash only, revocable, rotating) + - MFA enrollments + recovery codes (hash only) + - rights/roles/assignments + - audit log index (append-only model) +- [x] **2.2** Define storage schema + versioning + - Key naming conventions + - JSON shapes (forward compatible) + - Migration strategy for schema changes +- [x] **2.3** Implement storage client abstraction + - CRUD primitives with compare-and-set semantics where needed (e.g., refresh rotation) + - Pagination/scan strategy for admin listing endpoints + - Consistent error mapping for storage failures + +### Tests +- [x] **T2.1** Sensitive values are stored only as hashes (reset tokens, refresh tokens, recovery codes) +- [x] **T2.2** Refresh token rotation is atomic (cannot be used twice under concurrency) +- [x] **T2.3** Tautological test: storage client is Send + Sync + +--- + +## Milestone 3: Routing Config + Service Discovery + +**Goal:** Implement the routing layer described in [prd.md](file:///Users/vlad/Developer/cloudlysis/gateway/prd.md), supporting independent placement per service kind and hot reload. + +### Dependencies +- Milestone 1 (project foundation) + +### Exit Criteria +- All Milestone 3 tests pass + +### Tasks +- [x] **3.1** Define routing config model (in-memory) + - Placement maps per service kind: `aggregate_placement`, `projection_placement`, `runner_placement` + - Shard directory per service kind: `*_shards[shard_id] -> endpoint(s)` + - Revision tracking and last-known-good semantics +- [x] **3.2** Implement config sources + - Static file config for local development + - NATS JetStream KV watcher for production +- [x] **3.3** Implement routing decision API + - `(tenant_id, service_kind) -> selected endpoint` + - Admin introspection: `GET /admin/routing` +- [x] **3.4** Implement config reload semantics + - `POST /admin/routing/reload` to force refresh + - Watcher-based reload that updates atomically + +### Tests +- [x] **T3.1** Routing resolves endpoints for `(tenant_id, service_kind)` correctly +- [x] **T3.2** Hot reload swaps routing tables atomically (no partial reads) +- [x] **T3.3** Unknown tenant returns a consistent, typed routing error + +--- + +## Milestone 4: AuthN Core (Tokens, Passwords, OIDC, MFA) + +**Goal:** Implement the authentication layer and the public AuthN HTTP APIs described in the PRD: signup/signin/signout/refresh/forgot/reset and MFA primitives. + +### Dependencies +- Milestone 1 (project foundation) + - Milestone 2 (persistent state) + +### Exit Criteria +- All Milestone 4 tests pass + +### Tasks +- [x] **4.1** Implement token model + - Access token (short-lived) + - Refresh token (rotating, revocable) + - Key rotation for signing keys +- [x] **4.2** Implement password flows + - `POST /v1/auth/signup`, `POST /v1/auth/signin`, `POST /v1/auth/signout`, `POST /v1/auth/refresh` + - Forgot/reset: `POST /v1/auth/forgot`, `POST /v1/auth/reset` +- [x] **4.3** Implement Google OIDC integration points + - `POST /v1/auth/oidc/google/start` + - `GET /v1/auth/oidc/google/callback` + - Account linking rules +- [x] **4.4** Implement MFA (TOTP) primitives + - Enrollment start/confirm + - Challenge and verification + - Recovery codes +- [x] **4.5** Abuse protections + - Rate limits for signin/forgot/reset + - Generic “account not found” responses where appropriate + +### Tests +- [x] **T4.1** Password hashing/verification works (Argon2id) +- [x] **T4.2** Refresh token rotation: old refresh token is invalid after use +- [x] **T4.3** Forgot/reset tokens are one-time and expire +- [x] **T4.4** MFA TOTP enrollment and challenge succeed for valid codes and fail for invalid + +--- + +## Milestone 5: AuthZ (RBAC) + Tenant Enforcement + +**Goal:** Enforce authorization decisions at the Gateway boundary, including tenant selection rules for `x-tenant-id`. + +### Dependencies +- Milestone 4 (authn) + +### Exit Criteria +- All Milestone 5 tests pass + +### Tasks +- [x] **5.1** Define RBAC model + - Rights (permissions), roles, assignments (principal ↔ tenant ↔ role) + - Platform admin vs tenant admin vs tenant member scoping rules +- [x] **5.2** Implement authorization engine + - Inputs: principal, tenant_id, action, resource attributes (aggregate_type, view_type) + - Outputs: allow/deny with reason +- [x] **5.3** Enforce `x-tenant-id` rules + - Required on tenant-scoped endpoints + - Validated format and tenant membership checks +- [x] **5.4** Add consistent error envelope mapping (401/403/400) + +### Tests +- [x] **T5.1** Tenant spoofing is rejected (principal lacks membership) +- [x] **T5.2** Role assignment enables expected actions and denies others +- [x] **T5.3** Missing `x-tenant-id` on tenant routes returns 400 + +--- + +## Milestone 6: Upstream Proxying (Aggregate / Projection / Runner) + +**Goal:** Route authenticated and authorized requests to the node services. + +### Dependencies +- Milestone 3 (routing) +- Milestone 5 (authz) + +### Exit Criteria +- All Milestone 6 tests pass + +### Tasks +- [x] **6.1** Aggregate submit command proxy + - gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` + - HTTP wrapper `POST /v1/commands/{aggregate_type}/{aggregate_id}` + - Propagate `x-tenant-id` and correlation metadata + - Ensure safe retry semantics using `command_id` idempotency +- [x] **6.2** Projection query proxy + - `POST /v1/query/{view_type}` forwarding to Projection query endpoint once available +- [x] **6.3** Runner admin passthrough (admin-only) + - `/admin/runner/*` forwarding with strict authorization + +### Tests +- [x] **T6.1** gRPC SubmitCommand forwards tenant metadata and returns upstream events +- [x] **T6.2** HTTP command endpoint returns the same shape as gRPC response +- [x] **T6.3** Query endpoint enforces tenant scoping and denies unauthorized callers + +--- + +## Milestone 7: Admin IAM APIs (Users, Roles, Rights) + +**Goal:** Expose the admin IAM endpoints for the Admin UI node to manage authn/authz data. + +### Dependencies +- Milestone 4 (authn) +- Milestone 5 (authz) + +### Exit Criteria +- All Milestone 7 tests pass + +### Tasks +- [x] **7.1** Implement admin IAM endpoints + - Users CRUD and disable/delete + - Identities link/unlink (OIDC), manage password credentials + - Rights CRUD, roles CRUD, role↔rights management + - Assignments CRUD (principal ↔ tenant ↔ role) + - Service accounts credential create/rotate and tenant role assignment + - MFA admin actions (reset MFA, revoke recovery codes) + - Session revocation for user (global signout) +- [x] **7.2** Implement audit trail for admin IAM actions + - Immutable record of actor, action, target, tenant scope, timestamp, request metadata + +### Tests +- [x] **T7.1** Only platform/tenant admins can access relevant endpoints +- [x] **T7.2** All admin mutations emit an audit record +- [x] **T7.3** Assignment changes immediately affect authorization decisions + +--- + +## Milestone 8: Rebalancing Operations (Control Plane Hooks) + +**Goal:** Provide the pieces needed to support tenant rebalancing as described in the PRD: visibility, readiness gates, and safe cutover support. + +### Dependencies +- Milestone 3 (routing config) +- Milestone 6 (upstream proxying) + +### Exit Criteria +- All Milestone 8 tests pass + +### Tasks +- [x] **8.1** Expose placement introspection and status + - Current placement revision per service kind + - Effective routing decisions for a given tenant (admin-only) +- [x] **8.2** Define and implement readiness gates used by rebalancer + - Projection: warmup/catchup signal (lag) + - Runner: tenant drained / checkpoint stable signal + - Aggregate: tenant drain and state availability signal (as defined by upstream changes) +- [x] **8.3** Add operator-facing rebalancing endpoints (optional if a separate rebalancer service exists) + - Plan/apply/rollback APIs with strong authorization + +### Tests +- [x] **T8.1** Placement revision changes are visible immediately and atomically +- [x] **T8.2** Rebalancing guardrails prevent cutover when target shard is not ready + +--- + +## Milestone 9: Docker Swarm Deployment + HA (Max 2 Replicas) + +**Goal:** Define and validate the Docker Swarm architecture for Gateway, including HA behavior with at most **2 Gateway replicas**. + +### Dependencies +- Milestone 1 (health/ready/metrics) +- Milestone 2 (persistent state suitable for HA) +- Milestone 6 (proxying) for end-to-end smoke tests + +### Exit Criteria +- All Milestone 9 tests pass +- The platform stack (`swarm/stacks/platform.yml`) can deploy the Gateway with `replicas: 2` and serve traffic during rolling updates + +### Tasks +- [x] **9.1** Build container image + - Dockerfile, multi-stage build, minimal runtime image + - Embed build metadata (version, git sha) +- [x] **9.2** Define Swarm service topology (2 nodes max) + - `gateway` service with `deploy.replicas: 2` + - Healthcheck based on `/ready` + - Rolling update strategy (start-first), rollback policy on failure + - Network: overlay network for internal traffic to NATS and nodes +- [x] **9.3** Define ingress and TLS termination strategy + - Swarm routing mesh or an ingress proxy (document choice in stack) + - Ensure HTTP and gRPC can be routed correctly +- [x] **9.4** Secrets and config distribution + - OIDC client secrets, JWT signing keys (rotation-ready), NATS credentials + - Use Swarm secrets/configs instead of environment variables for secrets where possible +- [x] **9.5** HA behavior validation + - Run two replicas and ensure: + - refresh token rotation works across replicas (no stickiness) + - admin IAM updates are visible from both replicas + - in-flight requests survive a single replica restart + +### Tests +- [x] **T9.1** `swarm/stacks/platform.yml` parses as valid YAML +- [x] **T9.2** Smoke: deploy 2 replicas and confirm `/ready` is healthy on both +- [x] **T9.3** Rolling update does not drop readiness below 1 available replica +- [x] **T9.4** Auth session/refresh works across replicas (no sticky sessions required) + +--- + +## Milestone 10: Observability + Hardening + +**Goal:** Make the Gateway production-ready with robust telemetry and safety defaults. + +### Dependencies +- Milestone 6 (proxying) + +### Exit Criteria +- All Milestone 10 tests pass + +### Tasks +- [x] **10.1** Structured logs with correlation + - `request_id`, `trace_id`, principal id, tenant id (when present), upstream target +- [x] **10.2** Metrics + - Request counts/latency, auth failures, upstream errors, routing misses, rate limit blocks +- [x] **10.3** Security hardening + - CSRF protections for cookie-based flows + - JWT key rotation strategy and config + - mTLS/service auth boundary for internal upstreams +- [x] **10.4** Load and failure testing strategy + - Soak tests for routing reload + auth endpoints + - Backpressure/timeouts/circuit breaker verification +- [ ] **10.5** Correlation and trace context propagation (Gateway as source of truth) + - Accept inbound `x-correlation-id` and `traceparent` on HTTP and gRPC requests + - If missing, generate `x-correlation-id` at the start of request handling and start a new trace + - Echo `x-correlation-id` (and `traceparent` when applicable) on responses + - Propagate `x-correlation-id` and `traceparent` to upstream nodes (Aggregate/Projection/Runner) and record them in request spans/log fields + +### Tests +- [x] **T10.1** Metrics include expected labels and counters increment correctly +- [x] **T10.2** Secrets never appear in logs in representative error cases +- [x] **T10.3** Rate limits trigger under abusive patterns +- [ ] **T10.4** Gateway generates `x-correlation-id` when missing and echoes it on responses +- [ ] **T10.5** Gateway propagates `x-correlation-id` and `traceparent` to upstream calls and includes them in logs/spans diff --git a/gateway/LOAD_TESTING.md b/gateway/LOAD_TESTING.md new file mode 100644 index 0000000..cc4a0fe --- /dev/null +++ b/gateway/LOAD_TESTING.md @@ -0,0 +1,44 @@ +# Load and Failure Testing Strategy + +## Goals + +- Verify the Gateway stays responsive under sustained traffic. +- Verify auth flows behave correctly under concurrency. +- Verify routing reloads are atomic and safe under load. +- Verify upstream failures are bounded (timeouts) and observable (metrics/logs). + +## Scenarios + +### AuthN + +- Sign up once, then: + - Burst sign-in attempts to verify rate limits and correct 401/429 behavior. + - Parallel refresh calls to verify refresh rotation correctness. + +### Routing Reload + +- Run steady traffic to: + - `POST /v1/query/{view_type}` + - `POST /v1/commands/{aggregate_type}/{aggregate_id}` +- Trigger `POST /admin/routing/reload` repeatedly and verify: + - No 500s from partial routing table reads. + - Routing decisions switch only at revision boundaries. + +### Upstream Failure Modes + +- Configure routing to a shard endpoint that: + - Refuses connections (ECONNREFUSED) + - Hangs (no response) + - Returns 5xx +- Verify: + - Gateway timeouts are enforced. + - Errors are surfaced as 5xx to callers. + - `gateway_http_requests_total` and duration histograms capture the failures. + +### HA Behavior (Swarm) + +- Run `gateway` with 2 replicas and no sticky sessions. +- Verify: + - Refresh works across replicas. + - IAM updates become effective immediately on both replicas. + - Rolling update keeps at least 1 replica ready. diff --git a/gateway/build.rs b/gateway/build.rs new file mode 100644 index 0000000..bee9375 --- /dev/null +++ b/gateway/build.rs @@ -0,0 +1,12 @@ +fn main() -> Result<(), Box> { + let proto_path = "../aggregate/proto/aggregate.proto"; + let proto_dir = "../aggregate/proto"; + + tonic_build::configure() + .build_server(true) + .build_client(true) + .compile_protos(&[proto_path], &[proto_dir])?; + + println!("cargo:rerun-if-changed={}", proto_path); + Ok(()) +} diff --git a/gateway/external_prd.md b/gateway/external_prd.md new file mode 100644 index 0000000..9075799 --- /dev/null +++ b/gateway/external_prd.md @@ -0,0 +1,192 @@ +### External PRD: Changes Required in Aggregate, Projection, Runner + +This document captures the work needed outside the Gateway to support: +- Tenant-aware routing via `x-tenant-id` +- Independent horizontal scalability of Aggregate, Projection, Runner +- A safe mechanism for tenant rebalancing per service kind + +--- + +## **Target State** + +### Independent Placements + +Each service kind has its own placement map: +- `aggregate_placement[tenant_id] -> aggregate_shard_id` +- `projection_placement[tenant_id] -> projection_shard_id` +- `runner_placement[tenant_id] -> runner_shard_id` + +Each shard is a replica set that can scale independently. + +### Rebalancing Contract (Per Service Kind) + +All nodes MUST support: +- Dynamic placement updates (watch NATS KV or reload config) +- A drain mechanism that can target a specific tenant (stop acquiring new work for that tenant, finish in-flight, report status) +- Clear readiness semantics that reflect whether the node will accept work for a tenant + +Additionally, all nodes SHOULD converge on the same operational contract: +- A per-tenant “accepting” gate (can this shard accept new work/queries/commands for tenant X?) +- A per-tenant “drained” signal (no in-flight work remains for tenant X) +- A per-tenant warmup/catchup signal where relevant (projection lag, aggregate snapshot availability) + +--- + +## **Aggregate: Required Changes** + +### 1) Expose a Real Command API (Gateway Upstream) + +Today, Aggregate has internal command handling types (e.g., `CommandServer`) but its running HTTP server only exposes health/metrics/admin endpoints ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L82), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L81-L213)). + +Aggregate MUST expose one of the following upstream APIs for the Gateway to call: +- **Option A (Recommended)**: gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` compatible with [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31). +- **Option B**: HTTP endpoint for command submission (REST), with a stable request/response shape that the Gateway can proxy. + +### 2) Tenant Placement Enforcement + +Aggregate MUST enforce “hosted tenants” so independent scaling is safe: +- If an Aggregate shard/node is not assigned a tenant, it MUST reject commands for that tenant (e.g., `403` or `503` with retriable hint depending on whether the issue is authorization vs placement). +- Aggregate SHOULD maintain an in-memory allowlist of hosted tenants that is driven by: + - NATS KV placement watcher (preferred), or + - Hot-reloaded config pushed via `/admin/reload` + +Aggregate already has admin hooks for drain/reload, but they are currently generic and/or illustrative ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L72), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L402-L442)). These need to become placement-aware. + +### 3) Tenant Drain (Per Tenant) + +Aggregate MUST provide a per-tenant drain mechanism to support rebalancing: +- Stop accepting new commands for the tenant. +- Allow in-flight commands to finish (bounded wait), then report drained. +- Expose drain status per tenant (admin endpoint). + +### 4) Rebalancing State Strategy + +Aggregate persists snapshots locally (MDBX) and uses JetStream for events. To move a tenant: +- **Approach 1 (Snapshot migration)**: copy tenant snapshot DB/state to the target shard, then switch placement. +- **Approach 2 (Cold rehydrate)**: switch placement and let the target shard rebuild state by replaying events from JetStream; expect higher latency during warmup. + +The system should support both, with the rebalancer selecting the strategy based on tenant size/SLO. + +### 5) Metrics for Placement Decisions + +Aggregate SHOULD expose: +- Per-tenant command rate, error rate +- In-flight commands by tenant +- Rehydrate time / snapshot hit ratio +- Storage size per tenant (if feasible) + +--- + +## **Projection: Required Changes** + +### 1) Expose Query API Upstream for Gateway + +Projection has a working `QueryService` with tenant-scoped prefix scans ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L121-L162)) but it is not exposed via HTTP/gRPC (current HTTP routes are health/ready/metrics/info only: [projection/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/http/mod.rs#L102-L109)). + +Projection MUST add one upstream API the Gateway can route to: +- `POST /query/{view_type}` (HTTP) accepting `x-tenant-id` and a UQF payload, returning `QueryResponse`. +- Or a gRPC query service (new proto) if gRPC is preferred end-to-end. + +### 2) Tenant Placement Filtering (Independent Scaling) + +Projection MUST support running in one of these modes: +- **Multi-tenant shard**: consumes all tenants (simple, less isolated). +- **Tenant-filtered shard (required for rebalancing)**: + - only consumes/serves queries for the tenants assigned to that shard + - rejects queries for unassigned tenants (consistent error semantics) + +Implementation direction: +- Add a placement watcher similar to Runner’s tenant filter ([runner/tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)). +- Apply tenant filter to: + - event consumption subject filters (preferred), and + - query serving validation (always). + +### 3) Drain + Warmup Endpoints + +Projection SHOULD add: +- `/admin/drain?tenant_id=...` (stop consuming new events for that tenant, finish in-flight, flush checkpoints) +- `/admin/reload` (apply latest placement/config) +- Optional warmup status: whether the shard has caught up to JetStream tail for that tenant/view_types + +### 4) Rebalancing Strategy for Projection + +Projection can rebalance safely with “warm then cut over”: +- Assign tenant to the new projection shard while old shard still serves. +- New shard catches up (replay from JetStream, build view KV). +- Switch Gateway placement for query routing to new shard. +- Drain old shard for that tenant and optionally delete old tenant KV keys. + +### 5) Metrics for Placement Decisions + +Projection SHOULD expose: +- JetStream lag per tenant/view_type (tail minus checkpoint) +- Query latency and scan counts +- Storage size per tenant (if feasible) + +--- + +## **Runner: Required Changes** + +Runner already has: +- A tenant placement watcher capable of producing an allowlist ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)) +- Admin endpoints including drain/reload/config ([runner/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/http/mod.rs#L69-L86)) +- Gateway client integration for aggregate command submission ([runner/gateway/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47)) + +To support independent scalability + rebalancing, Runner needs the following. + +### 1) Per-Tenant Drain (Not Only Global) + +Runner’s current drain is global (`/admin/drain` toggles a single draining flag). Runner MUST support draining a specific tenant: +- Stop acquiring new saga/effect work for the tenant. +- Allow in-flight work for the tenant to finish (bounded). +- Flush outbox for the tenant (or guarantee idempotency on handoff). +- Persist final checkpoints so another shard can continue without duplication beyond at-least-once bounds. + +### 2) Placement-Enforced Work Acquisition + +Runner MUST validate tenant assignment at the boundary where it: +- consumes JetStream messages (saga triggers, effect commands), and +- dispatches outbox work. + +If a tenant is not assigned to the shard, Runner must not process its work. + +### 3) Handoff Safety Rules for Rebalancing + +Runner rebalancing should follow: +- New shard begins processing only after it is assigned the tenant. +- Old shard stops acquiring new work for that tenant, then drains. +- Idempotency remains correct across handoff using checkpoints and dedupe markers. + +### 4) Metrics for Placement Decisions + +Runner SHOULD expose: +- Outbox depth by tenant +- Work processing latency and retries by tenant/effect +- Schedule due items by tenant +- Consumer lag by tenant (if the consumption model supports per-tenant lag) + +### 5) Auth Delivery Side Effects (Email/SMS/Push) + +If the platform’s AuthN flows require out-of-band delivery (password reset links, email verification, MFA codes), the Runner SHOULD be the standard place to execute those side effects: +- Define a stable effect interface for sending transactional emails (reset links, verification links, security alerts). +- Optionally add SMS/push providers later under the same effect contract. + +This keeps the Gateway free of long-lived provider credentials and aligns with the existing “effects are executed by workers” pattern. + +--- + +## **Gateway Integration Notes** + +Once the above changes exist: +- Gateway routes per `(tenant_id, service_kind)` using independent placement maps. +- Gateway can implement “warm then cut over” rebalancing for Projection and Runner by switching only query/workflow routing after readiness conditions are met. +- Gateway can enforce consistent tenant validation, authn/authz, and error semantics at the edge even as placements move. + +--- + +## **Gaps / Opportunities** + +- **KV schema + ownership**: define the exact NATS KV bucket layout, key naming, revisioning rules, and who is allowed to write placement updates. +- **Rebalancer API**: define operator workflows (plan/apply/rollback), status reporting, and audit log requirements for placement changes. +- **Shard discovery**: define how shard endpoints are registered (static config vs KV directory entries) and how health is represented. +- **Consistency boundaries**: define rebalancing guarantees per service kind (projection can be warm-cutover; runner requires checkpoint handoff; aggregate requires single-writer and state availability). diff --git a/gateway/prd.md b/gateway/prd.md new file mode 100644 index 0000000..6ca88b3 --- /dev/null +++ b/gateway/prd.md @@ -0,0 +1,425 @@ +### 🧱 Component: Gateway + +**Definition:** +The Gateway is the single ingress for the platform. It provides: +- **Tenant-aware routing** to the node services: **Aggregate** (write/commands), **Projection** (read/queries), and **Runner** (workflow/saga + effects admin). +- Centralized **authn** (password via Argon2 + Google OIDC; extensible to more providers) and **authz** (tenant-scoped RBAC). +- Cross-cutting concerns: request validation, rate limiting, observability, and consistent error semantics. + +The Gateway is responsible for enforcing multi-tenancy at the edge: it treats `x-tenant-id` as the tenant selection signal, validates it against the caller identity, and routes requests to the correct tenant shard/node. + +--- + +## **Context: Existing Nodes** + +This PRD is based on the currently implemented node repositories: +- **Aggregate**: defines gRPC Command API `aggregate.gateway.v1.CommandService/SubmitCommand` in [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31). Aggregate’s PRD explicitly expects the Gateway to route by `x-tenant-id` ([aggregate/prd.md](file:///Users/vlad/Developer/cloudlysis/aggregate/prd.md#L5-L12)). +- **Projection**: provides health/admin HTTP endpoints and implements an in-process UQF query engine as `QueryService` but does not currently expose it over HTTP/gRPC ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L8-L162)). +- **Runner**: uses a gRPC client to submit aggregate commands “through the gateway” (config key `aggregate_gateway_url`), propagating `x-tenant-id` as gRPC metadata ([GatewayClient](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47), [OutboxRelay](file:///Users/vlad/Developer/cloudlysis/runner/src/outbox/relay.rs#L37-L110)). +- **Tenant placement**: there is precedent for **NATS JetStream KV** as a control plane for tenant placement/sharding (Runner tenant filter watcher: [tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100); Aggregate KV client helper: [swarm.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/swarm.rs#L79-L227)). There is also a simple static mapping example in [gateway-routing.yaml](file:///Users/vlad/Developer/cloudlysis/aggregate/gateway-routing.yaml#L1-L3). + +--- + +## **Problem Statement** + +Clients (and internal workers like Runner) need a stable, secure entrypoint that: +- Authenticates identities (humans and services) +- Authorizes actions per tenant +- Routes requests to the correct node(s) for the selected tenant +- Provides consistent APIs independent of the underlying shard topology and service discovery + +Without a Gateway, each node would need to re-implement auth, tenant enforcement, rate limiting, and topology discovery, increasing security risk and operational complexity. + +--- + +## **Goals** + +- Provide one entrypoint for **command submission** (Aggregate) and **query execution** (Projection), and an authenticated entrypoint for **workflow/admin actions** (Runner). +- Enforce tenant isolation using `x-tenant-id`: + - Validate tenant selection is allowed for the caller + - Prevent tenant spoofing +- Prioritize **independent scalability** of Aggregate, Projection, and Runner: + - Scale each service horizontally without requiring the others to scale + - Allow tenant assignments for each service to be rebalanced independently +- Support **authn**: + - Username/password with Argon2 password hashing + - Google OIDC login (future providers supported) +- Support **authz**: + - Tenant-scoped RBAC with explicit permissions + - Service identities for internal traffic (Runner → Gateway) +- Provide operational endpoints: `/health`, `/ready`, `/metrics`, config/routing introspection (admin-only). + +--- + +## **Non-Goals** + +- Implement the Aggregate/Projection/Runner business logic. +- Replace NATS JetStream as the event bus or the storage responsibilities of nodes. +- Provide a general-purpose API gateway for arbitrary upstreams; this Gateway is purpose-built for platform nodes. +- Provide UI/console; the Gateway only exposes APIs. + +--- + +## **Primary Users** + +- **External clients**: applications submitting commands and running queries. +- **Internal services**: Runner submitting commands on behalf of sagas. +- **Operators**: managing tenant placement and observing health/metrics. + +--- + +## **Key Concepts** + +### Tenant Selection and Enforcement + +- `x-tenant-id` is the canonical tenant selector for all tenant-scoped requests. +- The Gateway MUST reject requests when: + - The endpoint is tenant-scoped and `x-tenant-id` is missing (unless explicitly configured as single-tenant default). + - The caller is not authorized for that tenant. +- The Gateway SHOULD normalize and validate tenant IDs using the same constraints the nodes already use (alphanumeric + `-` + `_`). + +### Node Types and Traffic Classes + +- **Aggregate (write path)**: synchronous command submission; returns events. +- **Projection (read path)**: query execution; returns query results; eventual consistency is expected. +- **Runner (workflow/admin path)**: operational endpoints for runner configuration, drain, reload, and diagnostics; access is admin-only. + +### Tenant-Aware Routing + +- Routing decision is primarily based on `tenant_id`, and secondarily on request kind (aggregate vs projection vs runner). +- The Gateway abstracts the topology: clients do not need to know which node hosts their tenant. + +### Independent Scalability and Rebalancing + +- Each service (Aggregate, Projection, Runner) can have its own tenant-to-shard placement. The Gateway resolves routing per `(tenant_id, service_kind)`. +- Rebalancing is defined as moving a tenant’s assignment for a specific service from one shard to another with bounded disruption. + +--- + +## **Functional Requirements** + +### 1) Authentication (AuthN) + +- **AuthN surface area**: + - Signup, signin, signout + - Forgot password, reset password + - MFA enrollment and MFA challenge (step-up) + - Google OIDC login (and future providers) + - Service identities (internal callers) + +- **Password-based accounts**: + - Store passwords hashed with **Argon2id** using per-user random salts and parameters suitable for production. + - Signup MUST support email verification before the account becomes active (configurable per environment). + - Signin MUST support MFA when required by policy. + - Signout MUST revoke refresh tokens (and optionally maintain a short-lived access-token denylist only if needed). + +- **Sessions and tokens**: + - Issue a short-lived access token and a refresh token with rotation. + - Refresh tokens MUST be stored server-side (hashed at rest) to support revocation and rotation. + - Support both browser and API clients: + - Browser: refresh token in an HttpOnly cookie with CSRF protections. + - API clients: refresh token in an authorization header or secure client storage (no localStorage guidance in the PRD; implementation chooses). +- **OIDC (Google)**: + - Support Authorization Code flow with PKCE. + - Map OIDC identities to internal users; allow linking multiple providers per user. + - Future providers (e.g., GitHub, Azure AD) should fit the same model. +- **Service auth** (internal): + - Support service identities for Runner → Gateway and other future internal callers. + - Recommended approach: mTLS and/or signed JWTs with a `sub` of `service:` plus explicit RBAC grants. + +- **Forgot / reset password**: + - Forgot password MUST create a one-time reset token with an expiry and store only a hash of it. + - Reset password MUST verify the token, enforce password policy, rotate credentials, and revoke all refresh tokens for the user. + - Sending reset links/codes is a side effect; the Gateway SHOULD trigger it via the platform’s effect execution path (Runner effect providers) rather than embedding SMTP credentials in the Gateway. + +- **MFA**: + - Support TOTP (authenticator apps) as the default MFA method. + - Support recovery codes (one-time use) for account recovery. + - MFA enrollment MUST require a recent primary authentication (step-up). + - MFA challenges MUST be bound to an auth session and have short expiration. + +### 2) Authorization (AuthZ / RBAC) + +- RBAC entities: + - **User** (human identity) + - **Service** (machine identity) + - **Tenant** + - **Role** (set of permissions) + - **Assignment** (principal ↔ tenant ↔ role) +- Authorization checks: + - Command submission permissions: per tenant, optionally scoped by `aggregate_type`. + - Query permissions: per tenant, optionally scoped by `view_type`. + - Admin permissions: routing/config endpoints, runner admin passthrough, tenant placement changes. + +### 3) Routing to Nodes + +The Gateway MUST route to: +- **Aggregate nodes** for command submission. +- **Projection nodes** for query execution. +- **Runner nodes** for admin/ops passthrough. + +Routing inputs: +- `tenant_id` (from `x-tenant-id` or request body for internal gRPC; header is authoritative for external HTTP). +- A routing table defining tenant → shard/node → service endpoint(s), where placement MAY differ per service kind. + +Routing behavior: +- The Gateway MUST be able to hot-reload routing configuration without restart. +- The Gateway SHOULD support both: + - **Static config** (file-based mapping for development) + - **Dynamic config** (NATS KV-based control plane for production) +- The Gateway MUST support routing when placements are independent: + - `aggregate_placement[tenant_id] -> aggregate_shard_id` + - `projection_placement[tenant_id] -> projection_shard_id` + - `runner_placement[tenant_id] -> runner_shard_id` +- The Gateway SHOULD expose placement revisions and effective routing decisions for debugging (admin-only). + +### 4) Public APIs (Initial) + +The Gateway exposes two public surface areas: + +#### Command Submission (Write) + +- **gRPC**: implement `aggregate.gateway.v1.CommandService/SubmitCommand` for internal callers (Runner) and optional external clients. +- **HTTP**: provide a simple REST wrapper to allow browser and non-gRPC clients. + +HTTP sketch: +- `POST /v1/commands/{aggregate_type}/{aggregate_id}` + - Headers: `Authorization`, `x-tenant-id` + - Body: JSON command payload + - Response: JSON containing events (mirrors the gRPC response shape) + +#### Query Execution (Read) + +Because Projection currently implements UQF query logic but does not expose it, the Gateway defines a stable API and routes to a Projection query endpoint once it exists. + +HTTP sketch: +- `POST /v1/query/{view_type}` + - Headers: `Authorization`, `x-tenant-id` + - Body: `{ "uqf": "" }` + - Response: `{ "mode": "find" | "count", ... }` compatible with Projection’s `QueryResponse` shape. + +### 5) Operational APIs + +- `GET /health` and `GET /ready` for load balancers. +- `GET /metrics` for Prometheus/Victoria Metrics. +- Admin-only: + - `GET /admin/routing` (current effective routing table and revision) + - `POST /admin/routing/reload` (force reload; should still be safe if watcher exists) + - Runner passthrough under `/admin/runner/*` (authenticated + authorized) + +### 6) AuthN Endpoints (HTTP) + +The Gateway SHOULD expose a stable HTTP AuthN API (exact payloads may evolve; semantics should not): +- `POST /v1/auth/signup` +- `POST /v1/auth/signin` +- `POST /v1/auth/signout` +- `POST /v1/auth/refresh` +- `POST /v1/auth/forgot` +- `POST /v1/auth/reset` +- `POST /v1/auth/mfa/enroll/start` +- `POST /v1/auth/mfa/enroll/confirm` +- `POST /v1/auth/mfa/challenge` +- `POST /v1/auth/oidc/google/start` +- `GET /v1/auth/oidc/google/callback` + +The Gateway MUST enforce rate limits on signin/forgot/reset and MUST apply abuse protections (generic error responses for account existence, IP/device throttling). + +### 7) Admin IAM APIs (HTTP) + +The Gateway MUST expose an admin-facing API surface for the Admin UI node to manage authentication + authorization: +- **Users**: create, read, update, disable, delete +- **Identities**: link/unlink OIDC identities, manage password credentials, enforce email verification status +- **Roles and Rights**: define permissions (rights), create/update roles, assign rights to roles +- **Assignments**: assign roles to principals (users/services) scoped to a tenant +- **Service Accounts**: create/rotate credentials for internal callers, assign tenant roles +- **MFA Admin Actions**: reset MFA for a user, revoke recovery codes, force re-enrollment +- **Sessions**: revoke refresh tokens for a user (global signout) + +Endpoint sketch (admin-only, audited, paginated): +- `GET /v1/admin/iam/users` +- `POST /v1/admin/iam/users` +- `GET /v1/admin/iam/users/{user_id}` +- `PATCH /v1/admin/iam/users/{user_id}` +- `POST /v1/admin/iam/users/{user_id}/disable` +- `POST /v1/admin/iam/users/{user_id}/sessions/revoke` +- `POST /v1/admin/iam/users/{user_id}/mfa/reset` +- `GET /v1/admin/iam/rights` +- `POST /v1/admin/iam/rights` +- `GET /v1/admin/iam/roles` +- `POST /v1/admin/iam/roles` +- `GET /v1/admin/iam/roles/{role_id}` +- `PATCH /v1/admin/iam/roles/{role_id}` +- `POST /v1/admin/iam/roles/{role_id}/rights` +- `GET /v1/admin/iam/assignments` +- `POST /v1/admin/iam/assignments` +- `DELETE /v1/admin/iam/assignments/{assignment_id}` + +Tenant scoping rules: +- Tenant-scoped operations MUST require `x-tenant-id` and apply within that tenant (role assignments, tenant membership, tenant admin). +- Platform-scoped operations MUST NOT depend on `x-tenant-id` (right/permission catalog, platform admins, global user search). + +All admin IAM endpoints MUST require strong authorization (platform admin or tenant admin depending on the resource) and MUST produce an immutable audit trail (who changed what, from where, and when). + +--- + +## **Non-Functional Requirements** + +- **Security** + - Reject requests missing tenant context when required. + - Do not trust `x-tenant-id` unless it is authorized by the caller identity. + - Rate limit authentication endpoints and command submission endpoints. + - Ensure secrets never appear in logs (tokens, OIDC codes, passwords). + - Enforce secure defaults for sessions: + - HttpOnly + Secure cookies where applicable, explicit CSRF protections for browser flows. + - Access token TTLs and refresh token rotation with revocation. + - Account lockout / progressive throttling for credential stuffing. + - Require key management and rotation: + - JWT signing keys MUST support rotation; old keys remain valid only for bounded overlap. + - Password reset tokens, email verification tokens, and refresh tokens MUST be stored as hashes. + - Require transport security: + - mTLS between Gateway and internal nodes (or an equivalent, explicit service-to-service auth boundary). + - Produce auditable, immutable logs for admin IAM actions and tenant placement changes. +- **Reliability** + - Timeouts for upstream calls; bounded retries only when safe (idempotency key present). + - Circuit breaking per upstream endpoint. + - Graceful degradation when routing config control plane is temporarily unavailable (serve last known good config). +- **Observability** + - Correlate requests with `request_id` and `trace_id`. + - Emit structured logs and Prometheus metrics (request counts, latency histograms, auth failures, upstream errors). + - Emit security signals (failed signins, MFA failures, suspicious IP/device patterns) suitable for alerting. +- **Performance** + - Minimize per-request allocations; use connection pools for upstreams. + - Cache routing decisions keyed by `(tenant_id, service_kind)` with small TTL and invalidation on routing config change. +- **Compatibility** + - Support single-tenant mode (empty tenant id) for development and early environments, without changing client code. + - Define API versioning rules and a consistent error envelope for HTTP APIs. + +--- + +## **Proposed Architecture** + +### High-Level Flow + +``` +Client / Runner + | + | (Authorization, x-tenant-id) + v +Gateway + | 1) AuthN (password/OIDC/service) + | 2) AuthZ (RBAC per tenant + permission) + | 3) Tenant routing (tenant_id -> node -> endpoint) + v +Aggregate / Projection / Runner nodes +``` + +### Components Inside the Gateway + +- **API Layer** + - HTTP server for REST endpoints + - gRPC server implementing `aggregate.gateway.v1.CommandService` for Runner compatibility +- **Identity Layer** + - Credential verification (Argon2) + - OIDC provider integration (Google) + - Token issuance and verification (JWT access + refresh token rotation) +- **Authorization Layer** + - RBAC policy evaluation for each request + - Tenant membership validation for `x-tenant-id` +- **Routing Layer** + - Routing config loader: file + NATS KV watcher + - Routing decision: `(tenant_id, service_kind) -> endpoint` with independent placement per service kind + - Health-aware endpoint selection (optional phase): avoid unhealthy endpoints when multiple replicas exist +- **Upstream Clients** + - Aggregate upstream: gRPC client (forward SubmitCommand) + - Projection upstream: HTTP or gRPC client (forward Query) + - Runner upstream: HTTP client for admin passthrough (restricted) + +### Routing Config Model (Recommended) + +Represent routing as two layers: +- **Placement maps** (tenant → shard), per service kind: + - `aggregate_placement[tenant_id] -> aggregate_shard_id` + - `projection_placement[tenant_id] -> projection_shard_id` + - `runner_placement[tenant_id] -> runner_shard_id` +- **Shard directory** (shard → endpoints), per service kind: + - `aggregate_shards[aggregate_shard_id] -> { grpc_endpoint, http_endpoint, admin_endpoint? }` + - `projection_shards[projection_shard_id] -> { http_endpoint, admin_endpoint? }` + - `runner_shards[runner_shard_id] -> { http_endpoint, admin_endpoint }` + +This supports both: +- Static YAML/JSON config files for local runs. +- Dynamic updates via NATS KV: + - Keys like `aggregate/tenants/`, `projection/tenants/`, `runner/tenants/` + - Keys like `aggregate/shards/`, `projection/shards/`, `runner/shards/` + +The Gateway keeps: +- **Last known good** routing config +- A **revision** number (KV revision or monotonic local revision) for observability/debugging + +### Rebalancing Mechanism (Control Plane) + +Rebalancing is driven by a small control plane that updates placement and coordinates safe handoff: +- **Placement Store**: NATS JetStream KV buckets holding placement maps and shard directory entries. +- **Rebalancer** (operator-driven initially, automated later): + - Reads load signals (Gateway/Node metrics) and proposes moves: `(service_kind, tenant_id, from_shard, to_shard)` + - Applies moves by writing to KV and orchestrating drain/warmup as needed + - Provides audit trail: who moved what, when, and why + +Rebalance flow (per service kind): +- Update placement (KV) to include the target shard assignment with a revision. +- Ensure the target shard is ready for the tenant (service-specific warmup). +- Drain the tenant on the old shard (stop accepting new work for that tenant, finish in-flight). +- Finalize by removing/overwriting the old assignment and triggering config reload/watchers. + +Service-specific notes: +- **Projection**: can rebuild from JetStream; rebalancing can be “cold” (new shard catches up) with minimal coordination beyond tenant filtering. +- **Runner**: must stop acquiring new work for a tenant, flush outbox dispatch, and persist checkpoints before handing off. +- **Aggregate**: must ensure single-writer semantics per aggregate instance; tenant drain should block new commands during handoff, and the target shard must have state (snapshot transfer) or accept a cold rehydrate from JetStream. + +--- + +## **Error Semantics** + +- Auth failures: `401` (unauthenticated) or `403` (forbidden) +- Tenant header issues: + - Missing `x-tenant-id` on tenant-scoped routes: `400` + - Invalid tenant format: `400` + - Tenant not permitted for principal: `403` +- Routing failures: + - Unknown tenant assignment: `503` with retriable hint + - No healthy upstream endpoints: `503` +- Upstream errors: + - Preserve upstream error category when safe; normalize into a consistent error envelope. + +--- + +## **Rollout Plan** + +Phase 1 (Minimum viable ingress) +- Implement tenant-aware routing for Aggregate command submission. +- Implement gRPC `SubmitCommand` compatible with Runner. +- Add HTTP wrapper for command submission. +- Introduce basic authn/authz (service identity + a minimal RBAC model). + +Phase 2 (Read path + OIDC) +- Add query API and route to Projection query endpoint (Projection may need an exposed endpoint). +- Add Google OIDC login and account linking. +- Harden RBAC and permissions by resource type (`aggregate_type`, `view_type`). + +Phase 3 (Operations + topology) +- NATS KV routing config watcher (hot reload). +- Admin APIs for routing inspection and controlled updates. +- Health-aware routing and per-tenant rate limits. +- Introduce placement maps per service kind (independent scaling). +- Introduce a rebalancer workflow (manual first) to move tenant placements safely. + +--- + +## **Gaps / Opportunities** + +- **Tenant lifecycle APIs**: tenant creation, tenant metadata, domain verification, invite flows, default roles, and bootstrap of the first tenant admin. +- **API conventions**: standard error envelope, pagination/cursors, request IDs, idempotency semantics for command submission retries. +- **Identity hardening**: password policy, breached-password checks, device/session management, step-up authentication rules, and admin break-glass procedures. +- **SSO / enterprise**: SCIM provisioning and additional OIDC/SAML providers as a future track. +- **Audit & compliance**: immutable audit log schema, export/retention policies, and per-tenant data access trails. +- **Rebalancer safety**: explicit two-phase cutover semantics (warmup readiness gates + drain completion signals) with operator-visible status. diff --git a/gateway/src/admin_iam.rs b/gateway/src/admin_iam.rs new file mode 100644 index 0000000..cefdf16 --- /dev/null +++ b/gateway/src/admin_iam.rs @@ -0,0 +1,1562 @@ +use argon2::password_hash::PasswordHasher; +use argon2::password_hash::SaltString; +use argon2::Argon2; +use axum::extract::Query; +use axum::extract::State; +use axum::http::StatusCode; +use axum::routing::delete; +use axum::routing::get; +use axum::routing::put; +use axum::Json; +use serde::Deserialize; +use serde::Serialize; +use sha2::Digest; + +use crate::authz; +use crate::authz::AuthzRejection; +use crate::authz::Principal; +use crate::storage::GatewayStorage; +use crate::storage::StorageError; +use crate::AppState; + +pub fn router() -> axum::Router { + axum::Router::new() + .route("/users", get(list_users)) + .route("/users/:user_id/disable", put(disable_user)) + .route("/users/:user_id", delete(delete_user)) + .route("/users/:user_id/sessions/revoke", put(revoke_user_sessions)) + .route("/users/:user_id/mfa/reset", put(reset_user_mfa)) + .route( + "/users/:user_id/mfa/recovery/revoke", + put(revoke_user_recovery_codes), + ) + .route("/users/:user_id/password", put(set_user_password)) + .route("/users/:user_id/password", delete(delete_user_password)) + .route( + "/users/:user_id/identities/google", + get(list_google_identities), + ) + .route( + "/users/:user_id/identities/google", + put(link_google_identity), + ) + .route( + "/users/:user_id/identities/google", + delete(unlink_google_identity), + ) + .route("/rights", get(list_rights)) + .route("/rights/:right_id", put(put_right)) + .route("/rights/:right_id", delete(delete_right)) + .route("/roles", get(list_roles)) + .route("/roles/:role_id", put(put_role)) + .route("/roles/:role_id", delete(delete_role)) + .route("/assignments", get(list_assignments)) + .route("/assignments", put(put_assignment)) + .route("/assignments", delete(delete_assignment)) + .route("/service-accounts", put(create_service_account)) + .route( + "/service-accounts/:user_id/rotate", + put(rotate_service_account_token), + ) + .route("/service-accounts/:user_id", delete(delete_service_account)) +} + +#[derive(Debug, Serialize)] +struct UserDto { + user_id: String, + email: String, + enabled: bool, + created_at_ms: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct Stored { + v: u32, + data: T, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct UserRecord { + user_id: String, + email: String, + enabled: bool, + created_at_ms: i64, +} + +#[derive(Debug, Deserialize)] +struct PutRoleBody { + rights: Vec, +} + +#[derive(Debug, Deserialize)] +struct PutRightBody { + description: Option, +} + +#[derive(Debug, Deserialize)] +struct SetPasswordBody { + password: String, +} + +#[derive(Debug, Deserialize)] +struct GoogleIdentityBody { + sub: String, +} + +#[derive(Debug, Deserialize)] +struct ServiceAccountBody { + name: String, + tenant_id: String, + role_id: String, +} + +#[derive(Debug, Serialize)] +struct ServiceAccountTokenResponse { + user_id: String, + token: String, +} + +#[derive(Debug, Deserialize)] +struct AssignmentBody { + tenant_id: String, + principal_id: String, + role_id: String, +} + +#[derive(Debug, Deserialize)] +struct AssignmentQuery { + tenant_id: Option, + principal_id: Option, +} + +#[derive(Debug, Serialize)] +struct AssignmentDto { + tenant_id: String, + principal_id: String, + role_id: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct RightRecord { + right_id: String, + description: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct PasswordCredentialRecord { + user_id: String, + password_hash: String, + updated_at_ms: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct TotpEnrollmentRecord { + user_id: String, + secret_base32: String, + enabled: bool, + created_at_ms: i64, + recovery_hashes: Vec, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct ServiceTokenRecord { + user_id: String, + token_hash: String, + created_at_ms: i64, + rotated_at_ms: Option, + enabled: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +struct AuditRecord { + actor_id: String, + action: String, + tenant_id: Option, + target: String, + timestamp_ms: i64, +} + +async fn list_users( + State(state): State, + principal: Principal, +) -> Result>, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let keys = state + .storage + .users + .list_keys("v1/users/") + .await + .map_err(|_| AuthzRejection::Internal)?; + + let mut out = Vec::new(); + for key in keys { + if key.starts_with("v1/users/by_email_hash/") { + continue; + } + let entry = state + .storage + .users + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + continue; + }; + let stored: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + out.push(UserDto { + user_id: stored.data.user_id, + email: stored.data.email, + enabled: stored.data.enabled, + created_at_ms: stored.data.created_at_ms, + }); + } + + Ok(Json(out)) +} + +async fn disable_user( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let key = format!("v1/users/{user_id}"); + for _ in 0..10 { + let entry = state + .storage + .users + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)? + .ok_or(AuthzRejection::NotFound)?; + + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + stored.data.enabled = false; + + let payload = serde_json::to_vec(&stored).map_err(|_| AuthzRejection::Internal)?; + match state + .storage + .users + .update(&key, entry.revision, payload) + .await + { + Ok(_) => break, + Err(StorageError::CasMismatch) => continue, + Err(_) => return Err(AuthzRejection::Internal), + } + } + + write_audit( + &state.storage, + &principal.user_id, + "user.disable", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn delete_user( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let user_key = format!("v1/users/{user_id}"); + let entry = state + .storage + .users + .get(&user_key) + .await + .map_err(|_| AuthzRejection::Internal)? + .ok_or(AuthzRejection::NotFound)?; + let stored: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + + let email_hash = hash_stable("email", &stored.data.email); + let _ = state + .storage + .users + .delete(&format!("v1/users/by_email_hash/{email_hash}")) + .await; + let _ = state.storage.users.delete(&user_key).await; + let _ = state + .storage + .password_credentials + .delete(&format!("v1/password/{user_id}")) + .await; + let _ = state + .storage + .mfa + .delete(&format!("v1/totp/{user_id}")) + .await; + let _ = revoke_all_sessions_for_user(&state.storage, &user_id).await; + + write_audit( + &state.storage, + &principal.user_id, + "user.delete", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn revoke_user_sessions( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + revoke_all_sessions_for_user(&state.storage, &user_id) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "user.sessions.revoke", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn reset_user_mfa( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let _ = state + .storage + .mfa + .delete(&format!("v1/totp/{user_id}")) + .await; + + write_audit( + &state.storage, + &principal.user_id, + "user.mfa.reset", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn revoke_user_recovery_codes( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let key = format!("v1/totp/{user_id}"); + for _ in 0..10 { + let entry = state + .storage + .mfa + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + break; + }; + + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + stored.data.recovery_hashes.clear(); + let payload = serde_json::to_vec(&stored).map_err(|_| AuthzRejection::Internal)?; + match state + .storage + .mfa + .update(&key, entry.revision, payload) + .await + { + Ok(_) => break, + Err(StorageError::CasMismatch) => continue, + Err(_) => return Err(AuthzRejection::Internal), + } + } + + write_audit( + &state.storage, + &principal.user_id, + "user.mfa.recovery.revoke", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn set_user_password( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + if body.password.len() < 8 { + return Err(AuthzRejection::Internal); + } + + let salt = SaltString::generate(&mut rand_core::OsRng); + let argon2 = Argon2::default(); + let password_hash = argon2 + .hash_password(body.password.as_bytes(), &salt) + .map_err(|_| AuthzRejection::Internal)? + .to_string(); + + let cred = PasswordCredentialRecord { + user_id: user_id.clone(), + password_hash, + updated_at_ms: unix_ms(), + }; + let payload = serde_json::to_vec(&Stored { + v: crate::storage::SCHEMA_VERSION, + data: cred, + }) + .map_err(|_| AuthzRejection::Internal)?; + + state + .storage + .password_credentials + .put(&format!("v1/password/{user_id}"), payload) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "user.password.set", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn delete_user_password( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let _ = state + .storage + .password_credentials + .delete(&format!("v1/password/{user_id}")) + .await; + + write_audit( + &state.storage, + &principal.user_id, + "user.password.delete", + None, + format!("user:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn list_google_identities( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, +) -> Result>, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let keys = state + .storage + .identities + .list_keys("v1/identities/google/") + .await + .map_err(|_| AuthzRejection::Internal)?; + + let mut subs = Vec::new(); + for key in keys { + let entry = state + .storage + .identities + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + continue; + }; + if entry.value == user_id.as_bytes() { + if let Some(sub) = key.strip_prefix("v1/identities/google/") { + subs.push(sub.to_string()); + } + } + } + Ok(Json(subs)) +} + +async fn link_google_identity( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let key = format!("v1/identities/google/{}", body.sub); + if let Some(entry) = state + .storage + .identities + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)? + { + if entry.value != user_id.as_bytes() { + return Err(AuthzRejection::Conflict); + } + return Ok(StatusCode::NO_CONTENT); + } + + state + .storage + .identities + .create(&key, user_id.as_bytes().to_vec()) + .await + .map_err(|e| match e { + StorageError::AlreadyExists => AuthzRejection::Conflict, + _ => AuthzRejection::Internal, + })?; + + write_audit( + &state.storage, + &principal.user_id, + "identity.google.link", + None, + format!("user:{user_id}:sub:{}", body.sub), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn unlink_google_identity( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let key = format!("v1/identities/google/{}", body.sub); + if let Some(entry) = state + .storage + .identities + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)? + { + if entry.value != user_id.as_bytes() { + return Err(AuthzRejection::Forbidden); + } + } + + let _ = state.storage.identities.delete(&key).await; + + write_audit( + &state.storage, + &principal.user_id, + "identity.google.unlink", + None, + format!("user:{user_id}:sub:{}", body.sub), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn create_service_account( + State(state): State, + principal: Principal, + Json(body): Json, +) -> Result, AuthzRejection> { + require_tenant_or_platform_admin(&state.storage, &principal.user_id, &body.tenant_id).await?; + + if body.name.trim().is_empty() { + return Err(AuthzRejection::Internal); + } + + let email = format!("service:{}:{}", body.tenant_id, body.name.trim()); + let email_hash = hash_stable("email", &email); + + let user_id = uuid::Uuid::new_v4().to_string(); + let now_ms = unix_ms(); + let user = UserRecord { + user_id: user_id.clone(), + email: email.clone(), + enabled: true, + created_at_ms: now_ms, + }; + + state + .storage + .users + .create( + &format!("v1/users/by_email_hash/{email_hash}"), + user_id.as_bytes().to_vec(), + ) + .await + .map_err(|e| match e { + StorageError::AlreadyExists => AuthzRejection::Conflict, + _ => AuthzRejection::Internal, + })?; + state + .storage + .users + .create( + &format!("v1/users/{user_id}"), + serde_json::to_vec(&Stored { + v: crate::storage::SCHEMA_VERSION, + data: user, + }) + .map_err(|_| AuthzRejection::Internal)?, + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + let token = uuid::Uuid::new_v4().simple().to_string(); + let token_hash = hash_stable("service_token", &token); + let record = ServiceTokenRecord { + user_id: user_id.clone(), + token_hash, + created_at_ms: now_ms, + rotated_at_ms: None, + enabled: true, + }; + let payload = serde_json::to_vec(&Stored { + v: crate::storage::SCHEMA_VERSION, + data: record, + }) + .map_err(|_| AuthzRejection::Internal)?; + + state + .storage + .service_tokens + .create(&format!("v1/service_tokens/{user_id}"), payload) + .await + .map_err(|_| AuthzRejection::Internal)?; + + authz::assign_role(&state.storage, &body.tenant_id, &user_id, &body.role_id) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "service_account.create", + Some(body.tenant_id.clone()), + format!("service_account:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(Json(ServiceAccountTokenResponse { user_id, token })) +} + +#[derive(Debug, Deserialize)] +struct ServiceAccountScope { + tenant_id: String, +} + +async fn rotate_service_account_token( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, + Query(scope): Query, +) -> Result, AuthzRejection> { + require_tenant_or_platform_admin(&state.storage, &principal.user_id, &scope.tenant_id).await?; + + let key = format!("v1/service_tokens/{user_id}"); + + for _ in 0..10 { + let entry = state + .storage + .service_tokens + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + return Err(AuthzRejection::NotFound); + }; + + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + + let token = uuid::Uuid::new_v4().simple().to_string(); + stored.data.token_hash = hash_stable("service_token", &token); + stored.data.rotated_at_ms = Some(unix_ms()); + stored.data.enabled = true; + + let payload = serde_json::to_vec(&stored).map_err(|_| AuthzRejection::Internal)?; + match state + .storage + .service_tokens + .update(&key, entry.revision, payload) + .await + { + Ok(_) => { + write_audit( + &state.storage, + &principal.user_id, + "service_account.rotate", + Some(scope.tenant_id.clone()), + format!("service_account:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + return Ok(Json(ServiceAccountTokenResponse { user_id, token })); + } + Err(StorageError::CasMismatch) => continue, + Err(_) => return Err(AuthzRejection::Internal), + } + } + + Err(AuthzRejection::Internal) +} + +async fn delete_service_account( + State(state): State, + principal: Principal, + axum::extract::Path(user_id): axum::extract::Path, + Query(scope): Query, +) -> Result { + require_tenant_or_platform_admin(&state.storage, &principal.user_id, &scope.tenant_id).await?; + + let _ = state + .storage + .service_tokens + .delete(&format!("v1/service_tokens/{user_id}")) + .await; + + let user_key = format!("v1/users/{user_id}"); + for _ in 0..10 { + let entry = state + .storage + .users + .get(&user_key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + break; + }; + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + stored.data.enabled = false; + let payload = serde_json::to_vec(&stored).map_err(|_| AuthzRejection::Internal)?; + match state + .storage + .users + .update(&user_key, entry.revision, payload) + .await + { + Ok(_) => break, + Err(StorageError::CasMismatch) => continue, + Err(_) => return Err(AuthzRejection::Internal), + } + } + + write_audit( + &state.storage, + &principal.user_id, + "service_account.delete", + Some(scope.tenant_id), + format!("service_account:{user_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn list_rights( + State(state): State, + principal: Principal, +) -> Result>, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let keys = state + .storage + .rights + .list_keys("v1/rights/") + .await + .map_err(|_| AuthzRejection::Internal)?; + + let mut out = Vec::new(); + for key in keys { + let entry = state + .storage + .rights + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + continue; + }; + let right: RightRecord = + decode_stored(&entry.value).map_err(|_| AuthzRejection::Internal)?; + out.push(right); + } + Ok(Json(out)) +} + +async fn put_right( + State(state): State, + principal: Principal, + axum::extract::Path(right_id): axum::extract::Path, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let record = RightRecord { + right_id: right_id.clone(), + description: body.description, + }; + let payload = serde_json::to_vec(&serde_json::json!({ + "v": crate::storage::SCHEMA_VERSION, + "data": record + })) + .map_err(|_| AuthzRejection::Internal)?; + state + .storage + .rights + .put(&format!("v1/rights/{right_id}"), payload) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "right.put", + None, + format!("right:{right_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn delete_right( + State(state): State, + principal: Principal, + axum::extract::Path(right_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + state + .storage + .rights + .delete(&format!("v1/rights/{right_id}")) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "right.delete", + None, + format!("right:{right_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn list_roles( + State(state): State, + principal: Principal, +) -> Result>, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let keys = state + .storage + .roles + .list_keys("v1/roles/") + .await + .map_err(|_| AuthzRejection::Internal)?; + + let mut out = Vec::new(); + for key in keys { + let entry = state + .storage + .roles + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + continue; + }; + let role: authz::RoleRecord = + decode_stored(&entry.value).map_err(|_| AuthzRejection::Internal)?; + out.push(role); + } + + Ok(Json(out)) +} + +async fn put_role( + State(state): State, + principal: Principal, + axum::extract::Path(role_id): axum::extract::Path, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + authz::put_role(&state.storage, &role_id, body.rights) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "role.put", + None, + format!("role:{role_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn delete_role( + State(state): State, + principal: Principal, + axum::extract::Path(role_id): axum::extract::Path, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + + state + .storage + .roles + .delete(&format!("v1/roles/{role_id}")) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "role.delete", + None, + format!("role:{role_id}"), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn list_assignments( + State(state): State, + principal: Principal, + Query(q): Query, +) -> Result>, AuthzRejection> { + if let Some(tenant_id) = &q.tenant_id { + require_tenant_or_platform_admin(&state.storage, &principal.user_id, tenant_id).await?; + } else { + require_platform_admin(&state.storage, &principal.user_id).await?; + } + + let prefix = match (&q.tenant_id, &q.principal_id) { + (Some(t), Some(p)) => format!("v1/assignments/{t}/{p}/"), + (Some(t), None) => format!("v1/assignments/{t}/"), + (None, Some(_)) => "v1/assignments/".to_string(), + (None, None) => "v1/assignments/".to_string(), + }; + + let keys = state + .storage + .assignments + .list_keys(&prefix) + .await + .map_err(|_| AuthzRejection::Internal)?; + + let mut out = Vec::new(); + for key in keys { + let parts: Vec<&str> = key.split('/').collect(); + if parts.len() < 5 { + continue; + } + let tenant_id = parts[2].to_string(); + let principal_id = parts[3].to_string(); + let role_id = parts[4].to_string(); + + if let Some(filter_principal) = &q.principal_id { + if &principal_id != filter_principal { + continue; + } + } + if let Some(filter_tenant) = &q.tenant_id { + if &tenant_id != filter_tenant { + continue; + } + } + + out.push(AssignmentDto { + tenant_id, + principal_id, + role_id, + }); + } + + Ok(Json(out)) +} + +async fn put_assignment( + State(state): State, + principal: Principal, + Json(body): Json, +) -> Result { + require_tenant_or_platform_admin(&state.storage, &principal.user_id, &body.tenant_id).await?; + + authz::assign_role( + &state.storage, + &body.tenant_id, + &body.principal_id, + &body.role_id, + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "assignment.put", + Some(body.tenant_id.clone()), + format!( + "assignment:{}:{}:{}", + body.tenant_id, body.principal_id, body.role_id + ), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn delete_assignment( + State(state): State, + principal: Principal, + Json(body): Json, +) -> Result { + require_tenant_or_platform_admin(&state.storage, &principal.user_id, &body.tenant_id).await?; + + state + .storage + .assignments + .delete(&format!( + "v1/assignments/{}/{}/{}", + body.tenant_id, body.principal_id, body.role_id + )) + .await + .map_err(|_| AuthzRejection::Internal)?; + + write_audit( + &state.storage, + &principal.user_id, + "assignment.delete", + Some(body.tenant_id.clone()), + format!( + "assignment:{}:{}:{}", + body.tenant_id, body.principal_id, body.role_id + ), + ) + .await + .map_err(|_| AuthzRejection::Internal)?; + + Ok(StatusCode::NO_CONTENT) +} + +async fn require_platform_admin( + storage: &GatewayStorage, + principal_id: &str, +) -> Result<(), AuthzRejection> { + authz::ensure_allowed(storage, principal_id, "*", "iam.platform_admin").await +} + +async fn require_tenant_or_platform_admin( + storage: &GatewayStorage, + principal_id: &str, + tenant_id: &str, +) -> Result<(), AuthzRejection> { + if authz::ensure_allowed(storage, principal_id, "*", "iam.platform_admin") + .await + .is_ok() + { + return Ok(()); + } + authz::ensure_allowed(storage, principal_id, tenant_id, "iam.tenant_admin").await +} + +async fn write_audit( + storage: &GatewayStorage, + actor_id: &str, + action: &str, + tenant_id: Option, + target: String, +) -> Result<(), StorageError> { + let record = AuditRecord { + actor_id: actor_id.to_string(), + action: action.to_string(), + tenant_id, + target, + timestamp_ms: unix_ms(), + }; + + let key = format!("v1/audit/{}-{}", record.timestamp_ms, uuid::Uuid::new_v4()); + let payload = serde_json::to_vec(&serde_json::json!({ + "v": crate::storage::SCHEMA_VERSION, + "data": record + })) + .map_err(|e| StorageError::Serde(e.to_string()))?; + + storage.audit_index.put(&key, payload).await?; + Ok(()) +} + +fn decode_stored Deserialize<'de>>(bytes: &[u8]) -> Result { + let stored: Stored = + serde_json::from_slice(bytes).map_err(|e| StorageError::Serde(e.to_string()))?; + Ok(stored.data) +} + +fn unix_ms() -> i64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as i64 +} + +fn hash_stable(domain: &str, value: &str) -> String { + let mut hasher = sha2::Sha256::default(); + hasher.update(domain.as_bytes()); + hasher.update([0u8]); + hasher.update(value.as_bytes()); + hex::encode(hasher.finalize()) +} + +async fn revoke_all_sessions_for_user( + storage: &GatewayStorage, + user_id: &str, +) -> Result<(), StorageError> { + let keys = storage.refresh_sessions.list_keys("v1/sessions/").await?; + for key in keys { + let Some(session_id) = key.strip_prefix("v1/sessions/") else { + continue; + }; + let entry = storage.refresh_sessions.get(&key).await?; + let Some(entry) = entry else { + continue; + }; + let stored: Stored = + serde_json::from_slice(&entry.value).map_err(|e| StorageError::Serde(e.to_string()))?; + if stored.data.user_id == user_id { + let _ = storage.revoke_refresh_session(session_id).await; + } + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::authn; + use std::sync::Arc; + use tower::util::ServiceExt; + + async fn test_app() -> (axum::Router, AppState) { + let metrics = crate::observability::init_metrics_for_tests(); + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn_cfg = crate::authn::AuthnConfig::for_tests(); + let state = crate::AppState { + metrics, + routing, + storage, + authn: authn_cfg, + }; + let app = crate::app(state.clone()); + (app, state) + } + + async fn signup_and_get_claims( + app: &axum::Router, + cfg: &authn::AuthnConfig, + ) -> (String, authn::AccessClaims) { + let response = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"a@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + let body = axum::body::to_bytes(response.into_body(), usize::MAX) + .await + .unwrap(); + let created: crate::authn::AuthResponse = serde_json::from_slice(&body).unwrap(); + + let claims = cfg.verify_access_token(&created.access_token).unwrap(); + (created.access_token, claims) + } + + #[tokio::test] + async fn only_platform_admin_can_list_users() { + let (app, state) = test_app().await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + let resp = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/iam/users") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::FORBIDDEN); + + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &claims.sub, "role-platform-admin") + .await + .unwrap(); + + let resp = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/iam/users") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + } + + #[tokio::test] + async fn admin_mutation_emits_audit_record() { + let (app, state) = test_app().await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &claims.sub, "role-platform-admin") + .await + .unwrap(); + + let before = state + .storage + .audit_index + .list_keys("v1/audit/") + .await + .unwrap() + .len(); + + let resp = app + .oneshot( + axum::http::Request::builder() + .method("PUT") + .uri("/admin/iam/roles/role1") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{"rights":["x.y"]}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NO_CONTENT); + + let after = state + .storage + .audit_index + .list_keys("v1/audit/") + .await + .unwrap() + .len(); + assert_eq!(after, before + 1); + } + + #[tokio::test] + async fn assignment_changes_affect_authorization_immediately() { + let (_app, state) = test_app().await; + + crate::authz::put_role( + &state.storage, + "role-command", + vec!["command.submit".to_string()], + ) + .await + .unwrap(); + + let user_id = "u1"; + let tenant_id = "tenant-a"; + + let before = + crate::authz::ensure_allowed(&state.storage, user_id, tenant_id, "command.submit") + .await; + assert!(matches!(before, Err(AuthzRejection::Forbidden))); + + crate::authz::assign_role(&state.storage, tenant_id, user_id, "role-command") + .await + .unwrap(); + + let after = + crate::authz::ensure_allowed(&state.storage, user_id, tenant_id, "command.submit") + .await; + assert!(after.is_ok()); + } + + #[tokio::test] + async fn tenant_admin_can_manage_assignments_only_within_their_tenant() { + let (app, state) = test_app().await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + crate::authz::put_role( + &state.storage, + "role-tenant-admin", + vec!["iam.tenant_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "tenant-a", &claims.sub, "role-tenant-admin") + .await + .unwrap(); + + let ok = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("PUT") + .uri("/admin/iam/assignments") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"tenant_id":"tenant-a","principal_id":"u2","role_id":"r1"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(ok.status(), StatusCode::NO_CONTENT); + + let forbidden = app + .oneshot( + axum::http::Request::builder() + .method("PUT") + .uri("/admin/iam/assignments") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"tenant_id":"tenant-b","principal_id":"u2","role_id":"r1"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(forbidden.status(), StatusCode::FORBIDDEN); + } + + #[tokio::test] + async fn tenant_admin_can_create_service_account_and_service_can_query() { + let projection_app = axum::Router::new().route( + "/v1/query/TestView", + axum::routing::post(|| async { (StatusCode::OK, r#"{"ok":true}"#) }), + ); + let projection_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let projection_addr = projection_listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(projection_listener, projection_app) + .await + .unwrap(); + }); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let cfg = crate::routing::RoutingConfig { + revision: 1, + aggregate_placement: std::collections::HashMap::new(), + projection_placement: std::collections::HashMap::from([( + "tenant-a".to_string(), + "p".to_string(), + )]), + runner_placement: std::collections::HashMap::new(), + aggregate_shards: std::collections::HashMap::new(), + projection_shards: std::collections::HashMap::from([( + "p".to_string(), + vec![format!("http://{}", projection_addr)], + )]), + runner_shards: std::collections::HashMap::new(), + }; + + let metrics = crate::observability::init_metrics_for_tests(); + let routing = + crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new(cfg))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn_cfg = crate::authn::AuthnConfig::for_tests(); + let state = crate::AppState { + metrics, + routing, + storage, + authn: authn_cfg, + }; + let app = crate::app(state.clone()); + + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + crate::authz::put_role( + &state.storage, + "role-tenant-admin", + vec!["iam.tenant_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "tenant-a", &claims.sub, "role-tenant-admin") + .await + .unwrap(); + + crate::authz::put_role( + &state.storage, + "role-query", + vec!["query.execute".to_string()], + ) + .await + .unwrap(); + + let resp = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("PUT") + .uri("/admin/iam/service-accounts") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"name":"svc1","tenant_id":"tenant-a","role_id":"role-query"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let created: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let service_account_id = created + .get("user_id") + .and_then(|v| v.as_str()) + .unwrap() + .to_string(); + let service_token = created + .get("token") + .and_then(|v| v.as_str()) + .unwrap() + .to_string(); + + let service_signin = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/service/signin") + .header("content-type", "application/json") + .body(axum::body::Body::from(format!( + r#"{{"service_account_id":"{}","token":"{}"}}"#, + service_account_id, service_token + ))) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(service_signin.status(), StatusCode::OK); + let body = axum::body::to_bytes(service_signin.into_body(), usize::MAX) + .await + .unwrap(); + let signed_in: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let service_access_token = signed_in + .get("access_token") + .and_then(|v| v.as_str()) + .unwrap() + .to_string(); + + let query = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/query/TestView") + .header("authorization", format!("Bearer {service_access_token}")) + .header("x-tenant-id", "tenant-a") + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(query.status(), StatusCode::OK); + } +} diff --git a/gateway/src/admin_rebalance.rs b/gateway/src/admin_rebalance.rs new file mode 100644 index 0000000..afd9c92 --- /dev/null +++ b/gateway/src/admin_rebalance.rs @@ -0,0 +1,790 @@ +use axum::extract::Query; +use axum::extract::State; +use axum::http::StatusCode; +use axum::Json; +use serde::Deserialize; +use serde::Serialize; +use std::time::Duration; + +use crate::authz; +use crate::authz::AuthzRejection; +use crate::authz::Principal; +use crate::routing::ServiceKind; +use crate::storage::StorageError; +use crate::AppState; + +pub fn router() -> axum::Router { + axum::Router::new() + .route("/status", axum::routing::get(status)) + .route("/gates", axum::routing::get(gates)) + .route("/plans", axum::routing::get(list_plans)) + .route("/plan", axum::routing::post(create_plan)) + .route("/apply", axum::routing::post(apply_plan)) + .route("/rollback", axum::routing::post(rollback_plan)) +} + +#[derive(Debug, Deserialize)] +pub struct ResolveQuery { + pub tenant_id: String, + pub kind: String, +} + +#[derive(Debug, Serialize)] +pub struct ResolveResponse { + pub tenant_id: String, + pub kind: ServiceKind, + pub endpoint: String, + pub revision: u64, +} + +#[derive(Debug, Deserialize)] +struct TenantQuery { + tenant_id: String, +} + +#[derive(Debug, Serialize)] +struct StatusResponse { + tenant_id: String, + revision: u64, + aggregate: Option, + projection: Option, + runner: Option, +} + +#[derive(Debug, Serialize)] +struct GatesResponse { + tenant_id: String, + aggregate_ready: bool, + projection_ready: bool, + runner_ready: bool, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct Stored { + v: u32, + data: T, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +struct RebalancePlan { + plan_id: String, + tenant_id: String, + kind: ServiceKind, + from_endpoint: Option, + to_endpoint: Option, + status: String, + actor_id: String, + created_at_ms: i64, + updated_at_ms: i64, +} + +#[derive(Debug, Deserialize)] +struct CreatePlanBody { + tenant_id: String, + kind: String, + to_endpoint: Option, +} + +#[derive(Debug, Deserialize)] +struct PlanActionBody { + plan_id: String, + tenant_id: String, +} + +#[derive(Debug, Deserialize)] +struct ListPlansQuery { + tenant_id: Option, + limit: Option, +} + +pub async fn resolve( + State(state): State, + principal: Principal, + Query(q): Query, +) -> Result, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let kind = parse_kind(&q.kind).ok_or(AuthzRejection::Internal)?; + let table = state.routing.snapshot().await; + let endpoint = table.resolve(&q.tenant_id, kind).map_err(|e| match e { + crate::routing::RoutingError::UnknownTenant => AuthzRejection::NotFound, + crate::routing::RoutingError::MissingShard | crate::routing::RoutingError::EmptyShard => { + AuthzRejection::Internal + } + })?; + + Ok(Json(ResolveResponse { + tenant_id: q.tenant_id, + kind, + endpoint, + revision: table.revision, + })) +} + +async fn status( + State(state): State, + principal: Principal, + Query(q): Query, +) -> Result, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let table = state.routing.snapshot().await; + let aggregate = table.resolve(&q.tenant_id, ServiceKind::Aggregate).ok(); + let projection = table.resolve(&q.tenant_id, ServiceKind::Projection).ok(); + let runner = table.resolve(&q.tenant_id, ServiceKind::Runner).ok(); + + Ok(Json(StatusResponse { + tenant_id: q.tenant_id, + revision: table.revision, + aggregate, + projection, + runner, + })) +} + +async fn gates( + State(state): State, + principal: Principal, + Query(q): Query, +) -> Result, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + let projection_endpoint = state + .routing + .resolve(&q.tenant_id, ServiceKind::Projection) + .await + .ok(); + let runner_endpoint = state + .routing + .resolve(&q.tenant_id, ServiceKind::Runner) + .await + .ok(); + let aggregate_endpoint = state + .routing + .resolve(&q.tenant_id, ServiceKind::Aggregate) + .await + .ok(); + + let projection_ready = if let Some(ep) = projection_endpoint { + projection_gate_ready(&ep, &q.tenant_id) + .await + .unwrap_or(false) + } else { + false + }; + let runner_ready = if let Some(ep) = runner_endpoint { + http_ready(&ep).await.unwrap_or(false) + } else { + false + }; + let aggregate_ready = if let Some(ep) = aggregate_endpoint { + aggregate_ready(&ep).await.unwrap_or(false) + } else { + false + }; + + Ok(Json(GatesResponse { + tenant_id: q.tenant_id, + aggregate_ready, + projection_ready, + runner_ready, + })) +} + +async fn http_ready(endpoint: &str) -> Result { + let url = format!("{}/ready", endpoint.trim_end_matches('/')); + let client = crate::upstream::http_client(); + let resp = tokio::time::timeout(Duration::from_secs(2), client.get(url).send()) + .await + .map_err(|_| AuthzRejection::Internal)? + .map_err(|_| AuthzRejection::Internal)?; + Ok(resp.status().is_success()) +} + +async fn aggregate_ready(endpoint: &str) -> Result { + if endpoint.contains(":50051") { + let http_ep = endpoint.replace(":50051", ":8080"); + return http_ready(&http_ep).await; + } + http_ready(endpoint).await +} + +async fn projection_gate_ready(endpoint: &str, tenant_id: &str) -> Result { + let url = format!("{}/metrics", endpoint.trim_end_matches('/')); + let client = crate::upstream::http_client(); + let resp = tokio::time::timeout(Duration::from_secs(2), client.get(url).send()) + .await + .map_err(|_| AuthzRejection::Internal)? + .map_err(|_| AuthzRejection::Internal)?; + if !resp.status().is_success() { + return Ok(false); + } + let text = resp.text().await.map_err(|_| AuthzRejection::Internal)?; + + let ready = parse_prom_gauge(&text, "projection_ready").unwrap_or(0.0) >= 1.0; + if !ready { + return Ok(false); + } + + let max_lag = parse_projection_max_lag(&text, tenant_id).unwrap_or(u64::MAX); + let threshold = std::env::var("GATEWAY_REBALANCE_PROJECTION_MAX_LAG") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + Ok(max_lag <= threshold) +} + +fn parse_prom_gauge(metrics: &str, name: &str) -> Option { + for line in metrics.lines() { + let line = line.trim(); + if line.starts_with('#') || line.is_empty() { + continue; + } + if line.starts_with(name) && !line.contains('{') { + let mut it = line.split_whitespace(); + let _ = it.next()?; + return it.next()?.parse::().ok(); + } + } + None +} + +fn parse_projection_max_lag(metrics: &str, tenant_id: &str) -> Option { + let mut max: Option = None; + for line in metrics.lines() { + let line = line.trim(); + if !line.starts_with("projection_lag{") { + continue; + } + if !line.contains(&format!("tenant_id=\"{}\"", tenant_id)) { + continue; + } + let value = line + .split_whitespace() + .nth(1) + .and_then(|v| v.parse::().ok())?; + max = Some(max.map(|m| m.max(value)).unwrap_or(value)); + } + max +} + +fn parse_kind(kind: &str) -> Option { + match kind.trim().to_ascii_lowercase().as_str() { + "aggregate" => Some(ServiceKind::Aggregate), + "projection" => Some(ServiceKind::Projection), + "runner" => Some(ServiceKind::Runner), + _ => None, + } +} + +async fn require_platform_admin( + storage: &crate::storage::GatewayStorage, + principal_id: &str, +) -> Result<(), AuthzRejection> { + authz::ensure_allowed(storage, principal_id, "*", "iam.platform_admin").await +} + +async fn create_plan( + State(state): State, + principal: Principal, + Json(body): Json, +) -> Result, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + + if body.tenant_id.trim().is_empty() { + return Err(AuthzRejection::BadRequest); + } + let kind = parse_kind(&body.kind).ok_or(AuthzRejection::BadRequest)?; + let to_endpoint = body.to_endpoint.filter(|s| !s.trim().is_empty()); + if to_endpoint.is_none() { + return Err(AuthzRejection::BadRequest); + } + + let from_endpoint = state.routing.resolve(&body.tenant_id, kind).await.ok(); + let plan_id = uuid::Uuid::new_v4().to_string(); + let now_ms = unix_ms(); + + let plan = RebalancePlan { + plan_id: plan_id.clone(), + tenant_id: body.tenant_id.clone(), + kind, + from_endpoint, + to_endpoint, + status: "planned".to_string(), + actor_id: principal.user_id, + created_at_ms: now_ms, + updated_at_ms: now_ms, + }; + + let key = plan_key(&plan.tenant_id, &plan.plan_id); + state + .storage + .audit_index + .create( + &key, + encode_stored(&plan).map_err(|_| AuthzRejection::Internal)?, + ) + .await + .map_err(|e| match e { + StorageError::AlreadyExists => AuthzRejection::Conflict, + _ => AuthzRejection::Internal, + })?; + + Ok(Json(plan)) +} + +async fn apply_plan( + State(state): State, + principal: Principal, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + transition_plan_status(&state, &body.tenant_id, &body.plan_id, "apply_requested").await?; + Ok(StatusCode::NO_CONTENT) +} + +async fn rollback_plan( + State(state): State, + principal: Principal, + Json(body): Json, +) -> Result { + require_platform_admin(&state.storage, &principal.user_id).await?; + transition_plan_status(&state, &body.tenant_id, &body.plan_id, "rollback_requested").await?; + Ok(StatusCode::NO_CONTENT) +} + +async fn list_plans( + State(state): State, + principal: Principal, + Query(q): Query, +) -> Result>, AuthzRejection> { + require_platform_admin(&state.storage, &principal.user_id).await?; + let prefix = match &q.tenant_id { + Some(t) => format!("v1/rebalance/plans/{}/", t.trim()), + None => "v1/rebalance/plans/".to_string(), + }; + let mut keys = state + .storage + .audit_index + .list_keys(&prefix) + .await + .map_err(|_| AuthzRejection::Internal)?; + keys.sort(); + keys.reverse(); + + let limit = q.limit.unwrap_or(50).min(200); + let mut out = Vec::new(); + for key in keys.into_iter().take(limit) { + let entry = state + .storage + .audit_index + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + continue; + }; + let plan: RebalancePlan = + decode_stored(&entry.value).map_err(|_| AuthzRejection::Internal)?; + out.push(plan); + } + Ok(Json(out)) +} + +async fn transition_plan_status( + state: &AppState, + tenant_id: &str, + plan_id: &str, + next_status: &str, +) -> Result<(), AuthzRejection> { + let key = plan_key(tenant_id, plan_id); + for _ in 0..10 { + let entry = state + .storage + .audit_index + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)? + .ok_or(AuthzRejection::NotFound)?; + + let mut plan: Stored = + serde_json::from_slice(&entry.value).map_err(|_| AuthzRejection::Internal)?; + plan.data.status = next_status.to_string(); + plan.data.updated_at_ms = unix_ms(); + let payload = serde_json::to_vec(&plan).map_err(|_| AuthzRejection::Internal)?; + + match state + .storage + .audit_index + .update(&key, entry.revision, payload) + .await + { + Ok(_) => return Ok(()), + Err(StorageError::CasMismatch) => continue, + Err(_) => return Err(AuthzRejection::Internal), + } + } + Err(AuthzRejection::Internal) +} + +fn plan_key(tenant_id: &str, plan_id: &str) -> String { + format!("v1/rebalance/plans/{tenant_id}/{plan_id}") +} + +fn encode_stored(data: &T) -> Result, StorageError> { + serde_json::to_vec(&Stored { + v: crate::storage::SCHEMA_VERSION, + data, + }) + .map_err(|e| StorageError::Serde(e.to_string())) +} + +fn decode_stored Deserialize<'de>>(bytes: &[u8]) -> Result { + let stored: Stored = + serde_json::from_slice(bytes).map_err(|e| StorageError::Serde(e.to_string()))?; + Ok(stored.data) +} + +fn unix_ms() -> i64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as i64 +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::authn; + use std::collections::HashMap; + use std::sync::Arc; + use tower::util::ServiceExt; + + async fn test_app_with_routing(cfg: crate::routing::RoutingConfig) -> (axum::Router, AppState) { + let metrics = crate::observability::init_metrics_for_tests(); + let source: Arc = + Arc::new(crate::routing::FixedSource::new(cfg)); + let routing = crate::routing::RouterState::new(source).await.unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn_cfg = crate::authn::AuthnConfig::for_tests(); + let state = crate::AppState { + metrics, + routing, + storage, + authn: authn_cfg, + }; + let app = crate::app(state.clone()); + (app, state) + } + + async fn signup_and_token(app: &axum::Router, cfg: &authn::AuthnConfig) -> (String, String) { + let response = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"a@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + let body = axum::body::to_bytes(response.into_body(), usize::MAX) + .await + .unwrap(); + let created: crate::authn::AuthResponse = serde_json::from_slice(&body).unwrap(); + let claims = cfg.verify_access_token(&created.access_token).unwrap(); + (created.access_token, claims.sub) + } + + #[tokio::test] + async fn resolve_requires_platform_admin() { + let cfg = crate::routing::RoutingConfig::empty(); + let (app, state) = test_app_with_routing(cfg).await; + let (token, user_id) = signup_and_token(&app, &state.authn).await; + + let resp = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/routing/resolve?tenant_id=t1&kind=aggregate") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), axum::http::StatusCode::FORBIDDEN); + + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &user_id, "role-platform-admin") + .await + .unwrap(); + + let resp = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/routing/resolve?tenant_id=t1&kind=aggregate") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), axum::http::StatusCode::NOT_FOUND); + } + + #[tokio::test] + async fn status_includes_revision() { + let cfg = crate::routing::RoutingConfig { + revision: 42, + aggregate_placement: HashMap::new(), + projection_placement: HashMap::new(), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::new(), + projection_shards: HashMap::new(), + runner_shards: HashMap::new(), + }; + let (app, state) = test_app_with_routing(cfg).await; + let (token, user_id) = signup_and_token(&app, &state.authn).await; + + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &user_id, "role-platform-admin") + .await + .unwrap(); + + let resp = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/rebalance/status?tenant_id=t1") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let value: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert_eq!(value.get("revision").and_then(|v| v.as_u64()).unwrap(), 42); + } + + #[tokio::test] + async fn gates_prevent_cutover_when_projection_not_ready_or_lagging() { + let metrics_not_ready = axum::Router::new().route( + "/metrics", + axum::routing::get(|| async { (axum::http::StatusCode::OK, "projection_ready 0\n") }), + ); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, metrics_not_ready).await.unwrap(); + }); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + let endpoint = format!("http://{}", addr); + + let cfg = crate::routing::RoutingConfig { + revision: 1, + aggregate_placement: HashMap::new(), + projection_placement: HashMap::from([("tenant-a".to_string(), "p".to_string())]), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::new(), + projection_shards: HashMap::from([("p".to_string(), vec![endpoint])]), + runner_shards: HashMap::new(), + }; + let (app, state) = test_app_with_routing(cfg).await; + let (token, user_id) = signup_and_token(&app, &state.authn).await; + + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &user_id, "role-platform-admin") + .await + .unwrap(); + + let resp = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/rebalance/gates?tenant_id=tenant-a") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let value: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert!(!value + .get("projection_ready") + .and_then(|v| v.as_bool()) + .unwrap()); + + let metrics_lagging = axum::Router::new().route( + "/metrics", + axum::routing::get(|| async { + ( + axum::http::StatusCode::OK, + "projection_ready 1\nprojection_lag{tenant_id=\"tenant-a\"} 5\n", + ) + }), + ); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(listener, metrics_lagging).await.unwrap(); + }); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + let endpoint = format!("http://{}", addr); + + std::env::set_var("GATEWAY_REBALANCE_PROJECTION_MAX_LAG", "0"); + + let cfg = crate::routing::RoutingConfig { + revision: 2, + aggregate_placement: HashMap::new(), + projection_placement: HashMap::from([("tenant-a".to_string(), "p".to_string())]), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::new(), + projection_shards: HashMap::from([("p".to_string(), vec![endpoint])]), + runner_shards: HashMap::new(), + }; + let (app, state) = test_app_with_routing(cfg).await; + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &user_id, "role-platform-admin") + .await + .unwrap(); + let resp = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/rebalance/gates?tenant_id=tenant-a") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(resp.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let value: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert!(!value + .get("projection_ready") + .and_then(|v| v.as_bool()) + .unwrap()); + } + + #[tokio::test] + async fn plan_endpoints_require_platform_admin_and_persist_plans() { + let cfg = crate::routing::RoutingConfig::empty(); + let (app, state) = test_app_with_routing(cfg).await; + let (token, user_id) = signup_and_token(&app, &state.authn).await; + + let forbidden = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/admin/rebalance/plan") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"tenant_id":"tenant-a","kind":"projection","to_endpoint":"http://p"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(forbidden.status(), axum::http::StatusCode::FORBIDDEN); + + crate::authz::put_role( + &state.storage, + "role-platform-admin", + vec!["iam.platform_admin".to_string()], + ) + .await + .unwrap(); + crate::authz::assign_role(&state.storage, "*", &user_id, "role-platform-admin") + .await + .unwrap(); + + let created = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/admin/rebalance/plan") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"tenant_id":"tenant-a","kind":"projection","to_endpoint":"http://p"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(created.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(created.into_body(), usize::MAX) + .await + .unwrap(); + let plan: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let plan_id = plan.get("plan_id").and_then(|v| v.as_str()).unwrap(); + + let listed = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/admin/rebalance/plans?tenant_id=tenant-a&limit=10") + .header("authorization", format!("Bearer {token}")) + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(listed.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(listed.into_body(), usize::MAX) + .await + .unwrap(); + let plans: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert!(plans + .as_array() + .unwrap() + .iter() + .any(|p| p.get("plan_id").and_then(|v| v.as_str()) == Some(plan_id))); + } +} diff --git a/gateway/src/authn.rs b/gateway/src/authn.rs new file mode 100644 index 0000000..c05c5ef --- /dev/null +++ b/gateway/src/authn.rs @@ -0,0 +1,1707 @@ +use std::sync::Arc; +use std::sync::OnceLock; +use std::time::Duration; +use std::time::Instant; +use std::time::SystemTime; +use std::time::UNIX_EPOCH; + +use std::collections::HashMap; + +use argon2::password_hash::PasswordHash; +use argon2::password_hash::PasswordHasher; +use argon2::password_hash::PasswordVerifier; +use argon2::password_hash::SaltString; +use argon2::Argon2; +use axum::extract::Query; +use axum::extract::State; +use axum::http::StatusCode; +use axum::response::IntoResponse; +use axum::routing::get; +use axum::routing::post; +use axum::Json; +use chrono::Utc; +use hmac::Hmac; +use hmac::Mac; +use serde::Deserialize; +use serde::Serialize; +use sha1::Sha1; +use sha2::Digest; +use subtle::ConstantTimeEq; +use thiserror::Error; +use tokio::sync::Mutex; + +use crate::storage::GatewayStorage; +use crate::storage::StorageError; +use crate::AppState; + +pub fn router() -> axum::Router { + axum::Router::new() + .route("/signup", post(signup)) + .route("/signin", post(signin)) + .route("/service/signin", post(service_signin)) + .route("/signout", post(signout)) + .route("/refresh", post(refresh)) + .route("/forgot", post(forgot)) + .route("/reset", post(reset)) + .route("/oidc/google/start", post(oidc_google_start)) + .route("/oidc/google/callback", get(oidc_google_callback)) + .route("/mfa/enroll/start", post(mfa_enroll_start)) + .route("/mfa/enroll/confirm", post(mfa_enroll_confirm)) + .route("/mfa/challenge", post(mfa_challenge)) +} + +#[derive(Clone)] +pub struct AuthnConfig { + jwt_secrets: Arc>>, + access_ttl: Duration, + refresh_ttl: Duration, + reset_ttl: Duration, +} + +impl std::fmt::Debug for AuthnConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AuthnConfig").finish_non_exhaustive() + } +} + +impl AuthnConfig { + pub fn from_env() -> Self { + let jwt_secrets = read_jwt_secrets_from_env() + .unwrap_or_else(|| vec![uuid::Uuid::new_v4().as_bytes().to_vec()]); + + let access_ttl = std::env::var("GATEWAY_ACCESS_TTL_SECS") + .ok() + .and_then(|v| v.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or(Duration::from_secs(300)); + + let refresh_ttl = std::env::var("GATEWAY_REFRESH_TTL_SECS") + .ok() + .and_then(|v| v.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or(Duration::from_secs(60 * 60 * 24 * 30)); + + let reset_ttl = std::env::var("GATEWAY_RESET_TTL_SECS") + .ok() + .and_then(|v| v.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or(Duration::from_secs(60 * 15)); + + Self { + jwt_secrets: Arc::new(jwt_secrets), + access_ttl, + refresh_ttl, + reset_ttl, + } + } + + pub fn for_tests() -> Self { + Self { + jwt_secrets: Arc::new(vec![b"test-secret".to_vec()]), + access_ttl: Duration::from_secs(300), + refresh_ttl: Duration::from_secs(3600), + reset_ttl: Duration::from_secs(60), + } + } + + pub fn verify_access_token(&self, token: &str) -> Result { + let mut validation = jsonwebtoken::Validation::new(jsonwebtoken::Algorithm::HS256); + validation.validate_exp = true; + validation.validate_nbf = false; + validation.leeway = 0; + + for secret in self.jwt_secrets.iter() { + if let Ok(data) = jsonwebtoken::decode::( + token, + &jsonwebtoken::DecodingKey::from_secret(secret), + &validation, + ) { + return Ok(data.claims); + } + } + + Err(VerifyError::InvalidToken) + } +} + +#[derive(Debug, Error, Clone, PartialEq, Eq)] +pub enum VerifyError { + #[error("invalid token")] + InvalidToken, +} + +#[derive(Debug, Deserialize)] +pub struct SignupRequest { + pub email: String, + pub password: String, +} + +#[derive(Debug, Deserialize)] +pub struct SigninRequest { + pub email: String, + pub password: String, +} + +#[derive(Debug, Deserialize)] +pub struct ServiceSigninRequest { + pub service_account_id: String, + pub token: String, +} + +#[derive(Debug, Deserialize)] +pub struct SignoutRequest { + pub session_id: String, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct RefreshRequest { + pub session_id: String, + pub refresh_token: String, +} + +#[derive(Debug, Deserialize)] +pub struct ForgotRequest { + pub email: String, +} + +#[derive(Debug, Deserialize)] +pub struct ResetRequest { + pub reset_token: String, + pub new_password: String, +} + +#[derive(Debug, Deserialize)] +pub struct MfaEnrollStartRequest { + pub user_id: String, +} + +#[derive(Debug, Deserialize)] +pub struct MfaEnrollConfirmRequest { + pub user_id: String, + pub code: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct AuthResponse { + pub access_token: String, + pub session_id: String, + pub refresh_token: String, +} + +#[derive(Debug, Serialize)] +pub struct ForgotResponse { + pub status: &'static str, +} + +#[derive(Debug, Serialize)] +pub struct OidcStartResponse { + pub url: String, +} + +#[derive(Debug, Serialize)] +pub struct MfaEnrollStartResponse { + pub secret_base32: String, +} + +#[derive(Debug, Serialize)] +pub struct MfaEnrollConfirmResponse { + pub status: &'static str, + pub recovery_codes: Vec, +} + +#[derive(Debug, Deserialize)] +pub struct MfaChallengeRequest { + pub code: String, +} + +#[derive(Debug, Serialize)] +pub struct MfaChallengeResponse { + pub status: &'static str, +} + +#[derive(Debug, Serialize)] +pub struct ResetResponse { + pub status: &'static str, +} + +#[derive(Debug, Serialize, Deserialize)] +struct Stored { + v: u32, + data: T, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct UserRecord { + user_id: String, + email: String, + enabled: bool, + created_at_ms: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct PasswordCredentialRecord { + user_id: String, + password_hash: String, + updated_at_ms: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct PasswordResetRecord { + user_id: String, + token_hash: String, + created_at_ms: i64, + expires_at_ms: i64, + used_at_ms: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct OidcStateRecord { + nonce: String, + created_at_ms: i64, + expires_at_ms: i64, + redirect_uri: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct ServiceTokenRecord { + user_id: String, + token_hash: String, + created_at_ms: i64, + rotated_at_ms: Option, + enabled: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct TotpEnrollmentRecord { + user_id: String, + secret_base32: String, + enabled: bool, + created_at_ms: i64, + recovery_hashes: Vec, +} + +#[derive(Debug, Error)] +enum AuthnError { + #[error("invalid input")] + InvalidInput, + #[error("user already exists")] + UserExists, + #[error("invalid credentials")] + InvalidCredentials, + #[error("refresh token invalid")] + InvalidRefresh, + #[error("reset token invalid")] + InvalidReset, + #[error("reset token expired")] + ResetExpired, + #[error("reset token already used")] + ResetUsed, + #[error("mfa code invalid")] + MfaInvalid, + #[error("too many requests")] + TooManyRequests, + #[error("oidc not configured")] + OidcNotConfigured, + #[error("storage error: {0}")] + Storage(String), +} + +impl From for AuthnError { + fn from(value: StorageError) -> Self { + match value { + StorageError::AlreadyExists => AuthnError::UserExists, + StorageError::RefreshTokenInvalid => AuthnError::InvalidRefresh, + StorageError::RefreshSessionExpired | StorageError::RefreshSessionRevoked => { + AuthnError::InvalidRefresh + } + other => AuthnError::Storage(other.to_string()), + } + } +} + +impl IntoResponse for AuthnError { + fn into_response(self) -> axum::response::Response { + let (status, msg) = match &self { + AuthnError::InvalidInput => (StatusCode::BAD_REQUEST, self.to_string()), + AuthnError::UserExists => (StatusCode::CONFLICT, self.to_string()), + AuthnError::InvalidCredentials => (StatusCode::UNAUTHORIZED, self.to_string()), + AuthnError::InvalidRefresh => (StatusCode::UNAUTHORIZED, self.to_string()), + AuthnError::InvalidReset | AuthnError::ResetExpired | AuthnError::ResetUsed => { + (StatusCode::BAD_REQUEST, self.to_string()) + } + AuthnError::MfaInvalid => (StatusCode::BAD_REQUEST, self.to_string()), + AuthnError::TooManyRequests => (StatusCode::TOO_MANY_REQUESTS, self.to_string()), + AuthnError::OidcNotConfigured => (StatusCode::NOT_IMPLEMENTED, self.to_string()), + AuthnError::Storage(_) => (StatusCode::INTERNAL_SERVER_ERROR, self.to_string()), + }; + (status, msg).into_response() + } +} + +static RATE_LIMITER: OnceLock>>> = OnceLock::new(); + +async fn check_rate_limit(key: &str, max: usize, window: Duration) -> Result<(), AuthnError> { + let limiter = RATE_LIMITER.get_or_init(|| Mutex::new(HashMap::new())); + let mut guard = limiter.lock().await; + let now = Instant::now(); + + let bucket = guard.entry(key.to_string()).or_default(); + bucket.retain(|t| now.duration_since(*t) < window); + if bucket.len() >= max { + return Err(AuthnError::TooManyRequests); + } + bucket.push(now); + Ok(()) +} + +fn read_jwt_secrets_from_env() -> Option>> { + if let Ok(path) = std::env::var("GATEWAY_JWT_SECRETS_FILE") { + if let Some(raw) = read_secret_file(&path) { + return split_secrets(&raw); + } + } + + if let Ok(v) = std::env::var("GATEWAY_JWT_SECRETS") { + if let Some(secrets) = split_secrets(&v) { + return Some(secrets); + } + } + + if let Ok(path) = std::env::var("GATEWAY_JWT_SECRET_FILE") { + if let Some(raw) = read_secret_file(&path) { + return Some(vec![raw.into_bytes()]); + } + } + + std::env::var("GATEWAY_JWT_SECRET") + .ok() + .map(|s| vec![s.into_bytes()]) +} + +fn split_secrets(raw: &str) -> Option>> { + let normalized = raw.replace('\n', ","); + let secrets: Vec> = normalized + .split(',') + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.as_bytes().to_vec()) + .collect(); + if secrets.is_empty() { + None + } else { + Some(secrets) + } +} + +fn read_secret_file(path: &str) -> Option { + std::fs::read_to_string(path) + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) +} + +fn env_or_file(env_key: &str, file_env_key: &str) -> Option { + if let Ok(path) = std::env::var(file_env_key) { + if let Some(value) = read_secret_file(&path) { + return Some(value); + } + } + std::env::var(env_key).ok().filter(|s| !s.trim().is_empty()) +} + +async fn signup( + State(state): State, + Json(req): Json, +) -> Result, AuthnError> { + let email = normalize_email(&req.email).ok_or(AuthnError::InvalidInput)?; + validate_password(&req.password)?; + + let user_id = uuid::Uuid::new_v4().to_string(); + let now_ms = unix_ms(); + + let user = UserRecord { + user_id: user_id.clone(), + email: email.clone(), + enabled: true, + created_at_ms: now_ms, + }; + + let email_hash = hash_stable("email", &email); + state + .storage + .users + .create( + &user_by_email_hash_key(&email_hash), + user_id.as_bytes().to_vec(), + ) + .await?; + state + .storage + .users + .create(&user_key(&user_id), encode_stored(&user)?) + .await?; + + let password_hash = hash_password(&req.password)?; + let cred = PasswordCredentialRecord { + user_id: user_id.clone(), + password_hash, + updated_at_ms: now_ms, + }; + state + .storage + .password_credentials + .create(&password_key(&user_id), encode_stored(&cred)?) + .await?; + + let session = state + .storage + .create_refresh_session(&user_id, state.authn.refresh_ttl) + .await?; + let access_token = issue_access_token(&state.authn, &user_id, &session.session_id)?; + + Ok(Json(AuthResponse { + access_token, + session_id: session.session_id, + refresh_token: session.refresh_token, + })) +} + +async fn signin( + State(state): State, + Json(req): Json, +) -> Result, AuthnError> { + let email = normalize_email(&req.email).ok_or(AuthnError::InvalidInput)?; + check_rate_limit(&format!("signin:{email}"), 10, Duration::from_secs(60)).await?; + let email_hash = hash_stable("email", &email); + + let user_id_bytes = state + .storage + .users + .get(&user_by_email_hash_key(&email_hash)) + .await? + .ok_or(AuthnError::InvalidCredentials)? + .value; + let user_id = String::from_utf8(user_id_bytes).map_err(|_| AuthnError::InvalidCredentials)?; + + let user_entry = state + .storage + .users + .get(&user_key(&user_id)) + .await? + .ok_or(AuthnError::InvalidCredentials)?; + let stored_user: Stored = serde_json::from_slice(&user_entry.value) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + if !stored_user.data.enabled { + return Err(AuthnError::InvalidCredentials); + } + + let cred_entry = state + .storage + .password_credentials + .get(&password_key(&user_id)) + .await? + .ok_or(AuthnError::InvalidCredentials)?; + let stored_cred: Stored = + serde_json::from_slice(&cred_entry.value) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + + if !verify_password(&req.password, &stored_cred.data.password_hash)? { + return Err(AuthnError::InvalidCredentials); + } + + let session = state + .storage + .create_refresh_session(&user_id, state.authn.refresh_ttl) + .await?; + let access_token = issue_access_token(&state.authn, &user_id, &session.session_id)?; + + Ok(Json(AuthResponse { + access_token, + session_id: session.session_id, + refresh_token: session.refresh_token, + })) +} + +async fn service_signin( + State(state): State, + Json(req): Json, +) -> Result, AuthnError> { + if req.service_account_id.trim().is_empty() || req.token.trim().is_empty() { + return Err(AuthnError::InvalidInput); + } + + check_rate_limit( + &format!("service_signin:{}", req.service_account_id), + 30, + Duration::from_secs(60), + ) + .await?; + + let key = service_token_key(&req.service_account_id); + let entry = state + .storage + .service_tokens + .get(&key) + .await? + .ok_or(AuthnError::InvalidCredentials)?; + let stored: Stored = + serde_json::from_slice(&entry.value).map_err(|e| AuthnError::Storage(e.to_string()))?; + if !stored.data.enabled { + return Err(AuthnError::InvalidCredentials); + } + + let presented_hash = hash_stable("service_token", &req.token); + if !bool::from( + presented_hash + .as_bytes() + .ct_eq(stored.data.token_hash.as_bytes()), + ) { + return Err(AuthnError::InvalidCredentials); + } + + let user_entry = state + .storage + .users + .get(&user_key(&req.service_account_id)) + .await? + .ok_or(AuthnError::InvalidCredentials)?; + let stored_user: Stored = serde_json::from_slice(&user_entry.value) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + if !stored_user.data.enabled { + return Err(AuthnError::InvalidCredentials); + } + + let session = state + .storage + .create_refresh_session(&req.service_account_id, state.authn.refresh_ttl) + .await?; + let access_token = + issue_access_token(&state.authn, &req.service_account_id, &session.session_id)?; + + Ok(Json(AuthResponse { + access_token, + session_id: session.session_id, + refresh_token: session.refresh_token, + })) +} + +async fn refresh( + State(state): State, + Json(req): Json, +) -> Result, AuthnError> { + let new_refresh_token = state + .storage + .rotate_refresh_token(&req.session_id, &req.refresh_token) + .await?; + + let user_id = session_user_id(&state.storage, &req.session_id).await?; + let access_token = issue_access_token(&state.authn, &user_id, &req.session_id)?; + + Ok(Json(AuthResponse { + access_token, + session_id: req.session_id, + refresh_token: new_refresh_token, + })) +} + +async fn signout( + State(state): State, + Json(req): Json, +) -> Result { + state + .storage + .revoke_refresh_session(&req.session_id) + .await?; + Ok(StatusCode::NO_CONTENT) +} + +async fn forgot( + State(state): State, + Json(req): Json, +) -> Result, AuthnError> { + let email = normalize_email(&req.email).ok_or(AuthnError::InvalidInput)?; + check_rate_limit(&format!("forgot:{email}"), 5, Duration::from_secs(60)).await?; + let email_hash = hash_stable("email", &email); + let user_id = state + .storage + .users + .get(&user_by_email_hash_key(&email_hash)) + .await? + .and_then(|e| String::from_utf8(e.value).ok()); + + if let Some(user_id) = user_id { + let _ = issue_password_reset_token(&state.storage, &user_id, state.authn.reset_ttl).await; + } + + Ok(Json(ForgotResponse { status: "ok" })) +} + +async fn reset( + State(state): State, + Json(req): Json, +) -> Result, AuthnError> { + check_rate_limit("reset", 30, Duration::from_secs(60)).await?; + validate_password(&req.new_password)?; + reset_password(&state.storage, &req.reset_token, &req.new_password).await?; + Ok(Json(ResetResponse { status: "ok" })) +} + +async fn oidc_google_start( + State(state): State, +) -> Result, AuthnError> { + let client_id = env_or_file("GOOGLE_OIDC_CLIENT_ID", "GOOGLE_OIDC_CLIENT_ID_FILE") + .ok_or(AuthnError::OidcNotConfigured)?; + let redirect_uri = env_or_file("GOOGLE_OIDC_REDIRECT_URI", "GOOGLE_OIDC_REDIRECT_URI_FILE") + .ok_or(AuthnError::OidcNotConfigured)?; + + let state_value = uuid::Uuid::new_v4().to_string(); + let nonce = uuid::Uuid::new_v4().to_string(); + let now_ms = unix_ms(); + let expires_at_ms = now_ms + 10 * 60 * 1000; + + let record = OidcStateRecord { + nonce: nonce.clone(), + created_at_ms: now_ms, + expires_at_ms, + redirect_uri: redirect_uri.clone(), + }; + + let key = oidc_state_key(&state_value); + state + .storage + .identities + .create(&key, encode_stored(&record)?) + .await?; + + let url = format!( + "https://accounts.google.com/o/oauth2/v2/auth?client_id={}&redirect_uri={}&response_type=code&scope=openid%20email%20profile&state={}&nonce={}&access_type=offline&prompt=consent", + urlencoding::encode(&client_id), + urlencoding::encode(&redirect_uri), + urlencoding::encode(&state_value), + urlencoding::encode(&nonce), + ); + + Ok(Json(OidcStartResponse { url })) +} + +#[derive(Debug, Deserialize)] +struct OidcCallbackQuery { + code: String, + state: String, +} + +async fn oidc_google_callback( + State(state): State, + Query(q): Query, +) -> Result, AuthnError> { + let client_id = env_or_file("GOOGLE_OIDC_CLIENT_ID", "GOOGLE_OIDC_CLIENT_ID_FILE") + .ok_or(AuthnError::OidcNotConfigured)?; + let client_secret = env_or_file( + "GOOGLE_OIDC_CLIENT_SECRET", + "GOOGLE_OIDC_CLIENT_SECRET_FILE", + ) + .ok_or(AuthnError::OidcNotConfigured)?; + + let state_key = oidc_state_key(&q.state); + let entry = state + .storage + .identities + .get(&state_key) + .await? + .ok_or(AuthnError::InvalidInput)?; + + let state_record: Stored = + serde_json::from_slice(&entry.value).map_err(|e| AuthnError::Storage(e.to_string()))?; + let now_ms = unix_ms(); + if now_ms >= state_record.data.expires_at_ms { + let _ = state.storage.identities.delete(&state_key).await; + return Err(AuthnError::InvalidInput); + } + + let _ = state.storage.identities.delete(&state_key).await; + + #[derive(Deserialize)] + struct TokenResponse { + id_token: String, + } + + let client = reqwest::Client::new(); + let token_resp = client + .post("https://oauth2.googleapis.com/token") + .form(&[ + ("grant_type", "authorization_code"), + ("code", q.code.as_str()), + ("client_id", client_id.as_str()), + ("client_secret", client_secret.as_str()), + ("redirect_uri", state_record.data.redirect_uri.as_str()), + ]) + .send() + .await + .map_err(|e| AuthnError::Storage(e.to_string()))?; + + if !token_resp.status().is_success() { + return Err(AuthnError::InvalidInput); + } + + let token_body = token_resp + .json::() + .await + .map_err(|e| AuthnError::Storage(e.to_string()))?; + + let claims = + verify_google_id_token(&token_body.id_token, &client_id, &state_record.data.nonce).await?; + + let user_id = upsert_google_identity(&state.storage, &claims).await?; + + let session = state + .storage + .create_refresh_session(&user_id, state.authn.refresh_ttl) + .await?; + let access_token = issue_access_token(&state.authn, &user_id, &session.session_id)?; + + Ok(Json(AuthResponse { + access_token, + session_id: session.session_id, + refresh_token: session.refresh_token, + })) +} + +#[derive(Debug, Deserialize)] +struct GoogleIdClaims { + sub: String, + email: Option, + email_verified: Option, + nonce: Option, +} + +async fn verify_google_id_token( + id_token: &str, + client_id: &str, + expected_nonce: &str, +) -> Result { + let header = jsonwebtoken::decode_header(id_token).map_err(|_| AuthnError::InvalidInput)?; + let kid = header.kid.ok_or(AuthnError::InvalidInput)?; + + let jwks = reqwest::get("https://www.googleapis.com/oauth2/v3/certs") + .await + .map_err(|e| AuthnError::Storage(e.to_string()))? + .json::() + .await + .map_err(|e| AuthnError::Storage(e.to_string()))?; + + let keys = jwks + .get("keys") + .and_then(|v| v.as_array()) + .ok_or(AuthnError::InvalidInput)?; + + let mut n = None; + let mut e = None; + for k in keys { + if k.get("kid").and_then(|v| v.as_str()) == Some(kid.as_str()) { + n = k.get("n").and_then(|v| v.as_str()).map(|s| s.to_string()); + e = k.get("e").and_then(|v| v.as_str()).map(|s| s.to_string()); + break; + } + } + + let n = n.ok_or(AuthnError::InvalidInput)?; + let e = e.ok_or(AuthnError::InvalidInput)?; + let decoding_key = jsonwebtoken::DecodingKey::from_rsa_components(&n, &e) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + + let mut validation = jsonwebtoken::Validation::new(jsonwebtoken::Algorithm::RS256); + validation.set_audience(&[client_id]); + validation.set_issuer(&["https://accounts.google.com", "accounts.google.com"]); + + let token_data = jsonwebtoken::decode::(id_token, &decoding_key, &validation) + .map_err(|_| AuthnError::InvalidInput)?; + + let claims = token_data.claims; + if claims.nonce.as_deref() != Some(expected_nonce) { + return Err(AuthnError::InvalidInput); + } + + Ok(claims) +} + +async fn upsert_google_identity( + storage: &GatewayStorage, + claims: &GoogleIdClaims, +) -> Result { + let identity_key = format!("v1/identities/google/{}", claims.sub); + if let Some(entry) = storage.identities.get(&identity_key).await? { + let user_id = String::from_utf8(entry.value).map_err(|_| AuthnError::InvalidInput)?; + return Ok(user_id); + } + + let email = claims.email.clone().ok_or(AuthnError::InvalidInput)?; + if claims.email_verified != Some(true) { + return Err(AuthnError::InvalidInput); + } + + let email_hash = hash_stable("email", &email); + let existing = storage + .users + .get(&user_by_email_hash_key(&email_hash)) + .await? + .and_then(|e| String::from_utf8(e.value).ok()); + + let user_id = if let Some(user_id) = existing { + user_id + } else { + let user_id = uuid::Uuid::new_v4().to_string(); + let now_ms = unix_ms(); + let user = UserRecord { + user_id: user_id.clone(), + email: email.clone(), + enabled: true, + created_at_ms: now_ms, + }; + + storage + .users + .create( + &user_by_email_hash_key(&email_hash), + user_id.as_bytes().to_vec(), + ) + .await?; + storage + .users + .create(&user_key(&user_id), encode_stored(&user)?) + .await?; + + user_id + }; + + let _ = storage + .identities + .create(&identity_key, user_id.as_bytes().to_vec()) + .await; + + Ok(user_id) +} + +async fn mfa_enroll_start( + State(state): State, + principal: crate::authz::Principal, + Json(req): Json, +) -> Result, AuthnError> { + if req.user_id != principal.user_id { + return Err(AuthnError::InvalidInput); + } + + let secret_base32 = generate_totp_secret_base32(); + let now_ms = unix_ms(); + let record = TotpEnrollmentRecord { + user_id: req.user_id.clone(), + secret_base32: secret_base32.clone(), + enabled: false, + created_at_ms: now_ms, + recovery_hashes: Vec::new(), + }; + + state + .storage + .mfa + .put(&totp_key(&principal.user_id), encode_stored(&record)?) + .await?; + + Ok(Json(MfaEnrollStartResponse { secret_base32 })) +} + +async fn mfa_enroll_confirm( + State(state): State, + principal: crate::authz::Principal, + Json(req): Json, +) -> Result, AuthnError> { + if req.user_id != principal.user_id { + return Err(AuthnError::InvalidInput); + } + + let entry = state + .storage + .mfa + .get(&totp_key(&principal.user_id)) + .await? + .ok_or(AuthnError::InvalidInput)?; + + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|e| AuthnError::Storage(e.to_string()))?; + + if stored.data.enabled { + return Ok(Json(MfaEnrollConfirmResponse { + status: "ok", + recovery_codes: Vec::new(), + })); + } + + let now = SystemTime::now(); + if !verify_totp_code(&stored.data.secret_base32, &req.code, now)? { + return Err(AuthnError::MfaInvalid); + } + + stored.data.enabled = true; + let mut recovery_codes: Vec = Vec::new(); + if stored.data.recovery_hashes.is_empty() { + recovery_codes = generate_recovery_codes(10); + stored.data.recovery_hashes = recovery_codes + .iter() + .map(|c| hash_stable("recovery", c)) + .collect(); + } + let payload = serde_json::to_vec(&stored).map_err(|e| AuthnError::Storage(e.to_string()))?; + state + .storage + .mfa + .update(&totp_key(&principal.user_id), entry.revision, payload) + .await?; + + Ok(Json(MfaEnrollConfirmResponse { + status: "ok", + recovery_codes, + })) +} + +async fn mfa_challenge( + State(state): State, + principal: crate::authz::Principal, + Json(req): Json, +) -> Result, AuthnError> { + let key = totp_key(&principal.user_id); + + for _ in 0..10 { + let entry = state + .storage + .mfa + .get(&key) + .await? + .ok_or(AuthnError::InvalidInput)?; + + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|e| AuthnError::Storage(e.to_string()))?; + + if !stored.data.enabled { + return Err(AuthnError::InvalidInput); + } + + let now = SystemTime::now(); + if req.code.chars().all(|c| c.is_ascii_digit()) && req.code.len() == 6 { + if verify_totp_code(&stored.data.secret_base32, &req.code, now)? { + return Ok(Json(MfaChallengeResponse { status: "ok" })); + } + metrics::counter!("gateway_mfa_fail_total", "kind" => "totp").increment(1); + return Err(AuthnError::MfaInvalid); + } + + let presented_hash = hash_stable("recovery", &req.code); + if let Some(pos) = stored + .data + .recovery_hashes + .iter() + .position(|h| h.as_bytes().ct_eq(presented_hash.as_bytes()).into()) + { + stored.data.recovery_hashes.remove(pos); + let payload = + serde_json::to_vec(&stored).map_err(|e| AuthnError::Storage(e.to_string()))?; + match state + .storage + .mfa + .update(&key, entry.revision, payload) + .await + { + Ok(_) => { + return Ok(Json(MfaChallengeResponse { status: "ok" })); + } + Err(StorageError::CasMismatch) => continue, + Err(e) => return Err(e.into()), + } + } + + metrics::counter!("gateway_mfa_fail_total", "kind" => "recovery").increment(1); + return Err(AuthnError::MfaInvalid); + } + + Err(AuthnError::Storage("mfa cas failed".to_string())) +} + +async fn session_user_id(storage: &GatewayStorage, session_id: &str) -> Result { + let entry = storage + .refresh_sessions + .get(&format!("v1/sessions/{session_id}")) + .await? + .ok_or(AuthnError::InvalidRefresh)?; + + let stored: Stored = + serde_json::from_slice(&entry.value).map_err(|e| AuthnError::Storage(e.to_string()))?; + Ok(stored.data.user_id) +} + +async fn issue_password_reset_token( + storage: &GatewayStorage, + user_id: &str, + ttl: Duration, +) -> Result { + let token = uuid::Uuid::new_v4().to_string(); + let token_hash = hash_stable("reset", &token); + let now_ms = unix_ms(); + let record = PasswordResetRecord { + user_id: user_id.to_string(), + token_hash: token_hash.clone(), + created_at_ms: now_ms, + expires_at_ms: now_ms + ttl.as_millis() as i64, + used_at_ms: None, + }; + + storage + .password_resets + .create(&reset_key(&token_hash), encode_stored(&record)?) + .await?; + + Ok(token) +} + +async fn reset_password( + storage: &GatewayStorage, + presented_token: &str, + new_password: &str, +) -> Result<(), AuthnError> { + let token_hash = hash_stable("reset", presented_token); + let key = reset_key(&token_hash); + + for _ in 0..10 { + let entry = storage + .password_resets + .get(&key) + .await? + .ok_or(AuthnError::InvalidReset)?; + + let mut stored: Stored = + serde_json::from_slice(&entry.value).map_err(|e| AuthnError::Storage(e.to_string()))?; + + let now_ms = unix_ms(); + if stored.data.used_at_ms.is_some() { + return Err(AuthnError::ResetUsed); + } + if now_ms >= stored.data.expires_at_ms { + return Err(AuthnError::ResetExpired); + } + + stored.data.used_at_ms = Some(now_ms); + let payload = + serde_json::to_vec(&stored).map_err(|e| AuthnError::Storage(e.to_string()))?; + match storage + .password_resets + .update(&key, entry.revision, payload) + .await + { + Ok(_) => { + let password_hash = hash_password(new_password)?; + let cred_key = password_key(&stored.data.user_id); + let cred_entry = storage + .password_credentials + .get(&cred_key) + .await? + .ok_or(AuthnError::InvalidReset)?; + let mut stored_cred: Stored = + serde_json::from_slice(&cred_entry.value) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + stored_cred.data.password_hash = password_hash; + stored_cred.data.updated_at_ms = now_ms; + let cred_payload = serde_json::to_vec(&stored_cred) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + storage + .password_credentials + .update(&cred_key, cred_entry.revision, cred_payload) + .await?; + + revoke_all_refresh_sessions_for_user(storage, &stored.data.user_id).await?; + return Ok(()); + } + Err(StorageError::CasMismatch) => continue, + Err(e) => return Err(e.into()), + } + } + + Err(AuthnError::Storage("reset cas failed".to_string())) +} + +async fn revoke_all_refresh_sessions_for_user( + storage: &GatewayStorage, + user_id: &str, +) -> Result<(), AuthnError> { + let keys = storage.refresh_sessions.list_keys("v1/sessions/").await?; + for key in keys { + if let Some(session_id) = key.strip_prefix("v1/sessions/") { + let entry = storage.refresh_sessions.get(&key).await?; + let Some(entry) = entry else { + continue; + }; + let stored: Stored = + serde_json::from_slice(&entry.value) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + if stored.data.user_id == user_id { + let _ = storage.revoke_refresh_session(session_id).await; + } + } + } + Ok(()) +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +pub struct AccessClaims { + pub sub: String, + pub session_id: String, + pub iat: i64, + pub exp: i64, +} + +fn issue_access_token( + cfg: &AuthnConfig, + user_id: &str, + session_id: &str, +) -> Result { + let now = Utc::now().timestamp(); + let exp = now + cfg.access_ttl.as_secs() as i64; + let claims = AccessClaims { + sub: user_id.to_string(), + session_id: session_id.to_string(), + iat: now, + exp, + }; + + let token = jsonwebtoken::encode( + &jsonwebtoken::Header::default(), + &claims, + &jsonwebtoken::EncodingKey::from_secret( + cfg.jwt_secrets + .first() + .ok_or_else(|| AuthnError::Storage("no jwt secrets configured".to_string()))?, + ), + ) + .map_err(|e| AuthnError::Storage(e.to_string()))?; + + Ok(token) +} + +fn validate_password(password: &str) -> Result<(), AuthnError> { + if password.len() < 8 { + return Err(AuthnError::InvalidInput); + } + Ok(()) +} + +fn normalize_email(email: &str) -> Option { + let e = email.trim().to_ascii_lowercase(); + if e.is_empty() || !e.contains('@') { + return None; + } + Some(e) +} + +fn unix_ms() -> i64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as i64 +} + +#[derive(Debug, Serialize)] +struct StoredRef<'a, T> { + v: u32, + data: &'a T, +} + +fn encode_stored(value: &T) -> Result, AuthnError> { + serde_json::to_vec(&StoredRef { + v: crate::storage::SCHEMA_VERSION, + data: value, + }) + .map_err(|e| AuthnError::Storage(e.to_string())) +} + +fn hash_stable(domain: &str, value: &str) -> String { + let mut hasher = sha2::Sha256::new(); + hasher.update(domain.as_bytes()); + hasher.update([0u8]); + hasher.update(value.as_bytes()); + hex::encode(hasher.finalize()) +} + +fn user_key(user_id: &str) -> String { + format!("v1/users/{user_id}") +} + +fn user_by_email_hash_key(email_hash: &str) -> String { + format!("v1/users/by_email_hash/{email_hash}") +} + +fn password_key(user_id: &str) -> String { + format!("v1/password/{user_id}") +} + +fn reset_key(token_hash: &str) -> String { + format!("v1/resets/{token_hash}") +} + +fn oidc_state_key(state: &str) -> String { + let h = hash_stable("oidc_state", state); + format!("v1/oidc/google/state/{h}") +} + +fn service_token_key(user_id: &str) -> String { + format!("v1/service_tokens/{user_id}") +} + +fn totp_key(user_id: &str) -> String { + format!("v1/totp/{user_id}") +} + +fn hash_password(password: &str) -> Result { + let salt = SaltString::generate(&mut rand_core::OsRng); + let argon2 = Argon2::default(); + let hash = argon2 + .hash_password(password.as_bytes(), &salt) + .map_err(|e| AuthnError::Storage(e.to_string()))? + .to_string(); + Ok(hash) +} + +fn verify_password(password: &str, hash: &str) -> Result { + let parsed = PasswordHash::new(hash).map_err(|e| AuthnError::Storage(e.to_string()))?; + let argon2 = Argon2::default(); + Ok(argon2.verify_password(password.as_bytes(), &parsed).is_ok()) +} + +fn generate_totp_secret_base32() -> String { + let raw = uuid::Uuid::new_v4().as_bytes().to_vec(); + base32::encode(base32::Alphabet::Rfc4648 { padding: false }, &raw) +} + +fn generate_recovery_codes(count: usize) -> Vec { + let mut out = Vec::with_capacity(count); + let mut seen = std::collections::HashSet::new(); + while out.len() < count { + let code = uuid::Uuid::new_v4() + .simple() + .to_string() + .chars() + .take(8) + .collect::() + .to_ascii_uppercase(); + if seen.insert(code.clone()) { + out.push(code); + } + } + out +} + +fn verify_totp_code(secret_base32: &str, code: &str, now: SystemTime) -> Result { + let secret = base32::decode(base32::Alphabet::Rfc4648 { padding: false }, secret_base32) + .ok_or(AuthnError::InvalidInput)?; + + let now_secs = now.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(); + let step = 30u64; + let counter = now_secs / step; + + for offset in [0i64, -1, 1] { + let c = if offset.is_negative() { + counter.saturating_sub(offset.unsigned_abs()) + } else { + counter.saturating_add(offset as u64) + }; + let expected = totp_code(&secret, c, 6)?; + if expected.as_bytes().ct_eq(code.as_bytes()).into() { + return Ok(true); + } + } + + Ok(false) +} + +fn totp_code(secret: &[u8], counter: u64, digits: u32) -> Result { + let mut msg = [0u8; 8]; + msg.copy_from_slice(&counter.to_be_bytes()); + + let mut mac = Hmac::::new_from_slice(secret).map_err(|_| AuthnError::InvalidInput)?; + mac.update(&msg); + let result = mac.finalize().into_bytes(); + + let offset = (result[19] & 0x0f) as usize; + let binary = ((result[offset] as u32 & 0x7f) << 24) + | ((result[offset + 1] as u32) << 16) + | ((result[offset + 2] as u32) << 8) + | (result[offset + 3] as u32); + + let modulus = 10u32.pow(digits); + let code = binary % modulus; + Ok(format!("{:0width$}", code, width = digits as usize)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tower::util::ServiceExt; + + #[test] + fn password_hashing_and_verification_works() { + let hash = hash_password("correct horse battery staple").unwrap(); + assert!(hash.contains("$argon2id$")); + assert!(verify_password("correct horse battery staple", &hash).unwrap()); + assert!(!verify_password("wrong", &hash).unwrap()); + } + + #[tokio::test] + async fn refresh_rotation_invalidates_old_token() { + let metrics = crate::observability::init_metrics_for_tests(); + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn = AuthnConfig::for_tests(); + let app = crate::app(crate::AppState { + metrics, + routing, + storage, + authn, + }); + + let signup_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"a@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(signup_res.status(), StatusCode::OK); + let body = axum::body::to_bytes(signup_res.into_body(), usize::MAX) + .await + .unwrap(); + let created: AuthResponse = serde_json::from_slice(&body).unwrap(); + + let refresh_body = serde_json::to_vec(&RefreshRequest { + session_id: created.session_id.clone(), + refresh_token: created.refresh_token.clone(), + }) + .unwrap(); + + let refresh_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/refresh") + .header("content-type", "application/json") + .body(axum::body::Body::from(refresh_body)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(refresh_res.status(), StatusCode::OK); + let body = axum::body::to_bytes(refresh_res.into_body(), usize::MAX) + .await + .unwrap(); + let refreshed: AuthResponse = serde_json::from_slice(&body).unwrap(); + + let refresh_again_body = serde_json::to_vec(&RefreshRequest { + session_id: created.session_id.clone(), + refresh_token: created.refresh_token.clone(), + }) + .unwrap(); + + let refresh_again = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/refresh") + .header("content-type", "application/json") + .body(axum::body::Body::from(refresh_again_body)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(refresh_again.status(), StatusCode::UNAUTHORIZED); + + assert_ne!(refreshed.refresh_token, created.refresh_token); + } + + #[tokio::test] + async fn forgot_reset_token_is_one_time_and_expires() { + let storage = GatewayStorage::new_in_memory(); + + let user_id = "u1"; + let token = issue_password_reset_token(&storage, user_id, Duration::from_millis(1)) + .await + .unwrap(); + tokio::time::sleep(Duration::from_millis(5)).await; + + let res = reset_password(&storage, &token, "password123").await; + assert!(matches!(res, Err(AuthnError::ResetExpired))); + + let token2 = issue_password_reset_token(&storage, user_id, Duration::from_secs(60)) + .await + .unwrap(); + + let cred = PasswordCredentialRecord { + user_id: user_id.to_string(), + password_hash: hash_password("password123").unwrap(), + updated_at_ms: unix_ms(), + }; + storage + .password_credentials + .create( + &password_key(user_id), + serde_json::to_vec(&Stored { v: 1, data: cred }).unwrap(), + ) + .await + .unwrap(); + + reset_password(&storage, &token2, "newpassword123") + .await + .unwrap(); + let again = reset_password(&storage, &token2, "anotherpassword123").await; + assert!(matches!(again, Err(AuthnError::ResetUsed))); + } + + #[test] + fn totp_verification_accepts_valid_code() { + let secret = generate_totp_secret_base32(); + let now = SystemTime::now(); + let now_secs = now.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(); + let counter = now_secs / 30; + let secret_bytes = + base32::decode(base32::Alphabet::Rfc4648 { padding: false }, &secret).unwrap(); + let code = totp_code(&secret_bytes, counter, 6).unwrap(); + assert!(verify_totp_code(&secret, &code, now).unwrap()); + assert!(!verify_totp_code(&secret, "000000", now).unwrap()); + } + + #[tokio::test] + async fn mfa_enrollment_and_challenge_work_with_totp_and_recovery_codes() { + let metrics = crate::observability::init_metrics_for_tests(); + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn = AuthnConfig::for_tests(); + let app = crate::app(crate::AppState { + metrics, + routing, + storage, + authn: authn.clone(), + }); + + let signup_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"mfa@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + let body = axum::body::to_bytes(signup_res.into_body(), usize::MAX) + .await + .unwrap(); + let created: AuthResponse = serde_json::from_slice(&body).unwrap(); + let claims = authn.verify_access_token(&created.access_token).unwrap(); + + let enroll_start_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/mfa/enroll/start") + .header("authorization", format!("Bearer {}", created.access_token)) + .header("content-type", "application/json") + .body(axum::body::Body::from(format!( + r#"{{"user_id":"{}"}}"#, + claims.sub + ))) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(enroll_start_res.status(), StatusCode::OK); + let body = axum::body::to_bytes(enroll_start_res.into_body(), usize::MAX) + .await + .unwrap(); + let enroll_start: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let secret_base32 = enroll_start + .get("secret_base32") + .and_then(|v| v.as_str()) + .unwrap() + .to_string(); + + let now = SystemTime::now(); + let now_secs = now.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(); + let counter = now_secs / 30; + let secret_bytes = + base32::decode(base32::Alphabet::Rfc4648 { padding: false }, &secret_base32).unwrap(); + let code = totp_code(&secret_bytes, counter, 6).unwrap(); + + let enroll_confirm_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/mfa/enroll/confirm") + .header("authorization", format!("Bearer {}", created.access_token)) + .header("content-type", "application/json") + .body(axum::body::Body::from(format!( + r#"{{"user_id":"{}","code":"{}"}}"#, + claims.sub, code + ))) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(enroll_confirm_res.status(), StatusCode::OK); + let body = axum::body::to_bytes(enroll_confirm_res.into_body(), usize::MAX) + .await + .unwrap(); + let enroll_confirm: serde_json::Value = serde_json::from_slice(&body).unwrap(); + let recovery_codes: Vec = enroll_confirm + .get("recovery_codes") + .and_then(|v| v.as_array()) + .unwrap() + .iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect(); + assert_eq!(recovery_codes.len(), 10); + + let challenge_totp_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/mfa/challenge") + .header("authorization", format!("Bearer {}", created.access_token)) + .header("content-type", "application/json") + .body(axum::body::Body::from(format!(r#"{{"code":"{}"}}"#, code))) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(challenge_totp_res.status(), StatusCode::OK); + + let recovery = recovery_codes[0].clone(); + let challenge_recovery_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/mfa/challenge") + .header("authorization", format!("Bearer {}", created.access_token)) + .header("content-type", "application/json") + .body(axum::body::Body::from(format!( + r#"{{"code":"{}"}}"#, + recovery + ))) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(challenge_recovery_res.status(), StatusCode::OK); + + let challenge_recovery_again = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/mfa/challenge") + .header("authorization", format!("Bearer {}", created.access_token)) + .header("content-type", "application/json") + .body(axum::body::Body::from(format!( + r#"{{"code":"{}"}}"#, + recovery + ))) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(challenge_recovery_again.status(), StatusCode::BAD_REQUEST); + } + + #[tokio::test] + async fn rate_limits_trigger_for_signin_and_errors_do_not_echo_secrets() { + if let Some(lock) = RATE_LIMITER.get() { + lock.lock().await.clear(); + } + + let metrics = crate::observability::init_metrics_for_tests(); + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn = AuthnConfig::for_tests(); + let app = crate::app(crate::AppState { + metrics, + routing, + storage, + authn, + }); + + let signup_res = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"rl@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(signup_res.status(), StatusCode::OK); + + for i in 0..11 { + let resp = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signin") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"rl@b.com","password":"supersecret"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + let status = resp.status(); + + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let body_str = String::from_utf8_lossy(&body); + assert!(!body_str.contains("supersecret")); + + if i < 10 { + assert_eq!(status, StatusCode::UNAUTHORIZED); + } else { + assert_eq!(status, StatusCode::TOO_MANY_REQUESTS); + } + } + } +} diff --git a/gateway/src/authz.rs b/gateway/src/authz.rs new file mode 100644 index 0000000..4f31df3 --- /dev/null +++ b/gateway/src/authz.rs @@ -0,0 +1,839 @@ +use axum::extract::FromRef; +use axum::extract::FromRequestParts; +use axum::extract::Path; +use axum::extract::Request; +use axum::extract::State; +use axum::http::header; +use axum::http::request::Parts; +use axum::http::StatusCode; +use axum::response::IntoResponse; +use axum::response::Response; +use axum::routing::post; +use axum::Json; +use serde::Deserialize; +use serde::Serialize; +use serde_json::Value; +use thiserror::Error; + +use crate::grpc; +use crate::storage::GatewayStorage; +use crate::storage::StorageError; +use crate::AppState; + +pub fn router() -> axum::Router { + axum::Router::new() + .route( + "/commands/:aggregate_type/:aggregate_id", + post(submit_command_stub), + ) + .route("/query/:view_type", post(query_stub)) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Principal { + pub user_id: String, + pub session_id: String, +} + +#[async_trait::async_trait] +impl FromRequestParts for Principal +where + S: Send + Sync, + AppState: FromRef, +{ + type Rejection = AuthzRejection; + + async fn from_request_parts(parts: &mut Parts, state: &S) -> Result { + let auth_header = parts + .headers + .get(header::AUTHORIZATION) + .and_then(|v| v.to_str().ok()) + .ok_or(AuthzRejection::Unauthorized)?; + + let token = auth_header + .strip_prefix("Bearer ") + .ok_or(AuthzRejection::Unauthorized)?; + + let app_state = AppState::from_ref(state); + let claims = app_state.authn.verify_access_token(token).map_err(|_| { + metrics::counter!("gateway_authn_token_verify_fail_total").increment(1); + AuthzRejection::Unauthorized + })?; + + tracing::Span::current().record("principal_id", claims.sub.as_str()); + Ok(Self { + user_id: claims.sub, + session_id: claims.session_id, + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TenantId(pub String); + +#[async_trait::async_trait] +impl FromRequestParts for TenantId +where + S: Send + Sync, +{ + type Rejection = AuthzRejection; + + async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { + let raw = parts + .headers + .get("x-tenant-id") + .and_then(|v| v.to_str().ok()) + .ok_or(AuthzRejection::MissingTenant)?; + + let tenant = raw.trim(); + if tenant.is_empty() + || !tenant + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') + { + return Err(AuthzRejection::InvalidTenant); + } + + tracing::Span::current().record("tenant_id", tenant); + Ok(TenantId(tenant.to_string())) + } +} + +#[derive(Debug, Error)] +pub enum AuthzRejection { + #[error("unauthorized")] + Unauthorized, + #[error("bad request")] + BadRequest, + #[error("missing x-tenant-id")] + MissingTenant, + #[error("invalid x-tenant-id")] + InvalidTenant, + #[error("forbidden")] + Forbidden, + #[error("not found")] + NotFound, + #[error("conflict")] + Conflict, + #[error("internal error")] + Internal, +} + +impl IntoResponse for AuthzRejection { + fn into_response(self) -> axum::response::Response { + match self { + AuthzRejection::Unauthorized => { + (StatusCode::UNAUTHORIZED, self.to_string()).into_response() + } + AuthzRejection::BadRequest => { + (StatusCode::BAD_REQUEST, self.to_string()).into_response() + } + AuthzRejection::MissingTenant | AuthzRejection::InvalidTenant => { + (StatusCode::BAD_REQUEST, self.to_string()).into_response() + } + AuthzRejection::Forbidden => (StatusCode::FORBIDDEN, self.to_string()).into_response(), + AuthzRejection::NotFound => (StatusCode::NOT_FOUND, self.to_string()).into_response(), + AuthzRejection::Conflict => (StatusCode::CONFLICT, self.to_string()).into_response(), + AuthzRejection::Internal => { + (StatusCode::INTERNAL_SERVER_ERROR, self.to_string()).into_response() + } + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RoleRecord { + pub role_id: String, + pub rights: Vec, +} + +#[derive(Debug, Deserialize)] +struct HttpCommandRequest { + command_id: Option, + payload: Value, + metadata: Option>, +} + +#[derive(Debug, Serialize)] +struct HttpCommandResponse { + events: Vec, +} + +#[derive(Debug, Serialize)] +struct EventDto { + event_id: String, + command_id: String, + aggregate_id: String, + aggregate_type: String, + version: u64, + event_type: String, + payload_json: String, + timestamp_rfc3339: String, +} + +async fn submit_command_stub( + State(state): State, + ctx: crate::RequestContext, + principal: Principal, + TenantId(tenant_id): TenantId, + Path((aggregate_type, aggregate_id)): Path<(String, String)>, + Json(body): Json, +) -> Result, AuthzRejection> { + ensure_allowed( + &state.storage, + &principal.user_id, + &tenant_id, + "command.submit", + ) + .await?; + + let command_id = body + .command_id + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + + let metadata = body.metadata.unwrap_or_default(); + let request = grpc::proto::SubmitCommandRequest { + tenant_id: tenant_id.clone(), + command_id, + aggregate_id, + aggregate_type, + payload_json: body.payload.to_string(), + metadata, + }; + + let resp = grpc::submit_command_via_routing(&state.routing, request, &ctx) + .await + .map_err(|_| AuthzRejection::Internal)?; + + let events = resp + .events + .into_iter() + .map(|e| EventDto { + event_id: e.event_id, + command_id: e.command_id, + aggregate_id: e.aggregate_id, + aggregate_type: e.aggregate_type, + version: e.version, + event_type: e.event_type, + payload_json: e.payload_json, + timestamp_rfc3339: e.timestamp_rfc3339, + }) + .collect(); + + Ok(Json(HttpCommandResponse { events })) +} + +async fn query_stub( + State(state): State, + ctx: crate::RequestContext, + principal: Principal, + TenantId(tenant_id): TenantId, + Path(view_type): Path, + Json(payload): Json, +) -> Result { + ensure_allowed( + &state.storage, + &principal.user_id, + &tenant_id, + "query.execute", + ) + .await?; + + let upstream = state + .routing + .resolve(&tenant_id, crate::routing::ServiceKind::Projection) + .await + .map_err(|_| AuthzRejection::Internal)?; + tracing::Span::current().record("upstream", upstream.as_str()); + + let url = format!("{}/v1/query/{}", upstream.trim_end_matches('/'), view_type); + + let client = crate::upstream::http_client(); + let resp = client + .post(url) + .header("x-tenant-id", tenant_id) + .header("x-correlation-id", ctx.correlation_id) + .header("traceparent", ctx.traceparent) + .json(&payload) + .send() + .await + .map_err(|_| AuthzRejection::Internal)?; + + let status = StatusCode::from_u16(resp.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); + let bytes = resp.bytes().await.map_err(|_| AuthzRejection::Internal)?; + let mut out = Response::new(axum::body::Body::from(bytes)); + *out.status_mut() = status; + Ok(out) +} + +pub async fn runner_admin_proxy( + State(state): State, + ctx: crate::RequestContext, + principal: Principal, + TenantId(tenant_id): TenantId, + Path(path): Path, + request: Request, +) -> Result { + ensure_allowed( + &state.storage, + &principal.user_id, + &tenant_id, + "runner.admin", + ) + .await?; + + let upstream = state + .routing + .resolve(&tenant_id, crate::routing::ServiceKind::Runner) + .await + .map_err(|_| AuthzRejection::Internal)?; + tracing::Span::current().record("upstream", upstream.as_str()); + + let mut url = format!( + "{}/admin/{}", + upstream.trim_end_matches('/'), + path.trim_start_matches('/') + ); + if let Some(q) = request.uri().query() { + url.push('?'); + url.push_str(q); + } + + let method = request.method().clone(); + let headers = request.headers().clone(); + let body = axum::body::to_bytes(request.into_body(), usize::MAX) + .await + .map_err(|_| AuthzRejection::Internal)?; + + let client = crate::upstream::http_client(); + let mut req = client + .request(method, url) + .header("x-tenant-id", tenant_id) + .header("x-correlation-id", ctx.correlation_id) + .header("traceparent", ctx.traceparent) + .body(body); + + for (k, v) in headers.iter() { + if k == header::HOST { + continue; + } + req = req.header(k, v); + } + + let resp = req.send().await.map_err(|_| AuthzRejection::Internal)?; + let status = StatusCode::from_u16(resp.status().as_u16()).unwrap_or(StatusCode::BAD_GATEWAY); + let bytes = resp.bytes().await.map_err(|_| AuthzRejection::Internal)?; + + let mut out = Response::new(axum::body::Body::from(bytes)); + *out.status_mut() = status; + Ok(out) +} + +pub async fn ensure_allowed( + storage: &GatewayStorage, + principal_id: &str, + tenant_id: &str, + required_right: &str, +) -> Result<(), AuthzRejection> { + let mut roles = list_assigned_roles(storage, tenant_id, principal_id).await?; + roles.extend(list_assigned_roles(storage, "*", principal_id).await?); + + if roles.is_empty() { + metrics::counter!( + "gateway_authz_decisions_total", + "tenant" => tenant_id.to_string(), + "right" => required_right.to_string(), + "result" => "deny" + ) + .increment(1); + return Err(AuthzRejection::Forbidden); + } + + for role_id in roles { + let key = role_key(&role_id); + let entry = storage + .roles + .get(&key) + .await + .map_err(|_| AuthzRejection::Internal)?; + let Some(entry) = entry else { + continue; + }; + let role: RoleRecord = decode_stored(&entry.value).map_err(|_| AuthzRejection::Internal)?; + if role.rights.iter().any(|r| r == required_right) { + metrics::counter!( + "gateway_authz_decisions_total", + "tenant" => tenant_id.to_string(), + "right" => required_right.to_string(), + "result" => "allow" + ) + .increment(1); + return Ok(()); + } + } + + metrics::counter!( + "gateway_authz_decisions_total", + "tenant" => tenant_id.to_string(), + "right" => required_right.to_string(), + "result" => "deny" + ) + .increment(1); + Err(AuthzRejection::Forbidden) +} + +async fn list_assigned_roles( + storage: &GatewayStorage, + tenant_id: &str, + principal_id: &str, +) -> Result, AuthzRejection> { + let prefix = assignment_prefix(tenant_id, principal_id); + let keys = storage + .assignments + .list_keys(&prefix) + .await + .map_err(|_| AuthzRejection::Internal)?; + Ok(keys + .into_iter() + .filter_map(|k| k.rsplit('/').next().map(|s| s.to_string())) + .collect()) +} + +fn role_key(role_id: &str) -> String { + format!("v1/roles/{role_id}") +} + +fn assignment_key(tenant_id: &str, principal_id: &str, role_id: &str) -> String { + format!("v1/assignments/{tenant_id}/{principal_id}/{role_id}") +} + +fn assignment_prefix(tenant_id: &str, principal_id: &str) -> String { + format!("v1/assignments/{tenant_id}/{principal_id}/") +} + +fn decode_stored Deserialize<'de>>(bytes: &[u8]) -> Result { + #[derive(Deserialize)] + struct Stored { + data: T, + } + let stored: Stored = + serde_json::from_slice(bytes).map_err(|e| StorageError::Serde(e.to_string()))?; + Ok(stored.data) +} + +pub async fn put_role( + storage: &GatewayStorage, + role_id: &str, + rights: Vec, +) -> Result<(), StorageError> { + let record = RoleRecord { + role_id: role_id.to_string(), + rights, + }; + let payload = serde_json::to_vec(&serde_json::json!({ + "v": crate::storage::SCHEMA_VERSION, + "data": record + })) + .map_err(|e| StorageError::Serde(e.to_string()))?; + + storage.roles.put(&role_key(role_id), payload).await?; + Ok(()) +} + +pub async fn assign_role( + storage: &GatewayStorage, + tenant_id: &str, + principal_id: &str, + role_id: &str, +) -> Result<(), StorageError> { + storage + .assignments + .put( + &assignment_key(tenant_id, principal_id, role_id), + b"1".to_vec(), + ) + .await?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::authn; + use std::sync::Arc; + use tower::util::ServiceExt; + + async fn test_app() -> (axum::Router, AppState) { + let metrics = crate::observability::init_metrics_for_tests(); + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn_cfg = crate::authn::AuthnConfig::for_tests(); + let state = crate::AppState { + metrics, + routing, + storage, + authn: authn_cfg, + }; + let app = crate::app(state.clone()); + (app, state) + } + + async fn test_app_with_routing(cfg: crate::routing::RoutingConfig) -> (axum::Router, AppState) { + let metrics = crate::observability::init_metrics_for_tests(); + let routing = + crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new(cfg))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn_cfg = crate::authn::AuthnConfig::for_tests(); + let state = crate::AppState { + metrics, + routing, + storage, + authn: authn_cfg, + }; + let app = crate::app(state.clone()); + (app, state) + } + + async fn signup_and_get_claims( + app: &axum::Router, + cfg: &authn::AuthnConfig, + ) -> (String, authn::AccessClaims) { + let response = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"a@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + let body = axum::body::to_bytes(response.into_body(), usize::MAX) + .await + .unwrap(); + let created: crate::authn::AuthResponse = serde_json::from_slice(&body).unwrap(); + + let claims = cfg.verify_access_token(&created.access_token).unwrap(); + (created.access_token, claims) + } + + #[tokio::test] + async fn missing_tenant_header_returns_400() { + let (app, state) = test_app().await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + put_role( + &state.storage, + "role-command", + vec!["command.submit".to_string()], + ) + .await + .unwrap(); + + assign_role(&state.storage, "tenant-a", &claims.sub, "role-command") + .await + .unwrap(); + + let response = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/commands/User/u1") + .header("authorization", format!("Bearer {token}")) + .header("content-type", "application/json") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::BAD_REQUEST); + } + + #[tokio::test] + async fn tenant_spoofing_is_rejected() { + let (app, state) = test_app().await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + put_role( + &state.storage, + "role-command", + vec!["command.submit".to_string()], + ) + .await + .unwrap(); + assign_role(&state.storage, "tenant-a", &claims.sub, "role-command") + .await + .unwrap(); + + let response = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/commands/User/u1") + .header("authorization", format!("Bearer {token}")) + .header("x-tenant-id", "tenant-b") + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{"payload":{}}"#)) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::FORBIDDEN); + } + + #[tokio::test] + async fn role_assignment_enables_expected_action() { + let (app, state) = test_app().await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + put_role( + &state.storage, + "role-command", + vec!["command.submit".to_string()], + ) + .await + .unwrap(); + assign_role(&state.storage, "tenant-a", &claims.sub, "role-command") + .await + .unwrap(); + + let response = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/commands/User/u1") + .header("authorization", format!("Bearer {token}")) + .header("x-tenant-id", "tenant-a") + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{"payload":{}}"#)) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), StatusCode::INTERNAL_SERVER_ERROR); + } + + #[tokio::test] + async fn http_command_endpoint_returns_same_shape_as_grpc_response() { + use crate::grpc::proto; + use crate::routing::RoutingConfig; + use std::collections::HashMap; + + #[derive(Default)] + struct Upstream; + + #[async_trait::async_trait] + impl proto::command_service_server::CommandService for Upstream { + async fn submit_command( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let req = request.into_inner(); + Ok(tonic::Response::new(proto::SubmitCommandResponse { + events: vec![proto::Event { + event_id: "e1".to_string(), + command_id: req.command_id, + aggregate_id: req.aggregate_id, + aggregate_type: req.aggregate_type, + version: 1, + event_type: "Created".to_string(), + payload_json: "{}".to_string(), + timestamp_rfc3339: "2020-01-01T00:00:00Z".to_string(), + }], + })) + } + } + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + drop(listener); + tokio::spawn(async move { + tonic::transport::Server::builder() + .add_service(proto::command_service_server::CommandServiceServer::new( + Upstream, + )) + .serve(addr) + .await + .unwrap(); + }); + + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let upstream_url = format!("http://{}", addr); + let cfg = RoutingConfig { + revision: 1, + aggregate_placement: HashMap::from([("tenant-a".to_string(), "a".to_string())]), + projection_placement: HashMap::new(), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::from([("a".to_string(), vec![upstream_url])]), + projection_shards: HashMap::new(), + runner_shards: HashMap::new(), + }; + + let (app, state) = test_app_with_routing(cfg).await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + put_role( + &state.storage, + "role-command", + vec!["command.submit".to_string()], + ) + .await + .unwrap(); + assign_role(&state.storage, "tenant-a", &claims.sub, "role-command") + .await + .unwrap(); + + let response = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/commands/User/u1") + .header("authorization", format!("Bearer {token}")) + .header("x-tenant-id", "tenant-a") + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{"payload":{}}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + + let body = axum::body::to_bytes(response.into_body(), usize::MAX) + .await + .unwrap(); + let value: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert!( + value + .get("events") + .and_then(|v| v.as_array()) + .unwrap() + .len() + == 1 + ); + assert_eq!( + value.get("events").unwrap()[0] + .get("event_id") + .and_then(|v| v.as_str()) + .unwrap(), + "e1" + ); + } + + #[tokio::test] + async fn query_endpoint_denies_unauthorized_and_forwards_when_authorized() { + use crate::routing::RoutingConfig; + use std::collections::HashMap; + + let projection_app = axum::Router::new().route( + "/v1/query/TestView", + post(|headers: axum::http::HeaderMap| async move { + let correlation = headers + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let traceparent = headers + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if correlation.trim().is_empty() + || crate::trace_id_from_traceparent(traceparent).is_none() + { + return (StatusCode::BAD_REQUEST, "missing correlation"); + } + (StatusCode::OK, r#"{"mode":"count"}"#) + }), + ); + let projection_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let projection_addr = projection_listener.local_addr().unwrap(); + tokio::spawn(async move { + axum::serve(projection_listener, projection_app) + .await + .unwrap(); + }); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + let projection_url = format!("http://{}", projection_addr); + + let cfg = RoutingConfig { + revision: 1, + aggregate_placement: HashMap::new(), + projection_placement: HashMap::from([("tenant-a".to_string(), "p".to_string())]), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::new(), + projection_shards: HashMap::from([("p".to_string(), vec![projection_url])]), + runner_shards: HashMap::new(), + }; + + let (app, state) = test_app_with_routing(cfg).await; + let (token, claims) = signup_and_get_claims(&app, &state.authn).await; + + let deny = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/query/TestView") + .header("authorization", format!("Bearer {token}")) + .header("x-tenant-id", "tenant-a") + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{"uqf":"{}"}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(deny.status(), StatusCode::FORBIDDEN); + + put_role( + &state.storage, + "role-query", + vec!["query.execute".to_string()], + ) + .await + .unwrap(); + assign_role(&state.storage, "tenant-a", &claims.sub, "role-query") + .await + .unwrap(); + + let ok = app + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/query/TestView") + .header("authorization", format!("Bearer {token}")) + .header("x-tenant-id", "tenant-a") + .header("content-type", "application/json") + .body(axum::body::Body::from(r#"{"uqf":"{}"}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(ok.status(), StatusCode::OK); + assert!(!ok + .headers() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .is_empty()); + assert!(crate::trace_id_from_traceparent( + ok.headers() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + ) + .is_some()); + } +} diff --git a/gateway/src/grpc.rs b/gateway/src/grpc.rs new file mode 100644 index 0000000..dd3a01d --- /dev/null +++ b/gateway/src/grpc.rs @@ -0,0 +1,275 @@ +use crate::routing::RouterState; +use crate::routing::RoutingError; +use crate::routing::ServiceKind; + +pub mod proto { + tonic::include_proto!("aggregate.gateway.v1"); +} + +#[derive(Clone)] +pub struct GatewayCommandService { + routing: RouterState, +} + +impl GatewayCommandService { + pub fn new(routing: RouterState) -> Self { + Self { routing } + } +} + +#[async_trait::async_trait] +impl proto::command_service_server::CommandService for GatewayCommandService { + async fn submit_command( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let correlation_id = request + .metadata() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + + let traceparent = request + .metadata() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .and_then(|s| { + if crate::trace_id_from_traceparent(s).is_some() { + Some(s.to_string()) + } else { + None + } + }) + .unwrap_or_else(|| { + let trace_id = uuid::Uuid::new_v4().simple().to_string(); + let span_id = uuid::Uuid::new_v4().simple().to_string()[..16].to_string(); + format!("00-{trace_id}-{span_id}-01") + }); + + let mut req = request.into_inner(); + + let tenant_id = req.tenant_id.trim().to_string(); + if tenant_id.is_empty() { + return Err(tonic::Status::invalid_argument("tenant_id is required")); + } + req.tenant_id = tenant_id.clone(); + + let upstream = self + .routing + .resolve(&tenant_id, ServiceKind::Aggregate) + .await + .map_err(map_routing_error)?; + tracing::Span::current().record("upstream", upstream.as_str()); + + let channel = crate::upstream::grpc_endpoint(&upstream) + .map_err(|e| tonic::Status::unavailable(e.to_string()))? + .connect() + .await + .map_err(|e| tonic::Status::unavailable(e.to_string()))?; + let mut client = proto::command_service_client::CommandServiceClient::new(channel); + + let mut upstream_req = tonic::Request::new(req); + if let Ok(v) = tonic::metadata::MetadataValue::try_from(tenant_id.as_str()) { + upstream_req.metadata_mut().insert("x-tenant-id", v); + } + if let Ok(v) = tonic::metadata::MetadataValue::try_from(correlation_id.as_str()) { + upstream_req.metadata_mut().insert("x-correlation-id", v); + } + if let Ok(v) = tonic::metadata::MetadataValue::try_from(traceparent.as_str()) { + upstream_req.metadata_mut().insert("traceparent", v); + } + + let mut resp = client.submit_command(upstream_req).await?; + if let Ok(v) = tonic::metadata::MetadataValue::try_from(correlation_id.as_str()) { + resp.metadata_mut().insert("x-correlation-id", v); + } + if let Ok(v) = tonic::metadata::MetadataValue::try_from(traceparent.as_str()) { + resp.metadata_mut().insert("traceparent", v); + } + Ok(resp) + } +} + +pub async fn submit_command_via_routing( + routing: &RouterState, + request: proto::SubmitCommandRequest, + ctx: &crate::RequestContext, +) -> Result { + let tenant_id = request.tenant_id.trim().to_string(); + if tenant_id.is_empty() { + return Err(tonic::Status::invalid_argument("tenant_id is required")); + } + + let upstream = routing + .resolve(&tenant_id, ServiceKind::Aggregate) + .await + .map_err(map_routing_error)?; + tracing::Span::current().record("upstream", upstream.as_str()); + + let channel = crate::upstream::grpc_endpoint(&upstream) + .map_err(|e| tonic::Status::unavailable(e.to_string()))? + .connect() + .await + .map_err(|e| tonic::Status::unavailable(e.to_string()))?; + let mut client = proto::command_service_client::CommandServiceClient::new(channel); + + let mut upstream_req = tonic::Request::new(request); + if let Ok(v) = tonic::metadata::MetadataValue::try_from(tenant_id.as_str()) { + upstream_req.metadata_mut().insert("x-tenant-id", v); + } + if let Ok(v) = tonic::metadata::MetadataValue::try_from(ctx.correlation_id.as_str()) { + upstream_req.metadata_mut().insert("x-correlation-id", v); + } + if let Ok(v) = tonic::metadata::MetadataValue::try_from(ctx.traceparent.as_str()) { + upstream_req.metadata_mut().insert("traceparent", v); + } + + let resp = client.submit_command(upstream_req).await?; + Ok(resp.into_inner()) +} + +fn map_routing_error(err: RoutingError) -> tonic::Status { + match err { + RoutingError::UnknownTenant => tonic::Status::not_found("unknown tenant"), + RoutingError::MissingShard | RoutingError::EmptyShard => { + tonic::Status::unavailable(err.to_string()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::routing::RoutingConfig; + use std::collections::HashMap; + use std::sync::Arc; + + #[tokio::test] + async fn grpc_submit_command_forwards_tenant_metadata_and_returns_events() { + use proto::command_service_server::CommandService; + + #[derive(Default)] + struct Upstream; + + #[async_trait::async_trait] + impl proto::command_service_server::CommandService for Upstream { + async fn submit_command( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let tenant_md = request + .metadata() + .get("x-tenant-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if tenant_md != request.get_ref().tenant_id { + return Err(tonic::Status::failed_precondition( + "missing tenant metadata", + )); + } + let correlation = request + .metadata() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if correlation.trim().is_empty() { + return Err(tonic::Status::failed_precondition( + "missing correlation metadata", + )); + } + let traceparent = request + .metadata() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if crate::trace_id_from_traceparent(traceparent).is_none() { + return Err(tonic::Status::failed_precondition("missing traceparent")); + } + + let resp = proto::SubmitCommandResponse { + events: vec![proto::Event { + event_id: "e1".to_string(), + command_id: request.get_ref().command_id.clone(), + aggregate_id: request.get_ref().aggregate_id.clone(), + aggregate_type: request.get_ref().aggregate_type.clone(), + version: 1, + event_type: "Created".to_string(), + payload_json: "{}".to_string(), + timestamp_rfc3339: "2020-01-01T00:00:00Z".to_string(), + }], + }; + Ok(tonic::Response::new(resp)) + } + } + + let upstream_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let upstream_addr = upstream_listener.local_addr().unwrap(); + drop(upstream_listener); + let upstream_url = format!("http://{}", upstream_addr); + + let upstream_task = tokio::spawn(async move { + tonic::transport::Server::builder() + .add_service(proto::command_service_server::CommandServiceServer::new( + Upstream, + )) + .serve(upstream_addr) + .await + .unwrap(); + }); + + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let cfg = RoutingConfig { + revision: 1, + aggregate_placement: HashMap::from([("tenant-a".to_string(), "a".to_string())]), + projection_placement: HashMap::new(), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::from([("a".to_string(), vec![upstream_url])]), + projection_shards: HashMap::new(), + runner_shards: HashMap::new(), + }; + + let routing = + crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new(cfg))) + .await + .unwrap(); + let svc = GatewayCommandService::new(routing); + + let request = proto::SubmitCommandRequest { + tenant_id: "tenant-a".to_string(), + command_id: "c1".to_string(), + aggregate_id: "id1".to_string(), + aggregate_type: "User".to_string(), + payload_json: "{}".to_string(), + metadata: HashMap::new(), + }; + + let resp = CommandService::submit_command(&svc, tonic::Request::new(request)) + .await + .unwrap(); + assert!(!resp + .metadata() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .is_empty()); + assert!(crate::trace_id_from_traceparent( + resp.metadata() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + ) + .is_some()); + let resp = resp.into_inner(); + + assert_eq!(resp.events.len(), 1); + assert_eq!(resp.events[0].command_id, "c1"); + + upstream_task.abort(); + } +} diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs new file mode 100644 index 0000000..d8e37f7 --- /dev/null +++ b/gateway/src/lib.rs @@ -0,0 +1,541 @@ +use std::time::Duration; +use std::time::Instant; + +use axum::error_handling::HandleErrorLayer; +use axum::extract::MatchedPath; +use axum::extract::State; +use axum::http::request::Parts; +use axum::http::HeaderName; +use axum::http::HeaderValue; +use axum::http::StatusCode; +use axum::middleware::Next; +use axum::response::IntoResponse; +use axum::routing::get; +use axum::BoxError; +use axum::Json; +use axum::Router; +use metrics_exporter_prometheus::PrometheusHandle; +use serde::Serialize; +use std::future::Future; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use tower::timeout::TimeoutLayer; +use tower::Layer; +use tower::Service; +use tower::ServiceBuilder; +use tower_http::limit::RequestBodyLimitLayer; +use tower_http::request_id::MakeRequestUuid; +use tower_http::request_id::PropagateRequestIdLayer; +use tower_http::request_id::SetRequestIdLayer; +use tower_http::trace::TraceLayer; +use tracing::Level; + +#[derive(Debug, Clone)] +pub struct RequestContext { + pub request_id: String, + pub correlation_id: String, + pub traceparent: String, + pub trace_id: String, +} + +#[async_trait::async_trait] +impl axum::extract::FromRequestParts for RequestContext +where + S: Send + Sync, +{ + type Rejection = StatusCode; + + async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { + let request_id = parts + .headers + .get("x-request-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + let correlation_id = parts + .headers + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + let traceparent = parts + .headers + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or("") + .to_string(); + let trace_id = trace_id_from_traceparent(&traceparent) + .map(|s| s.to_string()) + .unwrap_or_default(); + + Ok(Self { + request_id, + correlation_id, + traceparent, + trace_id, + }) + } +} + +#[derive(Clone)] +pub struct AppState { + pub metrics: PrometheusHandle, + pub routing: routing::RouterState, + pub storage: storage::GatewayStorage, + pub authn: authn::AuthnConfig, +} + +#[derive(Serialize)] +struct StatusResponse { + status: &'static str, +} + +pub fn app(state: AppState) -> Router { + let request_id_header = HeaderName::from_static("x-request-id"); + + Router::new() + .route("/health", get(health)) + .route("/ready", get(ready)) + .route("/metrics", get(metrics)) + .nest("/v1/auth", authn::router()) + .nest("/v1", authz::router()) + .nest("/admin/iam", admin_iam::router()) + .nest("/v1/admin/iam", admin_iam::router()) + .nest("/admin/rebalance", admin_rebalance::router()) + .route("/admin/routing", get(admin_routing)) + .route( + "/admin/runner/*path", + axum::routing::any(authz::runner_admin_proxy), + ) + .route( + "/admin/routing/reload", + axum::routing::post(admin_routing_reload), + ) + .route( + "/admin/routing/resolve", + axum::routing::get(admin_rebalance::resolve), + ) + .route_layer(axum::middleware::from_fn(track_http_metrics)) + .with_state(state) + .layer( + ServiceBuilder::new() + .layer(HandleErrorLayer::new(|error: BoxError| async move { + (StatusCode::REQUEST_TIMEOUT, error.to_string()) + })) + .layer(SetRequestIdLayer::new( + request_id_header.clone(), + MakeRequestUuid, + )) + .layer(PropagateRequestIdLayer::new(request_id_header)) + .layer(EnsureCorrelationTraceLayer) + .layer(TraceLayer::new_for_http().make_span_with( + |request: &axum::http::Request<_>| { + let request_id = request + .headers() + .get("x-request-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let correlation_id = request + .headers() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let traceparent = request + .headers() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let trace_id = trace_id_from_traceparent(traceparent).unwrap_or(""); + let path = request_path_for_logging(request); + + tracing::span!( + Level::INFO, + "http.request", + method = %request.method(), + path = %path, + request_id = request_id, + correlation_id = correlation_id, + trace_id = trace_id, + tenant_id = tracing::field::Empty, + principal_id = tracing::field::Empty, + upstream = tracing::field::Empty, + ) + }, + )) + .layer(RequestBodyLimitLayer::new(1024 * 1024)) + .layer(TimeoutLayer::new(Duration::from_secs(30))), + ) +} + +#[derive(Clone)] +struct EnsureCorrelationTraceLayer; + +#[derive(Clone)] +struct EnsureCorrelationTrace { + inner: S, +} + +impl Layer for EnsureCorrelationTraceLayer { + type Service = EnsureCorrelationTrace; + + fn layer(&self, inner: S) -> Self::Service { + Self::Service { inner } + } +} + +impl Service> for EnsureCorrelationTrace +where + S: Service, Response = axum::http::Response> + + Clone + + Send + + 'static, + S::Future: Send + 'static, + S::Error: Send + 'static, + ReqBody: Send + 'static, + ResBody: Send + 'static, +{ + type Response = axum::http::Response; + type Error = S::Error; + type Future = + Pin, S::Error>> + Send>>; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.inner.poll_ready(cx) + } + + fn call(&mut self, mut req: axum::http::Request) -> Self::Future { + let correlation_id = req + .headers() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .unwrap_or_else(generate_correlation_id); + + let traceparent = req + .headers() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .and_then(|s| { + if trace_id_from_traceparent(s).is_some() { + Some(s.to_string()) + } else { + None + } + }) + .unwrap_or_else(generate_traceparent); + + if let Ok(v) = HeaderValue::from_str(&correlation_id) { + req.headers_mut().insert("x-correlation-id", v); + } + if let Ok(v) = HeaderValue::from_str(&traceparent) { + req.headers_mut().insert("traceparent", v); + } + + let mut inner = self.inner.clone(); + Box::pin(async move { + let mut resp = inner.call(req).await?; + if resp.headers().get("x-correlation-id").is_none() { + if let Ok(v) = HeaderValue::from_str(&correlation_id) { + resp.headers_mut().insert("x-correlation-id", v); + } + } + if resp.headers().get("traceparent").is_none() { + if let Ok(v) = HeaderValue::from_str(&traceparent) { + resp.headers_mut().insert("traceparent", v); + } + } + Ok(resp) + }) + } +} + +fn generate_correlation_id() -> String { + uuid::Uuid::new_v4().to_string() +} + +fn generate_traceparent() -> String { + let trace_id = uuid::Uuid::new_v4().simple().to_string(); + let span_id = uuid::Uuid::new_v4().simple().to_string()[..16].to_string(); + format!("00-{trace_id}-{span_id}-01") +} + +pub(crate) fn trace_id_from_traceparent(traceparent: &str) -> Option<&str> { + shared::trace_id_from_traceparent(traceparent) +} + +async fn track_http_metrics( + req: axum::http::Request, + next: Next, +) -> axum::response::Response { + let method = req.method().to_string(); + let path = req + .extensions() + .get::() + .map(|p| p.as_str().to_string()) + .unwrap_or_else(|| req.uri().path().to_string()); + let start = Instant::now(); + + let response = next.run(req).await; + + let status = response.status().as_u16().to_string(); + let elapsed = start.elapsed().as_secs_f64(); + + metrics::counter!( + "gateway_http_requests_total", + "method" => method.clone(), + "path" => path.clone(), + "status" => status.clone() + ) + .increment(1); + metrics::histogram!( + "gateway_http_request_duration_seconds", + "method" => method, + "path" => path, + "status" => status + ) + .record(elapsed); + + response +} + +fn request_path_for_logging(req: &axum::http::Request) -> String { + req.extensions() + .get::() + .map(|p| p.as_str().to_string()) + .unwrap_or_else(|| req.uri().path().to_string()) +} + +async fn health() -> impl IntoResponse { + metrics::counter!("gateway_health_requests_total").increment(1); + Json(StatusResponse { status: "ok" }) +} + +async fn ready() -> impl IntoResponse { + metrics::counter!("gateway_ready_requests_total").increment(1); + Json(StatusResponse { status: "ok" }) +} + +async fn metrics(State(state): State) -> impl IntoResponse { + state.metrics.render() +} + +async fn admin_routing(State(state): State) -> impl IntoResponse { + Json(state.routing.snapshot().await) +} + +async fn admin_routing_reload(State(state): State) -> impl IntoResponse { + match state.routing.reload().await { + Ok(()) => StatusCode::NO_CONTENT.into_response(), + Err(e) => (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()).into_response(), + } +} + +pub mod http {} +pub mod admin_iam; +pub mod admin_rebalance; +pub mod authn; +pub mod authz; +pub mod grpc; +pub mod routing; +pub mod upstream; +pub mod config {} +pub mod storage; + +pub mod observability { + use edge_logger_client::Config as EdgeLoggerConfig; + use edge_logger_client::EdgeLoggerLayer; + use metrics_exporter_prometheus::PrometheusBuilder; + use metrics_exporter_prometheus::PrometheusHandle; + use std::time::Duration; + use tracing_subscriber::prelude::*; + + pub fn init_tracing() { + let filter = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string()); + let env_filter = tracing_subscriber::EnvFilter::new(filter); + + let fmt_layer = tracing_subscriber::fmt::layer().json(); + let edge_layer = edge_logger_layer_from_env("gateway"); + + let registry = tracing_subscriber::registry() + .with(env_filter) + .with(fmt_layer); + let _ = match edge_layer { + Some(layer) => registry.with(layer).try_init(), + None => registry.try_init(), + }; + } + + pub fn init_metrics() -> PrometheusHandle { + PrometheusBuilder::new() + .install_recorder() + .expect("failed to install Prometheus recorder") + } + + pub fn init_metrics_for_tests() -> PrometheusHandle { + PrometheusBuilder::new().build_recorder().handle() + } + + fn edge_logger_layer_from_env(service_name: &str) -> Option { + let enabled = std::env::var("EDGE_LOGGER_ENABLED") + .ok() + .map(|v| matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes")) + .unwrap_or(false); + + let socket_path = std::env::var("EDGE_LOGGER_SOCKET_PATH").ok(); + if !enabled && socket_path.is_none() { + return None; + } + + let environment = std::env::var("EDGE_LOGGER_ENVIRONMENT") + .or_else(|_| std::env::var("ENVIRONMENT")) + .unwrap_or_else(|_| "production".to_string()); + + let tenant_id = + std::env::var("EDGE_LOGGER_TENANT_ID").unwrap_or_else(|_| "default".to_string()); + + let batch_size = std::env::var("EDGE_LOGGER_BATCH_SIZE") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(100); + + let flush_interval = std::env::var("EDGE_LOGGER_FLUSH_INTERVAL_MS") + .ok() + .and_then(|v| v.parse::().ok()) + .map(Duration::from_millis) + .unwrap_or(Duration::from_secs(1)); + + Some(EdgeLoggerLayer::new(EdgeLoggerConfig { + socket_path: socket_path + .unwrap_or_else(|| "/var/run/edge-logger/logger.sock".to_string()), + service: service_name.to_string(), + environment, + tenant_id, + batch_size, + flush_interval, + })) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + use std::sync::OnceLock; + use tower::util::ServiceExt; + + fn assert_send_sync() {} + + #[test] + fn app_state_is_send_sync() { + assert_send_sync::(); + } + + #[tokio::test] + async fn health_returns_200() { + let metrics = crate::observability::init_metrics_for_tests(); + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn = crate::authn::AuthnConfig::for_tests(); + let app = app(AppState { + metrics, + routing, + storage, + authn, + }); + + let response = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/health") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(response.status(), axum::http::StatusCode::OK); + } + + #[test] + fn docker_stack_yml_is_valid_yaml() { + let raw = std::fs::read_to_string("../swarm/stacks/platform.yml").unwrap(); + let parsed: serde_yaml::Value = serde_yaml::from_str(&raw).unwrap(); + assert!(parsed.as_mapping().is_some()); + } + + #[tokio::test] + async fn metrics_include_http_request_counters() { + static HANDLE: OnceLock = OnceLock::new(); + let metrics = HANDLE + .get_or_init(|| { + metrics_exporter_prometheus::PrometheusBuilder::new() + .install_recorder() + .unwrap() + }) + .clone(); + + let routing = crate::routing::RouterState::new(Arc::new(crate::routing::FixedSource::new( + crate::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = crate::storage::GatewayStorage::new_in_memory(); + let authn = crate::authn::AuthnConfig::for_tests(); + let app = app(AppState { + metrics, + routing, + storage, + authn, + }); + + let _ = app + .clone() + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/health") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + let resp = app + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/metrics") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let rendered = String::from_utf8_lossy(&body); + assert!(rendered.contains("gateway_http_requests_total")); + } + + #[test] + fn request_path_for_logging_does_not_include_query() { + let req = axum::http::Request::builder() + .method("GET") + .uri("/v1/auth/oidc/google/callback?code=supersecret&state=x") + .body(axum::body::Body::empty()) + .unwrap(); + let path = request_path_for_logging(&req); + assert_eq!(path, "/v1/auth/oidc/google/callback"); + assert!(!path.contains("supersecret")); + } +} diff --git a/gateway/src/main.rs b/gateway/src/main.rs new file mode 100644 index 0000000..c8c8ab3 --- /dev/null +++ b/gateway/src/main.rs @@ -0,0 +1,130 @@ +use std::net::SocketAddr; +use std::sync::Arc; + +use gateway::observability; +use gateway::routing; +use gateway::storage; +use gateway::AppState; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + observability::init_tracing(); + let metrics = observability::init_metrics(); + let authn = gateway::authn::AuthnConfig::from_env(); + + let build_version = option_env!("GATEWAY_BUILD_VERSION").unwrap_or("dev"); + let build_sha = option_env!("GATEWAY_BUILD_SHA").unwrap_or("unknown"); + tracing::info!(build_version, build_sha, "gateway starting"); + + let addr: SocketAddr = std::env::var("GATEWAY_ADDR") + .unwrap_or_else(|_| "0.0.0.0:8080".to_string()) + .parse()?; + + let storage_path = + std::env::var("GATEWAY_STORAGE_PATH").unwrap_or_else(|_| "./data/gateway.mdbx".to_string()); + if let Some(parent) = std::path::Path::new(&storage_path).parent() { + let _ = std::fs::create_dir_all(parent); + } + let storage = storage::GatewayStorage::open_edge_storage(storage_path, "gateway") + .unwrap_or_else(|_| storage::GatewayStorage::new_in_memory()); + + let routing_source: Arc = + if let Ok(path) = std::env::var("GATEWAY_ROUTING_FILE") { + Arc::new(routing::StaticFileSource::new(path)) + } else if let (Ok(nats_url), Ok(bucket), Ok(key)) = ( + std::env::var("GATEWAY_ROUTING_NATS_URL"), + std::env::var("GATEWAY_ROUTING_NATS_BUCKET"), + std::env::var("GATEWAY_ROUTING_NATS_KEY"), + ) { + Arc::new(routing::NatsKvSource::connect(nats_url, bucket, key).await?) + } else { + Arc::new(routing::FixedSource::new(routing::RoutingConfig::empty())) + }; + + let routing = routing::RouterState::new(routing_source).await?; + let _routing_watcher = routing.start_watcher(); + + let grpc_addr: SocketAddr = std::env::var("GATEWAY_GRPC_ADDR") + .unwrap_or_else(|_| "0.0.0.0:8081".to_string()) + .parse()?; + + let state = AppState { + metrics, + routing, + storage, + authn, + }; + + let app = gateway::app(state.clone()); + + let listener = tokio::net::TcpListener::bind(addr).await?; + tracing::info!(%addr, "gateway listening"); + + tracing::info!(%grpc_addr, "gateway grpc listening"); + + let (shutdown_tx, _shutdown_rx) = tokio::sync::broadcast::channel::<()>(2); + let shutdown_task = { + let shutdown_tx = shutdown_tx.clone(); + tokio::spawn(async move { + shutdown_signal().await; + let _ = shutdown_tx.send(()); + }) + }; + + let http_task = { + let mut shutdown_rx = shutdown_tx.subscribe(); + tokio::spawn(async move { + axum::serve(listener, app) + .with_graceful_shutdown(async move { + let _ = shutdown_rx.recv().await; + }) + .await + .unwrap(); + }) + }; + + let grpc_task = { + let mut shutdown_rx = shutdown_tx.subscribe(); + let svc = gateway::grpc::GatewayCommandService::new(state.routing.clone()); + tokio::spawn(async move { + tonic::transport::Server::builder() + .add_service( + gateway::grpc::proto::command_service_server::CommandServiceServer::new(svc), + ) + .serve_with_shutdown(grpc_addr, async move { + let _ = shutdown_rx.recv().await; + }) + .await + .unwrap(); + }) + }; + + tokio::select! { + _ = http_task => {}, + _ = grpc_task => {}, + } + let _ = shutdown_task.await; + + Ok(()) +} + +async fn shutdown_signal() { + let ctrl_c = async { + let _ = tokio::signal::ctrl_c().await; + }; + + #[cfg(unix)] + let terminate = async { + let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to register SIGTERM handler"); + sigterm.recv().await; + }; + + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + _ = ctrl_c => {}, + _ = terminate => {}, + } +} diff --git a/gateway/src/routing.rs b/gateway/src/routing.rs new file mode 100644 index 0000000..8b381d9 --- /dev/null +++ b/gateway/src/routing.rs @@ -0,0 +1,456 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use futures::StreamExt; +use serde::Deserialize; +use serde::Serialize; +use thiserror::Error; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ServiceKind { + Aggregate, + Projection, + Runner, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct RoutingConfig { + pub revision: u64, + + pub aggregate_placement: HashMap, + pub projection_placement: HashMap, + pub runner_placement: HashMap, + + pub aggregate_shards: HashMap>, + pub projection_shards: HashMap>, + pub runner_shards: HashMap>, +} + +impl RoutingConfig { + pub fn empty() -> Self { + Self { + revision: 0, + aggregate_placement: HashMap::new(), + projection_placement: HashMap::new(), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::new(), + projection_shards: HashMap::new(), + runner_shards: HashMap::new(), + } + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct RoutingTable { + pub revision: u64, + aggregate_placement: HashMap, + projection_placement: HashMap, + runner_placement: HashMap, + aggregate_shards: HashMap>, + projection_shards: HashMap>, + runner_shards: HashMap>, +} + +impl From for RoutingTable { + fn from(value: RoutingConfig) -> Self { + Self { + revision: value.revision, + aggregate_placement: value.aggregate_placement, + projection_placement: value.projection_placement, + runner_placement: value.runner_placement, + aggregate_shards: value.aggregate_shards, + projection_shards: value.projection_shards, + runner_shards: value.runner_shards, + } + } +} + +#[derive(Debug, Error, Clone, PartialEq, Eq)] +pub enum RoutingError { + #[error("unknown tenant")] + UnknownTenant, + #[error("missing shard directory entry")] + MissingShard, + #[error("no endpoints for shard")] + EmptyShard, +} + +#[derive(Clone)] +pub struct RouterState { + table: Arc>>, + source: Arc, +} + +impl std::fmt::Debug for RouterState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RouterState").finish_non_exhaustive() + } +} + +impl RouterState { + pub async fn new(source: Arc) -> Result { + let cfg = source.load().await?; + Ok(Self { + table: Arc::new(tokio::sync::RwLock::new(Arc::new(cfg.into()))), + source, + }) + } + + pub async fn snapshot(&self) -> Arc { + self.table.read().await.clone() + } + + pub async fn reload(&self) -> Result<(), RoutingSourceError> { + let cfg = self.source.load().await?; + let next = Arc::new(RoutingTable::from(cfg)); + *self.table.write().await = next; + Ok(()) + } + + pub fn start_watcher(&self) -> tokio::task::JoinHandle<()> { + let this = self.clone(); + tokio::spawn(async move { + let mut stream = match this.source.watch().await { + Ok(s) => s, + Err(_) => return, + }; + + while let Some(msg) = stream.next().await { + if msg.is_err() { + continue; + } + let _ = this.reload().await; + } + }) + } + + pub async fn resolve( + &self, + tenant_id: &str, + kind: ServiceKind, + ) -> Result { + let table = self.snapshot().await; + let result = table.resolve(tenant_id, kind); + metrics::counter!( + "gateway_routing_resolutions_total", + "kind" => kind_label(kind), + "result" => if result.is_ok() { "ok" } else { "err" } + ) + .increment(1); + result + } +} + +fn kind_label(kind: ServiceKind) -> &'static str { + match kind { + ServiceKind::Aggregate => "aggregate", + ServiceKind::Projection => "projection", + ServiceKind::Runner => "runner", + } +} + +impl RoutingTable { + pub fn resolve(&self, tenant_id: &str, kind: ServiceKind) -> Result { + let shard_id = match kind { + ServiceKind::Aggregate => self.aggregate_placement.get(tenant_id), + ServiceKind::Projection => self.projection_placement.get(tenant_id), + ServiceKind::Runner => self.runner_placement.get(tenant_id), + } + .ok_or(RoutingError::UnknownTenant)?; + + let endpoints = match kind { + ServiceKind::Aggregate => self.aggregate_shards.get(shard_id), + ServiceKind::Projection => self.projection_shards.get(shard_id), + ServiceKind::Runner => self.runner_shards.get(shard_id), + } + .ok_or(RoutingError::MissingShard)?; + + endpoints.first().cloned().ok_or(RoutingError::EmptyShard) + } +} + +#[derive(Debug, Error)] +pub enum RoutingSourceError { + #[error("source error: {0}")] + Source(String), + #[error("decode error: {0}")] + Decode(String), +} + +#[async_trait::async_trait] +pub trait RoutingSource: Send + Sync { + async fn load(&self) -> Result; + async fn watch( + &self, + ) -> Result< + std::pin::Pin> + Send>>, + RoutingSourceError, + >; +} + +#[derive(Clone)] +pub struct FixedSource { + cfg: RoutingConfig, +} + +impl FixedSource { + pub fn new(cfg: RoutingConfig) -> Self { + Self { cfg } + } +} + +#[async_trait::async_trait] +impl RoutingSource for FixedSource { + async fn load(&self) -> Result { + Ok(self.cfg.clone()) + } + + async fn watch( + &self, + ) -> Result< + std::pin::Pin> + Send>>, + RoutingSourceError, + > { + Ok(Box::pin(futures::stream::empty())) + } +} + +#[derive(Clone)] +pub struct StaticFileSource { + path: String, +} + +impl StaticFileSource { + pub fn new(path: impl Into) -> Self { + Self { path: path.into() } + } +} + +#[async_trait::async_trait] +impl RoutingSource for StaticFileSource { + async fn load(&self) -> Result { + let raw = tokio::fs::read_to_string(&self.path) + .await + .map_err(|e| RoutingSourceError::Source(e.to_string()))?; + + if self.path.ends_with(".json") { + serde_json::from_str::(&raw) + .map_err(|e| RoutingSourceError::Decode(e.to_string())) + } else { + let yaml: serde_yaml::Value = serde_yaml::from_str(&raw) + .map_err(|e| RoutingSourceError::Decode(e.to_string()))?; + let json = serde_json::to_value(yaml) + .map_err(|e| RoutingSourceError::Decode(e.to_string()))?; + serde_json::from_value::(json) + .map_err(|e| RoutingSourceError::Decode(e.to_string())) + } + } + + async fn watch( + &self, + ) -> Result< + std::pin::Pin> + Send>>, + RoutingSourceError, + > { + Ok(Box::pin(futures::stream::empty())) + } +} + +#[derive(Clone)] +pub struct NatsKvSource { + kv: async_nats::jetstream::kv::Store, + key: String, +} + +impl NatsKvSource { + pub async fn connect( + nats_url: impl Into, + bucket: impl Into, + key: impl Into, + ) -> Result { + let nats_url = nats_url.into(); + let bucket = bucket.into(); + let key = key.into(); + + let client = async_nats::connect(nats_url) + .await + .map_err(|e| RoutingSourceError::Source(e.to_string()))?; + let jetstream = async_nats::jetstream::new(client); + + let kv = match jetstream.get_key_value(&bucket).await { + Ok(kv) => kv, + Err(_) => jetstream + .create_key_value(async_nats::jetstream::kv::Config { + bucket: bucket.clone(), + ..Default::default() + }) + .await + .map_err(|e| RoutingSourceError::Source(e.to_string()))?, + }; + + Ok(Self { kv, key }) + } +} + +#[async_trait::async_trait] +impl RoutingSource for NatsKvSource { + async fn load(&self) -> Result { + let entry = self + .kv + .entry(&self.key) + .await + .map_err(|e| RoutingSourceError::Source(e.to_string()))?; + + let Some(entry) = entry else { + return Ok(RoutingConfig::empty()); + }; + + serde_json::from_slice::(&entry.value) + .map_err(|e| RoutingSourceError::Decode(e.to_string())) + } + + async fn watch( + &self, + ) -> Result< + std::pin::Pin> + Send>>, + RoutingSourceError, + > { + let key = self.key.clone(); + let watch = self + .kv + .watch(&key) + .await + .map_err(|e| RoutingSourceError::Source(e.to_string()))?; + + Ok(Box::pin(watch.filter_map(|entry| async move { + match entry { + Ok(entry) => match entry.operation { + async_nats::jetstream::kv::Operation::Put => Some(Ok(())), + async_nats::jetstream::kv::Operation::Delete + | async_nats::jetstream::kv::Operation::Purge => None, + }, + Err(e) => Some(Err(RoutingSourceError::Source(e.to_string()))), + } + }))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_send_sync() {} + + #[test] + fn router_state_is_send_sync() { + assert_send_sync::(); + } + + #[tokio::test] + async fn resolves_endpoints_for_tenant_service_kind() { + let cfg = RoutingConfig { + revision: 1, + aggregate_placement: HashMap::from([("t1".to_string(), "a".to_string())]), + projection_placement: HashMap::from([("t1".to_string(), "p".to_string())]), + runner_placement: HashMap::from([("t1".to_string(), "r".to_string())]), + aggregate_shards: HashMap::from([("a".to_string(), vec!["http://a".to_string()])]), + projection_shards: HashMap::from([("p".to_string(), vec!["http://p".to_string()])]), + runner_shards: HashMap::from([("r".to_string(), vec!["http://r".to_string()])]), + }; + + let source: Arc = Arc::new(TestSource::new(cfg)); + let router = RouterState::new(source).await.unwrap(); + + assert_eq!( + router.resolve("t1", ServiceKind::Aggregate).await.unwrap(), + "http://a" + ); + assert_eq!( + router.resolve("t1", ServiceKind::Projection).await.unwrap(), + "http://p" + ); + assert_eq!( + router.resolve("t1", ServiceKind::Runner).await.unwrap(), + "http://r" + ); + } + + #[tokio::test] + async fn unknown_tenant_is_typed_error() { + let source: Arc = Arc::new(TestSource::new(RoutingConfig::empty())); + let router = RouterState::new(source).await.unwrap(); + let err = router + .resolve("missing", ServiceKind::Aggregate) + .await + .unwrap_err(); + assert_eq!(err, RoutingError::UnknownTenant); + } + + #[tokio::test] + async fn hot_reload_swaps_table_atomically() { + let cfg1 = RoutingConfig { + revision: 1, + aggregate_placement: HashMap::from([("t1".to_string(), "a".to_string())]), + projection_placement: HashMap::new(), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::from([("a".to_string(), vec!["http://a1".to_string()])]), + projection_shards: HashMap::new(), + runner_shards: HashMap::new(), + }; + let cfg2 = RoutingConfig { + revision: 2, + aggregate_placement: HashMap::from([("t1".to_string(), "a".to_string())]), + projection_placement: HashMap::new(), + runner_placement: HashMap::new(), + aggregate_shards: HashMap::from([("a".to_string(), vec!["http://a2".to_string()])]), + projection_shards: HashMap::new(), + runner_shards: HashMap::new(), + }; + + let test_source = Arc::new(TestSource::new(cfg1)); + let router = RouterState::new(test_source.clone()).await.unwrap(); + + let before = router.resolve("t1", ServiceKind::Aggregate).await.unwrap(); + assert_eq!(before, "http://a1"); + + test_source.set(cfg2).await; + router.reload().await.unwrap(); + + let after = router.resolve("t1", ServiceKind::Aggregate).await.unwrap(); + assert_eq!(after, "http://a2"); + } + + #[derive(Clone)] + struct TestSource { + cfg: Arc>, + } + + impl TestSource { + fn new(cfg: RoutingConfig) -> Self { + Self { + cfg: Arc::new(tokio::sync::RwLock::new(cfg)), + } + } + + async fn set(&self, cfg: RoutingConfig) { + *self.cfg.write().await = cfg; + } + } + + #[async_trait::async_trait] + impl RoutingSource for TestSource { + async fn load(&self) -> Result { + Ok(self.cfg.read().await.clone()) + } + + async fn watch( + &self, + ) -> Result< + std::pin::Pin> + Send>>, + RoutingSourceError, + > { + Ok(Box::pin(futures::stream::empty())) + } + } +} diff --git a/gateway/src/storage.rs b/gateway/src/storage.rs new file mode 100644 index 0000000..c486ff2 --- /dev/null +++ b/gateway/src/storage.rs @@ -0,0 +1,1015 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; +use std::time::SystemTime; +use std::time::UNIX_EPOCH; + +use async_trait::async_trait; +use edge_storage::Config as EdgeConfig; +use edge_storage::EdgeStorage; +use edge_storage::KvStore as EdgeKvStore; +use edge_storage::TableNames; +use edge_storage::Writer; +use futures::StreamExt; +use libmdbx::WriteFlags; +use serde::de::DeserializeOwned; +use serde::Deserialize; +use serde::Serialize; +use sha2::Digest; +use thiserror::Error; + +pub const SCHEMA_VERSION: u32 = 1; + +#[derive(Debug, Clone)] +pub struct BucketNames { + pub users: String, + pub identities: String, + pub password_credentials: String, + pub password_resets: String, + pub service_tokens: String, + pub refresh_sessions: String, + pub mfa: String, + pub rights: String, + pub roles: String, + pub assignments: String, + pub audit_index: String, +} + +impl BucketNames { + pub fn with_prefix(prefix: &str) -> Self { + let prefix = prefix.trim_end_matches('.'); + Self { + users: format!("{prefix}.users"), + identities: format!("{prefix}.identities"), + password_credentials: format!("{prefix}.password_credentials"), + password_resets: format!("{prefix}.password_resets"), + service_tokens: format!("{prefix}.service_tokens"), + refresh_sessions: format!("{prefix}.refresh_sessions"), + mfa: format!("{prefix}.mfa"), + rights: format!("{prefix}.rights"), + roles: format!("{prefix}.roles"), + assignments: format!("{prefix}.assignments"), + audit_index: format!("{prefix}.audit_index"), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct Stored { + v: u32, + data: T, +} + +impl Stored { + fn new(data: T) -> Self { + Self { + v: SCHEMA_VERSION, + data, + } + } +} + +#[derive(Debug, Error)] +pub enum StorageError { + #[error("connection error: {0}")] + Connection(String), + #[error("kv error: {0}")] + Kv(String), + #[error("serialization error: {0}")] + Serde(String), + #[error("not found")] + NotFound, + #[error("already exists")] + AlreadyExists, + #[error("compare-and-set mismatch")] + CasMismatch, + #[error("refresh token invalid or already used")] + RefreshTokenInvalid, + #[error("refresh session expired")] + RefreshSessionExpired, + #[error("refresh session revoked")] + RefreshSessionRevoked, + #[error("unsupported schema version {0}")] + UnsupportedSchemaVersion(u32), +} + +#[derive(Debug, Clone)] +pub struct KvEntry { + pub value: Vec, + pub revision: u64, +} + +#[async_trait] +pub trait KvStore: Send + Sync { + async fn get(&self, key: &str) -> Result, StorageError>; + async fn put(&self, key: &str, value: Vec) -> Result; + async fn create(&self, key: &str, value: Vec) -> Result; + async fn update( + &self, + key: &str, + expected_revision: u64, + value: Vec, + ) -> Result; + async fn delete(&self, key: &str) -> Result<(), StorageError>; + async fn list_keys(&self, prefix: &str) -> Result, StorageError>; +} + +#[derive(Clone, Default)] +pub struct MemKvStore { + inner: Arc>>, +} + +#[derive(Clone)] +struct MemValue { + value: Vec, + revision: u64, +} + +#[async_trait] +impl KvStore for MemKvStore { + async fn get(&self, key: &str) -> Result, StorageError> { + let guard = self.inner.read().await; + Ok(guard.get(key).map(|v| KvEntry { + value: v.value.clone(), + revision: v.revision, + })) + } + + async fn put(&self, key: &str, value: Vec) -> Result { + let mut guard = self.inner.write().await; + let next_revision = guard.get(key).map(|v| v.revision + 1).unwrap_or(1); + guard.insert( + key.to_string(), + MemValue { + value, + revision: next_revision, + }, + ); + Ok(next_revision) + } + + async fn create(&self, key: &str, value: Vec) -> Result { + let mut guard = self.inner.write().await; + if guard.contains_key(key) { + return Err(StorageError::AlreadyExists); + } + guard.insert(key.to_string(), MemValue { value, revision: 1 }); + Ok(1) + } + + async fn update( + &self, + key: &str, + expected_revision: u64, + value: Vec, + ) -> Result { + let mut guard = self.inner.write().await; + let Some(existing) = guard.get(key) else { + return Err(StorageError::NotFound); + }; + if existing.revision != expected_revision { + return Err(StorageError::CasMismatch); + } + let next_revision = existing.revision + 1; + guard.insert( + key.to_string(), + MemValue { + value, + revision: next_revision, + }, + ); + Ok(next_revision) + } + + async fn delete(&self, key: &str) -> Result<(), StorageError> { + let mut guard = self.inner.write().await; + guard.remove(key); + Ok(()) + } + + async fn list_keys(&self, prefix: &str) -> Result, StorageError> { + let guard = self.inner.read().await; + let mut keys: Vec = guard + .keys() + .filter(|k| k.starts_with(prefix)) + .cloned() + .collect(); + keys.sort(); + Ok(keys) + } +} + +#[derive(Clone)] +pub struct NatsKvStore { + kv: async_nats::jetstream::kv::Store, +} + +impl NatsKvStore { + pub async fn connect( + nats_url: impl Into, + bucket: impl Into, + ) -> Result { + Self::connect_with_timeout(nats_url, bucket, Duration::from_secs(2)).await + } + + pub async fn connect_with_timeout( + nats_url: impl Into, + bucket: impl Into, + timeout: Duration, + ) -> Result { + let nats_url = nats_url.into(); + let bucket = bucket.into(); + + let client = tokio::time::timeout(timeout, async_nats::connect(nats_url)) + .await + .map_err(|_| StorageError::Connection("connect timeout".to_string()))? + .map_err(|e| StorageError::Connection(e.to_string()))?; + + let jetstream = async_nats::jetstream::new(client); + + let kv = match jetstream.get_key_value(&bucket).await { + Ok(kv) => kv, + Err(_) => jetstream + .create_key_value(async_nats::jetstream::kv::Config { + bucket: bucket.clone(), + ..Default::default() + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))?, + }; + + Ok(Self { kv }) + } +} + +#[async_trait] +impl KvStore for NatsKvStore { + async fn get(&self, key: &str) -> Result, StorageError> { + let entry = self + .kv + .entry(key) + .await + .map_err(|e| StorageError::Kv(e.to_string()))?; + + Ok(entry.map(|e| KvEntry { + value: e.value.to_vec(), + revision: e.revision, + })) + } + + async fn put(&self, key: &str, value: Vec) -> Result { + let revision = self + .kv + .put(key, value.into()) + .await + .map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(revision) + } + + async fn create(&self, key: &str, value: Vec) -> Result { + let revision = self + .kv + .create(key, value.into()) + .await + .map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(revision) + } + + async fn update( + &self, + key: &str, + expected_revision: u64, + value: Vec, + ) -> Result { + let result = self.kv.update(key, value.into(), expected_revision).await; + match result { + Ok(revision) => Ok(revision), + Err(e) => { + let msg = e.to_string(); + if msg.contains("wrong last sequence") || msg.contains("wrong last") { + Err(StorageError::CasMismatch) + } else { + Err(StorageError::Kv(msg)) + } + } + } + } + + async fn delete(&self, key: &str) -> Result<(), StorageError> { + self.kv + .delete(key) + .await + .map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(()) + } + + async fn list_keys(&self, prefix: &str) -> Result, StorageError> { + let mut stream = self + .kv + .keys() + .await + .map_err(|e| StorageError::Kv(e.to_string()))?; + + let mut keys = Vec::new(); + while let Some(item) = stream.next().await { + if let Ok(key) = item { + if key.starts_with(prefix) { + keys.push(key); + } + } + } + Ok(keys) + } +} + +#[derive(Clone)] +pub struct EdgeMdbxKvStore { + storage: Arc, + kv: EdgeKvStore, +} + +impl std::fmt::Debug for EdgeMdbxKvStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EdgeMdbxKvStore").finish_non_exhaustive() + } +} + +impl EdgeMdbxKvStore { + pub fn open(storage_path: impl Into) -> Result { + let config = EdgeConfig::new(storage_path.into()); + let storage = + EdgeStorage::open(config.clone()).map_err(|e| StorageError::Kv(e.to_string()))?; + let writer = Arc::new(Writer::new(storage.db().clone(), &config)); + let kv = EdgeKvStore::new(storage.db().clone(), writer); + Ok(Self { + storage: Arc::new(storage), + kv, + }) + } +} + +#[async_trait] +impl KvStore for EdgeMdbxKvStore { + async fn get(&self, key: &str) -> Result, StorageError> { + let storage = self.storage.clone(); + let kv = self.kv.clone(); + let key = key.to_string(); + tokio::task::spawn_blocking(move || { + let Some(value) = kv + .get(key.as_bytes()) + .map_err(|e| StorageError::Kv(e.to_string()))? + else { + return Ok(None); + }; + + let rev_key = rev_key(&key); + let revision = kv + .get(rev_key.as_bytes()) + .map_err(|e| StorageError::Kv(e.to_string()))? + .and_then(|b| serde_json::from_slice::(&b).ok()) + .unwrap_or(0); + + let _ = storage; + Ok(Some(KvEntry { value, revision })) + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))? + } + + async fn put(&self, key: &str, value: Vec) -> Result { + let storage = self.storage.clone(); + let key = key.to_string(); + tokio::task::spawn_blocking(move || { + let txn = storage + .db() + .begin_rw_txn() + .map_err(|e| StorageError::Kv(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + let existing_rev_bytes: Option> = txn + .get(&table, rev_key(&key).as_bytes()) + .map_err(|e| StorageError::Kv(e.to_string()))?; + let existing_rev = existing_rev_bytes + .as_deref() + .and_then(|b| serde_json::from_slice::(b).ok()) + .unwrap_or(0); + let next_rev = existing_rev + 1; + + txn.put( + &table, + key.as_bytes(), + value.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| StorageError::Kv(e.to_string()))?; + let rev_bytes = + serde_json::to_vec(&next_rev).map_err(|e| StorageError::Serde(e.to_string()))?; + txn.put( + &table, + rev_key(&key).as_bytes(), + rev_bytes.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + txn.commit().map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(next_rev) + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))? + } + + async fn create(&self, key: &str, value: Vec) -> Result { + let storage = self.storage.clone(); + let key = key.to_string(); + tokio::task::spawn_blocking(move || { + let txn = storage + .db() + .begin_rw_txn() + .map_err(|e| StorageError::Kv(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + let exists: Option> = txn + .get(&table, key.as_bytes()) + .map_err(|e| StorageError::Kv(e.to_string()))?; + if exists.is_some() { + return Err(StorageError::AlreadyExists); + } + + txn.put( + &table, + key.as_bytes(), + value.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| StorageError::Kv(e.to_string()))?; + let rev_bytes = + serde_json::to_vec(&1u64).map_err(|e| StorageError::Serde(e.to_string()))?; + txn.put( + &table, + rev_key(&key).as_bytes(), + rev_bytes.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + txn.commit().map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(1) + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))? + } + + async fn update( + &self, + key: &str, + expected_revision: u64, + value: Vec, + ) -> Result { + let storage = self.storage.clone(); + let key = key.to_string(); + tokio::task::spawn_blocking(move || { + let txn = storage + .db() + .begin_rw_txn() + .map_err(|e| StorageError::Kv(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + let current_rev_bytes: Option> = txn + .get(&table, rev_key(&key).as_bytes()) + .map_err(|e| StorageError::Kv(e.to_string()))?; + let Some(current_rev_bytes) = current_rev_bytes else { + return Err(StorageError::NotFound); + }; + let current_rev: u64 = serde_json::from_slice(¤t_rev_bytes) + .map_err(|e| StorageError::Serde(e.to_string()))?; + if current_rev != expected_revision { + return Err(StorageError::CasMismatch); + } + + let next_rev = current_rev + 1; + txn.put( + &table, + key.as_bytes(), + value.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| StorageError::Kv(e.to_string()))?; + let rev_bytes = + serde_json::to_vec(&next_rev).map_err(|e| StorageError::Serde(e.to_string()))?; + txn.put( + &table, + rev_key(&key).as_bytes(), + rev_bytes.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + txn.commit().map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(next_rev) + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))? + } + + async fn delete(&self, key: &str) -> Result<(), StorageError> { + let storage = self.storage.clone(); + let key = key.to_string(); + tokio::task::spawn_blocking(move || { + let txn = storage + .db() + .begin_rw_txn() + .map_err(|e| StorageError::Kv(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + let _ = txn + .del(&table, key.as_bytes(), None) + .map_err(|e| StorageError::Kv(e.to_string()))?; + let _ = txn + .del(&table, rev_key(&key).as_bytes(), None) + .map_err(|e| StorageError::Kv(e.to_string()))?; + + txn.commit().map_err(|e| StorageError::Kv(e.to_string()))?; + Ok(()) + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))? + } + + async fn list_keys(&self, prefix: &str) -> Result, StorageError> { + let storage = self.storage.clone(); + let kv = self.kv.clone(); + let prefix = prefix.to_string(); + tokio::task::spawn_blocking(move || { + let txn = storage + .db() + .begin_ro_txn() + .map_err(|e| StorageError::Kv(e.to_string()))?; + + let mut keys = Vec::new(); + for res in kv + .prefix_scan(&txn, prefix.as_bytes()) + .map_err(|e| StorageError::Kv(e.to_string()))? + { + let (k, _) = res.map_err(|e| StorageError::Kv(e.to_string()))?; + if k.starts_with(b"__rev__:") { + continue; + } + let key = String::from_utf8_lossy(&k).to_string(); + keys.push(key); + } + Ok(keys) + }) + .await + .map_err(|e| StorageError::Kv(e.to_string()))? + } +} + +fn rev_key(key: &str) -> String { + format!("__rev__:{key}") +} + +#[derive(Clone)] +pub struct PrefixedKvStore { + inner: Arc, + prefix: String, +} + +impl PrefixedKvStore { + pub fn new(inner: Arc, prefix: impl Into) -> Self { + Self { + inner, + prefix: prefix.into(), + } + } + + fn k(&self, key: &str) -> String { + format!("{}{}", self.prefix, key) + } +} + +#[async_trait] +impl KvStore for PrefixedKvStore { + async fn get(&self, key: &str) -> Result, StorageError> { + self.inner.get(&self.k(key)).await + } + + async fn put(&self, key: &str, value: Vec) -> Result { + self.inner.put(&self.k(key), value).await + } + + async fn create(&self, key: &str, value: Vec) -> Result { + self.inner.create(&self.k(key), value).await + } + + async fn update( + &self, + key: &str, + expected_revision: u64, + value: Vec, + ) -> Result { + self.inner + .update(&self.k(key), expected_revision, value) + .await + } + + async fn delete(&self, key: &str) -> Result<(), StorageError> { + self.inner.delete(&self.k(key)).await + } + + async fn list_keys(&self, prefix: &str) -> Result, StorageError> { + let keys = self.inner.list_keys(&self.k(prefix)).await?; + Ok(keys + .into_iter() + .filter_map(|k| k.strip_prefix(&self.prefix).map(|s| s.to_string())) + .collect()) + } +} + +#[derive(Clone)] +pub struct GatewayStorage { + pub users: Arc, + pub identities: Arc, + pub password_credentials: Arc, + pub password_resets: Arc, + pub service_tokens: Arc, + pub refresh_sessions: Arc, + pub mfa: Arc, + pub rights: Arc, + pub roles: Arc, + pub assignments: Arc, + pub audit_index: Arc, +} + +impl GatewayStorage { + pub fn new_in_memory() -> Self { + Self { + users: Arc::new(MemKvStore::default()), + identities: Arc::new(MemKvStore::default()), + password_credentials: Arc::new(MemKvStore::default()), + password_resets: Arc::new(MemKvStore::default()), + service_tokens: Arc::new(MemKvStore::default()), + refresh_sessions: Arc::new(MemKvStore::default()), + mfa: Arc::new(MemKvStore::default()), + rights: Arc::new(MemKvStore::default()), + roles: Arc::new(MemKvStore::default()), + assignments: Arc::new(MemKvStore::default()), + audit_index: Arc::new(MemKvStore::default()), + } + } + + pub fn open_edge_storage( + storage_path: impl Into, + namespace: &str, + ) -> Result { + let base: Arc = Arc::new(EdgeMdbxKvStore::open(storage_path)?); + + let bucket = |name: &str| -> Arc { + Arc::new(PrefixedKvStore::new( + base.clone(), + format!("{namespace}:{name}:"), + )) + }; + + Ok(Self { + users: bucket("users"), + identities: bucket("identities"), + password_credentials: bucket("password_credentials"), + password_resets: bucket("password_resets"), + service_tokens: bucket("service_tokens"), + refresh_sessions: bucket("refresh_sessions"), + mfa: bucket("mfa"), + rights: bucket("rights"), + roles: bucket("roles"), + assignments: bucket("assignments"), + audit_index: bucket("audit_index"), + }) + } + + pub async fn connect_nats( + nats_url: impl Into, + bucket_prefix: &str, + ) -> Result { + let nats_url = nats_url.into(); + let buckets = BucketNames::with_prefix(bucket_prefix); + + let users: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.users).await?); + let identities: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.identities).await?); + let password_credentials: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.password_credentials).await?); + let password_resets: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.password_resets).await?); + let service_tokens: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.service_tokens).await?); + let refresh_sessions: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.refresh_sessions).await?); + let mfa: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.mfa).await?); + let rights: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.rights).await?); + let roles: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.roles).await?); + let assignments: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.assignments).await?); + let audit_index: Arc = + Arc::new(NatsKvStore::connect(nats_url.clone(), buckets.audit_index).await?); + + Ok(Self { + users, + identities, + password_credentials, + password_resets, + service_tokens, + refresh_sessions, + mfa, + rights, + roles, + assignments, + audit_index, + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct RefreshSessionRecord { + pub user_id: String, + pub session_id: String, + pub current_hash: String, + pub previous_hash: Option, + pub created_at_ms: i64, + pub rotated_at_ms: i64, + pub expires_at_ms: i64, + pub revoked_at_ms: Option, +} + +impl RefreshSessionRecord { + fn is_expired_at(&self, now_ms: i64) -> bool { + now_ms >= self.expires_at_ms + } +} + +pub struct NewRefreshSession { + pub session_id: String, + pub refresh_token: String, +} + +impl GatewayStorage { + pub async fn create_refresh_session( + &self, + user_id: impl Into, + ttl: Duration, + ) -> Result { + let user_id = user_id.into(); + let session_id = uuid::Uuid::new_v4().to_string(); + let refresh_token = uuid::Uuid::new_v4().to_string(); + let refresh_hash = hash_sensitive("refresh", &refresh_token); + let now_ms = unix_ms(); + let expires_at_ms = now_ms + ttl.as_millis() as i64; + + let record = RefreshSessionRecord { + user_id, + session_id: session_id.clone(), + current_hash: refresh_hash, + previous_hash: None, + created_at_ms: now_ms, + rotated_at_ms: now_ms, + expires_at_ms, + revoked_at_ms: None, + }; + + let payload = encode_stored(&record)?; + self.refresh_sessions + .create(&refresh_session_key(&session_id), payload) + .await?; + + Ok(NewRefreshSession { + session_id, + refresh_token, + }) + } + + pub async fn rotate_refresh_token( + &self, + session_id: &str, + presented_refresh_token: &str, + ) -> Result { + let key = refresh_session_key(session_id); + let presented_hash = hash_sensitive("refresh", presented_refresh_token); + + for _ in 0..20 { + let entry = self + .refresh_sessions + .get(&key) + .await? + .ok_or(StorageError::NotFound)?; + + let mut stored: Stored = serde_json::from_slice(&entry.value) + .map_err(|e| StorageError::Serde(e.to_string()))?; + + if stored.v != SCHEMA_VERSION { + return Err(StorageError::UnsupportedSchemaVersion(stored.v)); + } + + let now_ms = unix_ms(); + if stored.data.revoked_at_ms.is_some() { + return Err(StorageError::RefreshSessionRevoked); + } + if stored.data.is_expired_at(now_ms) { + return Err(StorageError::RefreshSessionExpired); + } + if stored.data.current_hash != presented_hash { + return Err(StorageError::RefreshTokenInvalid); + } + + let new_token = uuid::Uuid::new_v4().to_string(); + let new_hash = hash_sensitive("refresh", &new_token); + + stored.data.previous_hash = Some(stored.data.current_hash.clone()); + stored.data.current_hash = new_hash; + stored.data.rotated_at_ms = now_ms; + + let payload = + serde_json::to_vec(&stored).map_err(|e| StorageError::Serde(e.to_string()))?; + match self + .refresh_sessions + .update(&key, entry.revision, payload) + .await + { + Ok(_) => return Ok(new_token), + Err(StorageError::CasMismatch) => continue, + Err(e) => return Err(e), + } + } + + Err(StorageError::CasMismatch) + } + + pub async fn revoke_refresh_session(&self, session_id: &str) -> Result<(), StorageError> { + let key = refresh_session_key(session_id); + for _ in 0..20 { + let entry = self + .refresh_sessions + .get(&key) + .await? + .ok_or(StorageError::NotFound)?; + + let mut stored: Stored = serde_json::from_slice(&entry.value) + .map_err(|e| StorageError::Serde(e.to_string()))?; + + if stored.v != SCHEMA_VERSION { + return Err(StorageError::UnsupportedSchemaVersion(stored.v)); + } + + if stored.data.revoked_at_ms.is_some() { + return Ok(()); + } + + stored.data.revoked_at_ms = Some(unix_ms()); + let payload = + serde_json::to_vec(&stored).map_err(|e| StorageError::Serde(e.to_string()))?; + + match self + .refresh_sessions + .update(&key, entry.revision, payload) + .await + { + Ok(_) => return Ok(()), + Err(StorageError::CasMismatch) => continue, + Err(e) => return Err(e), + } + } + + Err(StorageError::CasMismatch) + } +} + +fn refresh_session_key(session_id: &str) -> String { + format!("v{SCHEMA_VERSION}/sessions/{session_id}") +} + +fn encode_stored(data: &T) -> Result, StorageError> { + serde_json::to_vec(&Stored::new(data)).map_err(|e| StorageError::Serde(e.to_string())) +} + +fn unix_ms() -> i64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as i64 +} + +fn hash_sensitive(domain: &str, value: &str) -> String { + let mut hasher = sha2::Sha256::new(); + hasher.update(domain.as_bytes()); + hasher.update([0u8]); + hasher.update(value.as_bytes()); + let out = hasher.finalize(); + hex::encode(out) +} + +pub async fn get_json( + store: &dyn KvStore, + key: &str, +) -> Result, StorageError> { + let Some(entry) = store.get(key).await? else { + return Ok(None); + }; + let value = serde_json::from_slice::(&entry.value) + .map_err(|e| StorageError::Serde(e.to_string()))?; + Ok(Some((value, entry.revision))) +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio::task::JoinSet; + + fn assert_send_sync() {} + + #[test] + fn storage_is_send_sync() { + assert_send_sync::(); + assert_send_sync::(); + } + + #[tokio::test] + async fn sensitive_values_are_stored_only_as_hashes() { + let storage = GatewayStorage::new_in_memory(); + + let created = storage + .create_refresh_session("user-1", Duration::from_secs(3600)) + .await + .unwrap(); + + let entry = storage + .refresh_sessions + .get(&refresh_session_key(&created.session_id)) + .await + .unwrap() + .unwrap(); + + let raw = String::from_utf8_lossy(&entry.value); + assert!(!raw.contains(&created.refresh_token)); + + let stored: Stored = serde_json::from_slice(&entry.value).unwrap(); + assert_eq!(stored.v, SCHEMA_VERSION); + assert_ne!(stored.data.current_hash, created.refresh_token); + assert_eq!(stored.data.current_hash.len(), 64); + } + + #[tokio::test] + async fn refresh_token_rotation_is_atomic_under_concurrency() { + let storage = GatewayStorage::new_in_memory(); + + let created = storage + .create_refresh_session("user-1", Duration::from_secs(3600)) + .await + .unwrap(); + + let mut join_set = JoinSet::new(); + for _ in 0..32 { + let storage = storage.clone(); + let session_id = created.session_id.clone(); + let token = created.refresh_token.clone(); + join_set.spawn(async move { storage.rotate_refresh_token(&session_id, &token).await }); + } + + let mut successes = Vec::new(); + let mut failures = 0usize; + + while let Some(res) = join_set.join_next().await { + match res.unwrap() { + Ok(new_token) => successes.push(new_token), + Err(StorageError::RefreshTokenInvalid) => failures += 1, + Err(e) => panic!("unexpected error: {e}"), + } + } + + assert_eq!(successes.len(), 1); + assert_eq!(failures, 31); + + let rotated_token = successes.pop().unwrap(); + let rotated_again = storage + .rotate_refresh_token(&created.session_id, &created.refresh_token) + .await; + assert!(matches!( + rotated_again, + Err(StorageError::RefreshTokenInvalid) + )); + + let ok = storage + .rotate_refresh_token(&created.session_id, &rotated_token) + .await; + assert!(ok.is_ok()); + } +} diff --git a/gateway/src/upstream.rs b/gateway/src/upstream.rs new file mode 100644 index 0000000..0657ec1 --- /dev/null +++ b/gateway/src/upstream.rs @@ -0,0 +1,99 @@ +use std::sync::OnceLock; +use std::time::Duration; + +pub fn http_client() -> &'static reqwest::Client { + static CLIENT: OnceLock = OnceLock::new(); + CLIENT.get_or_init(|| { + let mut builder = reqwest::Client::builder().timeout(Duration::from_secs(10)); + + if let Some(ca_pem) = env_or_file( + "GATEWAY_INTERNAL_CA_CERT_PEM", + "GATEWAY_INTERNAL_CA_CERT_PEM_FILE", + ) { + if let Ok(cert) = reqwest::Certificate::from_pem(ca_pem.as_bytes()) { + builder = builder.add_root_certificate(cert); + } + } + + if let Some(identity_pem) = env_or_file( + "GATEWAY_INTERNAL_IDENTITY_PEM", + "GATEWAY_INTERNAL_IDENTITY_PEM_FILE", + ) { + if let Ok(identity) = reqwest::Identity::from_pem(identity_pem.as_bytes()) { + builder = builder.identity(identity); + } + } + + builder.build().expect("failed to build reqwest client") + }) +} + +pub fn grpc_endpoint(url: &str) -> Result { + let mut endpoint = + tonic::transport::Endpoint::from_shared(url.to_string())?.timeout(Duration::from_secs(10)); + + let wants_tls = url.starts_with("https://") + || std::env::var("GATEWAY_INTERNAL_GRPC_TLS") + .ok() + .map(|v| matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes")) + .unwrap_or(false); + + if wants_tls { + if let Some(tls) = grpc_tls_config() { + endpoint = endpoint.tls_config(tls)?; + } + } + + Ok(endpoint) +} + +fn grpc_tls_config() -> Option { + let mut tls = tonic::transport::ClientTlsConfig::new(); + let mut configured = false; + + if let Some(ca_pem) = env_or_file( + "GATEWAY_INTERNAL_GRPC_CA_CERT_PEM", + "GATEWAY_INTERNAL_GRPC_CA_CERT_PEM_FILE", + ) { + tls = tls.ca_certificate(tonic::transport::Certificate::from_pem(ca_pem)); + configured = true; + } + + let cert_pem = env_or_file( + "GATEWAY_INTERNAL_GRPC_CLIENT_CERT_PEM", + "GATEWAY_INTERNAL_GRPC_CLIENT_CERT_PEM_FILE", + ); + let key_pem = env_or_file( + "GATEWAY_INTERNAL_GRPC_CLIENT_KEY_PEM", + "GATEWAY_INTERNAL_GRPC_CLIENT_KEY_PEM_FILE", + ); + if let (Some(cert_pem), Some(key_pem)) = (cert_pem, key_pem) { + tls = tls.identity(tonic::transport::Identity::from_pem(cert_pem, key_pem)); + configured = true; + } + + if configured { + Some(tls) + } else { + None + } +} + +fn env_or_file(env_key: &str, file_env_key: &str) -> Option { + if let Ok(path) = std::env::var(file_env_key) { + if let Ok(raw) = std::fs::read_to_string(path) { + let trimmed = raw.trim().to_string(); + if !trimmed.is_empty() { + return Some(trimmed); + } + } + } + std::env::var(env_key).ok().and_then(|v| { + let trimmed = v.trim().to_string(); + if trimmed.is_empty() { + None + } else { + Some(trimmed) + } + }) +} diff --git a/gateway/tests/ha_local.rs b/gateway/tests/ha_local.rs new file mode 100644 index 0000000..d5dd2a5 --- /dev/null +++ b/gateway/tests/ha_local.rs @@ -0,0 +1,158 @@ +use std::sync::Arc; + +use tower::util::ServiceExt; + +#[tokio::test] +async fn t9_2_and_t9_3_ready_is_healthy_on_both_replicas_and_survives_one_replica_down() { + let (app1, app2, _state) = build_two_replicas().await; + + let r1 = app1 + .clone() + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/ready") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(r1.status(), axum::http::StatusCode::OK); + + let r2 = app2 + .clone() + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/ready") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(r2.status(), axum::http::StatusCode::OK); + + drop(app1); + + let r2 = app2 + .oneshot( + axum::http::Request::builder() + .method("GET") + .uri("/ready") + .body(axum::body::Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(r2.status(), axum::http::StatusCode::OK); +} + +#[tokio::test] +async fn t9_4_refresh_works_across_replicas_without_sticky_sessions() { + let (app1, app2, state) = build_two_replicas().await; + + let signup = app1 + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/signup") + .header("content-type", "application/json") + .body(axum::body::Body::from( + r#"{"email":"ha@b.com","password":"password123"}"#, + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(signup.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(signup.into_body(), usize::MAX) + .await + .unwrap(); + let created: gateway::authn::AuthResponse = serde_json::from_slice(&body).unwrap(); + + let refresh_req = serde_json::to_vec(&gateway::authn::RefreshRequest { + session_id: created.session_id.clone(), + refresh_token: created.refresh_token.clone(), + }) + .unwrap(); + + let refresh = app2 + .clone() + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/refresh") + .header("content-type", "application/json") + .body(axum::body::Body::from(refresh_req)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(refresh.status(), axum::http::StatusCode::OK); + let body = axum::body::to_bytes(refresh.into_body(), usize::MAX) + .await + .unwrap(); + let refreshed: gateway::authn::AuthResponse = serde_json::from_slice(&body).unwrap(); + assert_ne!(refreshed.refresh_token, created.refresh_token); + + let refresh_again_req = serde_json::to_vec(&gateway::authn::RefreshRequest { + session_id: created.session_id.clone(), + refresh_token: created.refresh_token.clone(), + }) + .unwrap(); + + let refresh_again = app1 + .oneshot( + axum::http::Request::builder() + .method("POST") + .uri("/v1/auth/refresh") + .header("content-type", "application/json") + .body(axum::body::Body::from(refresh_again_req)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(refresh_again.status(), axum::http::StatusCode::UNAUTHORIZED); + + let stored = state + .storage + .refresh_sessions + .get(&format!("v1/sessions/{}", created.session_id)) + .await + .unwrap() + .unwrap(); + let value: serde_json::Value = serde_json::from_slice(&stored.value).unwrap(); + assert_eq!( + value.get("v").and_then(|v| v.as_u64()).unwrap_or(0), + u64::from(gateway::storage::SCHEMA_VERSION) + ); +} + +async fn build_two_replicas() -> (axum::Router, axum::Router, gateway::AppState) { + let metrics = gateway::observability::init_metrics_for_tests(); + let routing = gateway::routing::RouterState::new(Arc::new(gateway::routing::FixedSource::new( + gateway::routing::RoutingConfig::empty(), + ))) + .await + .unwrap(); + let storage = gateway::storage::GatewayStorage::new_in_memory(); + let authn = gateway::authn::AuthnConfig::for_tests(); + + let state = gateway::AppState { + metrics, + routing: routing.clone(), + storage: storage.clone(), + authn: authn.clone(), + }; + + let app1 = gateway::app(state.clone()); + let app2 = gateway::app(gateway::AppState { + metrics: gateway::observability::init_metrics_for_tests(), + routing, + storage, + authn, + }); + + (app1, app2, state) +} diff --git a/observability/INSTRUMENTATION.md b/observability/INSTRUMENTATION.md new file mode 100644 index 0000000..bd42a84 --- /dev/null +++ b/observability/INSTRUMENTATION.md @@ -0,0 +1,44 @@ +# Instrumentation Requirements (Control Plane Dashboards) + +## Global Conventions +- Build/version labeling: every service exports a gauge metric `*_build_info{service,version,git_sha} 1` +- Correlation identifiers: + - Logs include `correlation_id` and `request_id` fields for all request spans + - Traces propagate `traceparent` end-to-end and expose trace IDs in logs +- Cardinality safety: + - `tenant_id` must not be a label on high-frequency metrics unless bounded (sampling, rollups, or explicit allowlist) + +## Dashboard: Noisy Neighbor & Tenant Health +Required metrics (examples): +- `http_request_duration_ms_bucket{service,route,method,status}` histogram +- `job_duration_ms_bucket{job_kind,status}` histogram for control-plane jobs +- Optional bounded tenant signals: + - `tenant_active_jobs{tenant_id}` only if tenant count is bounded and enforced + +## Dashboard: API Regression & Deployment +Required metrics (examples): +- `*_build_info{service,version,git_sha} 1` +- `http_request_duration_ms_bucket{service,route,method,status}` +- `http_requests_total{service,route,method,status}` +Deploy markers: +- Grafana annotations for deploy events (vertical markers) or a low-cardinality metric like: + - `deploy_event{service,version,git_sha} 1` (sparse, emitted once per deploy) + +## Dashboard: Storage & Event Bus Bottlenecks +Required metrics (examples): +- Storage: + - `process_resident_memory_bytes{service}` + - `disk_io_time_seconds_total{device}` (node-exporter) + - `mdbx_*` or equivalent libmdbx metrics if exposed by storage services +- Event bus / JetStream: + - `nats_*` / `jetstream_*` metrics for consumer lag, ack latency, stream bytes, and redeliveries + +## Dashboard: Infrastructure Exhaustion +Required metrics (examples): +- Node exporter: + - `node_cpu_seconds_total` + - `node_memory_MemAvailable_bytes` + - `node_filesystem_avail_bytes` +- Container/service level: + - `process_open_fds` + - `tokio_*` runtime metrics (if enabled) for saturation indicators diff --git a/observability/docker-compose.yml b/observability/docker-compose.yml new file mode 100644 index 0000000..77ef8e9 --- /dev/null +++ b/observability/docker-compose.yml @@ -0,0 +1,62 @@ +services: + victoria-metrics: + image: victoriametrics/victoria-metrics:v1.120.0 + ports: + - "8428:8428" + command: + - "-retentionPeriod=30d" + volumes: + - victoria-metrics-data:/victoria-metrics-data + + vmagent: + image: victoriametrics/vmagent:v1.120.0 + depends_on: + - victoria-metrics + ports: + - "8429:8429" + command: + - "-promscrape.config=/etc/vmagent/scrape.yml" + - "-remoteWrite.url=http://victoria-metrics:8428/api/v1/write" + volumes: + - ./vmagent/scrape.yml:/etc/vmagent/scrape.yml:ro + + loki: + image: grafana/loki:3.5.5 + ports: + - "3100:3100" + command: + - "-config.file=/etc/loki/config.yml" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + + tempo: + image: grafana/tempo:2.8.2 + ports: + - "3200:3200" + - "4317:4317" + - "4318:4318" + command: + - "-config.file=/etc/tempo/config.yml" + volumes: + - ./tempo/config.yml:/etc/tempo/config.yml:ro + - tempo-data:/var/tempo + + grafana: + image: grafana/grafana:12.1.1 + depends_on: + - victoria-metrics + - loki + - tempo + ports: + - "3000:3000" + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - ./grafana/dashboards:/var/lib/grafana/dashboards:ro + +volumes: + grafana-data: + loki-data: + tempo-data: + victoria-metrics-data: diff --git a/observability/grafana/dashboards/api_regression_deployment.json b/observability/grafana/dashboards/api_regression_deployment.json new file mode 100644 index 0000000..8a07c45 --- /dev/null +++ b/observability/grafana/dashboards/api_regression_deployment.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "API Regression & Deployment", + "uid": "cloudlysis-api-regression-deploy", + "version": 1 +} diff --git a/observability/grafana/dashboards/cluster.json b/observability/grafana/dashboards/cluster.json new file mode 100644 index 0000000..26f6f1e --- /dev/null +++ b/observability/grafana/dashboards/cluster.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Cluster / Orchestrator", + "uid": "cloudlysis-cluster", + "version": 1 +} diff --git a/observability/grafana/dashboards/event_bus.json b/observability/grafana/dashboards/event_bus.json new file mode 100644 index 0000000..6458217 --- /dev/null +++ b/observability/grafana/dashboards/event_bus.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Event Bus / JetStream", + "uid": "cloudlysis-event-bus", + "version": 1 +} diff --git a/observability/grafana/dashboards/http_detail.json b/observability/grafana/dashboards/http_detail.json new file mode 100644 index 0000000..1f7771b --- /dev/null +++ b/observability/grafana/dashboards/http_detail.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "HTTP Detail", + "uid": "cloudlysis-http-detail", + "version": 1 +} diff --git a/observability/grafana/dashboards/infrastructure_exhaustion.json b/observability/grafana/dashboards/infrastructure_exhaustion.json new file mode 100644 index 0000000..505b19a --- /dev/null +++ b/observability/grafana/dashboards/infrastructure_exhaustion.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Infrastructure Exhaustion", + "uid": "cloudlysis-infra-exhaustion", + "version": 1 +} diff --git a/observability/grafana/dashboards/logs.json b/observability/grafana/dashboards/logs.json new file mode 100644 index 0000000..110ac18 --- /dev/null +++ b/observability/grafana/dashboards/logs.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Logs", + "uid": "cloudlysis-logs", + "version": 1 +} diff --git a/observability/grafana/dashboards/noisy_neighbor_tenant_health.json b/observability/grafana/dashboards/noisy_neighbor_tenant_health.json new file mode 100644 index 0000000..0747a2d --- /dev/null +++ b/observability/grafana/dashboards/noisy_neighbor_tenant_health.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Noisy Neighbor & Tenant Health", + "uid": "cloudlysis-noisy-neighbor", + "version": 1 +} diff --git a/observability/grafana/dashboards/operations_overview.json b/observability/grafana/dashboards/operations_overview.json new file mode 100644 index 0000000..c4fd0e1 --- /dev/null +++ b/observability/grafana/dashboards/operations_overview.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Operations Overview", + "uid": "cloudlysis-ops-overview", + "version": 1 +} diff --git a/observability/grafana/dashboards/storage.json b/observability/grafana/dashboards/storage.json new file mode 100644 index 0000000..96983b2 --- /dev/null +++ b/observability/grafana/dashboards/storage.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Storage", + "uid": "cloudlysis-storage", + "version": 1 +} diff --git a/observability/grafana/dashboards/storage_event_bus_bottlenecks.json b/observability/grafana/dashboards/storage_event_bus_bottlenecks.json new file mode 100644 index 0000000..6f94745 --- /dev/null +++ b/observability/grafana/dashboards/storage_event_bus_bottlenecks.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Storage & Event Bus Bottlenecks", + "uid": "cloudlysis-storage-event-bus", + "version": 1 +} diff --git a/observability/grafana/dashboards/traces.json b/observability/grafana/dashboards/traces.json new file mode 100644 index 0000000..755644b --- /dev/null +++ b/observability/grafana/dashboards/traces.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Traces", + "uid": "cloudlysis-traces", + "version": 1 +} diff --git a/observability/grafana/dashboards/workers.json b/observability/grafana/dashboards/workers.json new file mode 100644 index 0000000..74e24d5 --- /dev/null +++ b/observability/grafana/dashboards/workers.json @@ -0,0 +1,39 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["cloudlysis"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timezone": "", + "title": "Workers (Runner)", + "uid": "cloudlysis-workers", + "version": 1 +} diff --git a/observability/grafana/provisioning/dashboards/dashboards.yml b/observability/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..473cb5f --- /dev/null +++ b/observability/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,8 @@ +apiVersion: 1 + +providers: + - name: cloudlysis + type: file + updateIntervalSeconds: 10 + options: + path: /var/lib/grafana/dashboards diff --git a/observability/grafana/provisioning/datasources/datasources.yml b/observability/grafana/provisioning/datasources/datasources.yml new file mode 100644 index 0000000..e1fd521 --- /dev/null +++ b/observability/grafana/provisioning/datasources/datasources.yml @@ -0,0 +1,21 @@ +apiVersion: 1 + +datasources: + - name: VictoriaMetrics + type: prometheus + access: proxy + url: http://victoria-metrics:8428 + isDefault: true + editable: false + + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + editable: false + + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + editable: false diff --git a/observability/loki/config.yml b/observability/loki/config.yml new file mode 100644 index 0000000..86cbbce --- /dev/null +++ b/observability/loki/config.yml @@ -0,0 +1,28 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2025-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +limits_config: + allow_structured_metadata: true diff --git a/observability/tempo/config.yml b/observability/tempo/config.yml new file mode 100644 index 0000000..6b3f155 --- /dev/null +++ b/observability/tempo/config.yml @@ -0,0 +1,30 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +ingester: + max_block_bytes: 1000000 + trace_idle_period: 10s + +compactor: + compaction: + block_retention: 24h + +storage: + trace: + backend: local + local: + path: /var/tempo/traces + +overrides: + defaults: + metrics_generator: + processors: [] diff --git a/observability/vmagent/scrape.yml b/observability/vmagent/scrape.yml new file mode 100644 index 0000000..19bd53f --- /dev/null +++ b/observability/vmagent/scrape.yml @@ -0,0 +1,16 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: victoria-metrics + static_configs: + - targets: ["victoria-metrics:8428"] + + - job_name: vmagent + static_configs: + - targets: ["vmagent:8429"] + + - job_name: control-api + metrics_path: /metrics + static_configs: + - targets: ["host.docker.internal:8080"] diff --git a/placement/dev.json b/placement/dev.json new file mode 100644 index 0000000..a2476b0 --- /dev/null +++ b/placement/dev.json @@ -0,0 +1,6 @@ +{ + "revision": "dev", + "aggregate_placement": { "placements": [] }, + "projection_placement": { "placements": [] }, + "runner_placement": { "placements": [] } +} diff --git a/projection/.gitignore b/projection/.gitignore new file mode 100644 index 0000000..079f015 --- /dev/null +++ b/projection/.gitignore @@ -0,0 +1,30 @@ +/target/ +/target-*/ +**/target/ +/data/ +*.mdbx +*.mdbx-lock +*.dat +*.lck + +.DS_Store + +.env +.env.* +.envrc +.direnv/ + +.idea/ +.vscode/ + +*.swp +*.swo +*~ + +docker-compose.override.yml + +/tmp/ +/coverage/ +lcov.info +*.profraw +*.profdata diff --git a/projection/COMPATIBILITY_PLAN.md b/projection/COMPATIBILITY_PLAN.md new file mode 100644 index 0000000..6c421c3 --- /dev/null +++ b/projection/COMPATIBILITY_PLAN.md @@ -0,0 +1,44 @@ +## Goal +Make `projection` and `aggregate` compatible to run behind the same gateway and against the same NATS JetStream cluster, with consistent shared types and operational conventions. + +## Constraints +- Keep both services independently deployable. +- Preserve existing stream/subject conventions unless they are unsafe for multi-instance use. +- Prefer backward-compatible changes in message formats (optional fields, tolerant decoding). +- Keep changes minimal and verified by fmt/test/clippy. + +## Compatibility Checklist +### JetStream / NATS +- Stream name: `AGGREGATE_EVENTS` in both services. +- Subject pattern: `tenant.*.aggregate.*.*` in both services. +- Consumer durability: + - Projection: stable durable name (or per-view derived durable). + - Aggregate: avoid fixed durable names for ad-hoc fetch operations to prevent collisions across instances. + +### Common Types +- `TenantId`: identical newtype behavior across both codebases. +- Event payload shape: + - Aggregate publishes `types::Event` with `event_id`, `command_id`, `version`, etc. + - Projection consumes an `EventEnvelope`; keep required fields stable and add optional fields to mirror aggregate where useful. + +## Plan (Implementation Order) +1. Align projection’s event envelope schema to accept and (optionally) emit aggregate-compatible identifiers: + - Add optional `event_id`, `command_id`, `version` fields. + - Keep decoding tolerant with `#[serde(default)]`. + - Avoid changing serialized output unless those fields are present. +2. Make aggregate’s JetStream fetch consumer safe for shared clusters: + - Generate unique consumer names per fetch call (tenant + aggregate + uuid). + - Use explicit ack policy. + - Bound fetch loops by timeout/idle to avoid hanging. + - Best-effort delete the consumer when done. +3. Enforce consistent “strict clippy” hygiene: + - Remove trivial `assert!(true)` and other clippy warnings in aggregate so both crates can run `cargo clippy -- -D warnings`. +4. Verification: + - `cargo fmt --check` + - `cargo test` + - `cargo clippy --all-targets -- -D warnings` + +## Expected Outcomes +- Both binaries can connect to the same JetStream cluster without consumer name collisions. +- Projection can decode aggregate-published events and has access to event identifiers/versions when present. +- Both repositories share the same strictness level for formatting and linting. diff --git a/projection/Cargo.toml b/projection/Cargo.toml new file mode 100644 index 0000000..cc2e4ff --- /dev/null +++ b/projection/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "projection" +version = "0.1.0" +edition = "2021" + +[features] +default = [] +runtime-v8 = ["v8"] +runtime-wasm = [] + +[dependencies] +shared = { path = "../shared" } +edge_storage = { version = "0.1", registry = "madapes" } +runtime-function = { version = "0.2", registry = "madapes" } +edge-logger-client = { version = "0.1", registry = "madapes" } +query_engine = { version = "0.1", registry = "madapes" } +async-nats = "0.39" +tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +serde_yaml = "0.9" +toml = "0.8" +libmdbx = "0.6" +thiserror = "2" +anyhow = "1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] } +uuid = { version = "1", features = ["v7", "serde"] } +chrono = { version = "0.4", features = ["serde"] } +futures = "0.3" +axum = "0.7" +v8 = { version = "0.106", optional = true } + +[dev-dependencies] +tempfile = "3" +tower = "0.5" diff --git a/projection/DEVELOPMENT_PLAN.md b/projection/DEVELOPMENT_PLAN.md new file mode 100644 index 0000000..035d2b3 --- /dev/null +++ b/projection/DEVELOPMENT_PLAN.md @@ -0,0 +1,406 @@ +# Development Plan: Projection Node + +## Overview + +This plan breaks down the Projection node implementation into milestones ordered by dependency. Each milestone includes: +- **Tasks** with clear deliverables +- **Test Requirements** (unit tests + tautological tests + integration tests where applicable) +- **Dependencies** on previous milestones + +**Development Approach:** +1. Complete one milestone at a time +2. Write tests before implementation (TDD where applicable) +3. All tests must pass before moving to the next milestone +4. Mark tasks complete with `[x]` as you progress + +--- + +## Milestone 1: Project Foundation + +**Goal:** Set up the Rust project with proper structure, dependencies, and basic tooling. + +### Tasks +- [x] **1.1** Initialize Cargo project + - Create `src/lib.rs` and `src/main.rs` + - Configure `Cargo.toml` with madapes registry +- [x] **1.2** Configure dependencies + - `edge-storage` (KvStore) + - `runtime-function` (DAG program execution for `project`) + - `query-engine` (UQF query support) + - `edge-logger-client` (structured logs client) + - `async-nats` (JetStream consumption) + - `tokio`, `serde`, `serde_json`, `thiserror`, `anyhow`, `tracing` +- [x] **1.3** Establish initial module layout + ``` + src/ + ├── lib.rs + ├── main.rs + ├── types/ + │ ├── mod.rs + │ ├── id.rs + │ ├── event.rs + │ ├── view.rs + │ ├── checkpoint.rs + │ └── error.rs + ├── config/ + │ ├── mod.rs + │ └── settings.rs + ├── storage/ + │ ├── mod.rs + │ └── kv.rs + ├── stream/ + │ ├── mod.rs + │ └── jetstream.rs + ├── project/ + │ ├── mod.rs + │ ├── runtime.rs + │ └── manifest.rs + ├── query/ + │ ├── mod.rs + │ └── uqf.rs + └── observability/ + └── mod.rs + ``` +- [x] **1.4** Configure clippy and rustfmt + +### Tests +- [x] **T1.1** Project compiles successfully +- [x] **T1.2** Dependencies resolve from madapes registry +- [x] **T1.3** Clippy passes with no warnings + +--- + +## Milestone 2: Core Types (Envelopes, View Keys, Checkpoints) + +**Goal:** Define all core types required for event consumption, view persistence, and idempotency. + +### Dependencies +- Milestone 1 (project foundation) + +### Tasks +- [x] **2.1** Implement `TenantId` type + - Optional with default empty string for non-multi-tenant setups + - Display, FromStr, Serialize, Deserialize +- [x] **2.2** Implement `ViewType` and `ViewId` types + - String wrappers (consistent with `../aggregate` style: no validation in the type wrapper) + - Display, FromStr, Serialize, Deserialize +- [x] **2.3** Implement `ViewKey` composition + - `view:{tenant_id}:{view_type}:{view_id}` + - Centralize formatting/parsing in one place +- [x] **2.4** Implement `CheckpointKey` composition + - `checkpoint:{tenant_id}:{view_type}` +- [x] **2.5** Define event envelope type consumed from JetStream + - `tenant_id`, `aggregate_id`, `aggregate_type`, `event_type`, `payload`, `timestamp` + - Support forward-compatible decoding (unknown fields ignored) +- [x] **2.6** Define checkpoint representation + - Persisted value holds JetStream stream sequence (u64) and optional metadata +- [x] **2.7** Implement Projection error model + - Storage errors, stream errors, decode errors, project errors, tenant access errors +- [x] **2.8** Implement `ProjectionManifest` type + - Defines projections (`view_type`) and the `project` program reference for each + - Validates referenced programs exist +- [ ] **2.9** Add correlation/trace context fields to the event envelope (forward compatible) + - Support optional `correlation_id` and `traceparent` (or `trace_id`) so logs/traces can be stitched back to Gateway flows + - Preserve unknown fields for forward compatibility where practical + +### Tests +- [x] **T2.1** `TenantId` round-trips serialization and defaults to empty +- [x] **T2.2** `ViewType`, `ViewId` and key composition produce stable strings +- [x] **T2.3** Checkpoint encoding/decoding round-trips +- [x] **T2.4** Envelope decoding handles unknown fields +- [x] **T2.5** Tautological test: core types are Send + Sync + +--- + +## Milestone 3: Configuration + +**Goal:** Implement configuration loading and validation for the Projection node. + +### Dependencies +- Milestone 2 (core types) + +### Tasks +- [x] **3.1** Define `Settings` struct + - NATS URL, stream name, subject filter(s) + - Durable consumer name strategy (per tenant/view) + - Storage path + - Multi-tenancy enabled flag + default tenant behavior + - Backpressure configuration (max in-flight, batching, ack timeout) + - Manifest path (projection definitions and project program refs) +- [x] **3.2** Implement config loading from environment +- [x] **3.3** Implement config loading from file (YAML/TOML/JSON) + - Environment overrides file +- [x] **3.4** Implement config validation + - Required fields present + - Manifest loads and validates at startup (not inside `Settings::validate`) + +### Tests +- [x] **T3.1** Settings loads from environment variables +- [x] **T3.2** Settings validation catches missing/invalid values +- [x] **T3.3** Tautological test: Settings is Clone + Debug + +--- + +## Milestone 4: Storage Layer (KvStore Views + Atomic Checkpoints) + +**Goal:** Integrate `edge-storage` `KvStore` with transactionally correct view + checkpoint updates. + +### Dependencies +- Milestone 2 (core types) +- Milestone 3 (configuration) + +### Tasks +- [x] **4.1** Create `KvClient` wrapper + - Opens MDBX-backed KvStore at configured path + - Tenant-aware key composition helpers +- [x] **4.2** Implement view CRUD primitives + - `get_view(view_key) -> Option` + - `put_view(view_key, value)` + - `delete_view_prefix(tenant_id, view_type)` for rebuilds +- [x] **4.3** Implement checkpoint primitives + - `get_checkpoint(checkpoint_key) -> Option` + - `put_checkpoint(checkpoint_key, sequence)` +- [x] **4.4** Implement atomic commit primitive + - `txn { put_view(s); put_checkpoint }` as one MDBX transaction + - Expose API that makes it hard to update one without the other +- [x] **4.5** Optional: storage circuit breaker + - Protects node from tight retry loops when storage is degraded + +### Tests +- [x] **T4.1** View round-trip: put/get returns identical JSON +- [x] **T4.2** Checkpoint round-trip: put/get returns identical sequence +- [x] **T4.3** Atomicity: if transaction fails, neither view nor checkpoint is committed +- [x] **T4.4** Prefix delete removes all keys for tenant/view_type + +--- + +## Milestone 5: JetStream Consumption (Durable Consumer + Idempotency) + +**Goal:** Consume events from NATS JetStream with correct delivery semantics and checkpoint-based idempotency. + +### Dependencies +- Milestone 4 (storage layer) + +### Tasks +- [x] **5.1** Implement JetStream client wrapper + - Connect to NATS and bind to configured stream + - Create/bind durable consumer (single filter subject for now) +- [x] **5.2** Decode messages into event envelopes + - Extract JetStream stream sequence from message metadata + - Decode payload into the envelope type +- [x] **5.3** Implement idempotency gate + - Load checkpoint for `(tenant_id, view_type)` + - Skip/ack messages with sequence `<= checkpoint` +- [x] **5.4** Implement ack discipline + - Ack only after the MDBX transaction commits + - Define behavior for transient errors (no ack, allow redelivery) +- [x] **5.5** Implement poison-message policy + - Enforced via consumer max-deliver + TERM ack on excessive deliveries and KV quarantine record + +### Tests +- [x] **T5.1** Unit test: checkpoint gate skips sequences `<= checkpoint` +- [x] **T5.2** Unit test: ack is not called when storage commit fails +- [x] **T5.3** Integration test: JetStream redelivery re-processes unacked message and is made idempotent by checkpoint (ignored by default; run with `PROJECTION_TEST_NATS_URL=... cargo test -- --ignored`) + +--- + +## Milestone 6: Projection Execution (runtime-function `project` Program) + +**Goal:** Apply deterministic projection logic to turn `(current_view, event)` into `new_view`. + +### Dependencies +- Milestone 5 (stream consumption) + +### Tasks +- [x] **6.1** Implement `project` execution wrapper + - Loads program referenced by manifest + - Executes deterministically with gas limits + timeouts via `runtime-function` +- [x] **6.2** Define projection invocation contract + - Input: `{ current_view, event }` + - Output: `{ new_view, view_id }` (or equivalent) +- [x] **6.3** Implement per-message processing pipeline + - Load current view + - Execute project program + - Atomically write new view + checkpoint + - Ack message +- [x] **6.4** Enforce per-entity ordering where required + - Serialize updates per `(tenant_id, view_type, view_id)` if correctness depends on ordering + +### Tests +- [x] **T6.1** Unit test: project program transforms input deterministically (same input → same output) +- [x] **T6.2** Unit test: pipeline writes checkpoint only when view write succeeds +- [ ] **T6.3** Integration test: concurrent processing does not violate per-key ordering when enabled + +--- + +## Milestone 7: Query Support (query-engine UQF) + +**Goal:** Provide query access to stored views using `query-engine` UQF over `KvStore::query()` prefix scans. + +### Dependencies +- Milestone 4 (storage layer) + +### Tasks +- [x] **7.1** Implement query wrapper around `KvStore` prefix scans + - Tenant-scoped prefix scanning + - UQF filtering/sorting support +- [x] **7.2** Define query interface (library API and/or service endpoint) + - Ensure tenant isolation for all query paths +- [x] **7.3** Add query-time safeguards + - Limits on result size and scan cost + - Stable pagination strategy (if required) + +### Tests +- [x] **T7.1** Unit test: tenant-scoped queries never return other-tenant keys +- [x] **T7.2** Unit test: UQF filter works on a small in-memory fixture dataset + +--- + +## Milestone 8: Replay, Rebuild, and Hot Provisioning + +**Goal:** Implement operational workflows: rebuilds, backfills, rolling upgrades, and safety checks. + +### Dependencies +- Milestone 5 (JetStream consumption) +- Milestone 6 (projection execution) + +### Tasks +- [x] **8.1** Implement catch-up mode + - When no checkpoint exists, start from sequence 1 and process until tail +- [x] **8.2** Implement rebuild workflow + - Delete view prefix + checkpoint for `(tenant_id, view_type)` + - Re-consume from sequence 1 (or chosen seed sequence) +- [x] **8.3** Implement hot upgrade workflow (versioned views) + - New `view_type` (or suffix) + independent checkpoint + - Backfill then cutover routing, then retire old +- [x] **8.4** Add health/readiness signals + - Storage reachable + - NATS reachable + - Consumer lag below threshold (optional) + +### Tests +- [x] **T8.1** Integration test: rebuild from scratch produces identical view as uninterrupted consumption (ignored by default; run with `PROJECTION_TEST_NATS_URL=... cargo test -- --ignored`) +- [x] **T8.2** Integration test: rolling restart resumes from checkpoint without duplicating results (ignored by default; run with `PROJECTION_TEST_NATS_URL=... cargo test -- --ignored`) + +--- + +## Milestone 9: Container & Deployment + +**Goal:** Package as a container and enable predictable local and production deployment. + +### Dependencies +- Milestone 8 (replay/rebuild + health/readiness) + +### Tasks +- [x] **9.1** Create `docker/Dockerfile.rust` + - Multi-stage build + - Minimal runtime image + - Health check integration +- [x] **9.2** Create `docker-compose.yml` for local dev + - Projection container + - NATS server with JetStream enabled + - Optional: Grafana, Victoria Metrics, Loki +- [x] **9.3** Create container entrypoint behavior + - Config loading + - Graceful shutdown on SIGTERM + - Stop pulling new JetStream messages + - Complete or safely abandon in-flight processing without acking early +- [x] **9.4** Define environment variables and defaults + - NATS URL, stream name, subject filters + - Storage path + - Consumer naming strategy (durable) + - Multi-tenancy enabled flag +- [x] **9.5** Create release build optimization + - LTO, strip, single codegen unit + +### Tests +- [x] **T9.1** Container builds successfully + ```bash + docker build -f docker/Dockerfile.rust --build-arg PACKAGE=projection --build-arg BIN=projection -t cloudlysis/projection:local . + docker run cloudlysis/projection:local --help + ``` +- [x] **T9.2** Container starts with valid config + ```bash + docker run -e PROJECTION_NATS_URL=nats://nats:4222 cloudlysis/projection:local + ``` + +--- + +## Milestone 10: Provisioning, Scalability, and Docker Swarm Deployment + +**Goal:** Support horizontal scaling and safe rollouts in Docker Swarm with clear provisioning semantics for JetStream consumers. + +### Dependencies +- Milestone 9 (container & deployment) + +### Tasks +- [x] **10.1** Define the scaling model for JetStream consumption + - Use a durable consumer per `(view_type, shard)` so multiple replicas can share the same consumer workload + - Define the subject filter(s) used by each consumer (tenant wildcard vs tenant-range sharded) + - Document consumer configuration requirements (ack policy, max in-flight, replay policy) +- [x] **10.2** Implement replica-safe consumption + - Multiple replicas pulling from the same durable consumer distribute work + - Enforce per-key serialization if required for correctness +- [x] **10.3** Add tenant-aware provisioning option (sharding) + - Optional tenant-range sharding by subject filters (e.g., `tenant..*`) + - Placement constraints for Swarm nodes (e.g., `node.labels.tenant_range==`) + - Strategy for adding/removing shards +- [x] **10.4** Create Swarm stack definition (`swarm/stacks/platform.yml`) + - Service definition + - Replicas configuration + - Resource limits (CPU, memory) + - Health check integration + - Storage volume mapping for `edge-storage` data directory +- [x] **10.5** Define rollout strategy + - Rolling update parameters + - Backfill/cutover strategy for versioned `view_type` upgrades + - Safe rollback story (old view still present + routing switch back) + +### Tests +- [x] **T10.1** Stack file valid + ```bash + docker stack config -c swarm/stacks/platform.yml + ``` +- [x] **T10.2** Scale-out does not duplicate work (ignored by default; run with `PROJECTION_TEST_NATS_URL=... cargo test -- --ignored`) + - Start 2+ replicas pulling from the same durable consumer + - Verify the checkpoint is monotonic and view updates are not applied twice for the same sequence +- [x] **T10.3** Rolling restart preserves correctness (ignored by default; run with `PROJECTION_TEST_NATS_URL=... cargo test -- --ignored`) + - Restart replicas during active consumption + - Verify idempotency holds (no view corruption, checkpoint monotonic) + +--- + +## Milestone 11: Operational Endpoints & Observability + +**Goal:** Provide the minimum operational surface required to provision, scale, and monitor Projection nodes in production. + +### Dependencies +- Milestone 9 (container & deployment) +- Milestone 10 (provisioning & scaling semantics) + +### Tasks +- [x] **11.1** Implement `/health` endpoint + - Process is up + - Storage opened successfully +- [x] **11.2** Implement `/ready` endpoint + - NATS connection established + - JetStream consumer bound + - Storage writable +- [x] **11.3** Implement `/metrics` endpoint (Prometheus) + - Consumer lag (stream sequence - checkpoint) + - Processing throughput and latency + - Redelivery count / ack failures + - Storage commit failures +- [x] **11.4** Add build/runtime identity + - Version, commit hash (if available), configured `view_type` set +- [x] **11.5** Add graceful drain behavior for rollouts + - Report “not ready” before shutdown + - Stop pulling, wait for in-flight work up to a timeout +- [ ] **11.6** Correlation-aware logging for investigations + - When processing messages, include `tenant_id`, `correlation_id`, and `trace_id` in structured logs/spans when present in the envelope/headers + +### Tests +- [x] **T11.1** Readiness fails when NATS is unavailable +- [x] **T11.2** Metrics include lag gauge and counters +- [x] **T11.3** Drain transitions ready → not ready before exit +- [ ] **T11.4** Unit test: envelope decoding accepts optional correlation/trace fields and exposes them for logging diff --git a/projection/docs/scaling.md b/projection/docs/scaling.md new file mode 100644 index 0000000..afd4b04 --- /dev/null +++ b/projection/docs/scaling.md @@ -0,0 +1,28 @@ +# Scaling & Provisioning + +## Consumer Modes + +The projection supports two JetStream consumption modes (configured via `PROJECTION_CONSUMER_MODE`): + +- `single`: one durable consumer per projection process. Each message is processed once and the projection updates all `view_type`s defined in the manifest for that message. +- `per_view`: one durable consumer per `view_type`. Each consumer processes all events, but only updates its own `view_type` (checkpoint isolation and independent scaling). + +In `per_view` mode, durable names are derived as: + +`{PROJECTION_DURABLE_NAME}_{view_type}` (with non `[A-Za-z0-9_-]` replaced by `_`). + +## Replica Scaling + +To scale replicas, run multiple instances with the same durable name(s): + +- `single` mode: all replicas share one durable consumer and work-steal messages from that consumer. +- `per_view` mode: all replicas share a durable consumer per `view_type`. + +## Sharding + +JetStream subject filtering does not support “hash/range shard by aggregate_id” on the consumer side. + +If strict per-entity ordering is required across replicas, sharding must be encoded in the published subjects (for example `shard..tenant..aggregate..`), and each shard must run with a matching `PROJECTION_SUBJECT_FILTERS` value. + +Without subject-based sharding, multiple replicas can process events for the same `view_id` concurrently, which can break projections that depend on ordered read-modify-write updates. + diff --git a/projection/external_prd.md b/projection/external_prd.md new file mode 100644 index 0000000..9075799 --- /dev/null +++ b/projection/external_prd.md @@ -0,0 +1,192 @@ +### External PRD: Changes Required in Aggregate, Projection, Runner + +This document captures the work needed outside the Gateway to support: +- Tenant-aware routing via `x-tenant-id` +- Independent horizontal scalability of Aggregate, Projection, Runner +- A safe mechanism for tenant rebalancing per service kind + +--- + +## **Target State** + +### Independent Placements + +Each service kind has its own placement map: +- `aggregate_placement[tenant_id] -> aggregate_shard_id` +- `projection_placement[tenant_id] -> projection_shard_id` +- `runner_placement[tenant_id] -> runner_shard_id` + +Each shard is a replica set that can scale independently. + +### Rebalancing Contract (Per Service Kind) + +All nodes MUST support: +- Dynamic placement updates (watch NATS KV or reload config) +- A drain mechanism that can target a specific tenant (stop acquiring new work for that tenant, finish in-flight, report status) +- Clear readiness semantics that reflect whether the node will accept work for a tenant + +Additionally, all nodes SHOULD converge on the same operational contract: +- A per-tenant “accepting” gate (can this shard accept new work/queries/commands for tenant X?) +- A per-tenant “drained” signal (no in-flight work remains for tenant X) +- A per-tenant warmup/catchup signal where relevant (projection lag, aggregate snapshot availability) + +--- + +## **Aggregate: Required Changes** + +### 1) Expose a Real Command API (Gateway Upstream) + +Today, Aggregate has internal command handling types (e.g., `CommandServer`) but its running HTTP server only exposes health/metrics/admin endpoints ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L82), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L81-L213)). + +Aggregate MUST expose one of the following upstream APIs for the Gateway to call: +- **Option A (Recommended)**: gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` compatible with [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31). +- **Option B**: HTTP endpoint for command submission (REST), with a stable request/response shape that the Gateway can proxy. + +### 2) Tenant Placement Enforcement + +Aggregate MUST enforce “hosted tenants” so independent scaling is safe: +- If an Aggregate shard/node is not assigned a tenant, it MUST reject commands for that tenant (e.g., `403` or `503` with retriable hint depending on whether the issue is authorization vs placement). +- Aggregate SHOULD maintain an in-memory allowlist of hosted tenants that is driven by: + - NATS KV placement watcher (preferred), or + - Hot-reloaded config pushed via `/admin/reload` + +Aggregate already has admin hooks for drain/reload, but they are currently generic and/or illustrative ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L72), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L402-L442)). These need to become placement-aware. + +### 3) Tenant Drain (Per Tenant) + +Aggregate MUST provide a per-tenant drain mechanism to support rebalancing: +- Stop accepting new commands for the tenant. +- Allow in-flight commands to finish (bounded wait), then report drained. +- Expose drain status per tenant (admin endpoint). + +### 4) Rebalancing State Strategy + +Aggregate persists snapshots locally (MDBX) and uses JetStream for events. To move a tenant: +- **Approach 1 (Snapshot migration)**: copy tenant snapshot DB/state to the target shard, then switch placement. +- **Approach 2 (Cold rehydrate)**: switch placement and let the target shard rebuild state by replaying events from JetStream; expect higher latency during warmup. + +The system should support both, with the rebalancer selecting the strategy based on tenant size/SLO. + +### 5) Metrics for Placement Decisions + +Aggregate SHOULD expose: +- Per-tenant command rate, error rate +- In-flight commands by tenant +- Rehydrate time / snapshot hit ratio +- Storage size per tenant (if feasible) + +--- + +## **Projection: Required Changes** + +### 1) Expose Query API Upstream for Gateway + +Projection has a working `QueryService` with tenant-scoped prefix scans ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L121-L162)) but it is not exposed via HTTP/gRPC (current HTTP routes are health/ready/metrics/info only: [projection/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/http/mod.rs#L102-L109)). + +Projection MUST add one upstream API the Gateway can route to: +- `POST /query/{view_type}` (HTTP) accepting `x-tenant-id` and a UQF payload, returning `QueryResponse`. +- Or a gRPC query service (new proto) if gRPC is preferred end-to-end. + +### 2) Tenant Placement Filtering (Independent Scaling) + +Projection MUST support running in one of these modes: +- **Multi-tenant shard**: consumes all tenants (simple, less isolated). +- **Tenant-filtered shard (required for rebalancing)**: + - only consumes/serves queries for the tenants assigned to that shard + - rejects queries for unassigned tenants (consistent error semantics) + +Implementation direction: +- Add a placement watcher similar to Runner’s tenant filter ([runner/tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)). +- Apply tenant filter to: + - event consumption subject filters (preferred), and + - query serving validation (always). + +### 3) Drain + Warmup Endpoints + +Projection SHOULD add: +- `/admin/drain?tenant_id=...` (stop consuming new events for that tenant, finish in-flight, flush checkpoints) +- `/admin/reload` (apply latest placement/config) +- Optional warmup status: whether the shard has caught up to JetStream tail for that tenant/view_types + +### 4) Rebalancing Strategy for Projection + +Projection can rebalance safely with “warm then cut over”: +- Assign tenant to the new projection shard while old shard still serves. +- New shard catches up (replay from JetStream, build view KV). +- Switch Gateway placement for query routing to new shard. +- Drain old shard for that tenant and optionally delete old tenant KV keys. + +### 5) Metrics for Placement Decisions + +Projection SHOULD expose: +- JetStream lag per tenant/view_type (tail minus checkpoint) +- Query latency and scan counts +- Storage size per tenant (if feasible) + +--- + +## **Runner: Required Changes** + +Runner already has: +- A tenant placement watcher capable of producing an allowlist ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)) +- Admin endpoints including drain/reload/config ([runner/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/http/mod.rs#L69-L86)) +- Gateway client integration for aggregate command submission ([runner/gateway/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47)) + +To support independent scalability + rebalancing, Runner needs the following. + +### 1) Per-Tenant Drain (Not Only Global) + +Runner’s current drain is global (`/admin/drain` toggles a single draining flag). Runner MUST support draining a specific tenant: +- Stop acquiring new saga/effect work for the tenant. +- Allow in-flight work for the tenant to finish (bounded). +- Flush outbox for the tenant (or guarantee idempotency on handoff). +- Persist final checkpoints so another shard can continue without duplication beyond at-least-once bounds. + +### 2) Placement-Enforced Work Acquisition + +Runner MUST validate tenant assignment at the boundary where it: +- consumes JetStream messages (saga triggers, effect commands), and +- dispatches outbox work. + +If a tenant is not assigned to the shard, Runner must not process its work. + +### 3) Handoff Safety Rules for Rebalancing + +Runner rebalancing should follow: +- New shard begins processing only after it is assigned the tenant. +- Old shard stops acquiring new work for that tenant, then drains. +- Idempotency remains correct across handoff using checkpoints and dedupe markers. + +### 4) Metrics for Placement Decisions + +Runner SHOULD expose: +- Outbox depth by tenant +- Work processing latency and retries by tenant/effect +- Schedule due items by tenant +- Consumer lag by tenant (if the consumption model supports per-tenant lag) + +### 5) Auth Delivery Side Effects (Email/SMS/Push) + +If the platform’s AuthN flows require out-of-band delivery (password reset links, email verification, MFA codes), the Runner SHOULD be the standard place to execute those side effects: +- Define a stable effect interface for sending transactional emails (reset links, verification links, security alerts). +- Optionally add SMS/push providers later under the same effect contract. + +This keeps the Gateway free of long-lived provider credentials and aligns with the existing “effects are executed by workers” pattern. + +--- + +## **Gateway Integration Notes** + +Once the above changes exist: +- Gateway routes per `(tenant_id, service_kind)` using independent placement maps. +- Gateway can implement “warm then cut over” rebalancing for Projection and Runner by switching only query/workflow routing after readiness conditions are met. +- Gateway can enforce consistent tenant validation, authn/authz, and error semantics at the edge even as placements move. + +--- + +## **Gaps / Opportunities** + +- **KV schema + ownership**: define the exact NATS KV bucket layout, key naming, revisioning rules, and who is allowed to write placement updates. +- **Rebalancer API**: define operator workflows (plan/apply/rollback), status reporting, and audit log requirements for placement changes. +- **Shard discovery**: define how shard endpoints are registered (static config vs KV directory entries) and how health is represented. +- **Consistency boundaries**: define rebalancing guarantees per service kind (projection can be warm-cutover; runner requires checkpoint handoff; aggregate requires single-writer and state availability). diff --git a/projection/prd.md b/projection/prd.md new file mode 100644 index 0000000..0db9d3f --- /dev/null +++ b/projection/prd.md @@ -0,0 +1,109 @@ +The **Projection** is the "Read Side" of your CQRS (Command Query Responsibility Segregation) architecture. While Aggregates focus on **writing** valid data, Projections focus on **reading** and **formatting** that data for the end-user or application. + +In your framework, Projections are **event-driven views** that transform the stream of facts from **NATS JetStream** into highly optimized, queryable state in `edge-storage` `KvStore`, queryable via the embedded `query-engine` (UQF). + +--- + +### 🧱 Component: Projection (Read Model) +**Definition:** +A Projection is a standalone Rust-based container that consumes Events from **NATS JetStream** and incrementally updates one or more "Read Models" in `edge-storage`. Its sole purpose is to provide a high-performance, pre-computed view of the system state that is optimized for specific queries, bypassing the need to rehydrate Aggregate state or replay event streams at query time. + +**Multi-Tenancy:** +The Projection supports optional multi-tenancy via `tenant_id`. When enabled: +- **Subject Naming:** JetStream subjects include `tenant_id` (e.g., `tenant..aggregate..`) +- **Storage Namespacing:** Views and checkpoints are namespaced by `tenant_id` to prevent cross-tenant reads +- **Query Isolation:** Queries are tenant-scoped (e.g., `x-tenant-id` header) and only scan tenant-prefixed keys +- **Backward Compatibility:** Deployments without multi-tenancy use a default/empty `tenant_id` + +**Dependencies:** +* Core crates pulled from the custom Cargo registry: + ```toml + [registries.madapes] + index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" + ``` + + | Crate | Purpose | + |-------|---------| + | `edge-storage` | libmdbx-backed `KvStore` for durable view storage | + | `runtime-function` | Deterministic DAG execution for `project` programs | + | `edge-logger` | High-performance logging (UDS + Protobuf, Loki sink) | + | `query-engine` | UQF query support for filtering/querying view state | + | `async-nats` | NATS JetStream client for event consumption | + +* Source code available at `../../madapes/` +* **Note:** This is a standalone container — it does not use `event-bus` or gRPC `Consume`/`FetchBatch` APIs + +#### 1. Core Responsibilities +* **Event Consumption:** Subscribes to one or more JetStream subjects (typically Aggregate event subjects) using a durable consumer, filtering with subject wildcards. +* **State Transformation:** Uses a `project` program (`runtime-function` DAG) to map an incoming event to a state change (e.g., `IncrementCounter`, `UpdateUserEmail`, `AddToList`). +* **Read Model Persistence:** Stores the resulting "View" in `edge-storage` `KvStore` as a JSON document, keyed by `view:{tenant_id}:{view_type}:{view_id}` (e.g., `view:tenant_a:UserDashboard:user_123`). +* **Query Serving:** Provides read access via `query-engine` UQF queries. The existing `KvStore::query()` integration performs prefix scans and applies UQF filters/sorts. +* **Checkpointing:** Tracks its stream position (JetStream stream sequence) in `edge-storage` `KvStore` (key: `checkpoint:{tenant_id}:{view_type}`) to resume correctly after a restart. +* **Safe Acknowledgement:** Acks JetStream messages only after the view update and checkpoint are durably committed. + +#### 2. The Lifecycle of a Projection Update +1. **Ingestion:** The Projection receives a JetStream message whose payload is a `FrameworkEnvelope` (or equivalent event envelope). It extracts the message metadata (at minimum, the JetStream **stream sequence**) used for idempotency. +2. **Context Loading:** + * The Projection fetches the current "View" from `edge-storage` `KvStore` (e.g., `kv.get("view:tenant_a:UserDashboard:user_123")`). +3. **Transformation (`runtime-function`):** + * It executes the `project` DAG program: `(current_view_state, incoming_event) → new_view_state`. + * Alternatively, it can use `KvStore::query()` (with `query-engine` UQF) to perform cross-projection lookups to build the new state. +4. **Atomic Update:** + * The Projection saves the `new_view_state` back to `edge-storage` `KvStore`. + * **Critical:** It must save the **checkpoint** (JetStream stream sequence) as part of the same MDBX transaction (e.g., `kv.put_sync("checkpoint:tenant_a:UserDashboard", stream_sequence)`). This ensures crash-recovery correctness. +5. **Acknowledge:** After the transaction commits, the Projection acks the JetStream message so it will not be redelivered. +6. **Query Availability:** The updated state is immediately available for applications to query via `query-engine` UQF queries. + +#### 3. Technical Constraints & Guarantees +* **Eventual Consistency:** Projections are inherently "behind" the Aggregate. There is a sub-second (usually) delay between an event being committed and the Projection reflecting that change. +* **Idempotency:** Since JetStream provides **at-least-once** delivery, the Projection must use its stored **Checkpoint** (stream sequence) to ignore events it has already processed. +* **Disposable & Rebuildable:** Because JetStream is a durable log, Projections are "disposable." If a business requirement changes, you can delete a Projection's KV entries, create a new `runtime-function` program, and **replay** the entire history from JetStream (starting from sequence 1) to build a new view from scratch. +* **Read-Only:** Projections never produce events or commands. They are strictly "sinks" for data. + +#### 4. Replay & Recovery Model +* **Catch-up Mode:** When a new Projection is deployed (no checkpoint exists), it starts from the beginning of the JetStream stream (sequence 1) and consumes as fast as possible until it reaches the tail. +* **Live Mode:** Once caught up, it continues consuming in real time using the same durable consumer, relying on JetStream acks/redelivery for reliability. + +#### 5. Snapshots (Relationship to Aggregates) +The Projection does not require Aggregate snapshots to function, because its source of truth for changes is the JetStream event stream. However, snapshots are still relevant in two ways: + +* **Aggregate Snapshots (Write Side):** Aggregates persist versioned snapshots in `edge-storage` `AggregateStore` to speed up Aggregate rehydration. These snapshots are not a read API for projections and should not be treated as a substitute for consuming events. +* **Projection State (Read Side):** A Projection’s stored View in `edge-storage` `KvStore` is effectively its own “snapshot” of the read model at a specific checkpoint (stream sequence). +* **Fast Recovery:** On restart, the Projection loads `checkpoint:{tenant_id}:{view_type}`, resumes JetStream consumption from the next sequence, and continues updating existing View records in place. No replay is required unless the checkpoint is missing or the view schema/logic has changed. +* **Optional Seeding:** For very large histories, a Projection may optionally seed an initial View state from a recent Aggregate snapshot or an external export, then set its checkpoint to a known JetStream stream sequence and continue consuming events forward from that point. This preserves incremental correctness while reducing rebuild time. + +#### 6. Hot Provisioning (Rolling Scale + Rolling Upgrades) +Projections are designed to be provisioned and updated without downtime. + +* **Hot Scale-Out:** Multiple Projection replicas can run concurrently per `tenant_id` and `view_type`. JetStream consumer configuration is used to ensure each event is processed by exactly one replica within a replica set. +* **Hot Restart:** A restarted instance resumes from the persisted checkpoint and continues consumption; recovery time is proportional to the gap between the checkpoint and the stream tail. +* **Hot Upgrade (Projection Logic):** To change a `project` program safely: + * Deploy a new Projection version under a new `view_type` (or `view_type` + version suffix) with its own checkpoint. + * Backfill by consuming from sequence 1 (or from a chosen seed sequence) until caught up. + * Switch query routing from the old view keys to the new view keys. + * Retire old view data and checkpoint after the cutover. +* **In-Place Migration:** If the schema change is backward compatible, a Projection may evolve the stored View shape incrementally while processing events, but this requires strict versioning in the View payload. + +#### 7. Caveats & Operational Notes +* **Ordering Guarantees:** JetStream preserves ordering per stream, but if the Projection processes messages concurrently it can violate per-entity ordering. If ordering matters for a `view_id`, enforce per-key serialization in the Projection. +* **At-Least-Once Reality:** Redeliveries can happen (network splits, ack timeouts, restarts). The Projection must be idempotent via checkpoint checks and/or per-event dedupe keyed by stream sequence. +* **Ack Discipline:** Never ack before the MDBX transaction commits. Treat “view update + checkpoint update + ack” as one logical commit. +* **Poison Messages:** A single malformed event or incompatible schema can stall a durable consumer. Define a policy for retries, quarantine, and alerting (including whether to skip and record the failure). +* **Schema Evolution:** Projection logic must be able to handle old event versions or explicitly version the stream/subjects. Projection View schemas also need versioning if you support in-place migrations. +* **Backpressure & Lag:** Catch-up replays can saturate storage and CPU. Monitor consumer lag, redeliveries, and processing latency; apply limits (max in-flight, batching) to protect the node. +* **Rebuild Semantics:** Rebuilds must delete both View keys and checkpoints for the target `tenant_id`/`view_type`. Partial deletes can create “mixed era” views. +* **Cross-View Lookups:** Using `KvStore::query()` to join across projections is convenient but can amplify read load and introduce consistency anomalies between views. Prefer event-local computation when possible. + +#### 8. Data Structure (The View Envelope) +* `view_id`: The unique key for the record (e.g., `user_id`). Used in KvStore key: `view:{tenant_id}:{view_type}:{view_id}`. +* `view_type`: The name of the projection (e.g., `active_users_list`). +* `last_event_sequence`: The checkpoint (JetStream stream sequence) of the last event processed. Stored separately in `checkpoint:{tenant_id}:{view_type}`. +* `data`: The actual payload (JSON) optimized for the UI or API, stored as the KvStore value. + +--- + +### 💡 Key Distinction for your PRD: +In your framework, the **Projection** is where the "Distributed" part of the system becomes visible to the user. + +* **Aggregates** are for **Consistency** (The Truth). +* **Projections** are for **Performance** (The Speed). diff --git a/projection/rustfmt.toml b/projection/rustfmt.toml new file mode 100644 index 0000000..a311b9d --- /dev/null +++ b/projection/rustfmt.toml @@ -0,0 +1,2 @@ +edition = "2021" +newline_style = "Unix" diff --git a/projection/src/config/mod.rs b/projection/src/config/mod.rs new file mode 100644 index 0000000..6f5f373 --- /dev/null +++ b/projection/src/config/mod.rs @@ -0,0 +1,3 @@ +mod settings; + +pub use settings::{ConsumerMode, Settings, SettingsLoadError}; diff --git a/projection/src/config/settings.rs b/projection/src/config/settings.rs new file mode 100644 index 0000000..ebe1f71 --- /dev/null +++ b/projection/src/config/settings.rs @@ -0,0 +1,325 @@ +use serde::{Deserialize, Serialize}; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct Settings { + pub nats_url: String, + pub stream_name: String, + pub subject_filters: Vec, + pub durable_name: String, + pub storage_path: String, + pub manifest_path: String, + pub multi_tenant_enabled: bool, + pub default_tenant_id: Option, + pub tenant_placement_path: Option, + pub max_in_flight: usize, + pub ack_timeout_ms: u64, + pub max_deliver: i64, + pub consumer_mode: ConsumerMode, + pub http_addr: String, + pub storage_backoff_ms: u64, + pub storage_backoff_max_ms: u64, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum ConsumerMode { + #[default] + Single, + PerView, +} + +impl Default for Settings { + fn default() -> Self { + Self { + nats_url: "nats://localhost:4222".to_string(), + stream_name: "AGGREGATE_EVENTS".to_string(), + subject_filters: vec!["tenant.*.aggregate.*.*".to_string()], + durable_name: "projection".to_string(), + storage_path: "./data".to_string(), + manifest_path: "./projection-manifest.yaml".to_string(), + multi_tenant_enabled: true, + default_tenant_id: None, + tenant_placement_path: None, + max_in_flight: 128, + ack_timeout_ms: 30_000, + max_deliver: 10, + consumer_mode: ConsumerMode::Single, + http_addr: "0.0.0.0:8080".to_string(), + storage_backoff_ms: 50, + storage_backoff_max_ms: 2_000, + } + } +} + +impl Settings { + pub fn from_env() -> Result { + let mut settings = Self::default(); + settings.apply_env_overrides(); + Ok(settings) + } + + pub fn from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn from_toml(toml_str: &str) -> Result { + toml::from_str(toml_str) + } + + pub fn from_json(json: &str) -> Result { + serde_json::from_str(json) + } + + pub fn from_file(path: impl AsRef) -> Result { + let path = path.as_ref(); + let raw = std::fs::read_to_string(path)?; + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + match ext { + "yaml" | "yml" => Ok(Self::from_yaml(&raw)?), + "toml" => Ok(Self::from_toml(&raw)?), + "json" => Ok(Self::from_json(&raw)?), + _ => Err(SettingsLoadError::UnsupportedFormat { + path: path.display().to_string(), + }), + } + } + + pub fn load_from_file_with_env_overrides( + path: impl AsRef, + ) -> Result { + let mut settings = Self::from_file(path)?; + settings.apply_env_overrides(); + Ok(settings) + } + + fn apply_env_overrides(&mut self) { + if let Ok(url) = std::env::var("PROJECTION_NATS_URL") { + self.nats_url = url; + } + + if let Ok(stream) = std::env::var("PROJECTION_STREAM_NAME") { + self.stream_name = stream; + } + + if let Ok(filters) = std::env::var("PROJECTION_SUBJECT_FILTERS") { + let values = filters + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect::>(); + if !values.is_empty() { + self.subject_filters = values; + } + } + + if let Ok(durable) = std::env::var("PROJECTION_DURABLE_NAME") { + self.durable_name = durable; + } + + if let Ok(path) = std::env::var("PROJECTION_STORAGE_PATH") { + self.storage_path = path; + } + + if let Ok(path) = std::env::var("PROJECTION_MANIFEST_PATH") { + self.manifest_path = path; + } + + if let Ok(enabled) = std::env::var("PROJECTION_MULTI_TENANT") { + if let Ok(value) = enabled.parse() { + self.multi_tenant_enabled = value; + } + } + + if let Ok(default_tenant_id) = std::env::var("PROJECTION_DEFAULT_TENANT_ID") { + if default_tenant_id.is_empty() { + self.default_tenant_id = None; + } else { + self.default_tenant_id = Some(default_tenant_id); + } + } + + if let Ok(path) = std::env::var("PROJECTION_TENANT_PLACEMENT_PATH") { + if path.trim().is_empty() { + self.tenant_placement_path = None; + } else { + self.tenant_placement_path = Some(path); + } + } + + if let Ok(max_in_flight) = std::env::var("PROJECTION_MAX_IN_FLIGHT") { + if let Ok(value) = max_in_flight.parse() { + self.max_in_flight = value; + } + } + + if let Ok(ms) = std::env::var("PROJECTION_ACK_TIMEOUT_MS") { + if let Ok(value) = ms.parse() { + self.ack_timeout_ms = value; + } + } + + if let Ok(max_deliver) = std::env::var("PROJECTION_MAX_DELIVER") { + if let Ok(value) = max_deliver.parse() { + self.max_deliver = value; + } + } + + if let Ok(mode) = std::env::var("PROJECTION_CONSUMER_MODE") { + self.consumer_mode = match mode.trim().to_ascii_lowercase().as_str() { + "single" => ConsumerMode::Single, + "per_view" | "per-view" | "perview" => ConsumerMode::PerView, + _ => self.consumer_mode, + }; + } + + if let Ok(addr) = std::env::var("PROJECTION_HTTP_ADDR") { + if !addr.trim().is_empty() { + self.http_addr = addr; + } + } + + if let Ok(ms) = std::env::var("PROJECTION_STORAGE_BACKOFF_MS") { + if let Ok(value) = ms.parse() { + self.storage_backoff_ms = value; + } + } + + if let Ok(ms) = std::env::var("PROJECTION_STORAGE_BACKOFF_MAX_MS") { + if let Ok(value) = ms.parse() { + self.storage_backoff_max_ms = value; + } + } + } + + pub fn validate(&self) -> Result<(), String> { + if self.nats_url.is_empty() { + return Err("NATS URL is required".to_string()); + } + if self.stream_name.is_empty() { + return Err("Stream name is required".to_string()); + } + if self.storage_path.is_empty() { + return Err("Storage path is required".to_string()); + } + if self.subject_filters.is_empty() { + return Err("At least one subject filter is required".to_string()); + } + if self.durable_name.is_empty() { + return Err("Durable name is required".to_string()); + } + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SettingsLoadError { + #[error("Failed to read config file: {0}")] + Io(#[from] std::io::Error), + #[error("Failed to parse YAML config: {0}")] + Yaml(#[from] serde_yaml::Error), + #[error("Failed to parse TOML config: {0}")] + Toml(#[from] toml::de::Error), + #[error("Failed to parse JSON config: {0}")] + Json(#[from] serde_json::Error), + #[error("Unsupported config format: {path}")] + UnsupportedFormat { path: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + fn env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); + LOCK.get_or_init(|| std::sync::Mutex::new(())) + .lock() + .unwrap() + } + + #[test] + fn settings_from_env() { + let _guard = env_lock(); + std::env::set_var("PROJECTION_NATS_URL", "nats://localhost:4222"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.nats_url, "nats://localhost:4222"); + std::env::remove_var("PROJECTION_NATS_URL"); + } + + #[test] + fn consumer_mode_from_env() { + let _guard = env_lock(); + std::env::set_var("PROJECTION_CONSUMER_MODE", "per_view"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.consumer_mode, ConsumerMode::PerView); + std::env::remove_var("PROJECTION_CONSUMER_MODE"); + } + + #[test] + fn http_addr_from_env() { + let _guard = env_lock(); + std::env::set_var("PROJECTION_HTTP_ADDR", "127.0.0.1:12345"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.http_addr, "127.0.0.1:12345"); + std::env::remove_var("PROJECTION_HTTP_ADDR"); + } + + #[test] + fn storage_backoff_from_env() { + let _guard = env_lock(); + std::env::set_var("PROJECTION_STORAGE_BACKOFF_MS", "10"); + std::env::set_var("PROJECTION_STORAGE_BACKOFF_MAX_MS", "20"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.storage_backoff_ms, 10); + assert_eq!(settings.storage_backoff_max_ms, 20); + std::env::remove_var("PROJECTION_STORAGE_BACKOFF_MS"); + std::env::remove_var("PROJECTION_STORAGE_BACKOFF_MAX_MS"); + } + + #[test] + fn settings_validation() { + let settings = Settings { + nats_url: "".to_string(), + ..Default::default() + }; + assert!(settings.validate().is_err()); + } + + #[test] + fn settings_from_yaml_file_and_env_override() { + let _guard = env_lock(); + let dir = tempdir().unwrap(); + let file_path = dir.path().join("projection.yaml"); + std::fs::write( + &file_path, + r#" +nats_url: "nats://from-file:4222" +stream_name: "AGGREGATE_EVENTS" +subject_filters: + - "tenant.*.aggregate.*.*" +storage_path: "/tmp/proj" +durable_name: "proj" +multi_tenant_enabled: false +"#, + ) + .unwrap(); + + std::env::set_var("PROJECTION_NATS_URL", "nats://from-env:4222"); + let settings = Settings::load_from_file_with_env_overrides(&file_path).unwrap(); + assert_eq!(settings.nats_url, "nats://from-env:4222"); + assert_eq!(settings.storage_path, "/tmp/proj"); + assert_eq!(settings.durable_name, "proj"); + assert!(!settings.multi_tenant_enabled); + std::env::remove_var("PROJECTION_NATS_URL"); + } + + #[test] + fn settings_is_clone() { + let s = Settings::default(); + let _s2 = s.clone(); + let _ = format!("{:?}", s); + } +} diff --git a/projection/src/http/mod.rs b/projection/src/http/mod.rs new file mode 100644 index 0000000..bce22fb --- /dev/null +++ b/projection/src/http/mod.rs @@ -0,0 +1,714 @@ +use crate::config::Settings; +use crate::observability::Observability; +use crate::project::ProjectionManifest; +use crate::query::{QueryError, QueryRequest, QueryService}; +use crate::storage::KvClient; +use crate::tenant_placement::TenantPlacement; +use crate::types::{CheckpointKey, ProjectionError, TenantId, ViewType}; +use async_nats::jetstream; +use axum::extract::{Path, State}; +use axum::http::{header, HeaderMap, StatusCode}; +use axum::response::IntoResponse; +use axum::routing::{get, post}; +use axum::{Json, Router}; +use serde::Deserialize; +use serde_json::json; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +#[derive(Clone)] +pub struct AppState { + pub settings: Settings, + pub ready: Arc, + pub draining: Arc, + pub observability: Observability, + pub storage: KvClient, + pub manifest: ProjectionManifest, + pub jetstream: Option, + pub tenant_placement: TenantPlacement, + pub query: QueryService, +} + +impl std::fmt::Debug for AppState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AppState") + .field("settings", &self.settings) + .finish_non_exhaustive() + } +} + +pub async fn build_state( + settings: Settings, + ready: Arc, + draining: Arc, + observability: Observability, + tenant_placement: TenantPlacement, +) -> Result { + let storage = KvClient::open(settings.storage_path.clone()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + let manifest_raw = std::fs::read_to_string(&settings.manifest_path) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?; + let ext = std::path::Path::new(&settings.manifest_path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + let manifest = match ext { + "yaml" | "yml" => ProjectionManifest::load_from_yaml(&manifest_raw) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?, + "json" => ProjectionManifest::load_from_json(&manifest_raw) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?, + _ => { + return Err(ProjectionError::ManifestError(format!( + "Unsupported manifest format: {}", + settings.manifest_path + ))); + } + }; + manifest.validate()?; + + let jetstream = match async_nats::connect(&settings.nats_url).await { + Ok(client) => Some(jetstream::new(client)), + Err(_) => None, + }; + + let query = QueryService::new(storage.clone()); + + Ok(AppState { + settings, + ready, + draining, + observability, + storage, + manifest, + jetstream, + tenant_placement, + query, + }) +} + +pub async fn serve( + state: AppState, + shutdown: Arc, +) -> Result<(), ProjectionError> { + let addr = state.settings.http_addr.clone(); + let app = router(state); + + let listener = tokio::net::TcpListener::bind(&addr) + .await + .map_err(|e| ProjectionError::StreamError(e.to_string()))?; + + axum::serve(listener, app) + .with_graceful_shutdown(async move { + shutdown.notified().await; + }) + .await + .map_err(|e| ProjectionError::StreamError(e.to_string()))?; + + Ok(()) +} + +pub fn router(state: AppState) -> Router { + Router::new() + .route("/health", get(health)) + .route("/ready", get(ready)) + .route("/metrics", get(metrics)) + .route("/info", get(info)) + .route("/query/:view_type", post(query)) + .route("/admin/status", get(admin_status)) + .route("/admin/drain", post(admin_drain)) + .route("/admin/reload", post(admin_reload)) + .route("/admin/warmup", get(admin_warmup)) + .with_state(state) +} + +async fn health(State(state): State) -> impl IntoResponse { + let key = format!("health:{}", uuid::Uuid::now_v7()); + let value = json!({"ok": true}); + let writable = state + .storage + .put_json(&key, &value) + .and_then(|_| state.storage.delete_key(key.as_bytes())); + + match writable { + Ok(()) => (StatusCode::OK, Json(json!({"status": "ok"}))).into_response(), + Err(e) => ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"status": "error", "error": e.to_string()})), + ) + .into_response(), + } +} + +async fn ready(State(state): State) -> impl IntoResponse { + if state.draining.load(Ordering::Relaxed) { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"ready": false})), + ) + .into_response(); + } + if !state.ready.load(Ordering::Relaxed) { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"ready": false})), + ) + .into_response(); + } + + let key = format!("ready:{}", uuid::Uuid::now_v7()); + let value = json!({"ok": true}); + if state + .storage + .put_json(&key, &value) + .and_then(|_| state.storage.delete_key(key.as_bytes())) + .is_err() + { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"ready": false})), + ) + .into_response(); + } + + let connect = async { + let client = async_nats::connect(&state.settings.nats_url).await?; + let js = jetstream::new(client); + let stream = js.get_stream(&state.settings.stream_name).await?; + let mut stream = stream; + let _ = stream.info().await?; + Ok::<(), async_nats::Error>(()) + }; + + match tokio::time::timeout(Duration::from_millis(800), connect).await { + Ok(Ok(())) => (StatusCode::OK, Json(json!({"ready": true}))).into_response(), + _ => ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"ready": false})), + ) + .into_response(), + } +} + +async fn info(State(state): State) -> impl IntoResponse { + let view_types = state + .manifest + .all() + .map(|d| d.view_type.as_str().to_string()) + .collect::>(); + + let build = json!({ + "name": env!("CARGO_PKG_NAME"), + "version": env!("CARGO_PKG_VERSION"), + "git_sha": option_env!("GIT_SHA").unwrap_or("unknown"), + }); + + let payload = json!({ + "build": build, + "stream_name": state.settings.stream_name, + "durable_name": state.settings.durable_name, + "subject_filters": state.settings.subject_filters, + "consumer_mode": format!("{:?}", state.settings.consumer_mode).to_ascii_lowercase(), + "view_types": view_types, + "ready": state.ready.load(Ordering::Relaxed), + "draining": state.draining.load(Ordering::Relaxed), + }); + + (StatusCode::OK, Json(payload)).into_response() +} + +#[derive(Debug, Deserialize)] +struct QueryBody { + uqf: String, +} + +async fn query( + State(state): State, + Path(view_type): Path, + headers: HeaderMap, + Json(body): Json, +) -> impl IntoResponse { + let tenant_id = match tenant_from_headers(&state.settings, &headers) { + Ok(t) => t, + Err(e) => { + return (e.status, Json(json!({"error": e.message}))).into_response(); + } + }; + + if state.tenant_placement.is_draining(&tenant_id) { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"error": "tenant is draining", "tenant_id": tenant_id.as_str()})), + ) + .into_response(); + } + + if !state.tenant_placement.is_hosted(&tenant_id) { + return ( + StatusCode::FORBIDDEN, + Json(json!({"error": "tenant not hosted on this shard", "tenant_id": tenant_id.as_str()})), + ) + .into_response(); + } + + let view_type = ViewType::new(view_type); + if state.manifest.get(&view_type).is_none() { + return ( + StatusCode::NOT_FOUND, + Json(json!({"error": "unknown view type", "view_type": view_type.as_str()})), + ) + .into_response(); + } + + let request = QueryRequest { + tenant_id, + view_type, + uqf: body.uqf, + }; + + match state.query.query(request) { + Ok(resp) => (StatusCode::OK, Json(resp)).into_response(), + Err(QueryError::InvalidQuery(e)) => { + (StatusCode::BAD_REQUEST, Json(json!({"error": e}))).into_response() + } + Err(QueryError::Execution(e)) => { + (StatusCode::INTERNAL_SERVER_ERROR, Json(json!({"error": e}))).into_response() + } + } +} + +#[derive(Debug, Clone, Copy)] +struct TenantHeaderError { + status: StatusCode, + message: &'static str, +} + +fn tenant_from_headers( + settings: &Settings, + headers: &HeaderMap, +) -> Result { + let header_value = headers + .get("x-tenant-id") + .and_then(|v| v.to_str().ok()) + .map(|s| s.trim()) + .unwrap_or(""); + + if settings.multi_tenant_enabled { + if header_value.is_empty() { + if let Some(default) = &settings.default_tenant_id { + return Ok(TenantId::new(default)); + } + return Err(TenantHeaderError { + status: StatusCode::BAD_REQUEST, + message: "missing x-tenant-id", + }); + } + return Ok(TenantId::new(header_value)); + } + + if let Some(default) = &settings.default_tenant_id { + return Ok(TenantId::new(default)); + } + + Ok(TenantId::new(header_value)) +} + +async fn admin_status(State(state): State) -> impl IntoResponse { + let snapshot = state.tenant_placement.snapshot(); + let payload = json!({ + "ready": state.ready.load(Ordering::Relaxed), + "draining": state.draining.load(Ordering::Relaxed), + "placement": snapshot, + }); + (StatusCode::OK, Json(payload)).into_response() +} + +#[derive(Debug, Deserialize)] +struct DrainParams { + tenant_id: String, + #[serde(default)] + draining: Option, +} + +async fn admin_drain( + State(state): State, + axum::extract::Query(req): axum::extract::Query, +) -> impl IntoResponse { + let tenant_id = TenantId::new(req.tenant_id); + let draining = req.draining.unwrap_or(true); + match state.tenant_placement.set_draining(tenant_id, draining) { + Ok(()) => ( + StatusCode::OK, + Json(json!({"ok": true, "placement": state.tenant_placement.snapshot()})), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"ok": false, "error": e})), + ) + .into_response(), + } +} + +async fn admin_reload(State(state): State) -> impl IntoResponse { + match state.tenant_placement.reload(&state.settings) { + Ok(()) => ( + StatusCode::OK, + Json(json!({"ok": true, "placement": state.tenant_placement.snapshot()})), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({"ok": false, "error": e})), + ) + .into_response(), + } +} + +#[derive(Debug, Deserialize)] +struct WarmupParams { + tenant_id: Option, +} + +async fn admin_warmup( + State(state): State, + axum::extract::Query(params): axum::extract::Query, + headers: HeaderMap, +) -> impl IntoResponse { + let tenant_id = if let Some(t) = params.tenant_id { + TenantId::new(t) + } else { + match tenant_from_headers(&state.settings, &headers) { + Ok(t) => t, + Err(e) => return (e.status, Json(json!({"error": e.message}))).into_response(), + } + }; + + let Some(js) = &state.jetstream else { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"error": "nats unavailable"})), + ) + .into_response(); + }; + + let stream = match js.get_stream(&state.settings.stream_name).await { + Ok(s) => s, + Err(e) => { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"error": e.to_string()})), + ) + .into_response(); + } + }; + + let mut stream = stream; + let info = match stream.info().await { + Ok(i) => i, + Err(e) => { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({"error": e.to_string()})), + ) + .into_response(); + } + }; + let tail = info.state.last_sequence; + + let mut views = Vec::new(); + for def in state.manifest.all() { + let ck = CheckpointKey::new(&tenant_id, &def.view_type); + let cp = state + .storage + .get_checkpoint(&ck) + .unwrap_or(None) + .unwrap_or(0); + let lag = tail.saturating_sub(cp); + views.push(json!({ + "view_type": def.view_type.as_str(), + "checkpoint": cp, + "lag": lag + })); + } + + let payload = json!({ + "tenant_id": tenant_id.as_str(), + "stream_name": state.settings.stream_name, + "tail": tail, + "views": views + }); + + (StatusCode::OK, Json(payload)).into_response() +} + +async fn metrics(State(state): State) -> impl IntoResponse { + let mut output = state.observability.export_metrics(); + output.push('\n'); + + let ready_value = + if state.ready.load(Ordering::Relaxed) && !state.draining.load(Ordering::Relaxed) { + 1 + } else { + 0 + }; + output.push_str("# HELP projection_ready Projection readiness (1=ready,0=not ready)\n"); + output.push_str("# TYPE projection_ready gauge\n"); + output.push_str(&format!("projection_ready {}\n", ready_value)); + + if let Some(js) = &state.jetstream { + if let Ok(stream) = js.get_stream(&state.settings.stream_name).await { + let mut stream = stream; + if let Ok(info) = stream.info().await { + let tail = info.state.last_sequence; + output.push_str( + "\n# HELP projection_stream_last_sequence JetStream stream tail sequence\n", + ); + output.push_str("# TYPE projection_stream_last_sequence gauge\n"); + output.push_str(&format!("projection_stream_last_sequence {}\n", tail)); + + let tenant_id = + TenantId::new(state.settings.default_tenant_id.clone().unwrap_or_default()); + + output.push_str( + "\n# HELP projection_lag Stream tail sequence minus checkpoint per view_type\n", + ); + output.push_str("# TYPE projection_lag gauge\n"); + for def in state.manifest.all() { + let ck = CheckpointKey::new(&tenant_id, &def.view_type); + let cp = state + .storage + .get_checkpoint(&ck) + .unwrap_or(None) + .unwrap_or(0); + let lag = tail.saturating_sub(cp); + output.push_str(&format!( + "projection_lag{{tenant_id=\"{}\",view_type=\"{}\"}} {}\n", + tenant_id.as_str(), + def.view_type.as_str(), + lag + )); + } + } + } + } + + ( + StatusCode::OK, + [(header::CONTENT_TYPE, "text/plain; version=0.0.4")], + output, + ) + .into_response() +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::body::Body; + use axum::http::Request; + use query_engine::FilterNode; + use query_engine::Query; + use serde_json::json; + use tower::ServiceExt; + + fn test_state() -> AppState { + test_state_with_nats_url("nats://127.0.0.1:65535") + } + + fn test_state_with_nats_url(nats_url: &str) -> AppState { + let settings = Settings { + nats_url: nats_url.to_string(), + ..Settings::default() + }; + let ready = Arc::new(AtomicBool::new(false)); + let draining = Arc::new(AtomicBool::new(false)); + let observability = Observability::default(); + let storage = KvClient::in_memory(); + let tenant_placement = TenantPlacement::load(&settings).unwrap(); + let query = QueryService::new(storage.clone()); + let mut manifest = ProjectionManifest::new(); + manifest.register(crate::project::ProjectionDefinition { + view_type: crate::types::ViewType::new("User"), + project_program: "/tmp/prog".to_string(), + }); + + AppState { + settings, + ready, + draining, + observability, + storage, + manifest, + jetstream: None, + tenant_placement, + query, + } + } + + #[tokio::test] + async fn ready_returns_503_when_not_ready() { + let state = test_state(); + let app = router(state); + let response = app + .oneshot( + Request::builder() + .uri("/ready") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE); + } + + #[tokio::test] + async fn query_requires_tenant_header_in_multi_tenant_mode() { + let state = test_state(); + let app = router(state); + let response = app + .oneshot( + Request::builder() + .method("POST") + .uri("/query/User") + .header("content-type", "application/json") + .body(Body::from(r#"{"uqf":"{}"}"#)) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::BAD_REQUEST); + } + + #[tokio::test] + async fn query_returns_hits_for_hosted_tenant() { + let mut state = test_state(); + state.settings.multi_tenant_enabled = true; + state.tenant_placement = TenantPlacement::load(&state.settings).unwrap(); + + let tenant_id = TenantId::new("t1"); + let view_type = ViewType::new("User"); + let view_id = crate::types::ViewId::new("u1"); + let view_key = crate::types::ViewKey::new(&tenant_id, &view_type, &view_id); + let cp_key = CheckpointKey::new(&tenant_id, &view_type); + state + .storage + .commit_view_and_checkpoint(&view_key, &json!({"x": 1}), &cp_key, 1) + .unwrap(); + + let uqf = Query::new(FilterNode::r#true()).to_json().unwrap(); + let app = router(state); + let response = app + .oneshot( + Request::builder() + .method("POST") + .uri("/query/User") + .header("content-type", "application/json") + .header("x-tenant-id", "t1") + .body(Body::from( + serde_json::to_vec(&json!({"uqf": uqf})).unwrap(), + )) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + } + + #[tokio::test] + async fn ready_returns_503_when_nats_unavailable() { + let state = test_state_with_nats_url("nats://127.0.0.1:65535"); + state.ready.store(true, Ordering::Relaxed); + let app = router(state); + let response = app + .oneshot( + Request::builder() + .uri("/ready") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE); + } + + #[tokio::test] + #[ignore] + async fn ready_returns_200_when_nats_and_stream_available() { + let Ok(nats_url) = std::env::var("PROJECTION_TEST_NATS_URL") else { + return; + }; + + let id = uuid::Uuid::now_v7().to_string(); + let stream_name = format!("projection_ready_test_{}", id); + let subject = format!("tenant.t1.aggregate.Account.{}", id); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let js = async_nats::jetstream::new(client); + let _stream = js + .get_or_create_stream(async_nats::jetstream::stream::Config { + name: stream_name.clone(), + subjects: vec![subject], + ..Default::default() + }) + .await + .unwrap(); + + let mut state = test_state_with_nats_url(&nats_url); + state.settings.stream_name = stream_name; + state.ready.store(true, Ordering::Relaxed); + + let app = router(state); + let response = app + .oneshot( + Request::builder() + .uri("/ready") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + } + + #[tokio::test] + async fn ready_returns_503_when_draining() { + let state = test_state(); + state.ready.store(true, Ordering::Relaxed); + state.draining.store(true, Ordering::Relaxed); + let app = router(state); + let response = app + .oneshot( + Request::builder() + .uri("/ready") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::SERVICE_UNAVAILABLE); + } + + #[tokio::test] + async fn metrics_includes_ready_gauge() { + let state = test_state(); + state.ready.store(true, Ordering::Relaxed); + let app = router(state); + let response = app + .oneshot( + Request::builder() + .uri("/metrics") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::OK); + + let body = axum::body::to_bytes(response.into_body(), usize::MAX) + .await + .unwrap(); + let text = String::from_utf8(body.to_vec()).unwrap(); + assert!(text.contains("projection_ready 1")); + } +} diff --git a/projection/src/lib.rs b/projection/src/lib.rs new file mode 100644 index 0000000..b5fb1da --- /dev/null +++ b/projection/src/lib.rs @@ -0,0 +1,17 @@ +pub mod config; +pub mod http; +pub mod observability; +pub mod project; +pub mod query; +pub mod storage; +pub mod stream; +pub mod tenant_placement; +pub mod types; + +pub use config::Settings; +pub use observability::Observability; +pub use project::{ProjectionManifest, ProjectionRuntime}; +pub use storage::KvClient; +pub use stream::JetStreamClient; +pub use tenant_placement::TenantPlacement; +pub use types::*; diff --git a/projection/src/main.rs b/projection/src/main.rs new file mode 100644 index 0000000..3d80891 --- /dev/null +++ b/projection/src/main.rs @@ -0,0 +1,253 @@ +use projection::config::Settings; +use projection::Observability; +use projection::TenantPlacement; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +#[tokio::main] +async fn main() { + match std::env::args().nth(1).as_deref() { + Some("-h") | Some("--help") => { + print_help(); + return; + } + Some("serve") | None => serve().await, + Some("rebuild") => rebuild().await, + Some("backfill") => backfill().await, + Some("health") => health().await, + Some(other) => { + eprintln!("Unknown command: {}", other); + print_help(); + } + } +} + +async fn serve() { + let settings = load_settings(); + let _ = settings.validate(); + init_logging(&settings); + + tracing::info!(settings = ?settings, "Projection starting"); + + let shutdown = Arc::new(tokio::sync::Notify::new()); + let ready = Arc::new(AtomicBool::new(false)); + let draining = Arc::new(AtomicBool::new(false)); + let observability = Observability::default(); + let tenant_placement = TenantPlacement::load(&settings).unwrap_or_else(|e| { + tracing::error!(error = %e, "Failed to load tenant placement, defaulting to allow-all"); + TenantPlacement::default() + }); + + let http_state = match projection::http::build_state( + settings.clone(), + ready.clone(), + draining.clone(), + observability.clone(), + tenant_placement.clone(), + ) + .await + { + Ok(state) => state, + Err(e) => { + tracing::error!(error = %e, "Failed to initialize HTTP state"); + std::process::exit(1); + } + }; + + let http_shutdown = shutdown.clone(); + let http_task = + tokio::spawn(async move { projection::http::serve(http_state, http_shutdown).await }); + + let signal_shutdown = shutdown.clone(); + let signal_ready = ready.clone(); + let signal_draining = draining.clone(); + tokio::spawn(async move { + #[cfg(unix)] + { + use tokio::signal::unix::{signal, SignalKind}; + let mut sigterm = signal(SignalKind::terminate()).ok(); + let mut sigint = signal(SignalKind::interrupt()).ok(); + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + _ = async { if let Some(s) = &mut sigterm { let _ = s.recv().await; } } => {}, + _ = async { if let Some(s) = &mut sigint { let _ = s.recv().await; } } => {}, + } + } + + #[cfg(not(unix))] + { + let _ = tokio::signal::ctrl_c().await; + } + + signal_draining.store(true, Ordering::Relaxed); + signal_ready.store(false, Ordering::Relaxed); + signal_shutdown.notify_waiters(); + }); + + let worker_shutdown = shutdown.clone(); + let worker_ready = ready.clone(); + let worker_obs = observability.clone(); + let worker_tenant_placement = tenant_placement.clone(); + let worker_task = tokio::spawn(async move { + projection::stream::run_projection_with_signals( + settings, + worker_shutdown, + worker_ready, + worker_obs, + worker_tenant_placement, + ) + .await + }); + + let worker_result = worker_task.await; + shutdown.notify_waiters(); + + let _ = http_task.await; + + match worker_result { + Ok(Ok(())) => {} + Ok(Err(e)) => { + tracing::error!(error = %e, "Projection terminated with error"); + std::process::exit(1); + } + Err(e) => { + tracing::error!(error = %e, "Projection task join error"); + std::process::exit(1); + } + } +} + +async fn rebuild() { + let mut settings = load_settings(); + let args: Vec = std::env::args().collect(); + + let tenant_id = flag_value(&args, "--tenant").unwrap_or_default(); + let view_type = match flag_value(&args, "--view-type") { + Some(v) => v, + None => { + eprintln!("Missing --view-type"); + print_help(); + std::process::exit(2); + } + }; + let from_seq = flag_value(&args, "--from-seq") + .and_then(|v| v.parse::().ok()) + .unwrap_or(1); + if let Some(path) = flag_value(&args, "--manifest") { + settings.manifest_path = path; + } + + let _ = settings.validate(); + init_logging(&settings); + + let tenant = projection::types::TenantId::new(tenant_id); + let view_type = projection::types::ViewType::new(view_type); + + if let Err(e) = projection::stream::rebuild_view(settings, tenant, view_type, from_seq).await { + tracing::error!(error = %e, "Rebuild failed"); + std::process::exit(1); + } +} + +async fn backfill() { + let mut settings = load_settings(); + let args: Vec = std::env::args().collect(); + + let tenant_id = flag_value(&args, "--tenant").unwrap_or_default(); + let from_seq = flag_value(&args, "--from-seq") + .and_then(|v| v.parse::().ok()) + .unwrap_or(1); + if let Some(path) = flag_value(&args, "--manifest") { + settings.manifest_path = path; + } + + let _ = settings.validate(); + init_logging(&settings); + + let tenant = projection::types::TenantId::new(tenant_id); + + if let Err(e) = projection::stream::backfill_to_tail(settings, tenant, from_seq).await { + tracing::error!(error = %e, "Backfill failed"); + std::process::exit(1); + } +} + +async fn health() { + let mut settings = load_settings(); + let args: Vec = std::env::args().collect(); + + let tenant_id = flag_value(&args, "--tenant").unwrap_or_default(); + if let Some(path) = flag_value(&args, "--manifest") { + settings.manifest_path = path; + } + + let _ = settings.validate(); + init_logging(&settings); + + let tenant = projection::types::TenantId::new(tenant_id); + match projection::stream::health_report(settings, tenant).await { + Ok(report) => { + println!( + "{}", + serde_json::to_string_pretty(&report_to_json(report)).unwrap() + ); + } + Err(e) => { + tracing::error!(error = %e, "Health check failed"); + std::process::exit(1); + } + } +} + +fn print_help() { + println!( + "projection\n\nUSAGE:\n projection [COMMAND]\n\nCOMMANDS:\n serve Start the projection worker (default)\n rebuild Delete view+checkpoint for a tenant/view_type and backfill from a sequence\n backfill Backfill current manifest to tail for a tenant (for hot upgrades)\n health Print storage/NATS health and per-view lag\n\nOPTIONS:\n -h, --help Print help\n --manifest Manifest path override\n --tenant Tenant (empty string for default)\n --view-type View type (rebuild only)\n --from-seq Start sequence (default 1)\n" + ); +} + +fn load_settings() -> Settings { + if let Ok(path) = std::env::var("PROJECTION_CONFIG_PATH") { + if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) { + return settings; + } + } + + Settings::from_env().unwrap_or_default() +} + +fn init_logging(settings: &Settings) { + let _ = settings; + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")), + ) + .json() + .init(); +} + +fn flag_value(args: &[String], name: &str) -> Option { + args.iter() + .position(|a| a == name) + .and_then(|idx| args.get(idx + 1)) + .map(|v| v.to_string()) +} + +fn report_to_json(report: projection::stream::HealthReport) -> serde_json::Value { + serde_json::json!({ + "storage_ok": report.storage_ok, + "nats_ok": report.nats_ok, + "stream_last_sequence": report.stream_last_sequence, + "lags": report.lags.into_iter().map(|(view_type, lag)| { + serde_json::json!({"view_type": view_type, "lag": lag}) + }).collect::>() + }) +} + +#[cfg(test)] +mod tests { + #[test] + fn binary_exists() { + assert!(std::env::current_exe().is_ok()); + } +} diff --git a/projection/src/observability/metrics.rs b/projection/src/observability/metrics.rs new file mode 100644 index 0000000..48eafe4 --- /dev/null +++ b/projection/src/observability/metrics.rs @@ -0,0 +1,203 @@ +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::RwLock; +use std::time::Duration; + +#[derive(Debug)] +struct AtomicHistogram { + count: AtomicU64, + sum: AtomicU64, + buckets: Vec<(f64, AtomicU64)>, +} + +impl AtomicHistogram { + fn new() -> Self { + let buckets: Vec<(f64, AtomicU64)> = vec![ + 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, + ] + .into_iter() + .map(|v| (v, AtomicU64::new(0))) + .collect(); + + Self { + count: AtomicU64::new(0), + sum: AtomicU64::new(0), + buckets, + } + } + + fn observe(&self, duration: Duration) { + let value_ms = duration.as_secs_f64() * 1000.0; + self.count.fetch_add(1, Ordering::Relaxed); + self.sum + .fetch_add((value_ms * 1000.0) as u64, Ordering::Relaxed); + + for (threshold, count) in &self.buckets { + if value_ms <= *threshold { + count.fetch_add(1, Ordering::Relaxed); + } + } + } + + fn export(&self, name: &str, labels: &str) -> String { + let mut output = String::new(); + let count = self.count.load(Ordering::Relaxed); + let sum = self.sum.load(Ordering::Relaxed) as f64 / 1000.0; + + let label_str = if labels.is_empty() { + String::new() + } else { + format!("{{{}}}", labels.trim_start_matches(',')) + }; + + output.push_str(&format!("{}_sum{} {}\n", name, label_str, sum)); + output.push_str(&format!("{}_count{} {}\n", name, label_str, count)); + + for (threshold, bucket_count) in &self.buckets { + let c = bucket_count.load(Ordering::Relaxed); + let bucket_labels = if labels.is_empty() { + format!("le=\"{}\"", threshold) + } else { + format!("le=\"{}\"{}", threshold, labels) + }; + output.push_str(&format!("{}_bucket{{{}}} {}\n", name, bucket_labels, c)); + } + let inf_labels = if labels.is_empty() { + "le=\"+Inf\"".to_string() + } else { + format!("le=\"+Inf\"{}", labels) + }; + output.push_str(&format!("{}_bucket{{{}}} {}\n", name, inf_labels, count)); + + output + } +} + +impl Default for AtomicHistogram { + fn default() -> Self { + Self::new() + } +} + +#[derive(Debug)] +pub struct Metrics { + events_total: RwLock>, + processing_errors: RwLock>, + processing_duration: RwLock>, +} + +impl Metrics { + pub fn new() -> Self { + Self { + events_total: RwLock::new(HashMap::new()), + processing_errors: RwLock::new(HashMap::new()), + processing_duration: RwLock::new(HashMap::new()), + } + } + + pub fn increment_events_total(&self, view_type: &str, tenant_id: &str) { + let key = format!("{}:{}", view_type, tenant_id); + let map = self.events_total.read().unwrap(); + if let Some(counter) = map.get(&key) { + counter.fetch_add(1, Ordering::Relaxed); + return; + } + drop(map); + let mut map = self.events_total.write().unwrap(); + let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0)); + counter.fetch_add(1, Ordering::Relaxed); + } + + pub fn increment_processing_errors(&self, view_type: &str, tenant_id: &str) { + let key = format!("{}:{}", view_type, tenant_id); + let map = self.processing_errors.read().unwrap(); + if let Some(counter) = map.get(&key) { + counter.fetch_add(1, Ordering::Relaxed); + return; + } + drop(map); + let mut map = self.processing_errors.write().unwrap(); + let counter = map.entry(key).or_insert_with(|| AtomicU64::new(0)); + counter.fetch_add(1, Ordering::Relaxed); + } + + pub fn record_processing_duration(&self, duration: Duration, view_type: &str) { + let mut map = self.processing_duration.write().unwrap(); + let histogram = map.entry(view_type.to_string()).or_default(); + histogram.observe(duration); + } + + pub fn export_prometheus(&self) -> String { + let mut output = String::new(); + + output.push_str("# HELP projection_events_total Total number of events processed\n"); + output.push_str("# TYPE projection_events_total counter\n"); + { + let map = self.events_total.read().unwrap(); + for (key, counter) in map.iter() { + let parts: Vec<&str> = key.split(':').collect(); + if parts.len() == 2 { + let value = counter.load(Ordering::Relaxed); + output.push_str(&format!( + "projection_events_total{{view_type=\"{}\",tenant_id=\"{}\"}} {}\n", + parts[0], parts[1], value + )); + } + } + } + + output.push_str("\n# HELP projection_processing_errors_total Total processing errors\n"); + output.push_str("# TYPE projection_processing_errors_total counter\n"); + { + let map = self.processing_errors.read().unwrap(); + for (key, counter) in map.iter() { + let parts: Vec<&str> = key.split(':').collect(); + if parts.len() == 2 { + let value = counter.load(Ordering::Relaxed); + output.push_str(&format!( + "projection_processing_errors_total{{view_type=\"{}\",tenant_id=\"{}\"}} {}\n", + parts[0], parts[1], value + )); + } + } + } + + output.push_str( + "\n# HELP projection_processing_duration_seconds Event processing duration\n", + ); + output.push_str("# TYPE projection_processing_duration_seconds histogram\n"); + { + let map = self.processing_duration.read().unwrap(); + for (view_type, histogram) in map.iter() { + let labels = format!(",view_type=\"{}\"", view_type); + output + .push_str(&histogram.export("projection_processing_duration_seconds", &labels)); + } + } + + output + } +} + +impl Default for Metrics { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn metrics_increment_events() { + let metrics = Metrics::new(); + metrics.increment_events_total("User", "tenant-a"); + metrics.increment_events_total("User", "tenant-a"); + + let output = metrics.export_prometheus(); + assert!( + output.contains("projection_events_total{view_type=\"User\",tenant_id=\"tenant-a\"} 2") + ); + } +} diff --git a/projection/src/observability/mod.rs b/projection/src/observability/mod.rs new file mode 100644 index 0000000..1395590 --- /dev/null +++ b/projection/src/observability/mod.rs @@ -0,0 +1,128 @@ +mod metrics; + +pub use metrics::Metrics; + +use std::sync::Arc; +use std::time::Instant; + +#[derive(Debug, Clone)] +pub struct ProcessingSpan { + view_type: String, + tenant_id: String, + correlation_id: Option, + trace_id: Option, + start_time: Instant, +} + +impl ProcessingSpan { + pub fn new( + view_type: impl Into, + tenant_id: impl Into, + correlation_id: Option, + trace_id: Option, + ) -> Self { + Self { + view_type: view_type.into(), + tenant_id: tenant_id.into(), + correlation_id, + trace_id, + start_time: Instant::now(), + } + } + + pub fn elapsed(&self) -> std::time::Duration { + self.start_time.elapsed() + } + + pub fn view_type(&self) -> &str { + &self.view_type + } + + pub fn tenant_id(&self) -> &str { + &self.tenant_id + } + + pub fn correlation_id(&self) -> Option<&str> { + self.correlation_id.as_deref() + } + + pub fn trace_id(&self) -> Option<&str> { + self.trace_id.as_deref() + } +} + +#[derive(Debug, Clone)] +pub struct Observability { + metrics: Arc, +} + +impl Observability { + pub fn new() -> Self { + Self { + metrics: Arc::new(Metrics::new()), + } + } + + pub fn metrics(&self) -> &Arc { + &self.metrics + } + + pub fn start_processing_span( + &self, + view_type: &str, + tenant_id: &str, + correlation_id: Option<&str>, + trace_id: Option<&str>, + ) -> ProcessingSpan { + tracing::info_span!( + "projection_event", + view_type = %view_type, + tenant_id = %tenant_id, + correlation_id = correlation_id.unwrap_or(""), + trace_id = trace_id.unwrap_or(""), + ); + ProcessingSpan::new( + view_type, + tenant_id, + correlation_id.map(|s| s.to_string()), + trace_id.map(|s| s.to_string()), + ) + } + + pub fn record_processed(&self, span: &ProcessingSpan) { + self.metrics + .increment_events_total(span.view_type(), span.tenant_id()); + self.metrics + .record_processing_duration(span.elapsed(), span.view_type()); + } + + pub fn record_error(&self, span: &ProcessingSpan) { + self.metrics + .increment_events_total(span.view_type(), span.tenant_id()); + self.metrics + .increment_processing_errors(span.view_type(), span.tenant_id()); + self.metrics + .record_processing_duration(span.elapsed(), span.view_type()); + } + + pub fn export_metrics(&self) -> String { + self.metrics.export_prometheus() + } +} + +impl Default for Observability { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn observability_is_default() { + let obs = Observability::default(); + let _ = obs.export_metrics(); + } +} diff --git a/projection/src/project/manifest.rs b/projection/src/project/manifest.rs new file mode 100644 index 0000000..f8649fb --- /dev/null +++ b/projection/src/project/manifest.rs @@ -0,0 +1,89 @@ +use crate::types::{ProjectionError, ViewType}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProjectionDefinition { + pub view_type: ViewType, + pub project_program: String, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ProjectionManifest { + projections: HashMap, +} + +impl ProjectionManifest { + pub fn new() -> Self { + Self { + projections: HashMap::new(), + } + } + + pub fn register(&mut self, definition: ProjectionDefinition) { + self.projections + .insert(definition.view_type.as_str().to_string(), definition); + } + + pub fn get(&self, view_type: &ViewType) -> Option<&ProjectionDefinition> { + self.projections.get(view_type.as_str()) + } + + pub fn all(&self) -> impl Iterator { + self.projections.values() + } + + pub fn load_from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn load_from_json(json: &str) -> Result { + serde_json::from_str(json) + } + + pub fn validate(&self) -> Result<(), ProjectionError> { + for def in self.projections.values() { + if def.project_program.is_empty() { + return Err(ProjectionError::ManifestError(format!( + "Missing project_program for view_type {}", + def.view_type.as_str() + ))); + } + + if !std::path::Path::new(&def.project_program).exists() { + return Err(ProjectionError::ManifestError(format!( + "Project program not found for view_type {}: {}", + def.view_type.as_str(), + def.project_program + ))); + } + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn manifest_loads_and_validates() { + let dir = tempdir().unwrap(); + let program_path = dir.path().join("proj.js"); + std::fs::write(&program_path, "function project() { return null; }").unwrap(); + + let yaml = format!( + r#" +projections: + User: + view_type: "User" + project_program: "{}" +"#, + program_path.to_string_lossy() + ); + + let manifest = ProjectionManifest::load_from_yaml(&yaml).unwrap(); + manifest.validate().unwrap(); + } +} diff --git a/projection/src/project/mod.rs b/projection/src/project/mod.rs new file mode 100644 index 0000000..6cefade --- /dev/null +++ b/projection/src/project/mod.rs @@ -0,0 +1,5 @@ +mod manifest; +mod runtime; + +pub use manifest::{ProjectionDefinition, ProjectionManifest}; +pub use runtime::{ProjectionOutput, ProjectionRuntime}; diff --git a/projection/src/project/runtime.rs b/projection/src/project/runtime.rs new file mode 100644 index 0000000..3431444 --- /dev/null +++ b/projection/src/project/runtime.rs @@ -0,0 +1,274 @@ +use crate::types::{EventEnvelope, ProjectionError}; +use serde_json::Value as JsonValue; +use std::collections::BTreeMap; +use std::time::Duration; + +#[derive(Debug, Clone, PartialEq)] +pub struct ProjectionOutput { + pub view_id: String, + pub new_view: JsonValue, +} + +#[derive(Clone)] +pub struct ProjectionRuntime { + pub gas_limit: u64, + pub timeout: Duration, + engine: runtime_function::Engine, +} + +impl std::fmt::Debug for ProjectionRuntime { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ProjectionRuntime") + .field("gas_limit", &self.gas_limit) + .field("timeout", &self.timeout) + .finish_non_exhaustive() + } +} + +impl Default for ProjectionRuntime { + fn default() -> Self { + Self { + gas_limit: 1_000_000, + timeout: Duration::from_secs(5), + engine: runtime_function::Engine::new(), + } + } +} + +impl ProjectionRuntime { + pub fn new(gas_limit: u64, timeout: Duration) -> Self { + Self { + gas_limit, + timeout, + engine: runtime_function::Engine::new(), + } + } + + pub async fn project( + &self, + current_view: &JsonValue, + event: &EventEnvelope, + program_path: &str, + ) -> Result, ProjectionError> { + let program = std::fs::read_to_string(program_path) + .map_err(|e| ProjectionError::ProjectError(e.to_string()))?; + + let program = runtime_function::Program::from_json(&program) + .map_err(|e| ProjectionError::ProjectError(format!("Program parse error: {}", e)))?; + + self.project_program(current_view, event, &program).await + } + + pub async fn project_program( + &self, + current_view: &JsonValue, + event: &EventEnvelope, + program: &runtime_function::Program, + ) -> Result, ProjectionError> { + let current_view_value = json_to_runtime_value(current_view) + .map_err(|e| ProjectionError::ProjectError(e.to_string()))?; + + let event_json = serde_json::to_value(event) + .map_err(|e| ProjectionError::ProjectError(e.to_string()))?; + let event_value = json_to_runtime_value(&event_json) + .map_err(|e| ProjectionError::ProjectError(e.to_string()))?; + + let mut inputs = BTreeMap::new(); + inputs.insert("current_view".to_string(), current_view_value); + inputs.insert("event".to_string(), event_value); + + let timeout_secs = self.timeout.as_secs().max(1); + let options = runtime_function::engine::ExecutionOptions { + gas_limit: self.gas_limit, + timeout_secs, + trace: false, + }; + + let now = event.timestamp.unwrap_or_else(chrono::Utc::now); + let causation_id = format!("{}:{}", event.aggregate_id, event.event_type); + let context = runtime_function::Context::new(now, causation_id) + .with_tenant_id(event.tenant_id.as_str()); + + let result = self + .engine + .execute_with_options(program, inputs, context, options); + + if !result.success { + return Err(ProjectionError::ProjectError( + result + .error + .map(|e| e.to_string()) + .unwrap_or_else(|| "runtime execution failed".to_string()), + )); + } + + let Some(output) = result.output else { + return Ok(None); + }; + + if output.is_null() { + return Ok(None); + } + + let output_json = runtime_value_to_json(&output); + let view_id = output_json + .get("view_id") + .and_then(|v| v.as_str()) + .ok_or_else(|| ProjectionError::ProjectError("missing view_id".to_string()))? + .to_string(); + let new_view = output_json + .get("new_view") + .cloned() + .ok_or_else(|| ProjectionError::ProjectError("missing new_view".to_string()))?; + + Ok(Some(ProjectionOutput { view_id, new_view })) + } +} + +fn json_to_runtime_value(value: &JsonValue) -> Result { + serde_json::from_value(value.clone()) +} + +fn runtime_value_to_json(value: &runtime_function::Value) -> JsonValue { + match value { + runtime_function::Value::Null => JsonValue::Null, + runtime_function::Value::Bool(b) => JsonValue::Bool(*b), + runtime_function::Value::Decimal(d) => { + let s = d.to_string(); + if !s.contains('.') && !s.contains('e') && !s.contains('E') { + if let Ok(i) = s.parse::() { + return JsonValue::Number(i.into()); + } + if let Ok(u) = s.parse::() { + return JsonValue::Number(u.into()); + } + } + JsonValue::String(s) + } + runtime_function::Value::String(s) => JsonValue::String(s.to_string()), + runtime_function::Value::DateTime(dt) => JsonValue::String(dt.to_rfc3339()), + runtime_function::Value::Array(arr) => { + JsonValue::Array(arr.iter().map(runtime_value_to_json).collect::>()) + } + runtime_function::Value::Object(obj) => { + let mut map = serde_json::Map::new(); + for (k, v) in obj.iter() { + map.insert(k.clone(), runtime_value_to_json(v)); + } + JsonValue::Object(map) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[tokio::test] + async fn runtime_function_is_deterministic() { + let rt = ProjectionRuntime::default(); + let event = EventEnvelope { + tenant_id: crate::types::TenantId::new("t1"), + event_id: None, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: None, + event_type: "created".to_string(), + payload: json!({"x": 1}), + command_id: None, + timestamp: Some( + chrono::DateTime::parse_from_rfc3339("2026-02-09T12:00:00Z") + .unwrap() + .with_timezone(&chrono::Utc), + ), + correlation_id: None, + traceparent: None, + trace_id: None, + }; + + let program_json = r#" + { + "specVersion": "1.1", + "id": "proj", + "name": "Projection", + "inputs": [ + {"name": "current_view", "type": "Any", "required": true}, + {"name": "event", "type": "Any", "required": true} + ], + "nodes": [ + {"id": "event", "type": "InputRef", "data": {"input_name": "event"}}, + {"id": "expr", "type": "Expr", "data": {"expression": "({ view_id: input.aggregate_id, new_view: input.payload })"}}, + {"id": "output", "type": "Output", "data": {}} + ], + "edges": [ + {"id": "e1", "source": "event", "sourceHandle": "out", "target": "expr", "targetHandle": "input"}, + {"id": "e2", "source": "expr", "sourceHandle": "out", "target": "output", "targetHandle": "value"} + ], + "outputNodeId": "output" + } + "#; + + let program: runtime_function::Program = serde_json::from_str(program_json).unwrap(); + + let out1 = rt + .project_program(&json!({"a": 1}), &event, &program) + .await + .unwrap(); + let out2 = rt + .project_program(&json!({"a": 1}), &event, &program) + .await + .unwrap(); + + assert_eq!(out1, out2); + let out = out1.unwrap(); + assert_eq!(out.view_id, "a1"); + assert_eq!(out.new_view["x"], 1); + } + + #[tokio::test] + async fn runtime_contract_requires_fields() { + let rt = ProjectionRuntime::default(); + let event = EventEnvelope { + tenant_id: crate::types::TenantId::new("t1"), + event_id: None, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: None, + event_type: "created".to_string(), + payload: json!({"x": 1}), + command_id: None, + timestamp: None, + correlation_id: None, + traceparent: None, + trace_id: None, + }; + + let program_json = r#" + { + "specVersion": "1.1", + "id": "proj", + "name": "Projection", + "inputs": [ + {"name": "current_view", "type": "Any", "required": true}, + {"name": "event", "type": "Any", "required": true} + ], + "nodes": [ + {"id": "const", "type": "Const", "data": {"value": {"ok": true}}}, + {"id": "output", "type": "Output", "data": {}} + ], + "edges": [ + {"id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value"} + ], + "outputNodeId": "output" + } + "#; + + let program: runtime_function::Program = serde_json::from_str(program_json).unwrap(); + let err = rt + .project_program(&json!({}), &event, &program) + .await + .unwrap_err(); + assert!(format!("{}", err).contains("missing view_id")); + } +} diff --git a/projection/src/query/mod.rs b/projection/src/query/mod.rs new file mode 100644 index 0000000..2ecaa56 --- /dev/null +++ b/projection/src/query/mod.rs @@ -0,0 +1,3 @@ +mod uqf; + +pub use uqf::{QueryError, QueryRequest, QueryResponse, QueryService}; diff --git a/projection/src/query/uqf.rs b/projection/src/query/uqf.rs new file mode 100644 index 0000000..88b3ba0 --- /dev/null +++ b/projection/src/query/uqf.rs @@ -0,0 +1,266 @@ +use crate::storage::KvClient; +use crate::types::{ProjectionError, TenantId, ViewType}; +use query_engine::query::QueryHint; +use query_engine::{canonicalize_filter, ExecutionLimits, Query, QueryExecutor, QueryMode}; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryRequest { + pub tenant_id: TenantId, + pub view_type: ViewType, + pub uqf: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "mode", rename_all = "lowercase")] +pub enum QueryResponse { + Find { + took_ms: u64, + hits: Vec, + count: usize, + next_cursor: Option, + metrics: Option, + }, + Count { + took_ms: u64, + count: usize, + metrics: Option, + }, +} + +#[derive(Debug, thiserror::Error)] +pub enum QueryError { + #[error("Invalid query: {0}")] + InvalidQuery(String), + #[error("Query execution error: {0}")] + Execution(String), +} + +#[derive(Clone)] +pub struct QueryService { + storage: KvClient, + engine: Arc, + max_results: usize, + max_scan: usize, + timeout_ms: u64, +} + +impl std::fmt::Debug for QueryService { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("QueryService") + .field("max_results", &self.max_results) + .field("max_scan", &self.max_scan) + .field("timeout_ms", &self.timeout_ms) + .finish_non_exhaustive() + } +} + +impl QueryService { + pub fn new(storage: KvClient) -> Self { + Self { + storage, + engine: Arc::new(query_engine::Engine::new()), + max_results: 1000, + max_scan: 100_000, + timeout_ms: 1_000, + } + } + + pub fn with_limits(mut self, max_results: usize, max_scan: usize, timeout_ms: u64) -> Self { + self.max_results = max_results; + self.max_scan = max_scan; + self.timeout_ms = timeout_ms; + self + } + + pub fn query(&self, request: QueryRequest) -> Result { + let mut query = + Query::from_json(&request.uqf).map_err(|e| QueryError::InvalidQuery(e.to_string()))?; + + if matches!(query.mode, QueryMode::Explain) { + return Err(QueryError::InvalidQuery( + "Explain mode is not supported".to_string(), + )); + } + + query.filter = canonicalize_filter(query.filter); + + query.limit = Some( + query + .limit + .unwrap_or(self.max_results) + .min(self.max_results), + ); + + let effective_timeout_ms = query + .hint + .as_ref() + .and_then(|h| h.timeout_ms) + .unwrap_or(self.timeout_ms) + .min(self.timeout_ms); + let effective_max_scan = query + .hint + .as_ref() + .and_then(|h| h.max_scan) + .unwrap_or(self.max_scan) + .min(self.max_scan); + + let hint = query + .hint + .clone() + .unwrap_or_else(QueryHint::new) + .with_timeout(effective_timeout_ms) + .with_max_scan(effective_max_scan); + query.hint = Some(hint); + + self.engine + .validate(&query) + .map_err(|e| QueryError::InvalidQuery(e.to_string()))?; + + let prefix = format!( + "view:{}:{}:", + request.tenant_id.as_str(), + request.view_type.as_str() + ); + + let docs = self + .storage + .scan_documents_by_prefix(prefix.as_bytes(), effective_max_scan) + .map_err(QueryError::from)?; + + let source = query_engine::InMemorySource::from_documents(docs); + + let limits = ExecutionLimits::new() + .with_max_scan(effective_max_scan) + .with_timeout_ms(effective_timeout_ms) + .with_cancel_token(Arc::new(query_engine::CancelToken::new())); + + let executor = QueryExecutor::new(self.engine.config()); + let result = executor + .execute_with_limits(&query, &source, limits) + .map_err(|e| QueryError::Execution(e.to_string()))?; + + match result { + query_engine::exec::ExecutionResult::Find(resp) => Ok(QueryResponse::Find { + took_ms: resp.took_ms, + hits: resp.hits, + count: resp.count, + next_cursor: resp.next_cursor, + metrics: resp.metrics, + }), + query_engine::exec::ExecutionResult::Count(resp) => Ok(QueryResponse::Count { + took_ms: resp.took_ms, + count: resp.count, + metrics: resp.metrics, + }), + query_engine::exec::ExecutionResult::Explain(_) => Err(QueryError::InvalidQuery( + "Explain mode is not supported".to_string(), + )), + } + } +} + +impl From for QueryError { + fn from(e: ProjectionError) -> Self { + QueryError::Execution(e.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{CheckpointKey, StreamSequence, ViewId, ViewKey}; + use serde_json::json; + + #[test] + fn tenant_scoped_queries_never_return_other_tenant_keys() { + let storage = KvClient::in_memory(); + let service = QueryService::new(storage.clone()).with_limits(100, 1000, 1000); + + let tenant_a = TenantId::new("t1"); + let tenant_b = TenantId::new("t2"); + let view_type = ViewType::new("User"); + + let cp_a = CheckpointKey::new(&tenant_a, &view_type); + let cp_b = CheckpointKey::new(&tenant_b, &view_type); + + let key_a1 = ViewKey::new(&tenant_a, &view_type, &ViewId::new("u1")); + let key_a2 = ViewKey::new(&tenant_a, &view_type, &ViewId::new("u2")); + let key_b1 = ViewKey::new(&tenant_b, &view_type, &ViewId::new("u1")); + + storage + .commit_view_and_checkpoint(&key_a1, &json!({"tenant": "t1", "id": "u1"}), &cp_a, 1) + .unwrap(); + storage + .commit_view_and_checkpoint(&key_a2, &json!({"tenant": "t1", "id": "u2"}), &cp_a, 2) + .unwrap(); + storage + .commit_view_and_checkpoint(&key_b1, &json!({"tenant": "t2", "id": "u1"}), &cp_b, 1) + .unwrap(); + + let uqf = Query::new(query_engine::FilterNode::r#true()) + .with_limit(100) + .to_json() + .unwrap(); + + let response = service + .query(QueryRequest { + tenant_id: tenant_a, + view_type, + uqf, + }) + .unwrap(); + + match response { + QueryResponse::Find { hits, .. } => { + assert_eq!(hits.len(), 2); + assert!(hits.iter().all(|v| v["tenant"] == "t1")); + } + _ => panic!("expected find response"), + } + } + + #[test] + fn uqf_filter_works_on_fixture_dataset() { + let storage = KvClient::in_memory(); + let service = QueryService::new(storage.clone()).with_limits(100, 1000, 1000); + + let tenant = TenantId::new("t1"); + let view_type = ViewType::new("User"); + let cp = CheckpointKey::new(&tenant, &view_type); + + let docs = vec![ + (ViewId::new("u1"), json!({"age": 20, "name": "a"})), + (ViewId::new("u2"), json!({"age": 35, "name": "b"})), + (ViewId::new("u3"), json!({"age": 40, "name": "c"})), + ]; + + for (i, (id, doc)) in docs.into_iter().enumerate() { + let key = ViewKey::new(&tenant, &view_type, &id); + storage + .commit_view_and_checkpoint(&key, &doc, &cp, (i + 1) as StreamSequence) + .unwrap(); + } + + let query = Query::new(query_engine::FilterNode::gt("age", json!(30))); + let uqf = query.to_json().unwrap(); + + let response = service + .query(QueryRequest { + tenant_id: tenant, + view_type, + uqf, + }) + .unwrap(); + + match response { + QueryResponse::Find { hits, count, .. } => { + assert_eq!(count, 2); + assert_eq!(hits.len(), 2); + assert!(hits.iter().all(|v| v["age"].as_i64().unwrap() > 30)); + } + _ => panic!("expected find response"), + } + } +} diff --git a/projection/src/storage/kv.rs b/projection/src/storage/kv.rs new file mode 100644 index 0000000..989094b --- /dev/null +++ b/projection/src/storage/kv.rs @@ -0,0 +1,522 @@ +use crate::types::{ + Checkpoint, CheckpointKey, ProjectionError, StreamSequence, TenantId, ViewKey, ViewType, +}; +use edge_storage::{Config as EdgeConfig, EdgeStorage, KvStore, TableNames, Writer}; +use libmdbx::{NoWriteMap, WriteFlags, RW}; +use query_engine::Document; +use std::sync::Arc; + +#[derive(Clone)] +pub struct KvClient { + storage: Arc, + kv: KvStore, +} + +impl std::fmt::Debug for KvClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KvClient").finish_non_exhaustive() + } +} + +impl KvClient { + pub fn open(storage_path: impl Into) -> Result { + let config = EdgeConfig::new(storage_path.into()); + let storage = EdgeStorage::open(config.clone())?; + let writer = Arc::new(Writer::new(storage.db().clone(), &config)); + let kv = KvStore::new(storage.db().clone(), writer); + + Ok(Self { + storage: Arc::new(storage), + kv, + }) + } + + #[cfg(test)] + pub fn in_memory() -> Self { + use tempfile::tempdir; + let dir = tempdir().expect("failed to create temp dir"); + let path = dir.path().join("test.mdbx"); + std::mem::forget(dir); + Self::open(path.to_string_lossy().to_string()).expect("failed to open in-memory storage") + } + + pub fn get_view(&self, key: &ViewKey) -> Result, ProjectionError> { + let bytes = self + .kv + .get(key.as_str().as_bytes()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + match bytes { + Some(bytes) => serde_json::from_slice(&bytes) + .map(Some) + .map_err(|e| ProjectionError::DecodeError(e.to_string())), + None => Ok(None), + } + } + + pub fn get_checkpoint( + &self, + key: &CheckpointKey, + ) -> Result, ProjectionError> { + let bytes = self + .kv + .get(key.as_str().as_bytes()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + match bytes { + Some(bytes) => { + let cp: Checkpoint = serde_json::from_slice(&bytes) + .map_err(|e| ProjectionError::DecodeError(e.to_string()))?; + Ok(Some(cp.sequence)) + } + None => Ok(None), + } + } + + pub fn put_checkpoint( + &self, + key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + let checkpoint_bytes = serde_json::to_vec(&Checkpoint::new(sequence)) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + self.commit_kv_txn(|txn, table| { + txn.put( + table, + key.as_str().as_bytes(), + checkpoint_bytes.as_slice(), + WriteFlags::empty(), + )?; + Ok(()) + }) + } + + pub fn delete_checkpoint(&self, key: &CheckpointKey) -> Result<(), ProjectionError> { + self.delete_key(key.as_str().as_bytes()) + } + + pub fn delete_key(&self, key: &[u8]) -> Result<(), ProjectionError> { + self.commit_kv_txn(|txn, table| { + let _ = txn.del(table, key, None)?; + Ok(()) + }) + } + + pub fn delete_view_prefix( + &self, + tenant_id: &TenantId, + view_type: &ViewType, + ) -> Result<(), ProjectionError> { + let prefix = format!("view:{}:{}:", tenant_id.as_str(), view_type.as_str()); + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + let keys = self + .kv + .prefix_scan(&txn, prefix.as_bytes()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))? + .filter_map(|res| match res { + Ok((k, _)) => Some(k), + Err(_) => None, + }) + .collect::>(); + + if keys.is_empty() { + return Ok(()); + } + + let ops = keys + .into_iter() + .map(|key| edge_storage::writer::KvOp::Delete { key }) + .collect::>(); + + let result = self + .kv + .batch_sync(ops) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + match result { + edge_storage::WriteResult::Success => Ok(()), + edge_storage::WriteResult::Error(e) => Err(ProjectionError::StorageError(e)), + other => Err(ProjectionError::StorageError(format!( + "Unexpected write result: {:?}", + other + ))), + } + } + + pub fn commit_view_and_checkpoint( + &self, + view_key: &ViewKey, + view_value: &serde_json::Value, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + let view_bytes = serde_json::to_vec(view_value) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let checkpoint_bytes = serde_json::to_vec(&Checkpoint::new(sequence)) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + self.commit_kv_txn(|txn, table| { + txn.put( + table, + view_key.as_str().as_bytes(), + view_bytes.as_slice(), + WriteFlags::empty(), + )?; + txn.put( + table, + checkpoint_key.as_str().as_bytes(), + checkpoint_bytes.as_slice(), + WriteFlags::empty(), + )?; + Ok(()) + }) + } + + pub fn commit_view_and_advance_checkpoint_ordered( + &self, + view_key: &ViewKey, + view_value: &serde_json::Value, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + let view_bytes = serde_json::to_vec(view_value) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + self.commit_kv_txn_projection(|txn, table| { + txn.put( + table, + view_key.as_str().as_bytes(), + view_bytes.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + self.mark_processed_and_advance_checkpoint_in_txn(txn, table, checkpoint_key, sequence) + }) + } + + pub fn advance_checkpoint_ordered( + &self, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + self.commit_kv_txn_projection(|txn, table| { + self.mark_processed_and_advance_checkpoint_in_txn(txn, table, checkpoint_key, sequence) + }) + } + + pub fn put_json(&self, key: &str, value: &serde_json::Value) -> Result<(), ProjectionError> { + let bytes = + serde_json::to_vec(value).map_err(|e| ProjectionError::StorageError(e.to_string()))?; + self.commit_kv_txn(|txn, table| { + txn.put(table, key.as_bytes(), bytes.as_slice(), WriteFlags::empty())?; + Ok(()) + }) + } + + pub fn scan_documents_by_prefix( + &self, + prefix: &[u8], + max_scan: usize, + ) -> Result, ProjectionError> { + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let mut cursor = txn + .cursor(&table) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + let mut docs = Vec::new(); + let mut scanned = 0usize; + + let start_key = prefix.to_vec(); + let mut iter = match cursor.set_range::, Vec>(&start_key) { + Ok(Some((key, value))) => { + if key.starts_with(prefix) { + Some((key, value)) + } else { + None + } + } + Ok(None) => None, + Err(e) => return Err(ProjectionError::StorageError(e.to_string())), + }; + + while let Some((key_bytes, value_bytes)) = iter { + scanned += 1; + if scanned > max_scan { + break; + } + + if let Ok(mut value) = serde_json::from_slice::(&value_bytes) { + let key = String::from_utf8_lossy(&key_bytes).to_string(); + match &mut value { + serde_json::Value::Object(map) => { + map.entry("_id".to_string()) + .or_insert_with(|| serde_json::Value::String(key)); + } + other => { + value = serde_json::json!({"_id": key, "value": other.clone()}); + } + } + docs.push(Document::new(value)); + } + + iter = match cursor.next::, Vec>() { + Ok(Some((key, value))) => { + if key.starts_with(prefix) { + Some((key, value)) + } else { + None + } + } + Ok(None) => None, + Err(e) => return Err(ProjectionError::StorageError(e.to_string())), + }; + } + + Ok(docs) + } + + fn commit_kv_txn(&self, f: F) -> Result<(), ProjectionError> + where + F: FnOnce( + &libmdbx::Transaction<'_, RW, NoWriteMap>, + &libmdbx::Table<'_>, + ) -> Result<(), libmdbx::Error>, + { + let txn = self + .storage + .db() + .begin_rw_txn() + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + f(&txn, &table).map_err(|e| ProjectionError::StorageError(e.to_string()))?; + txn.commit() + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + Ok(()) + } + + fn commit_kv_txn_projection(&self, f: F) -> Result<(), ProjectionError> + where + F: FnOnce( + &libmdbx::Transaction<'_, RW, NoWriteMap>, + &libmdbx::Table<'_>, + ) -> Result<(), ProjectionError>, + { + let txn = self + .storage + .db() + .begin_rw_txn() + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + f(&txn, &table)?; + txn.commit() + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + Ok(()) + } + + fn mark_processed_and_advance_checkpoint_in_txn( + &self, + txn: &libmdbx::Transaction<'_, RW, NoWriteMap>, + table: &libmdbx::Table<'_>, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + if let Some(current) = self.get_checkpoint_in_txn(txn, table, checkpoint_key)? { + if sequence <= current { + return Ok(()); + } + } + + let marker_key = processed_marker_key(checkpoint_key, sequence); + txn.put(table, marker_key.as_bytes(), b"1", WriteFlags::empty()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + let mut checkpoint = self + .get_checkpoint_in_txn(txn, table, checkpoint_key)? + .unwrap_or(0); + + loop { + let next = checkpoint.saturating_add(1); + if next == 0 { + break; + } + let next_key = processed_marker_key(checkpoint_key, next); + let exists: Option> = txn + .get(table, next_key.as_bytes()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + if exists.is_none() { + break; + } + let _ = txn + .del(table, next_key.as_bytes(), None) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + checkpoint = next; + } + + let checkpoint_bytes = serde_json::to_vec(&Checkpoint::new(checkpoint)) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + txn.put( + table, + checkpoint_key.as_str().as_bytes(), + checkpoint_bytes.as_slice(), + WriteFlags::empty(), + ) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + + Ok(()) + } + + fn get_checkpoint_in_txn( + &self, + txn: &libmdbx::Transaction<'_, RW, NoWriteMap>, + table: &libmdbx::Table<'_>, + checkpoint_key: &CheckpointKey, + ) -> Result, ProjectionError> { + let bytes: Option> = txn + .get(table, checkpoint_key.as_str().as_bytes()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let Some(bytes) = bytes else { + return Ok(None); + }; + let cp: Checkpoint = serde_json::from_slice(&bytes) + .map_err(|e| ProjectionError::DecodeError(e.to_string()))?; + Ok(Some(cp.sequence)) + } +} + +fn processed_marker_key(checkpoint_key: &CheckpointKey, sequence: StreamSequence) -> String { + format!("processed:{}:{}", checkpoint_key.as_str(), sequence) +} + +#[derive(Debug, thiserror::Error)] +pub enum StorageInitError { + #[error("Failed to open storage: {0}")] + OpenError(#[from] edge_storage::Error), +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{CheckpointKey, TenantId, ViewId, ViewKey, ViewType}; + use serde_json::json; + use tempfile::tempdir; + + fn create_test_client() -> (tempfile::TempDir, KvClient) { + let dir = tempdir().unwrap(); + let path = dir.path().join("test.mdbx"); + let client = KvClient::open(path.to_string_lossy().to_string()).unwrap(); + (dir, client) + } + + #[test] + fn view_roundtrip_put_get() { + let (_dir, client) = create_test_client(); + let tenant = TenantId::new("t1"); + let view_type = ViewType::new("User"); + let view_id = ViewId::new("u1"); + let view_key = ViewKey::new(&tenant, &view_type, &view_id); + let cp_key = CheckpointKey::new(&tenant, &view_type); + + client + .commit_view_and_checkpoint(&view_key, &json!({"a": 1}), &cp_key, 5) + .unwrap(); + + let loaded = client.get_view(&view_key).unwrap().unwrap(); + assert_eq!(loaded["a"], 1); + } + + #[test] + fn checkpoint_roundtrip_put_get() { + let (_dir, client) = create_test_client(); + let tenant = TenantId::new("t1"); + let view_type = ViewType::new("User"); + let view_id = ViewId::new("u1"); + let view_key = ViewKey::new(&tenant, &view_type, &view_id); + let cp_key = CheckpointKey::new(&tenant, &view_type); + + client + .commit_view_and_checkpoint(&view_key, &json!({"a": 1}), &cp_key, 42) + .unwrap(); + + let cp = client.get_checkpoint(&cp_key).unwrap().unwrap(); + assert_eq!(cp, 42); + } + + #[test] + fn prefix_delete_removes_all_keys_for_tenant_view_type() { + let (_dir, client) = create_test_client(); + let tenant = TenantId::new("t1"); + let view_type = ViewType::new("User"); + let other_view_type = ViewType::new("Other"); + + let view_id_1 = ViewId::new("u1"); + let view_id_2 = ViewId::new("u2"); + let view_key_1 = ViewKey::new(&tenant, &view_type, &view_id_1); + let view_key_2 = ViewKey::new(&tenant, &view_type, &view_id_2); + let other_key = ViewKey::new(&tenant, &other_view_type, &view_id_1); + let cp_key = CheckpointKey::new(&tenant, &view_type); + + client + .commit_view_and_checkpoint(&view_key_1, &json!({"a": 1}), &cp_key, 1) + .unwrap(); + client + .commit_view_and_checkpoint(&view_key_2, &json!({"a": 2}), &cp_key, 2) + .unwrap(); + client + .commit_view_and_checkpoint(&other_key, &json!({"a": 3}), &cp_key, 3) + .unwrap(); + + client.delete_view_prefix(&tenant, &view_type).unwrap(); + assert!(client.get_view(&view_key_1).unwrap().is_none()); + assert!(client.get_view(&view_key_2).unwrap().is_none()); + assert!(client.get_view(&other_key).unwrap().is_some()); + } + + #[test] + fn atomicity_neither_view_nor_checkpoint_committed_on_error() { + let (_dir, client) = create_test_client(); + let tenant = TenantId::new("t1"); + let view_type = ViewType::new("User"); + let view_id = ViewId::new("u1"); + let view_key = ViewKey::new(&tenant, &view_type, &view_id); + let cp_key = CheckpointKey::new(&tenant, &view_type); + + let view_bytes = serde_json::to_vec(&json!({"a": 1})).unwrap(); + let checkpoint_bytes = serde_json::to_vec(&Checkpoint::new(10)).unwrap(); + + let result = client.commit_kv_txn(|txn, table| { + txn.put( + table, + view_key.as_str().as_bytes(), + view_bytes.as_slice(), + WriteFlags::empty(), + )?; + txn.put( + table, + cp_key.as_str().as_bytes(), + checkpoint_bytes.as_slice(), + WriteFlags::empty(), + )?; + Err(libmdbx::Error::Other(1)) + }); + + assert!(result.is_err()); + assert!(client.get_view(&view_key).unwrap().is_none()); + assert!(client.get_checkpoint(&cp_key).unwrap().is_none()); + } +} diff --git a/projection/src/storage/mod.rs b/projection/src/storage/mod.rs new file mode 100644 index 0000000..3cc5a94 --- /dev/null +++ b/projection/src/storage/mod.rs @@ -0,0 +1,3 @@ +mod kv; + +pub use kv::{KvClient, StorageInitError}; diff --git a/projection/src/stream/jetstream.rs b/projection/src/stream/jetstream.rs new file mode 100644 index 0000000..dbccb43 --- /dev/null +++ b/projection/src/stream/jetstream.rs @@ -0,0 +1,90 @@ +use crate::config::Settings; +use crate::types::ProjectionError; +use async_nats::jetstream::{ + self, consumer::pull::Config as PullConfig, consumer::AckPolicy, consumer::DeliverPolicy, + consumer::ReplayPolicy, +}; + +#[derive(Debug, Clone)] +pub struct JetStreamClient { + stream: jetstream::stream::Stream, + consumer: jetstream::consumer::PullConsumer, +} + +#[derive(Debug, Clone)] +pub struct ConsumerOptions { + pub durable_name: String, + pub filter_subject: String, + pub deliver_policy: DeliverPolicy, +} + +impl JetStreamClient { + pub async fn connect(settings: &Settings) -> Result { + let filter_subject = settings + .subject_filters + .first() + .cloned() + .unwrap_or_else(|| "tenant.*.aggregate.*.*".to_string()); + + let options = ConsumerOptions { + durable_name: settings.durable_name.clone(), + filter_subject, + deliver_policy: DeliverPolicy::All, + }; + + Self::connect_with(settings, options).await + } + + pub async fn connect_with( + settings: &Settings, + options: ConsumerOptions, + ) -> Result { + let client = async_nats::connect(&settings.nats_url).await.map_err(|e| { + ProjectionError::StreamError(format!("Failed to connect to NATS: {}", e)) + })?; + + let jetstream = jetstream::new(client); + + let stream = jetstream + .get_stream(&settings.stream_name) + .await + .map_err(|e| ProjectionError::StreamError(format!("Stream not found: {}", e)))?; + + let consumer_config = PullConfig { + durable_name: Some(options.durable_name.clone()), + deliver_policy: options.deliver_policy, + ack_policy: AckPolicy::Explicit, + ack_wait: std::time::Duration::from_millis(settings.ack_timeout_ms), + filter_subject: options.filter_subject, + replay_policy: ReplayPolicy::Instant, + max_ack_pending: settings.max_in_flight as i64, + max_deliver: settings.max_deliver, + ..Default::default() + }; + + let consumer = stream + .get_or_create_consumer(&options.durable_name, consumer_config) + .await + .map_err(|e| { + ProjectionError::StreamError(format!("Consumer creation failed: {}", e)) + })?; + + Ok(Self { stream, consumer }) + } + + pub async fn messages(&self) -> Result { + self.consumer + .messages() + .await + .map_err(|e| ProjectionError::StreamError(format!("Message stream error: {}", e))) + } + + pub async fn stream_last_sequence(&self) -> Result { + let mut stream = self.stream.clone(); + let info = stream + .info() + .await + .map_err(|e| ProjectionError::StreamError(e.to_string()))?; + Ok(info.state.last_sequence) + } +} diff --git a/projection/src/stream/mod.rs b/projection/src/stream/mod.rs new file mode 100644 index 0000000..4371179 --- /dev/null +++ b/projection/src/stream/mod.rs @@ -0,0 +1,1839 @@ +mod jetstream; + +pub use jetstream::JetStreamClient; + +use crate::config::Settings; +use crate::observability::Observability; +use crate::project::{ProjectionManifest, ProjectionOutput, ProjectionRuntime}; +use crate::storage::KvClient; +use crate::tenant_placement::TenantPlacement; +use crate::types::{ + CheckpointKey, EventEnvelope, ProjectionError, StreamSequence, TenantId, ViewId, ViewKey, +}; +use async_nats::jetstream::consumer::DeliverPolicy; +use async_nats::jetstream::AckKind; +use futures::StreamExt; +use runtime_function::Program; +use serde_json::Value as JsonValue; +use std::collections::{HashMap, HashSet}; +use std::future::Future; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use uuid::Uuid; + +#[derive(Debug, Clone, Default)] +pub struct RunOptions { + pub consumer_durable_name: Option, + pub consumer_filter_subject: Option, + pub consumer_deliver_policy: Option, + pub tenant_filter: Option, + pub view_type_filter: Option>, + pub stop_at_sequence: Option, + pub shutdown: Option>, + pub ready: Option>, + pub observability: Option, + pub storage: Option, + pub tenant_placement: Option, +} + +pub async fn run_projection(settings: Settings) -> Result<(), ProjectionError> { + match settings.consumer_mode { + crate::config::ConsumerMode::Single => { + run_projection_with_options(settings, RunOptions::default()).await + } + crate::config::ConsumerMode::PerView => run_projection_per_view(settings).await, + } +} + +pub async fn run_projection_with_signals( + settings: Settings, + shutdown: Arc, + ready: Arc, + observability: Observability, + tenant_placement: TenantPlacement, +) -> Result<(), ProjectionError> { + match settings.consumer_mode { + crate::config::ConsumerMode::Single => { + run_projection_with_options( + settings, + RunOptions { + shutdown: Some(shutdown), + ready: Some(ready), + observability: Some(observability), + tenant_placement: Some(tenant_placement), + ..RunOptions::default() + }, + ) + .await + } + crate::config::ConsumerMode::PerView => { + run_projection_per_view_with_options( + settings, + RunOptions { + shutdown: Some(shutdown), + ready: Some(ready), + observability: Some(observability), + tenant_placement: Some(tenant_placement), + ..RunOptions::default() + }, + ) + .await + } + } +} + +pub async fn run_projection_per_view(settings: Settings) -> Result<(), ProjectionError> { + run_projection_per_view_with_options(settings, RunOptions::default()).await +} + +async fn run_projection_per_view_with_options( + settings: Settings, + options: RunOptions, +) -> Result<(), ProjectionError> { + settings + .validate() + .map_err(ProjectionError::ValidationError)?; + + let (manifest, _) = load_manifest_and_programs(&settings)?; + let filter_subject = settings + .subject_filters + .first() + .cloned() + .unwrap_or_else(|| "tenant.*.aggregate.*.*".to_string()); + + let shutdown = options.shutdown.clone(); + let ready = options.ready.clone(); + let observability = options.observability.clone(); + let storage = options.storage.clone(); + let tenant_placement = options.tenant_placement.clone(); + + let mut tasks = Vec::new(); + + if let Some(ready) = &ready { + ready.store(true, Ordering::Relaxed); + } + + for def in manifest.all() { + let view_type = def.view_type.as_str().to_string(); + let durable_name = per_view_durable_name(&settings.durable_name, &view_type); + let mut settings = settings.clone(); + settings.consumer_mode = crate::config::ConsumerMode::Single; + let filter_subject = filter_subject.clone(); + let shutdown = shutdown.clone(); + let observability = observability.clone(); + let storage = storage.clone(); + let tenant_placement = tenant_placement.clone(); + + tasks.push(tokio::spawn(async move { + run_projection_with_options( + settings, + RunOptions { + consumer_durable_name: Some(durable_name), + consumer_filter_subject: Some(filter_subject), + view_type_filter: Some(vec![view_type]), + shutdown, + ready: None, + observability, + storage, + tenant_placement, + ..RunOptions::default() + }, + ) + .await + })); + } + + for task in tasks { + match task.await { + Ok(Ok(())) => {} + Ok(Err(e)) => return Err(e), + Err(e) => return Err(ProjectionError::StreamError(e.to_string())), + } + } + + if let Some(ready) = &ready { + ready.store(false, Ordering::Relaxed); + } + + Ok(()) +} + +fn per_view_durable_name(base: &str, view_type: &str) -> String { + let mut out = String::with_capacity(base.len() + view_type.len() + 1); + out.push_str(base); + out.push('_'); + for ch in view_type.chars() { + if ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' { + out.push(ch); + } else { + out.push('_'); + } + } + out +} + +pub async fn run_projection_with_options( + settings: Settings, + options: RunOptions, +) -> Result<(), ProjectionError> { + let mut options = options; + settings + .validate() + .map_err(ProjectionError::ValidationError)?; + + if options.consumer_filter_subject.is_none() { + if let Some(tp) = &options.tenant_placement { + if let Some(single) = tp.single_hosted_tenant() { + options.consumer_filter_subject = + Some(format!("tenant.{}.aggregate.*.*", single.as_str())); + } + } + } + + let (manifest, programs) = load_manifest_and_programs(&settings)?; + + let storage = if let Some(storage) = options.storage.clone() { + storage + } else { + KvClient::open(settings.storage_path.clone()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))? + }; + + let view_type_filter = options.view_type_filter.as_ref().map(|items| { + items + .iter() + .map(|s| s.to_string()) + .collect::>() + }); + + let jetstream = if options.consumer_durable_name.is_some() + || options.consumer_filter_subject.is_some() + || options.consumer_deliver_policy.is_some() + { + let durable_name = options + .consumer_durable_name + .clone() + .unwrap_or_else(|| settings.durable_name.clone()); + let filter_subject = options + .consumer_filter_subject + .clone() + .or_else(|| settings.subject_filters.first().cloned()) + .unwrap_or_else(|| "tenant.*.aggregate.*.*".to_string()); + let deliver_policy = options + .consumer_deliver_policy + .unwrap_or(DeliverPolicy::All); + let consumer_options = crate::stream::jetstream::ConsumerOptions { + durable_name, + filter_subject, + deliver_policy, + }; + JetStreamClient::connect_with(&settings, consumer_options).await? + } else { + JetStreamClient::connect(&settings).await? + }; + + let stop_at_sequence = options.stop_at_sequence; + let runtime = ProjectionRuntime::default(); + let observability = options.observability.clone().unwrap_or_default(); + + let stop_check_enabled = stop_at_sequence.is_some() && options.tenant_filter.is_some(); + let mut tick = tokio::time::interval(Duration::from_millis(200)); + tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + + if let Some(ready) = &options.ready { + ready.store(true, Ordering::Relaxed); + } + + let shutdown = options + .shutdown + .clone() + .unwrap_or_else(|| Arc::new(tokio::sync::Notify::new())); + if options.shutdown.is_none() { + spawn_shutdown_listener(shutdown.clone()); + } + + let mut messages = jetstream.messages().await?; + loop { + if stop_check_enabled + && caught_up(&options, &manifest, &storage, stop_at_sequence.unwrap())? + { + break; + } + + enum NextMsg { + Msg(T), + Tick, + } + + let next = tokio::select! { + _ = shutdown.notified() => break, + msg = messages.next() => NextMsg::Msg(msg), + _ = tick.tick(), if stop_check_enabled => NextMsg::Tick, + }; + + let next_msg = match next { + NextMsg::Msg(msg) => msg, + NextMsg::Tick => continue, + }; + + let Some(msg) = next_msg else { + break; + }; + + let msg = match msg { + Ok(m) => m, + Err(e) => { + tracing::error!(error = %e, "JetStream message stream error"); + continue; + } + }; + + let info = match msg.info() { + Ok(i) => i, + Err(e) => { + tracing::error!(error = %e, "Failed to parse JetStream message info"); + let _ = msg.ack().await; + continue; + } + }; + let sequence = info.stream_sequence; + let delivered = info.delivered; + + let envelope: EventEnvelope = match serde_json::from_slice(&msg.payload) { + Ok(e) => e, + Err(e) => { + tracing::error!(error = %e, "Failed to decode event envelope"); + let _ = msg.ack().await; + continue; + } + }; + + let tenant_id = resolve_tenant_id(&settings, &envelope); + + if let Some(filter) = &options.tenant_filter { + if filter.as_str() != tenant_id.as_str() { + let _ = msg.ack().await; + continue; + } + } + + if let Some(tp) = &options.tenant_placement { + if tp.is_draining(&tenant_id) || !tp.is_hosted(&tenant_id) { + let _ = msg.ack_with(AckKind::Term).await; + continue; + } + } + + let ctx = ProcessContext { + settings: &settings, + delivered, + sequence, + tenant_id: &tenant_id, + envelope: &envelope, + manifest: &manifest, + programs: &programs, + storage: &storage, + observability: &observability, + view_type_filter: view_type_filter.as_ref(), + }; + + let runtime = runtime.clone(); + let mut backoff_ms = settings.storage_backoff_ms.max(1); + let decision = loop { + let runtime = runtime.clone(); + let result = + process_message_with_storage(ctx.clone(), move |current_view, event, program| { + let runtime = runtime.clone(); + Box::pin( + async move { runtime.project_program(current_view, event, program).await }, + ) + }) + .await; + + match result { + Ok(decision) => break decision, + Err(ProjectionError::StorageError(e)) => { + tracing::error!(error = %e, backoff_ms = backoff_ms, "Storage error, backing off"); + let sleep = tokio::time::sleep(std::time::Duration::from_millis(backoff_ms)); + tokio::select! { + _ = shutdown.notified() => return Ok(()), + _ = sleep => {} + } + backoff_ms = + (backoff_ms.saturating_mul(2)).min(settings.storage_backoff_max_ms.max(1)); + continue; + } + Err(e) => return Err(e), + } + }; + + match decision { + AckDecision::Ack => { + if let Err(e) = msg.ack().await { + tracing::error!(error = %e, "Ack failed"); + } + } + AckDecision::Term => { + if let Err(e) = msg.ack_with(AckKind::Term).await { + tracing::error!(error = %e, "Term ack failed"); + } + } + AckDecision::None => {} + } + } + + if let Some(ready) = &options.ready { + ready.store(false, Ordering::Relaxed); + } + + Ok(()) +} + +fn caught_up( + options: &RunOptions, + manifest: &ProjectionManifest, + storage: &S, + target: u64, +) -> Result { + let Some(tenant_filter) = &options.tenant_filter else { + return Ok(false); + }; + let target_view_types = options.view_type_filter.as_ref().map_or_else( + || { + manifest + .all() + .map(|d| d.view_type.as_str().to_string()) + .collect::>() + }, + |types| types.clone(), + ); + for view_type in target_view_types { + let cp_key = CheckpointKey::new(tenant_filter, &crate::types::ViewType::new(view_type)); + let cp = storage.get_checkpoint(&cp_key)?.unwrap_or(0); + if cp < target { + return Ok(false); + } + } + Ok(true) +} + +fn spawn_shutdown_listener(shutdown: Arc) { + tokio::spawn(async move { + #[cfg(unix)] + { + use tokio::signal::unix::{signal, SignalKind}; + let mut sigterm = signal(SignalKind::terminate()).ok(); + let mut sigint = signal(SignalKind::interrupt()).ok(); + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + _ = async { if let Some(s) = &mut sigterm { let _ = s.recv().await; } } => {}, + _ = async { if let Some(s) = &mut sigint { let _ = s.recv().await; } } => {}, + } + } + + #[cfg(not(unix))] + { + let _ = tokio::signal::ctrl_c().await; + } + + shutdown.notify_waiters(); + }); +} + +pub async fn rebuild_view( + mut settings: Settings, + tenant_id: TenantId, + view_type: crate::types::ViewType, + start_sequence: u64, +) -> Result<(), ProjectionError> { + let storage = KvClient::open(settings.storage_path.clone()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + storage.delete_view_prefix(&tenant_id, &view_type)?; + storage.delete_checkpoint(&CheckpointKey::new(&tenant_id, &view_type))?; + + let durable_name = format!( + "{}_rebuild_{}_{}", + settings.durable_name, + view_type.as_str(), + Uuid::now_v7() + ); + let filter_subject = if tenant_id.is_empty() { + "tenant.*.aggregate.*.*".to_string() + } else { + format!("tenant.{}.aggregate.*.*", tenant_id.as_str()) + }; + + let jetstream = JetStreamClient::connect_with( + &settings, + crate::stream::jetstream::ConsumerOptions { + durable_name: durable_name.clone(), + filter_subject: filter_subject.clone(), + deliver_policy: DeliverPolicy::ByStartSequence { start_sequence }, + }, + ) + .await?; + let tail = jetstream.stream_last_sequence().await?; + + settings.subject_filters = vec![filter_subject]; + settings.durable_name = durable_name; + + run_projection_with_options( + settings, + RunOptions { + tenant_filter: Some(tenant_id), + view_type_filter: Some(vec![view_type.as_str().to_string()]), + stop_at_sequence: Some(tail), + consumer_deliver_policy: Some(DeliverPolicy::ByStartSequence { start_sequence }), + ..RunOptions::default() + }, + ) + .await +} + +pub async fn backfill_to_tail( + mut settings: Settings, + tenant_id: TenantId, + start_sequence: u64, +) -> Result<(), ProjectionError> { + let durable_name = format!("{}_backfill_{}", settings.durable_name, Uuid::now_v7()); + let filter_subject = if tenant_id.is_empty() { + "tenant.*.aggregate.*.*".to_string() + } else { + format!("tenant.{}.aggregate.*.*", tenant_id.as_str()) + }; + + let jetstream = JetStreamClient::connect_with( + &settings, + crate::stream::jetstream::ConsumerOptions { + durable_name: durable_name.clone(), + filter_subject: filter_subject.clone(), + deliver_policy: DeliverPolicy::ByStartSequence { start_sequence }, + }, + ) + .await?; + let tail = jetstream.stream_last_sequence().await?; + + settings.subject_filters = vec![filter_subject]; + settings.durable_name = durable_name; + + run_projection_with_options( + settings, + RunOptions { + tenant_filter: Some(tenant_id), + stop_at_sequence: Some(tail), + consumer_deliver_policy: Some(DeliverPolicy::ByStartSequence { start_sequence }), + ..RunOptions::default() + }, + ) + .await +} + +#[derive(Debug, Clone)] +pub struct HealthReport { + pub storage_ok: bool, + pub nats_ok: bool, + pub stream_last_sequence: Option, + pub lags: Vec<(String, u64)>, +} + +pub async fn health_report( + settings: Settings, + tenant_id: TenantId, +) -> Result { + let storage = KvClient::open(settings.storage_path.clone()) + .map_err(|e| ProjectionError::StorageError(e.to_string()))?; + let storage_ok = true; + + let (manifest, _) = load_manifest_and_programs(&settings)?; + + let jetstream = JetStreamClient::connect(&settings).await?; + let stream_last_sequence = jetstream.stream_last_sequence().await.ok(); + let nats_ok = stream_last_sequence.is_some(); + + let mut lags = Vec::new(); + if let Some(last) = stream_last_sequence { + for def in manifest.all() { + let ck = CheckpointKey::new(&tenant_id, &def.view_type); + let cp = storage.get_checkpoint(&ck)?.unwrap_or(0); + lags.push((def.view_type.as_str().to_string(), last.saturating_sub(cp))); + } + } + + Ok(HealthReport { + storage_ok, + nats_ok, + stream_last_sequence, + lags, + }) +} + +fn load_manifest_and_programs( + settings: &Settings, +) -> Result<(ProjectionManifest, HashMap), ProjectionError> { + let manifest_raw = std::fs::read_to_string(&settings.manifest_path) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?; + + let ext = std::path::Path::new(&settings.manifest_path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or(""); + + let manifest = match ext { + "yaml" | "yml" => ProjectionManifest::load_from_yaml(&manifest_raw) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?, + "json" => ProjectionManifest::load_from_json(&manifest_raw) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?, + _ => { + return Err(ProjectionError::ManifestError(format!( + "Unsupported manifest format: {}", + settings.manifest_path + ))); + } + }; + + manifest.validate()?; + + let mut programs: HashMap = HashMap::new(); + for def in manifest.all() { + let raw = std::fs::read_to_string(&def.project_program) + .map_err(|e| ProjectionError::ManifestError(e.to_string()))?; + let program = runtime_function::Program::from_json(&raw) + .map_err(|e| ProjectionError::ManifestError(format!("Program parse error: {}", e)))?; + programs.insert(def.view_type.as_str().to_string(), program); + } + + Ok((manifest, programs)) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AckDecision { + Ack, + Term, + None, +} + +trait Storage: Clone + Send + Sync + 'static { + fn get_checkpoint( + &self, + key: &CheckpointKey, + ) -> Result, ProjectionError>; + fn get_view(&self, key: &ViewKey) -> Result, ProjectionError>; + fn commit_view_and_checkpoint_ordered( + &self, + view_key: &ViewKey, + view_value: &JsonValue, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError>; + fn advance_checkpoint_ordered( + &self, + key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError>; + fn put_poison(&self, key: &str, value: &JsonValue) -> Result<(), ProjectionError>; +} + +impl Storage for KvClient { + fn get_checkpoint( + &self, + key: &CheckpointKey, + ) -> Result, ProjectionError> { + self.get_checkpoint(key) + } + + fn get_view(&self, key: &ViewKey) -> Result, ProjectionError> { + self.get_view(key) + } + + fn commit_view_and_checkpoint_ordered( + &self, + view_key: &ViewKey, + view_value: &JsonValue, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + self.commit_view_and_advance_checkpoint_ordered( + view_key, + view_value, + checkpoint_key, + sequence, + ) + } + + fn advance_checkpoint_ordered( + &self, + key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + self.advance_checkpoint_ordered(key, sequence) + } + + fn put_poison(&self, key: &str, value: &JsonValue) -> Result<(), ProjectionError> { + self.put_json(key, value) + } +} + +#[derive(Clone, Copy)] +struct ProcessContext<'a, S: Storage> { + settings: &'a Settings, + delivered: i64, + sequence: StreamSequence, + tenant_id: &'a TenantId, + envelope: &'a EventEnvelope, + manifest: &'a ProjectionManifest, + programs: &'a HashMap, + storage: &'a S, + observability: &'a Observability, + view_type_filter: Option<&'a HashSet>, +} + +async fn process_message_with_storage<'a, S>( + ctx: ProcessContext<'a, S>, + mut project: impl for<'b> FnMut( + &'b JsonValue, + &'b EventEnvelope, + &'b Program, + ) -> Pin< + Box, ProjectionError>> + Send + 'b>, + >, +) -> Result +where + S: Storage, +{ + if ctx.settings.max_deliver > 0 && ctx.delivered > ctx.settings.max_deliver { + let key = format!("poison:{}:{}", ctx.tenant_id.as_str(), ctx.sequence); + let payload_str = + String::from_utf8_lossy(ctx.envelope.payload.to_string().as_bytes()).to_string(); + let record = serde_json::json!({ + "tenant_id": ctx.tenant_id.as_str(), + "sequence": ctx.sequence, + "delivered": ctx.delivered, + "aggregate_id": ctx.envelope.aggregate_id, + "aggregate_type": ctx.envelope.aggregate_type, + "event_type": ctx.envelope.event_type, + "payload": payload_str, + }); + ctx.storage.put_poison(&key, &record)?; + return Ok(AckDecision::Term); + } + + let correlation_id = ctx.envelope.correlation_id.as_ref().map(|v| v.as_str()); + let trace_id = ctx.envelope.trace_id.clone().or_else(|| { + ctx.envelope + .traceparent + .as_deref() + .and_then(trace_id_from_traceparent) + }); + + for def in ctx.manifest.all() { + let view_type = def.view_type.clone(); + if let Some(filter) = ctx.view_type_filter { + if !filter.contains(view_type.as_str()) { + continue; + } + } + let checkpoint_key = CheckpointKey::new(ctx.tenant_id, &view_type); + + if let Some(cp) = ctx.storage.get_checkpoint(&checkpoint_key)? { + if ctx.sequence <= cp { + continue; + } + } + + let span = ctx.observability.start_processing_span( + view_type.as_str(), + ctx.tenant_id.as_str(), + correlation_id, + trace_id.as_ref().map(|v| v.as_str()), + ); + + let program = ctx + .programs + .get(view_type.as_str()) + .ok_or_else(|| ProjectionError::ManifestError("missing program".to_string()))?; + + let default_view_id = ViewId::new(ctx.envelope.aggregate_id.clone()); + let default_view_key = ViewKey::new(ctx.tenant_id, &view_type, &default_view_id); + + let current_view = ctx + .storage + .get_view(&default_view_key)? + .unwrap_or_else(|| serde_json::json!({})); + + let output = match project(¤t_view, ctx.envelope, program).await { + Ok(v) => v, + Err(e) => { + ctx.observability.record_error(&span); + tracing::error!(error = %e, "Projection runtime error"); + return Ok(AckDecision::None); + } + }; + + if let Some(output) = output { + let view_id = ViewId::new(output.view_id); + let view_key = ViewKey::new(ctx.tenant_id, &view_type, &view_id); + if let Err(e) = ctx.storage.commit_view_and_checkpoint_ordered( + &view_key, + &output.new_view, + &checkpoint_key, + ctx.sequence, + ) { + ctx.observability.record_error(&span); + tracing::error!(error = %e, "Failed to commit view+checkpoint"); + return Ok(AckDecision::None); + } + } else if let Err(e) = ctx + .storage + .advance_checkpoint_ordered(&checkpoint_key, ctx.sequence) + { + ctx.observability.record_error(&span); + tracing::error!(error = %e, "Failed to advance checkpoint"); + return Ok(AckDecision::None); + } + + ctx.observability.record_processed(&span); + } + + Ok(AckDecision::Ack) +} + +fn trace_id_from_traceparent(traceparent: &str) -> Option { + shared::trace_id_from_traceparent(traceparent).map(|s| shared::TraceId::new(s.to_string())) +} + +fn resolve_tenant_id(settings: &Settings, envelope: &EventEnvelope) -> TenantId { + if settings.multi_tenant_enabled { + if envelope.tenant_id.is_empty() { + if let Some(default) = &settings.default_tenant_id { + return TenantId::new(default); + } + } + return envelope.tenant_id.clone(); + } + + if let Some(default) = &settings.default_tenant_id { + return TenantId::new(default); + } + TenantId::default() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::project::ProjectionDefinition; + use crate::types::ViewType; + + fn test_program() -> Program { + let program_json = r#" + { + "specVersion": "1.1", + "id": "test", + "name": "Test", + "inputs": [], + "nodes": [ + {"id": "const", "type": "Const", "data": {"value": {"view_id": "a1", "new_view": {"ok": true}}}}, + {"id": "output", "type": "Output", "data": {}} + ], + "edges": [ + {"id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value"} + ], + "outputNodeId": "output" + } + "#; + serde_json::from_str(program_json).unwrap() + } + + #[derive(Clone, Default)] + struct FakeStorage { + checkpoint: Option, + fail_commit: bool, + poison_keys: std::sync::Arc>>, + } + + impl Storage for FakeStorage { + fn get_checkpoint( + &self, + _key: &CheckpointKey, + ) -> Result, ProjectionError> { + Ok(self.checkpoint) + } + + fn get_view(&self, _key: &ViewKey) -> Result, ProjectionError> { + Ok(Some(serde_json::json!({}))) + } + + fn commit_view_and_checkpoint_ordered( + &self, + _view_key: &ViewKey, + _view_value: &JsonValue, + _checkpoint_key: &CheckpointKey, + _sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + if self.fail_commit { + return Err(ProjectionError::StorageError("commit failed".to_string())); + } + Ok(()) + } + + fn advance_checkpoint_ordered( + &self, + _key: &CheckpointKey, + _sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + Ok(()) + } + + fn put_poison(&self, key: &str, _value: &JsonValue) -> Result<(), ProjectionError> { + self.poison_keys.lock().unwrap().push(key.to_string()); + Ok(()) + } + } + + fn test_manifest() -> ProjectionManifest { + let mut manifest = ProjectionManifest::new(); + manifest.register(ProjectionDefinition { + view_type: ViewType::new("User"), + project_program: "/tmp/prog".to_string(), + }); + manifest + } + + #[tokio::test] + async fn checkpoint_gate_skips_and_still_acks() { + let settings = Settings::default(); + let tenant_id = TenantId::new("t1"); + let envelope = EventEnvelope { + tenant_id: tenant_id.clone(), + event_id: None, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: None, + event_type: "created".to_string(), + payload: serde_json::json!({"x": 1}), + command_id: None, + timestamp: None, + correlation_id: None, + traceparent: None, + trace_id: None, + }; + + let manifest = test_manifest(); + let programs = HashMap::from([("User".to_string(), test_program())]); + let storage = FakeStorage { + checkpoint: Some(10), + ..Default::default() + }; + let obs = Observability::default(); + + let ctx = ProcessContext { + settings: &settings, + delivered: 1, + sequence: 5, + tenant_id: &tenant_id, + envelope: &envelope, + manifest: &manifest, + programs: &programs, + storage: &storage, + observability: &obs, + view_type_filter: None, + }; + + let decision = process_message_with_storage(ctx, |_current, _event, _program| { + Box::pin(async move { + Ok(Some(ProjectionOutput { + view_id: "a1".to_string(), + new_view: serde_json::json!({"ok": true}), + })) + }) + }) + .await + .unwrap(); + + assert_eq!(decision, AckDecision::Ack); + } + + #[tokio::test] + async fn commit_failure_prevents_ack() { + let settings = Settings::default(); + let tenant_id = TenantId::new("t1"); + let envelope = EventEnvelope { + tenant_id: tenant_id.clone(), + event_id: None, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: None, + event_type: "created".to_string(), + payload: serde_json::json!({"x": 1}), + command_id: None, + timestamp: None, + correlation_id: None, + traceparent: None, + trace_id: None, + }; + + let manifest = test_manifest(); + let programs = HashMap::from([("User".to_string(), test_program())]); + let storage = FakeStorage { + checkpoint: None, + fail_commit: true, + ..Default::default() + }; + let obs = Observability::default(); + + let ctx = ProcessContext { + settings: &settings, + delivered: 1, + sequence: 11, + tenant_id: &tenant_id, + envelope: &envelope, + manifest: &manifest, + programs: &programs, + storage: &storage, + observability: &obs, + view_type_filter: None, + }; + + let decision = process_message_with_storage(ctx, |_current, _event, _program| { + Box::pin(async move { + Ok(Some(ProjectionOutput { + view_id: "a1".to_string(), + new_view: serde_json::json!({"ok": true}), + })) + }) + }) + .await + .unwrap(); + + assert_eq!(decision, AckDecision::None); + } + + #[tokio::test] + async fn poison_policy_terms_after_max_deliver() { + let settings = Settings { + max_deliver: 2, + ..Default::default() + }; + let tenant_id = TenantId::new("t1"); + let envelope = EventEnvelope { + tenant_id: tenant_id.clone(), + event_id: None, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: None, + event_type: "created".to_string(), + payload: serde_json::json!({"x": 1}), + command_id: None, + timestamp: None, + correlation_id: None, + traceparent: None, + trace_id: None, + }; + + let manifest = test_manifest(); + let programs = HashMap::from([("User".to_string(), test_program())]); + let storage = FakeStorage::default(); + let obs = Observability::default(); + + let ctx = ProcessContext { + settings: &settings, + delivered: 10, + sequence: 11, + tenant_id: &tenant_id, + envelope: &envelope, + manifest: &manifest, + programs: &programs, + storage: &storage, + observability: &obs, + view_type_filter: None, + }; + + let decision = process_message_with_storage(ctx, |_current, _event, _program| { + Box::pin(async move { + Ok(Some(ProjectionOutput { + view_id: "a1".to_string(), + new_view: serde_json::json!({"ok": true}), + })) + }) + }) + .await + .unwrap(); + + assert_eq!(decision, AckDecision::Term); + assert!(!storage.poison_keys.lock().unwrap().is_empty()); + } + + #[tokio::test] + #[ignore] + async fn jetstream_redelivery_is_idempotent_with_checkpoint() { + let Ok(nats_url) = std::env::var("PROJECTION_TEST_NATS_URL") else { + return; + }; + + let id = uuid::Uuid::now_v7().to_string(); + let stream_name = format!("projection_test_{}", id); + let subject = format!("tenant.t1.aggregate.Account.{}", id); + let durable = format!("durable_{}", id); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let stream = jetstream + .get_or_create_stream(async_nats::jetstream::stream::Config { + name: stream_name.clone(), + subjects: vec![subject.clone()], + ..Default::default() + }) + .await + .unwrap(); + + let consumer = stream + .get_or_create_consumer( + &durable, + async_nats::jetstream::consumer::pull::Config { + durable_name: Some(durable.clone()), + deliver_policy: async_nats::jetstream::consumer::DeliverPolicy::All, + ack_policy: async_nats::jetstream::consumer::AckPolicy::Explicit, + ack_wait: std::time::Duration::from_millis(300), + max_deliver: 5, + filter_subject: subject.clone(), + ..Default::default() + }, + ) + .await + .unwrap(); + + let envelope = EventEnvelope { + tenant_id: TenantId::new("t1"), + event_id: None, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: None, + event_type: "created".to_string(), + payload: serde_json::json!({"x": 1}), + command_id: None, + timestamp: Some(chrono::Utc::now()), + correlation_id: None, + traceparent: None, + trace_id: None, + }; + let payload = serde_json::to_vec(&envelope).unwrap(); + + jetstream + .publish(subject.clone(), payload.into()) + .await + .unwrap() + .await + .unwrap(); + + let program_json = r#" + { + "specVersion": "1.1", + "id": "proj", + "name": "Projection", + "inputs": [ + {"name": "current_view", "type": "Any", "required": true}, + {"name": "event", "type": "Any", "required": true} + ], + "nodes": [ + {"id": "event", "type": "InputRef", "data": {"input_name": "event"}}, + {"id": "expr", "type": "Expr", "data": {"expression": "({ view_id: input.aggregate_id, new_view: input.payload })"}}, + {"id": "output", "type": "Output", "data": {}} + ], + "edges": [ + {"id": "e1", "source": "event", "sourceHandle": "out", "target": "expr", "targetHandle": "input"}, + {"id": "e2", "source": "expr", "sourceHandle": "out", "target": "output", "targetHandle": "value"} + ], + "outputNodeId": "output" + } + "#; + let program: Program = serde_json::from_str(program_json).unwrap(); + + let manifest = test_manifest(); + let programs = HashMap::from([("User".to_string(), program)]); + let obs = Observability::default(); + let runtime = ProjectionRuntime::default(); + let settings = Settings { + max_deliver: 10, + ..Default::default() + }; + + let storage_dir = tempfile::tempdir().unwrap(); + let storage_path = storage_dir.path().join("mdbx"); + let storage = KvClient::open(storage_path.to_string_lossy().to_string()).unwrap(); + + #[derive(Clone)] + struct FailOnceStorage { + inner: KvClient, + should_fail: std::sync::Arc, + } + + impl Storage for FailOnceStorage { + fn get_checkpoint( + &self, + key: &CheckpointKey, + ) -> Result, ProjectionError> { + self.inner.get_checkpoint(key) + } + + fn get_view(&self, key: &ViewKey) -> Result, ProjectionError> { + self.inner.get_view(key) + } + + fn commit_view_and_checkpoint_ordered( + &self, + view_key: &ViewKey, + view_value: &JsonValue, + checkpoint_key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + if self + .should_fail + .compare_exchange( + true, + false, + std::sync::atomic::Ordering::SeqCst, + std::sync::atomic::Ordering::SeqCst, + ) + .is_ok() + { + return Err(ProjectionError::StorageError("fail once".to_string())); + } + self.inner.commit_view_and_advance_checkpoint_ordered( + view_key, + view_value, + checkpoint_key, + sequence, + ) + } + + fn advance_checkpoint_ordered( + &self, + key: &CheckpointKey, + sequence: StreamSequence, + ) -> Result<(), ProjectionError> { + self.inner.advance_checkpoint_ordered(key, sequence) + } + + fn put_poison(&self, key: &str, value: &JsonValue) -> Result<(), ProjectionError> { + self.inner.put_json(key, value) + } + } + + let failing_storage = FailOnceStorage { + inner: storage.clone(), + should_fail: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(true)), + }; + + let mut messages = consumer.messages().await.unwrap(); + + let first = tokio::time::timeout(std::time::Duration::from_secs(3), messages.next()) + .await + .unwrap() + .unwrap() + .unwrap(); + let first_info = first.info().unwrap(); + let first_seq = first_info.stream_sequence; + + let first_ctx = ProcessContext { + settings: &settings, + delivered: first_info.delivered, + sequence: first_seq, + tenant_id: &envelope.tenant_id, + envelope: &envelope, + manifest: &manifest, + programs: &programs, + storage: &failing_storage, + observability: &obs, + view_type_filter: None, + }; + + let decision = process_message_with_storage(first_ctx, |current_view, event, program| { + let runtime = runtime.clone(); + Box::pin(async move { runtime.project_program(current_view, event, program).await }) + }) + .await + .unwrap(); + + assert_eq!(decision, AckDecision::None); + + let redelivered = tokio::time::timeout(std::time::Duration::from_secs(5), messages.next()) + .await + .unwrap() + .unwrap() + .unwrap(); + let redelivered_info = redelivered.info().unwrap(); + assert_eq!(redelivered_info.stream_sequence, first_seq); + assert!(redelivered_info.delivered >= 2); + + let second_ctx = ProcessContext { + settings: &settings, + delivered: redelivered_info.delivered, + sequence: redelivered_info.stream_sequence, + tenant_id: &envelope.tenant_id, + envelope: &envelope, + manifest: &manifest, + programs: &programs, + storage: &storage, + observability: &obs, + view_type_filter: None, + }; + + let decision = process_message_with_storage(second_ctx, |current_view, event, program| { + let runtime = runtime.clone(); + Box::pin(async move { runtime.project_program(current_view, event, program).await }) + }) + .await + .unwrap(); + + assert_eq!(decision, AckDecision::Ack); + redelivered.ack().await.unwrap(); + + let checkpoint_key = CheckpointKey::new(&envelope.tenant_id, &ViewType::new("User")); + let cp = storage.get_checkpoint(&checkpoint_key).unwrap().unwrap(); + assert_eq!(cp, first_seq); + } + + #[tokio::test] + #[ignore] + async fn rebuild_from_scratch_produces_identical_view() { + let Ok(nats_url) = std::env::var("PROJECTION_TEST_NATS_URL") else { + return; + }; + + let id = uuid::Uuid::now_v7().to_string(); + let stream_name = format!("projection_rebuild_test_{}", id); + let subject = format!("tenant.t1.aggregate.Account.{}", id); + let filter_subject = subject.clone(); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _stream = jetstream + .get_or_create_stream(async_nats::jetstream::stream::Config { + name: stream_name.clone(), + subjects: vec![subject.clone()], + ..Default::default() + }) + .await + .unwrap(); + + let n = 50usize; + for i in 0..n { + let envelope = EventEnvelope { + tenant_id: TenantId::new("t1"), + event_id: None, + aggregate_id: format!("a{}", i), + aggregate_type: "Account".to_string(), + version: None, + event_type: "tick".to_string(), + payload: serde_json::json!({"i": i}), + command_id: None, + timestamp: Some(chrono::Utc::now()), + correlation_id: None, + traceparent: None, + trace_id: None, + }; + let payload = serde_json::to_vec(&envelope).unwrap(); + jetstream + .publish(subject.clone(), payload.into()) + .await + .unwrap() + .await + .unwrap(); + } + + let dir = tempfile::tempdir().unwrap(); + let (manifest_path, _) = write_passthrough_manifest(dir.path(), "Counter"); + let storage_dir = tempfile::tempdir().unwrap(); + let storage_path = storage_dir.path().join("mdbx"); + + let durable = format!("durable_{}", id); + let settings = Settings { + nats_url, + stream_name, + subject_filters: vec![filter_subject.clone()], + durable_name: durable.clone(), + storage_path: storage_path.to_string_lossy().to_string(), + manifest_path: manifest_path.to_string_lossy().to_string(), + multi_tenant_enabled: true, + default_tenant_id: None, + ..Default::default() + }; + + let stream = jetstream.get_stream(&settings.stream_name).await.unwrap(); + let mut stream = stream; + let tail = stream.info().await.unwrap().state.last_sequence; + + let shared_storage = KvClient::open(settings.storage_path.clone()).unwrap(); + + run_projection_with_options( + settings.clone(), + RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: Some(tail), + consumer_durable_name: Some(durable.clone()), + consumer_filter_subject: Some(filter_subject.clone()), + consumer_deliver_policy: Some(DeliverPolicy::ByStartSequence { start_sequence: 1 }), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }, + ) + .await + .unwrap(); + + let mut before = shared_storage + .scan_documents_by_prefix(b"view:t1:Counter:", 10_000) + .unwrap() + .into_iter() + .map(|d| d.into_value()) + .collect::>(); + before.sort_by_key(|v| v["_id"].as_str().unwrap_or("").to_string()); + assert_eq!(before.len(), n); + + shared_storage + .delete_view_prefix(&TenantId::new("t1"), &ViewType::new("Counter")) + .unwrap(); + shared_storage + .delete_checkpoint(&CheckpointKey::new( + &TenantId::new("t1"), + &ViewType::new("Counter"), + )) + .unwrap(); + + let rebuild_durable = format!("{}_rebuild_{}", durable, uuid::Uuid::now_v7()); + run_projection_with_options( + settings.clone(), + RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: Some(tail), + consumer_durable_name: Some(rebuild_durable), + consumer_filter_subject: Some(filter_subject.clone()), + consumer_deliver_policy: Some(DeliverPolicy::ByStartSequence { start_sequence: 1 }), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }, + ) + .await + .unwrap(); + + let mut after = shared_storage + .scan_documents_by_prefix(b"view:t1:Counter:", 10_000) + .unwrap() + .into_iter() + .map(|d| d.into_value()) + .collect::>(); + after.sort_by_key(|v| v["_id"].as_str().unwrap_or("").to_string()); + assert_eq!(after.len(), n); + assert_eq!(before, after); + } + + #[tokio::test] + #[ignore] + async fn rolling_restart_resumes_from_checkpoint_without_duplication() { + let Ok(nats_url) = std::env::var("PROJECTION_TEST_NATS_URL") else { + return; + }; + + let id = uuid::Uuid::now_v7().to_string(); + let stream_name = format!("projection_restart_test_{}", id); + let subject = format!("tenant.t1.aggregate.Account.{}", id); + let filter_subject = subject.clone(); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _stream = jetstream + .get_or_create_stream(async_nats::jetstream::stream::Config { + name: stream_name.clone(), + subjects: vec![subject.clone()], + ..Default::default() + }) + .await + .unwrap(); + + let n = 60usize; + for i in 0..n { + let envelope = EventEnvelope { + tenant_id: TenantId::new("t1"), + event_id: None, + aggregate_id: format!("a{}", i), + aggregate_type: "Account".to_string(), + version: None, + event_type: "tick".to_string(), + payload: serde_json::json!({"i": i}), + command_id: None, + timestamp: Some(chrono::Utc::now()), + correlation_id: None, + traceparent: None, + trace_id: None, + }; + let payload = serde_json::to_vec(&envelope).unwrap(); + jetstream + .publish(subject.clone(), payload.into()) + .await + .unwrap() + .await + .unwrap(); + } + + let dir = tempfile::tempdir().unwrap(); + let (manifest_path, _) = write_passthrough_manifest(dir.path(), "Counter"); + let storage_dir = tempfile::tempdir().unwrap(); + let storage_path = storage_dir.path().join("mdbx"); + + let durable = format!("durable_{}", id); + let settings = Settings { + nats_url, + stream_name, + subject_filters: vec![filter_subject.clone()], + durable_name: durable.clone(), + storage_path: storage_path.to_string_lossy().to_string(), + manifest_path: manifest_path.to_string_lossy().to_string(), + multi_tenant_enabled: true, + default_tenant_id: None, + ..Default::default() + }; + + let stream = jetstream.get_stream(&settings.stream_name).await.unwrap(); + let mut stream = stream; + let tail = stream.info().await.unwrap().state.last_sequence; + let mid = tail / 2; + + let shared_storage = KvClient::open(settings.storage_path.clone()).unwrap(); + + run_projection_with_options( + settings.clone(), + RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: Some(mid), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }, + ) + .await + .unwrap(); + + let ck = CheckpointKey::new(&TenantId::new("t1"), &ViewType::new("Counter")); + let cp1 = shared_storage.get_checkpoint(&ck).unwrap().unwrap_or(0); + assert!(cp1 >= mid); + + run_projection_with_options( + settings.clone(), + RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: Some(tail), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }, + ) + .await + .unwrap(); + + let cp2 = shared_storage.get_checkpoint(&ck).unwrap().unwrap_or(0); + assert!(cp2 >= cp1); + + let docs = shared_storage + .scan_documents_by_prefix(b"view:t1:Counter:", 10_000) + .unwrap(); + assert_eq!(docs.len(), n); + assert_eq!(cp2, tail); + } + + fn write_passthrough_manifest( + dir: &std::path::Path, + view_type: &str, + ) -> (std::path::PathBuf, std::path::PathBuf) { + let program_path = dir.join("passthrough.json"); + let manifest_path = dir.join("manifest.yaml"); + std::fs::write( + &program_path, + r#" + { + "specVersion": "1.1", + "id": "passthrough", + "name": "Passthrough", + "inputs": [ + {"name": "current_view", "type": "Any", "required": true}, + {"name": "event", "type": "Any", "required": true} + ], + "nodes": [ + {"id": "event", "type": "InputRef", "data": {"input_name": "event"}}, + {"id": "expr", "type": "Expr", "data": {"expression": "{ view_id: input.aggregate_id, new_view: input.payload }"}}, + {"id": "output", "type": "Output", "data": {}} + ], + "edges": [ + {"id": "e1", "source": "event", "sourceHandle": "out", "target": "expr", "targetHandle": "input"}, + {"id": "e2", "source": "expr", "sourceHandle": "out", "target": "output", "targetHandle": "value"} + ], + "outputNodeId": "output" + } + "#, + ) + .unwrap(); + std::fs::write( + &manifest_path, + format!( + r#" +projections: + {view_type}: + view_type: "{view_type}" + project_program: "{}" +"#, + program_path.to_string_lossy() + ), + ) + .unwrap(); + (manifest_path, program_path) + } + + #[tokio::test] + #[ignore] + async fn scale_out_two_workers_does_not_duplicate_work() { + let Ok(nats_url) = std::env::var("PROJECTION_TEST_NATS_URL") else { + return; + }; + + let id = uuid::Uuid::now_v7().to_string(); + let stream_name = format!("projection_scale_test_{}", id); + let subject = format!("tenant.t1.aggregate.Account.{}", id); + let filter_subject = subject.clone(); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _stream = jetstream + .get_or_create_stream(async_nats::jetstream::stream::Config { + name: stream_name.clone(), + subjects: vec![subject.clone()], + ..Default::default() + }) + .await + .unwrap(); + + let n = 200usize; + for i in 0..n { + let envelope = EventEnvelope { + tenant_id: TenantId::new("t1"), + event_id: None, + aggregate_id: format!("a{}", i), + aggregate_type: "Account".to_string(), + version: None, + event_type: "upsert".to_string(), + payload: serde_json::json!({"i": i}), + command_id: None, + timestamp: Some(chrono::Utc::now()), + correlation_id: None, + traceparent: None, + trace_id: None, + }; + jetstream + .publish( + subject.clone(), + serde_json::to_vec(&envelope).unwrap().into(), + ) + .await + .unwrap() + .await + .unwrap(); + } + + let dir = tempfile::tempdir().unwrap(); + let (manifest_path, _) = write_passthrough_manifest(dir.path(), "Counter"); + let storage_dir = tempfile::tempdir().unwrap(); + let storage_path = storage_dir.path().join("mdbx"); + + let durable = format!("durable_{}", id); + let settings = Settings { + nats_url, + stream_name, + subject_filters: vec![filter_subject.clone()], + durable_name: durable.clone(), + storage_path: storage_path.to_string_lossy().to_string(), + manifest_path: manifest_path.to_string_lossy().to_string(), + multi_tenant_enabled: true, + default_tenant_id: None, + consumer_mode: crate::config::ConsumerMode::Single, + ..Default::default() + }; + + let shared_storage = KvClient::open(settings.storage_path.clone()).unwrap(); + + let jetstream_client = JetStreamClient::connect_with( + &settings, + crate::stream::jetstream::ConsumerOptions { + durable_name: durable.clone(), + filter_subject: filter_subject.clone(), + deliver_policy: DeliverPolicy::All, + }, + ) + .await + .unwrap(); + let tail = jetstream_client.stream_last_sequence().await.unwrap(); + + let opts = RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: Some(tail), + consumer_durable_name: Some(durable.clone()), + consumer_filter_subject: Some(filter_subject.clone()), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }; + + let t1 = tokio::spawn(run_projection_with_options(settings.clone(), opts.clone())); + let t2 = tokio::spawn(run_projection_with_options(settings.clone(), opts)); + + t1.await.unwrap().unwrap(); + t2.await.unwrap().unwrap(); + + let docs = shared_storage + .scan_documents_by_prefix(b"view:t1:Counter:", 10_000) + .unwrap(); + assert_eq!(docs.len(), n); + + let ck = CheckpointKey::new(&TenantId::new("t1"), &ViewType::new("Counter")); + let cp = shared_storage.get_checkpoint(&ck).unwrap().unwrap_or(0); + assert_eq!(cp, tail); + } + + #[tokio::test] + #[ignore] + async fn rolling_restart_with_two_workers_preserves_correctness() { + let Ok(nats_url) = std::env::var("PROJECTION_TEST_NATS_URL") else { + return; + }; + + let id = uuid::Uuid::now_v7().to_string(); + let stream_name = format!("projection_scale_restart_test_{}", id); + let subject = format!("tenant.t1.aggregate.Account.{}", id); + let filter_subject = subject.clone(); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let jetstream = async_nats::jetstream::new(client); + + let _stream = jetstream + .get_or_create_stream(async_nats::jetstream::stream::Config { + name: stream_name.clone(), + subjects: vec![subject.clone()], + ..Default::default() + }) + .await + .unwrap(); + + let n = 150usize; + for i in 0..n { + let envelope = EventEnvelope { + tenant_id: TenantId::new("t1"), + event_id: None, + aggregate_id: format!("a{}", i), + aggregate_type: "Account".to_string(), + version: None, + event_type: "upsert".to_string(), + payload: serde_json::json!({"i": i}), + command_id: None, + timestamp: Some(chrono::Utc::now()), + correlation_id: None, + traceparent: None, + trace_id: None, + }; + jetstream + .publish( + subject.clone(), + serde_json::to_vec(&envelope).unwrap().into(), + ) + .await + .unwrap() + .await + .unwrap(); + } + + let dir = tempfile::tempdir().unwrap(); + let (manifest_path, _) = write_passthrough_manifest(dir.path(), "Counter"); + let storage_dir = tempfile::tempdir().unwrap(); + let storage_path = storage_dir.path().join("mdbx"); + + let durable = format!("durable_{}", id); + let settings = Settings { + nats_url, + stream_name, + subject_filters: vec![filter_subject.clone()], + durable_name: durable.clone(), + storage_path: storage_path.to_string_lossy().to_string(), + manifest_path: manifest_path.to_string_lossy().to_string(), + multi_tenant_enabled: true, + default_tenant_id: None, + consumer_mode: crate::config::ConsumerMode::Single, + ..Default::default() + }; + + let shared_storage = KvClient::open(settings.storage_path.clone()).unwrap(); + + let jetstream_client = JetStreamClient::connect_with( + &settings, + crate::stream::jetstream::ConsumerOptions { + durable_name: durable.clone(), + filter_subject: filter_subject.clone(), + deliver_policy: DeliverPolicy::All, + }, + ) + .await + .unwrap(); + let tail = jetstream_client.stream_last_sequence().await.unwrap(); + + let shutdown1 = Arc::new(tokio::sync::Notify::new()); + let shutdown2 = Arc::new(tokio::sync::Notify::new()); + + let opts1 = RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: None, + consumer_durable_name: Some(durable.clone()), + consumer_filter_subject: Some(filter_subject.clone()), + shutdown: Some(shutdown1.clone()), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }; + + let opts2 = RunOptions { + tenant_filter: Some(TenantId::new("t1")), + view_type_filter: Some(vec!["Counter".to_string()]), + stop_at_sequence: Some(tail), + consumer_durable_name: Some(durable.clone()), + consumer_filter_subject: Some(filter_subject.clone()), + shutdown: Some(shutdown2.clone()), + storage: Some(shared_storage.clone()), + ..RunOptions::default() + }; + + let t1 = tokio::spawn(run_projection_with_options(settings.clone(), opts1)); + let t2 = tokio::spawn(run_projection_with_options(settings.clone(), opts2)); + + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + shutdown1.notify_waiters(); + + t1.await.unwrap().unwrap(); + t2.await.unwrap().unwrap(); + + let docs = shared_storage + .scan_documents_by_prefix(b"view:t1:Counter:", 10_000) + .unwrap(); + assert_eq!(docs.len(), n); + + let ck = CheckpointKey::new(&TenantId::new("t1"), &ViewType::new("Counter")); + let cp = shared_storage.get_checkpoint(&ck).unwrap().unwrap_or(0); + assert_eq!(cp, tail); + } + + #[test] + fn per_view_durable_name_sanitizes() { + assert_eq!(per_view_durable_name("base", "User"), "base_User"); + assert_eq!(per_view_durable_name("base", "My View"), "base_My_View"); + assert_eq!(per_view_durable_name("base", "a/b"), "base_a_b"); + } +} diff --git a/projection/src/tenant_placement.rs b/projection/src/tenant_placement.rs new file mode 100644 index 0000000..b4d4f22 --- /dev/null +++ b/projection/src/tenant_placement.rs @@ -0,0 +1,181 @@ +use crate::config::Settings; +use crate::types::TenantId; +use serde::Deserialize; +use std::collections::HashSet; +use std::sync::Arc; +use std::sync::RwLock; + +#[derive(Debug, Clone, Default)] +pub struct TenantPlacement { + inner: Arc>, +} + +#[derive(Debug, Default)] +struct Inner { + hosted: Option>, + draining: HashSet, +} + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TenantPlacementSnapshot { + pub hosted: Option>, + pub draining: Vec, +} + +impl TenantPlacement { + pub fn load(settings: &Settings) -> Result { + let hosted = hosted_tenants_from_settings(settings)?; + Ok(Self { + inner: Arc::new(RwLock::new(Inner { + hosted, + draining: HashSet::new(), + })), + }) + } + + pub fn reload(&self, settings: &Settings) -> Result<(), String> { + let hosted = hosted_tenants_from_settings(settings)?; + let mut inner = self + .inner + .write() + .map_err(|_| "tenant placement lock poisoned".to_string())?; + inner.hosted = hosted; + Ok(()) + } + + pub fn is_hosted(&self, tenant_id: &TenantId) -> bool { + let inner = match self.inner.read() { + Ok(i) => i, + Err(_) => return true, + }; + let Some(hosted) = &inner.hosted else { + return true; + }; + hosted.contains(tenant_id.as_str()) + } + + pub fn is_draining(&self, tenant_id: &TenantId) -> bool { + let inner = match self.inner.read() { + Ok(i) => i, + Err(_) => return false, + }; + inner.draining.contains(tenant_id.as_str()) + } + + pub fn set_draining(&self, tenant_id: TenantId, draining: bool) -> Result<(), String> { + let mut inner = self + .inner + .write() + .map_err(|_| "tenant placement lock poisoned".to_string())?; + if draining { + inner.draining.insert(tenant_id.as_str().to_string()); + } else { + inner.draining.remove(tenant_id.as_str()); + } + Ok(()) + } + + pub fn hosted_count(&self) -> Option { + let inner = self.inner.read().ok()?; + inner.hosted.as_ref().map(|s| s.len()) + } + + pub fn single_hosted_tenant(&self) -> Option { + let inner = self.inner.read().ok()?; + let hosted = inner.hosted.as_ref()?; + if hosted.len() != 1 { + return None; + } + hosted.iter().next().map(|s| TenantId::new(s.to_string())) + } + + pub fn snapshot(&self) -> TenantPlacementSnapshot { + let inner = self.inner.read(); + let Ok(inner) = inner else { + return TenantPlacementSnapshot { + hosted: None, + draining: vec![], + }; + }; + + let hosted = inner.hosted.as_ref().map(|set| { + let mut out = set.iter().cloned().collect::>(); + out.sort(); + out + }); + + let mut draining = inner.draining.iter().cloned().collect::>(); + draining.sort(); + + TenantPlacementSnapshot { hosted, draining } + } +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum PlacementDoc { + List(Vec), + Map(PlacementMap), +} + +#[derive(Debug, Deserialize)] +struct PlacementMap { + #[serde(default)] + hosted_tenants: Vec, + #[serde(default)] + tenants: Vec, +} + +fn hosted_tenants_from_settings(settings: &Settings) -> Result>, String> { + let mut tenants = Vec::new(); + + if let Ok(raw) = std::env::var("PROJECTION_HOSTED_TENANTS") { + tenants.extend( + raw.split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()), + ); + } + + if let Some(path) = settings.tenant_placement_path.as_ref() { + let raw = std::fs::read_to_string(path) + .map_err(|e| format!("failed to read tenant placement file: {}", e))?; + let ext = std::path::Path::new(path) + .extension() + .and_then(|e| e.to_str()) + .unwrap_or("") + .to_ascii_lowercase(); + + let doc = if ext == "json" { + serde_json::from_str::(&raw) + .map_err(|e| format!("failed to parse placement json: {}", e))? + } else { + serde_yaml::from_str::(&raw) + .map_err(|e| format!("failed to parse placement yaml: {}", e))? + }; + + match doc { + PlacementDoc::List(items) => tenants.extend(items), + PlacementDoc::Map(map) => { + if !map.hosted_tenants.is_empty() { + tenants.extend(map.hosted_tenants); + } else if !map.tenants.is_empty() { + tenants.extend(map.tenants); + } + } + } + } + + let mut set = tenants + .into_iter() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect::>(); + + if set.is_empty() { + return Ok(None); + } + + set.retain(|t| !t.is_empty()); + Ok(Some(set)) +} diff --git a/projection/src/types/checkpoint.rs b/projection/src/types/checkpoint.rs new file mode 100644 index 0000000..f294eca --- /dev/null +++ b/projection/src/types/checkpoint.rs @@ -0,0 +1,70 @@ +use crate::types::{TenantId, ViewType}; +use serde::{Deserialize, Serialize}; +use std::fmt; + +pub type StreamSequence = u64; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct CheckpointKey(String); + +impl CheckpointKey { + pub fn new(tenant_id: &TenantId, view_type: &ViewType) -> Self { + Self(format!( + "checkpoint:{}:{}", + tenant_id.as_str(), + view_type.as_str() + )) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for CheckpointKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct Checkpoint { + pub sequence: StreamSequence, + pub metadata: Option, +} + +impl Checkpoint { + pub fn new(sequence: StreamSequence) -> Self { + Self { + sequence, + metadata: None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{TenantId, ViewType}; + + #[test] + fn checkpoint_roundtrip() { + let cp = Checkpoint { + sequence: 42, + metadata: Some(serde_json::json!({"a": 1})), + }; + let bytes = serde_json::to_vec(&cp).unwrap(); + let decoded: Checkpoint = serde_json::from_slice(&bytes).unwrap(); + assert_eq!(decoded.sequence, 42); + assert_eq!(decoded.metadata.unwrap()["a"], 1); + } + + #[test] + fn checkpoint_key_format_is_stable() { + let tenant = TenantId::new("tenant-a"); + let view_type = ViewType::new("User"); + let key = CheckpointKey::new(&tenant, &view_type); + assert_eq!(key.as_str(), "checkpoint:tenant-a:User"); + } +} diff --git a/projection/src/types/error.rs b/projection/src/types/error.rs new file mode 100644 index 0000000..a58a2a3 --- /dev/null +++ b/projection/src/types/error.rs @@ -0,0 +1,52 @@ +use crate::types::{TenantId, ViewType}; +use thiserror::Error; + +#[derive(Debug, Clone, Error)] +pub enum ProjectionError { + #[error("Tenant access denied for tenant: {tenant_id}")] + TenantAccessDenied { tenant_id: TenantId }, + + #[error("Validation error: {0}")] + ValidationError(String), + + #[error("Storage error: {0}")] + StorageError(String), + + #[error("Stream error: {0}")] + StreamError(String), + + #[error("Decode error: {0}")] + DecodeError(String), + + #[error("Project error: {0}")] + ProjectError(String), + + #[error("Manifest error: {0}")] + ManifestError(String), + + #[error("Not found: {0}")] + NotFound(String), + + #[error("Unsupported view type: {view_type}")] + UnsupportedViewType { view_type: ViewType }, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn error_implements_traits() { + let err = ProjectionError::TenantAccessDenied { + tenant_id: TenantId::new("other"), + }; + let _ = format!("{}", err); + let _: &dyn std::error::Error = &err; + } + + #[test] + fn error_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } +} diff --git a/projection/src/types/event.rs b/projection/src/types/event.rs new file mode 100644 index 0000000..db81b70 --- /dev/null +++ b/projection/src/types/event.rs @@ -0,0 +1,80 @@ +use crate::types::TenantId; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct EventEnvelope { + pub tenant_id: TenantId, + #[serde(skip_serializing_if = "Option::is_none")] + pub event_id: Option, + pub aggregate_id: String, + pub aggregate_type: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, + pub event_type: String, + pub payload: serde_json::Value, + #[serde(skip_serializing_if = "Option::is_none")] + pub command_id: Option, + pub timestamp: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub correlation_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub traceparent: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub trace_id: Option, +} + +impl Default for EventEnvelope { + fn default() -> Self { + Self { + tenant_id: TenantId::default(), + event_id: None, + aggregate_id: String::new(), + aggregate_type: String::new(), + version: None, + event_type: String::new(), + payload: serde_json::Value::Null, + command_id: None, + timestamp: None, + correlation_id: None, + traceparent: None, + trace_id: None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn envelope_decoding_ignores_unknown_fields() { + let raw = r#" + { + "tenant_id": "tenant-a", + "aggregate_id": "a1", + "aggregate_type": "Account", + "event_type": "created", + "payload": {"x": 1}, + "timestamp": "2020-01-01T00:00:00Z", + "correlation_id": "corr-1", + "traceparent": "00-0123456789abcdef0123456789abcdef-1111111111111111-01", + "unknown": "ignored" + }"#; + + let decoded: EventEnvelope = serde_json::from_str(raw).unwrap(); + assert_eq!(decoded.tenant_id.as_str(), "tenant-a"); + assert_eq!(decoded.aggregate_id, "a1"); + assert_eq!(decoded.payload["x"], 1); + assert_eq!( + decoded.correlation_id.as_ref().map(|v| v.as_str()), + Some("corr-1") + ); + assert_eq!( + decoded.traceparent.as_deref(), + Some("00-0123456789abcdef0123456789abcdef-1111111111111111-01") + ); + } +} diff --git a/projection/src/types/id.rs b/projection/src/types/id.rs new file mode 100644 index 0000000..a3d18fa --- /dev/null +++ b/projection/src/types/id.rs @@ -0,0 +1,104 @@ +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::str::FromStr; + +pub type TenantId = shared::TenantId; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ViewType(String); + +impl ViewType { + pub fn new(ty: impl Into) -> Self { + Self(ty.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for ViewType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for ViewType { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl From<&str> for ViewType { + fn from(s: &str) -> Self { + Self(s.to_string()) + } +} + +impl From for ViewType { + fn from(s: String) -> Self { + Self(s) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ViewId(String); + +impl ViewId { + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for ViewId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for ViewId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for ViewId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tenant_id_serialization_roundtrip() { + let id = TenantId::new("acme-corp"); + let json = serde_json::to_string(&id).unwrap(); + let decoded: TenantId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + } + + #[test] + fn tenant_id_default() { + let id = TenantId::default(); + assert!(id.is_empty()); + } + + #[test] + fn view_key_types_are_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + } +} diff --git a/projection/src/types/mod.rs b/projection/src/types/mod.rs new file mode 100644 index 0000000..3f7f602 --- /dev/null +++ b/projection/src/types/mod.rs @@ -0,0 +1,11 @@ +mod checkpoint; +mod error; +mod event; +mod id; +mod view; + +pub use checkpoint::{Checkpoint, CheckpointKey, StreamSequence}; +pub use error::ProjectionError; +pub use event::EventEnvelope; +pub use id::{TenantId, ViewId, ViewType}; +pub use view::ViewKey; diff --git a/projection/src/types/view.rs b/projection/src/types/view.rs new file mode 100644 index 0000000..265cb29 --- /dev/null +++ b/projection/src/types/view.rs @@ -0,0 +1,42 @@ +use crate::types::{TenantId, ViewId, ViewType}; +use serde::{Deserialize, Serialize}; +use std::fmt; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct ViewKey(String); + +impl ViewKey { + pub fn new(tenant_id: &TenantId, view_type: &ViewType, view_id: &ViewId) -> Self { + Self(format!( + "view:{}:{}:{}", + tenant_id.as_str(), + view_type.as_str(), + view_id.as_str() + )) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for ViewKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{TenantId, ViewId, ViewType}; + + #[test] + fn view_key_format_is_stable() { + let tenant = TenantId::new("tenant-a"); + let view_type = ViewType::new("User"); + let view_id = ViewId::new("u1"); + let key = ViewKey::new(&tenant, &view_type, &view_id); + assert_eq!(key.as_str(), "view:tenant-a:User:u1"); + } +} diff --git a/routing/dev.json b/routing/dev.json new file mode 100644 index 0000000..aa35352 --- /dev/null +++ b/routing/dev.json @@ -0,0 +1,27 @@ +{ + "revision": 1, + "aggregate_placement": { + "tenant-a": "local", + "t1": "local", + "t2": "local" + }, + "projection_placement": { + "tenant-a": "local", + "t1": "local", + "t2": "local" + }, + "runner_placement": { + "tenant-a": "local", + "t1": "local", + "t2": "local" + }, + "aggregate_shards": { + "local": ["http://aggregate:50051"] + }, + "projection_shards": { + "local": ["http://projection:8080"] + }, + "runner_shards": { + "local": ["http://runner:8080"] + } +} diff --git a/runner/.gitignore b/runner/.gitignore new file mode 100644 index 0000000..c243d43 --- /dev/null +++ b/runner/.gitignore @@ -0,0 +1,30 @@ +/target/ +/target-*/ +**/target/ + +**/*.rs.bk +*.pdb +*.dSYM/ + +*.log + +data/ +*.mdbx +*.mdbx-lock +*.mdbx.dat +*.mdbx.lck +*.mdb +*.db + +docker-compose.override.yml + +.DS_Store + +.idea/ +.vscode/ + +.env +.env.local +.env.*.local +.envrc +.direnv/ diff --git a/runner/Cargo.toml b/runner/Cargo.toml new file mode 100644 index 0000000..2dc589a --- /dev/null +++ b/runner/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "runner" +version = "0.1.0" +edition = "2021" + +[features] +default = [] +runtime-v8 = ["v8"] +runtime-wasm = [] + +[dependencies] +shared = { path = "../shared" } +edge_storage = { version = "0.1", registry = "madapes" } +runtime-function = { version = "0.2", registry = "madapes" } +edge-logger-client = { version = "0.1", registry = "madapes" } +query_engine = { version = "0.1", registry = "madapes" } +async-nats = "0.39" +tokio = { version = "1", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +serde_yaml = "0.9" +toml = "0.8" +libmdbx = "0.6" +thiserror = "2" +anyhow = "1" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] } +uuid = { version = "1", features = ["v4", "v7", "serde"] } +chrono = { version = "0.4", features = ["serde"] } +futures = "0.3" +axum = "0.7" +tonic = { version = "0.12", default-features = false, features = ["transport", "prost", "codegen"] } +prost = "0.13" +v8 = { version = "0.106", optional = true } +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +lettre = { version = "0.11", default-features = false, features = ["builder", "hostname", "smtp-transport", "tokio1", "tokio1-native-tls"] } +aws-config = { version = "1", default-features = false, features = ["behavior-version-latest"] } +aws-sdk-sesv2 = { version = "1", default-features = false, features = ["rustls"] } + +[dev-dependencies] +tempfile = "3" +tower = "0.5" + +[build-dependencies] +protoc-bin-vendored = "3" +tonic-build = { version = "0.12", default-features = false, features = ["prost"] } diff --git a/runner/DEVELOPMENT_PLAN.md b/runner/DEVELOPMENT_PLAN.md new file mode 100644 index 0000000..677a6e3 --- /dev/null +++ b/runner/DEVELOPMENT_PLAN.md @@ -0,0 +1,562 @@ +# Development Plan: Runner Node + +## Overview + +This plan breaks down the Runner node implementation into milestones ordered by dependency. Each milestone includes: +- **Tasks** with clear deliverables +- **Test Requirements** (unit tests + tautological tests + integration tests where applicable) +- **Dependencies** on previous milestones + +**Development Approach:** +1. Complete one milestone at a time +2. Write tests before implementation (TDD where applicable) +3. All tests must pass before moving to the next milestone +4. Mark tasks complete with `[x]` as you progress + +--- + +## Milestone 0: Repo Bootstrap (Dev Ergonomics + Guardrails) + +**Goal:** Establish baseline development workflows and guardrails so later milestones can be executed and verified consistently. + +### Tasks +- [x] **0.1** Define canonical local commands + - `cargo test` + - `cargo fmt --check` + - `cargo clippy -- -D warnings` + - `cargo run -- --help` +- [x] **0.2** Add minimal CI entrypoints (repository-level) + - Run fmt + clippy + tests on every PR + - Ensure CI uses the same commands as **0.1** +- [x] **0.3** Define integration-test gating pattern (NATS-required) + - Use ignored tests that run only when `RUNNER_TEST_NATS_URL` is set + - Make the plan’s “ignored by default” integration tests follow the same convention +- [x] **0.4** Define baseline operational invariants (written as checklist items for later milestones) + - Never ack before durable commit + - Never delete outbox item before durable confirmation + - Never execute effect twice for the same `(tenant_id, command_id)` + - Always propagate `tenant_id`, `correlation_id`, and `trace_id` + +### Tests +- [x] **T0.1** Tautological test: baseline test harness runs + ```rust + #[test] + fn test_harness_runs() { + assert!(true); + } + ``` + +--- + +## Milestone 1: Project Foundation + +**Goal:** Set up the Runner as a standalone Rust container with correct dependencies and a stable module layout. + +### Tasks +- [x] **1.1** Initialize Cargo project + - Create `src/lib.rs` and `src/main.rs` + - Configure `Cargo.toml` with the madapes registry +- [x] **1.2** Configure dependencies (aligned with Aggregate/Projection) + - `edge-storage` (KvStore) + - `runtime-function` (Saga `on_event` / `compensation`) + - `edge-logger-client` (structured logs client) + - `query-engine` (optional admin/debug queries) + - `async-nats` (JetStream) + - `tokio`, `serde`, `serde_json`, `thiserror`, `anyhow`, `tracing`, `tracing-subscriber`, `uuid`, `chrono`, `axum` +- [x] **1.3** Establish initial module layout + ``` + src/ + ├── lib.rs + ├── main.rs + ├── types/ + │ ├── mod.rs + │ ├── id.rs + │ ├── keys.rs + │ ├── envelope.rs + │ └── error.rs + ├── config/ + │ ├── mod.rs + │ └── settings.rs + ├── storage/ + │ ├── mod.rs + │ └── kv.rs + ├── stream/ + │ ├── mod.rs + │ └── jetstream.rs + ├── saga/ + │ ├── mod.rs + │ ├── manifest.rs + │ └── runtime.rs + ├── outbox/ + │ ├── mod.rs + │ └── relay.rs + ├── effects/ + │ ├── mod.rs + │ ├── manifest.rs + │ ├── runtime.rs + │ └── providers/ + │ └── mod.rs + ├── schedule/ + │ ├── mod.rs + │ └── scheduler.rs + ├── http/ + │ └── mod.rs + └── observability/ + └── mod.rs + ``` +- [x] **1.4** Configure clippy and rustfmt (CI-friendly) + +### Tests +- [x] **T1.1** Project compiles successfully +- [x] **T1.2** Dependencies resolve from madapes registry +- [x] **T1.3** Clippy passes with no warnings + +--- + +## Milestone 2: Core Types + Keyspaces + +**Goal:** Implement the foundational types that the Runner depends on: multi-tenancy, envelope shapes, stable key composition, and error model. + +### Dependencies +- Milestone 1 (project foundation) + +### Tasks +- [x] **2.1** Implement `TenantId` type (same semantics as other nodes) + - String wrapper, default empty for single-tenant + - Display, FromStr, Serialize, Deserialize +- [x] **2.2** Implement workflow identity types + - `SagaName`, `EffectName` (string wrappers) + - `CorrelationId` (string wrapper) + - `WorkId` (UUID v7 or string wrapper depending on work kind) +- [x] **2.3** Implement stable key composition helpers + - `saga:{tenant_id}:{saga_name}:{correlation_id}` + - `checkpoint:{tenant_id}:{saga_name}` + - `outbox:{tenant_id}:{work_kind}:{work_id}` + - `schedule:{tenant_id}:{saga_name}:{correlation_id}:{due_at}` + - `dedupe:{tenant_id}:{saga_name}:{event_id}` + - `dedupe:{tenant_id}:effect:{command_id}` +- [x] **2.4** Define message envelopes used by the Runner (serde types) + - Aggregate event envelope (consumed from `AGGREGATE_EVENTS`) + - Effect command envelope (consumed from workflow command stream) + - Effect result event envelope (produced to workflow event stream) + - Gateway command submission shape (compatible with Aggregate gateway request fields) + - Forward-compatible decoding (unknown fields ignored where practical) +- [x] **2.5** Implement Runner error model + - Storage errors, stream errors, decode errors, runtime-function errors + - Tenant access errors + - Policy errors (poison message, quarantine, etc.) + +### Tests +- [x] **T2.1** `TenantId` round-trips serialization and defaults to empty +- [x] **T2.2** Key composition produces stable strings across all keyspaces +- [x] **T2.3** Envelope decoding ignores unknown fields (forward compatibility) +- [x] **T2.4** Tautological test: core types are Send + Sync + +--- + +## Milestone 3: Configuration + +**Goal:** Implement settings and startup validation for modes (saga/effect/combined), multi-tenancy, storage, and JetStream. + +### Dependencies +- Milestone 2 (core types) + +### Tasks +- [x] **3.1** Define `Settings` struct + - NATS URL + - Storage path + - Mode: saga/effect/combined + - Multi-tenancy enabled flag + default tenant behavior + - Stream names: + - `AGGREGATE_EVENTS` (existing) + - `WORKFLOW_COMMANDS` (runner work distribution) + - `WORKFLOW_EVENTS` (effect results + optional workflow facts) + - Subject filters for saga triggers and effect commands + - Consumer configuration: durable name strategy, deliver group, max in-flight, ack wait, max deliver + - Backpressure configuration: per-key concurrency, batching, relay polling intervals + - Manifest paths: sagas and effects (YAML/JSON) +- [x] **3.2** Implement config loading from environment variables +- [x] **3.3** Implement config loading from file (YAML/TOML/JSON) + - Environment overrides file +- [x] **3.4** Implement config validation + - Required fields present + - Stream names and consumer naming rules valid + - Manifests load and validate at startup + +### Tests +- [x] **T3.1** Settings loads from environment variables +- [x] **T3.2** Settings validation catches missing/invalid values +- [x] **T3.3** Tautological test: Settings is Clone + Debug + +--- + +## Milestone 4: Storage Layer (KvStore Transactions) + +**Goal:** Integrate `edge-storage` `KvStore` and provide transaction APIs that enforce the Runner’s atomicity requirements. + +### Dependencies +- Milestone 2 (core types) +- Milestone 3 (configuration) + +### Tasks +- [x] **4.1** Create `KvClient` wrapper + - Opens MDBX-backed KvStore at configured path + - Tenant-aware key helpers for all namespaces +- [x] **4.2** Implement saga state primitives + - `get_saga_state(key) -> Option` + - `put_saga_state(key, value)` +- [x] **4.3** Implement checkpoint primitives + - `get_checkpoint(key) -> Option` + - `put_checkpoint(key, u64)` +- [x] **4.4** Implement outbox primitives + - `put_outbox_item(key, item)` + - `list_outbox_prefix(tenant_id, ...)` (for relay scanning) + - `delete_outbox_item(key)` +- [x] **4.5** Implement schedule primitives + - `put_schedule_item(key, payload)` + - `scan_due_schedule_items(now)` (prefix scan + due filtering) + - `delete_schedule_item(key)` +- [x] **4.6** Implement atomic commit API for saga processing + - One transaction: update saga state + write outbox items + advance checkpoint + record dedupe marker(s) + - Provide an API surface that makes partial updates difficult + +### Tests +- [x] **T4.1** Saga state round-trip: put/get returns identical JSON +- [x] **T4.2** Checkpoint round-trip: put/get returns identical value +- [x] **T4.3** Atomicity: if transaction fails, state/outbox/checkpoint are not partially committed +- [x] **T4.4** Outbox delete removes keys reliably +- [x] **T4.5** Schedule scan only returns due items and respects tenant scoping + +--- + +## Milestone 5: JetStream Integration (Worker Pool Semantics) + +**Goal:** Consume and produce JetStream messages with correct delivery semantics, ack discipline, idempotency, and backpressure. + +### Dependencies +- Milestone 4 (storage layer) + +### Tasks +- [x] **5.1** Implement JetStream client wrapper + - Connect to NATS and create JetStream context + - Bind to required streams (create-if-missing for workflow streams if enabled by config) +- [x] **5.2** Implement saga trigger consumer + - Durable consumer filtered to aggregate event subjects (tenant-aware) + - Deliver group support for worker pool replicas + - Extract stream sequence from message metadata +- [x] **5.3** Implement effect command consumer + - Durable consumer filtered to effect command subjects (tenant-aware) + - Deliver group support +- [x] **5.4** Implement publish wrappers + - Publish effect result events with headers (tenant-id, command-id, effect-name, trace-id) + - Publish optional workflow facts if needed +- [x] **5.5** Enforce ack discipline + - Ack only after the relevant storage transaction commits and/or downstream publish is confirmed +- [x] **5.6** Implement poison-message policy wiring + - Consumer max-deliver configured + - After max attempts: quarantine record in KV + TERM ack + +### Tests +- [x] **T5.1** Unit test: checkpoint/dedupe gates skip already-processed items +- [x] **T5.2** Unit test: ack is not performed when storage commit fails +- [x] **T5.3** Integration test: JetStream redelivery is idempotent (ignored by default; enabled with `RUNNER_TEST_NATS_URL=...`) + +--- + +## Milestone 6: Saga Runtime (Deterministic Execution) + +**Goal:** Execute Saga logic as deterministic `runtime-function` DAG programs, producing work items that are persisted into the outbox. + +### Dependencies +- Milestone 5 (JetStream consumption) + +### Tasks +- [x] **6.1** Define Saga program invocation contract + - Input: `{ saga_state, event }` + - Output: `{ new_saga_state, work_items[], schedules[] }` +- [x] **6.2** Implement `runtime-function` execution wrapper + - Gas limits and timeouts + - Deterministic inputs only (no I/O, no clock access) +- [x] **6.3** Implement Saga manifest + - Defines sagas, trigger filters, program references (`on_event`, `compensation`) + - Validate referenced programs exist +- [x] **6.4** Implement saga processing pipeline + - Load saga state + - Execute program + - Atomic commit (state + outbox + checkpoint + dedupe + schedule items) + +### Tests +- [x] **T6.1** Unit test: same inputs produce same outputs (determinism) +- [x] **T6.2** Unit test: pipeline writes checkpoint only if state/outbox commit succeeds +- [x] **T6.3** Unit test: dedupe prevents duplicate transitions for the same event_id + +--- + +## Milestone 7: Outbox Relay (Reliable Dispatch) + +**Goal:** Reliably deliver outbox work items to their destinations without dual-write gaps, supporting retries and backpressure. + +### Dependencies +- Milestone 6 (saga runtime) + +### Tasks +- [x] **7.1** Implement outbox relay loop + - Poll `outbox:` prefix in KV with bounded batch size + - Emit metrics for outbox depth and dispatch latency +- [x] **7.2** Implement dispatch targets + - Aggregate commands via Gateway submission (HTTP/gRPC, tenant-scoped) + - Always send `x-tenant-id` and propagate `correlation_id`/`trace_id` metadata + - Effect commands published to `WORKFLOW_COMMANDS` +- [x] **7.3** Implement idempotency for relay dispatch + - Safe retries: dispatch operation is idempotent using `command_id` (and/or JetStream `Nats-Msg-Id`) + - Only delete outbox item after durable confirmation (publish ack / gateway response) +- [x] **7.4** Implement backpressure controls + - Max in-flight dispatches per tenant + - Bounded retries with backoff + +### Tests +- [x] **T7.1** Unit test: outbox item is not deleted if dispatch fails +- [x] **T7.2** Unit test: dispatch success deletes outbox item exactly once +- [x] **T7.3** Integration test: crash/restart simulation re-dispatches pending outbox items without duplicates (ignored by default) + +--- + +## Milestone 8: Effect Worker Runtime (Non-Deterministic Execution) + +**Goal:** Consume effect commands, execute side effects with reliability controls, publish result events, and ensure idempotency. + +### Dependencies +- Milestone 5 (effect command consumer) +- Milestone 4 (dedupe storage) + +### Tasks +- [x] **8.1** Define Effect command/result contract + - Command input: `{ tenant_id, command_id, effect_name, payload, metadata }` + - Result event output: `{ tenant_id, command_id, effect_name, result_type, payload, timestamp }` +- [x] **8.2** Implement provider interface + - Provider receives decoded command and returns a typed outcome + - Support per-provider configuration via manifest + - Support secret references resolved from Swarm secrets/config at runtime (no secrets in git) +- [x] **8.3** Implement reliability controls + - retries + exponential backoff + - timeouts + - circuit breakers per upstream +- [x] **8.4** Implement idempotency gate + - Check `dedupe:{tenant_id}:effect:{command_id}` before executing external call + - Record completion only after result publish is acknowledged +- [x] **8.5** Publish result events to `WORKFLOW_EVENTS` + - Include headers for correlation/trace propagation when present + +### Tests +- [x] **T8.1** Unit test: idempotency gate prevents double execution for same command_id +- [x] **T8.2** Unit test: result publish failure does not mark command as completed +- [x] **T8.3** Integration test: simulated redelivery of effect command does not duplicate external call (ignored by default) + +--- + +## Milestone 9: Scheduling (Durable Timeouts/Reminders) + +**Goal:** Implement durable scheduling for saga timeouts using KV-backed schedules rather than in-memory timers as the source of truth. + +### Dependencies +- Milestone 4 (schedule primitives) +- Milestone 6 (saga emits schedules) + +### Tasks +- [x] **9.1** Implement scheduler loop + - Periodically scan due `schedule:` items (tenant-aware) + - Emit scheduling metrics (due count, scan time, lag) +- [x] **9.2** Define reminder delivery mechanism + - Option A: publish a workflow event to `WORKFLOW_EVENTS` that sagas consume + - Option B: inject an internal event into the saga pipeline without JetStream (only if it preserves restart correctness) +- [x] **9.3** Ensure idempotency for reminders + - Reminder keys encode due time and correlation_id, allowing safe retry without duplicates + +### Tests +- [x] **T9.1** Unit test: due schedule item is delivered and then deleted +- [x] **T9.2** Unit test: scheduler is tenant-scoped +- [x] **T9.3** Integration test: restart re-scans and delivers still-due reminders exactly once (ignored by default) + +--- + +## Milestone 10: HTTP Endpoints + Operational Controls (Under Gateway) + +**Goal:** Provide health/readiness/metrics/info and operational controls (drain/reload) aligned with other nodes. + +### Dependencies +- Milestone 3 (settings) +- Milestone 4 (storage) +- Milestone 5 (JetStream) + +### Tasks +- [x] **10.1** Implement `/health` + - Storage writable check + JetStream connectivity check +- [x] **10.2** Implement `/ready` + - Not draining + storage writable + JetStream reachable + - Stop reporting ready before shutdown/drain to support safe rollouts +- [x] **10.3** Implement `/metrics` + - Worker lag, outbox depth, effect latency, schedule lag +- [x] **10.4** Implement `/info` + - Build info, mode, stream/consumer names, enabled saga/effect sets +- [x] **10.5** Implement `/admin/drain` + - Stop acquiring new work, finish in-flight, flush relay, then report draining state +- [x] **10.6** Implement `/admin/reload` + - Hot-reload manifests and (optionally) tenant placement config where safe + +### Tests +- [x] **T10.1** Unit test: readiness toggles with draining flag +- [x] **T10.2** Unit test: health fails when storage is unwritable + +--- + +## Milestone 11: Container & Deployment + +**Goal:** Package the Runner as a container and define entrypoint behavior consistent with Aggregate/Projection. + +### Dependencies +- Milestone 9 (scheduling) +- Milestone 10 (operational endpoints) + +### Tasks +- [x] **11.1** Create `docker/Dockerfile.rust` + - Multi-stage build + - Minimal runtime image + - Health check wiring (uses `/health`) +- [x] **11.2** Create `docker-compose.yml` for local dev + - Runner container (mode configurable) + - NATS server (JetStream enabled) + - Optional: Grafana, Victoria Metrics, Loki +- [x] **11.3** Define container entrypoint behavior + - Config loading + - Graceful shutdown on SIGTERM + - For saga/effect consumers: stop pulling new messages before exit + - Flush outbox relay safely (do not delete outbox entries without confirmation) + - Timeout-based forced shutdown policy +- [x] **11.4** Define environment variables and defaults + - NATS URL, stream names, subject filters + - Storage path + - Mode: saga/effect/combined + - Multi-tenancy enabled flag + default tenant behavior + - Consumer settings: durable name strategy, deliver group, max in-flight, ack wait, max deliver +- [x] **11.5** Create release build optimization + - LTO, strip, single codegen unit + +### Tests +- [x] **T11.1** Container builds successfully + ```bash + docker build -f docker/Dockerfile.rust --build-arg PACKAGE=runner --build-arg BIN=runner -t cloudlysis/runner:local . + docker run cloudlysis/runner:local --help + ``` +- [x] **T11.2** Container starts with valid config + ```bash + docker run -e RUNNER_NATS_URL=nats://nats:4222 runner:latest + ``` + +--- + +## Milestone 12: Provisioning, Scalability, and Docker Swarm Deployment + +**Goal:** Support horizontal scaling and safe rollouts in Docker Swarm with clear worker-pool semantics for JetStream consumers and outbox relay. + +### Dependencies +- Milestone 11 (container & deployment) + +### Tasks +- [x] **12.1** Define the scaling model for saga + effect workers + - Saga workers: durable consumer filtered to aggregate event subjects + - Effect workers: durable consumer filtered to effect command subjects + - Deliver group strategy so replicas share workload without duplication + - Consumer configuration requirements (ack policy, max in-flight, ack wait, max deliver) +- [x] **12.2** Implement replica-safe processing invariants + - Ack only after storage transaction commits + - Outbox relay deletes only after durable confirmation (publish ack / gateway response) + - Optional per-key serialization for workflows that require strict ordering +- [x] **12.3** Add tenant-aware provisioning option (sharding) + - Optional tenant-range sharding by subject filters (e.g., `tenant..*`) + - Placement constraints for Swarm nodes (e.g., `node.labels.tenant_range==`) + - Strategy for adding/removing shards and rebalancing tenants +- [x] **12.4** Optional: NATS KV-backed tenant placement config + - Define a `TENANT_PLACEMENT` bucket to store tenant → shard assignments + - Watch for config changes and apply without restart +- [x] **12.5** Create Swarm stack definition (`swarm/stacks/platform.yml`) + - Service definition(s) for saga/effect/combined modes + - Replicas configuration + - Resource limits (CPU, memory) + - Health check integration (`/health`, `/ready`) + - Storage volume mapping for `edge-storage` data directory + - Secrets/config wiring for provider credentials (no secrets in env vars) +- [x] **12.6** Define rollout + drain strategy + - Rolling update parameters + - Drain behavior: stop acquiring work, finish in-flight, flush relay + - Safe rollback story (old replicas still valid due to idempotency + checkpointing) + +### Tests +- [x] **T12.1** Stack file valid + ```bash + docker stack config -c swarm/stacks/platform.yml + ``` +- [x] **T12.2** Scale-out does not duplicate work (ignored by default; run with `RUNNER_TEST_NATS_URL=... cargo test -- --ignored`) + - Start 2+ replicas pulling from the same durable consumer deliver group + - Verify saga checkpoint monotonicity and outbox dispatch idempotency +- [x] **T12.3** Rolling restart preserves correctness (ignored by default; run with `RUNNER_TEST_NATS_URL=... cargo test -- --ignored`) + - Restart replicas during active processing + - Verify no duplicate effects and no lost outbox items + +--- + +## Milestone 13: Observability + Safety Policies + +**Goal:** Ensure production-grade logs, metrics, and policy controls consistent with Aggregate/Projection. + +### Dependencies +- Milestone 10 (operational endpoints) and prior runtime milestones + +### Tasks +- [x] **13.1** Integrate `edge-logger-client` logging pipeline + - Ensure tenant_id, trace_id, correlation_id are included in structured logs +- [x] **13.2** Implement core metrics + - consumer lag, redeliveries, processing latency + - outbox depth and dispatch latency + - effect success/failure counts and duration histograms + - schedule lag and scan durations +- [x] **13.3** Implement poison message quarantine records + - Store minimal decoded context and error reason in KV under a deadletter namespace +- [ ] **13.4** Standardize correlation and trace propagation to match Gateway/Control conventions + - When submitting commands via the Gateway, include `x-correlation-id` and `traceparent` headers when available in workflow metadata + - When publishing to JetStream, include `x-correlation-id` and `traceparent` message headers when available + - Ensure `correlation_id` and `trace_id` appear in logs/spans for dispatch and effect execution paths + +### Tests +- [x] **T13.1** Unit test: metrics exporter emits key metrics +- [x] **T13.2** Unit test: quarantine record is written on poison handling path +- [ ] **T13.3** Integration test: Gateway-bound requests include `x-correlation-id` when workflow metadata supplies a correlation_id (ignored by default; requires NATS) + +--- + +## Milestone 14: Integration Hardening (Replay, Scale, Compatibility) + +**Goal:** Validate end-to-end correctness across saga + outbox + effect execution, and implement operational workflows needed for safe scaling. + +### Dependencies +- Milestone 6–13 + +### Tasks +- [x] **14.1** Implement controlled replay for sagas + - Reset `checkpoint:{tenant_id}:{saga_name}` with explicit operator intent + - Safety checks to prevent accidental full replays +- [x] **14.2** Validate worker-pool semantics + - deliver groups distribute work across replicas + - per-key ordering enforcement option for workflows that need it +- [x] **14.3** Validate graceful drain behavior + - no new work acquired + - in-flight finishes + - outbox relay flush completes +- [x] **14.4** End-to-end integration test suite + - Aggregate event → saga transition → outbox command → effect command → effect result → saga completes + +### Tests +- [x] **T14.1** Integration test: full happy-path workflow (ignored by default; requires NATS) +- [x] **T14.2** Integration test: crash/restart across boundaries preserves atomicity (ignored by default; requires NATS) + - restart after state commit but before dispatch + - restart after dispatch but before outbox delete + - restart during effect execution and redelivery diff --git a/runner/SCALING.md b/runner/SCALING.md new file mode 100644 index 0000000..4e976be --- /dev/null +++ b/runner/SCALING.md @@ -0,0 +1,59 @@ +# Runner Scaling Model + +## Assumptions + +- Runner state (saga state, dedupe markers, checkpoints, outbox, schedules) is stored in a local MDBX database via `edge_storage`. +- Correctness for a given tenant+saga depends on reading/writing the same storage instance over time. + +## Practical Scaling Model + +### 1) Scale by tenant partitioning (recommended) + +Run multiple Runner instances, each responsible for a disjoint set of tenants, and give each instance its own storage volume. + +- Use `RUNNER_TENANT_ALLOWLIST` to bind an instance to tenants. +- Or use NATS KV placement: set `RUNNER_TENANT_PLACEMENT_BUCKET` and `RUNNER_SHARD_ID`. +- Streams/consumers can be shared; subjects are tenant-qualified, and per-instance consumers filter by tenant subjects. + +Example: + +- Runner A: `RUNNER_TENANT_ALLOWLIST=t1,t2` +- Runner B: `RUNNER_TENANT_ALLOWLIST=t3,t4` + +### NATS KV Placement (optional) + +If `RUNNER_TENANT_PLACEMENT_BUCKET` and `RUNNER_SHARD_ID` are set, the Runner watches a NATS KV bucket where: + +- key = tenant_id +- value = shard_id + +and dynamically updates the set of per-tenant consumers it is polling without restarting. + +### 2) Multiple replicas for the same tenant (not supported with local storage) + +If two replicas for the same tenant use different local storages, they will not share: + +- dedupe markers +- checkpoints +- saga state + +and can duplicate work. + +To support same-tenant replicas, storage must be shared/replicated (not implemented here). + +## Rollout/Drain Strategy + +Use the drain endpoint before stopping a process: + +- `POST /admin/drain` to stop taking new work. +- then stop the container/process. + +## Replay + +Controlled replay exists for operational/debug use: + +- `POST /admin/replay` with `tenant_id`, `saga_name`, and `mode`. +- Modes: + - `checkpoint_only` + - `checkpoint_and_dedupe` + - `full_reset` diff --git a/runner/build.rs b/runner/build.rs new file mode 100644 index 0000000..1c9dbee --- /dev/null +++ b/runner/build.rs @@ -0,0 +1,8 @@ +fn main() -> Result<(), Box> { + let protoc = protoc_bin_vendored::protoc_bin_path()?; + std::env::set_var("PROTOC", protoc); + + tonic_build::configure().compile_protos(&["proto/aggregate.proto"], &["proto"])?; + + Ok(()) +} diff --git a/runner/config/effects.yaml b/runner/config/effects.yaml new file mode 100644 index 0000000..20ca41b --- /dev/null +++ b/runner/config/effects.yaml @@ -0,0 +1,4 @@ +effects: + - name: noop + provider: noop + config: {} diff --git a/runner/config/saga_on_event.json b/runner/config/saga_on_event.json new file mode 100644 index 0000000..2a735a8 --- /dev/null +++ b/runner/config/saga_on_event.json @@ -0,0 +1,27 @@ +{ + "specVersion": "1.1", + "id": "noop_saga", + "name": "noop_saga", + "inputs": [ + { "name": "saga_state", "type": "Any" }, + { "name": "event", "type": "Any" } + ], + "nodes": [ + { + "id": "const", + "type": "Const", + "data": { + "value": { + "new_saga_state": {}, + "work_items": [], + "schedules": [] + } + } + }, + { "id": "output", "type": "Output", "data": {} } + ], + "edges": [ + { "id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value" } + ], + "outputNodeId": "output" +} diff --git a/runner/config/sagas.yaml b/runner/config/sagas.yaml new file mode 100644 index 0000000..b4ad9bb --- /dev/null +++ b/runner/config/sagas.yaml @@ -0,0 +1,5 @@ +sagas: + - name: noop + trigger_subjects: + - tenant.*.aggregate.*.* + on_event: /config/saga_on_event.json diff --git a/runner/external_prd.md b/runner/external_prd.md new file mode 100644 index 0000000..9075799 --- /dev/null +++ b/runner/external_prd.md @@ -0,0 +1,192 @@ +### External PRD: Changes Required in Aggregate, Projection, Runner + +This document captures the work needed outside the Gateway to support: +- Tenant-aware routing via `x-tenant-id` +- Independent horizontal scalability of Aggregate, Projection, Runner +- A safe mechanism for tenant rebalancing per service kind + +--- + +## **Target State** + +### Independent Placements + +Each service kind has its own placement map: +- `aggregate_placement[tenant_id] -> aggregate_shard_id` +- `projection_placement[tenant_id] -> projection_shard_id` +- `runner_placement[tenant_id] -> runner_shard_id` + +Each shard is a replica set that can scale independently. + +### Rebalancing Contract (Per Service Kind) + +All nodes MUST support: +- Dynamic placement updates (watch NATS KV or reload config) +- A drain mechanism that can target a specific tenant (stop acquiring new work for that tenant, finish in-flight, report status) +- Clear readiness semantics that reflect whether the node will accept work for a tenant + +Additionally, all nodes SHOULD converge on the same operational contract: +- A per-tenant “accepting” gate (can this shard accept new work/queries/commands for tenant X?) +- A per-tenant “drained” signal (no in-flight work remains for tenant X) +- A per-tenant warmup/catchup signal where relevant (projection lag, aggregate snapshot availability) + +--- + +## **Aggregate: Required Changes** + +### 1) Expose a Real Command API (Gateway Upstream) + +Today, Aggregate has internal command handling types (e.g., `CommandServer`) but its running HTTP server only exposes health/metrics/admin endpoints ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L82), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L81-L213)). + +Aggregate MUST expose one of the following upstream APIs for the Gateway to call: +- **Option A (Recommended)**: gRPC server implementing `aggregate.gateway.v1.CommandService/SubmitCommand` compatible with [aggregate.proto](file:///Users/vlad/Developer/cloudlysis/aggregate/proto/aggregate.proto#L1-L31). +- **Option B**: HTTP endpoint for command submission (REST), with a stable request/response shape that the Gateway can proxy. + +### 2) Tenant Placement Enforcement + +Aggregate MUST enforce “hosted tenants” so independent scaling is safe: +- If an Aggregate shard/node is not assigned a tenant, it MUST reject commands for that tenant (e.g., `403` or `503` with retriable hint depending on whether the issue is authorization vs placement). +- Aggregate SHOULD maintain an in-memory allowlist of hosted tenants that is driven by: + - NATS KV placement watcher (preferred), or + - Hot-reloaded config pushed via `/admin/reload` + +Aggregate already has admin hooks for drain/reload, but they are currently generic and/or illustrative ([aggregate/http_server.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/http_server.rs#L15-L72), [aggregate/server/mod.rs](file:///Users/vlad/Developer/cloudlysis/aggregate/src/server/mod.rs#L402-L442)). These need to become placement-aware. + +### 3) Tenant Drain (Per Tenant) + +Aggregate MUST provide a per-tenant drain mechanism to support rebalancing: +- Stop accepting new commands for the tenant. +- Allow in-flight commands to finish (bounded wait), then report drained. +- Expose drain status per tenant (admin endpoint). + +### 4) Rebalancing State Strategy + +Aggregate persists snapshots locally (MDBX) and uses JetStream for events. To move a tenant: +- **Approach 1 (Snapshot migration)**: copy tenant snapshot DB/state to the target shard, then switch placement. +- **Approach 2 (Cold rehydrate)**: switch placement and let the target shard rebuild state by replaying events from JetStream; expect higher latency during warmup. + +The system should support both, with the rebalancer selecting the strategy based on tenant size/SLO. + +### 5) Metrics for Placement Decisions + +Aggregate SHOULD expose: +- Per-tenant command rate, error rate +- In-flight commands by tenant +- Rehydrate time / snapshot hit ratio +- Storage size per tenant (if feasible) + +--- + +## **Projection: Required Changes** + +### 1) Expose Query API Upstream for Gateway + +Projection has a working `QueryService` with tenant-scoped prefix scans ([uqf.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/query/uqf.rs#L121-L162)) but it is not exposed via HTTP/gRPC (current HTTP routes are health/ready/metrics/info only: [projection/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/projection/src/http/mod.rs#L102-L109)). + +Projection MUST add one upstream API the Gateway can route to: +- `POST /query/{view_type}` (HTTP) accepting `x-tenant-id` and a UQF payload, returning `QueryResponse`. +- Or a gRPC query service (new proto) if gRPC is preferred end-to-end. + +### 2) Tenant Placement Filtering (Independent Scaling) + +Projection MUST support running in one of these modes: +- **Multi-tenant shard**: consumes all tenants (simple, less isolated). +- **Tenant-filtered shard (required for rebalancing)**: + - only consumes/serves queries for the tenants assigned to that shard + - rejects queries for unassigned tenants (consistent error semantics) + +Implementation direction: +- Add a placement watcher similar to Runner’s tenant filter ([runner/tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)). +- Apply tenant filter to: + - event consumption subject filters (preferred), and + - query serving validation (always). + +### 3) Drain + Warmup Endpoints + +Projection SHOULD add: +- `/admin/drain?tenant_id=...` (stop consuming new events for that tenant, finish in-flight, flush checkpoints) +- `/admin/reload` (apply latest placement/config) +- Optional warmup status: whether the shard has caught up to JetStream tail for that tenant/view_types + +### 4) Rebalancing Strategy for Projection + +Projection can rebalance safely with “warm then cut over”: +- Assign tenant to the new projection shard while old shard still serves. +- New shard catches up (replay from JetStream, build view KV). +- Switch Gateway placement for query routing to new shard. +- Drain old shard for that tenant and optionally delete old tenant KV keys. + +### 5) Metrics for Placement Decisions + +Projection SHOULD expose: +- JetStream lag per tenant/view_type (tail minus checkpoint) +- Query latency and scan counts +- Storage size per tenant (if feasible) + +--- + +## **Runner: Required Changes** + +Runner already has: +- A tenant placement watcher capable of producing an allowlist ([tenant_placement.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/tenant_placement.rs#L8-L100)) +- Admin endpoints including drain/reload/config ([runner/http/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/http/mod.rs#L69-L86)) +- Gateway client integration for aggregate command submission ([runner/gateway/mod.rs](file:///Users/vlad/Developer/cloudlysis/runner/src/gateway/mod.rs#L1-L47)) + +To support independent scalability + rebalancing, Runner needs the following. + +### 1) Per-Tenant Drain (Not Only Global) + +Runner’s current drain is global (`/admin/drain` toggles a single draining flag). Runner MUST support draining a specific tenant: +- Stop acquiring new saga/effect work for the tenant. +- Allow in-flight work for the tenant to finish (bounded). +- Flush outbox for the tenant (or guarantee idempotency on handoff). +- Persist final checkpoints so another shard can continue without duplication beyond at-least-once bounds. + +### 2) Placement-Enforced Work Acquisition + +Runner MUST validate tenant assignment at the boundary where it: +- consumes JetStream messages (saga triggers, effect commands), and +- dispatches outbox work. + +If a tenant is not assigned to the shard, Runner must not process its work. + +### 3) Handoff Safety Rules for Rebalancing + +Runner rebalancing should follow: +- New shard begins processing only after it is assigned the tenant. +- Old shard stops acquiring new work for that tenant, then drains. +- Idempotency remains correct across handoff using checkpoints and dedupe markers. + +### 4) Metrics for Placement Decisions + +Runner SHOULD expose: +- Outbox depth by tenant +- Work processing latency and retries by tenant/effect +- Schedule due items by tenant +- Consumer lag by tenant (if the consumption model supports per-tenant lag) + +### 5) Auth Delivery Side Effects (Email/SMS/Push) + +If the platform’s AuthN flows require out-of-band delivery (password reset links, email verification, MFA codes), the Runner SHOULD be the standard place to execute those side effects: +- Define a stable effect interface for sending transactional emails (reset links, verification links, security alerts). +- Optionally add SMS/push providers later under the same effect contract. + +This keeps the Gateway free of long-lived provider credentials and aligns with the existing “effects are executed by workers” pattern. + +--- + +## **Gateway Integration Notes** + +Once the above changes exist: +- Gateway routes per `(tenant_id, service_kind)` using independent placement maps. +- Gateway can implement “warm then cut over” rebalancing for Projection and Runner by switching only query/workflow routing after readiness conditions are met. +- Gateway can enforce consistent tenant validation, authn/authz, and error semantics at the edge even as placements move. + +--- + +## **Gaps / Opportunities** + +- **KV schema + ownership**: define the exact NATS KV bucket layout, key naming, revisioning rules, and who is allowed to write placement updates. +- **Rebalancer API**: define operator workflows (plan/apply/rollback), status reporting, and audit log requirements for placement changes. +- **Shard discovery**: define how shard endpoints are registered (static config vs KV directory entries) and how health is represented. +- **Consistency boundaries**: define rebalancing guarantees per service kind (projection can be warm-cutover; runner requires checkpoint handoff; aggregate requires single-writer and state availability). diff --git a/runner/prd.md b/runner/prd.md new file mode 100644 index 0000000..172f7ce --- /dev/null +++ b/runner/prd.md @@ -0,0 +1,210 @@ +### 🧱 Component: Runner (Sagas + Effect Workflows) +**Definition:** +The Runner is a standalone container responsible for executing **workflow logic** in the system: it runs deterministic **Saga** state machines and drives non-deterministic **Effect Provider** executions. It consumes and produces messages via **NATS JetStream**, persists workflow state and checkpoints in `edge-storage` `KvStore`, and uses the same “container + gateway + multi-tenant isolation + horizontal scale” model as Aggregate and Projection. + +The Runner is designed to operate as a **worker** in a horizontally scalable worker pool: multiple Runner replicas can share the workload from JetStream while preserving ordering and idempotency guarantees per workflow key. + +**Multi-Tenancy:** +Multi-tenancy is first-class via `tenant_id`. When enabled: +- **Routing:** All inbound admin/query requests are tenant-scoped using the `x-tenant-id` header (same key as other components). +- **Stream Isolation:** JetStream subjects and durable consumer names are tenant-aware (e.g., `tenant..aggregate.*.*`, `tenant..workflow.*.*`). +- **Storage Namespacing:** Saga state, outbox items, checkpoints, and scheduling state are keyed with `tenant_id` prefixes to prevent cross-tenant reads/writes. +- **Worker Partitioning:** Runner instances may be sharded by `tenant_id` (tenant placement) or may run multi-tenant with strict subject + key isolation. +- **Backward Compatibility:** Deployments without multi-tenancy use a default/empty `tenant_id` and non-tenant-prefixed namespaces. + +**Dependencies:** +* Core crates pulled from the custom Cargo registry: + ```toml + [registries.madapes] + index = "sparse+https://git.madapes.com/api/packages/madapes/cargo/" + ``` + + | Crate | Purpose | + |-------|---------| + | `edge-storage` | libmdbx-backed `KvStore` for saga state, checkpoints, outbox, and schedules | + | `runtime-function` | Deterministic DAG execution for Saga `on_event` / `compensation` programs | + | `edge-logger-client` | High-performance logging (UDS + Protobuf, Loki sink) | + | `query-engine` | Optional UQF queries over workflow state (admin/debug tooling) | + | `async-nats` | NATS JetStream client for consuming events and dispatching workflow messages | + +* Source code available at `../../madapes/` +* **Note:** This is a standalone container, aligned with Aggregate/Projection operational constraints and patterns. + +**Observability:** +* Production stack: **Grafana** + **Victoria Metrics** + **Loki** +* Logs via `edge-logger-client` with multi-tenant isolation and cardinality protection +* Metrics exported in Prometheus format for Victoria Metrics scraping (worker lag, outbox depth, effect latency) +* Trace correlation via propagated `trace_id` in message metadata and log fields + +--- + +#### 1. Core Responsibilities +* **Event Consumption (Triggering):** Consumes Aggregate events (and optionally workflow events) from JetStream using durable consumers and subject filters. +* **Saga Execution:** Runs deterministic Saga `on_event` programs (`runtime-function` DAG) to compute `(new_state, outgoing_work[])`. +* **Atomic Persistence (State + Outbox + Checkpoint):** Commits saga state updates, newly produced work items, and processing checkpoints in a single `KvStore` transaction. +* **Outbox Relay:** Reliably dispatches outgoing work to the appropriate destination (Gateway command submission, Effect Provider command stream) after it is durably recorded. +* **Effect Execution:** Runs Effect Provider workflows that translate internal “effect commands” into external actions (HTTP/gRPC/SMTP/SQL) and publishes result events back into JetStream. +* **Scheduling (Durable Timeouts):** Provides durable timers/reminders for sagas (e.g., “cancel order if not paid in 30 minutes”) without relying on in-memory timers as the source of truth. +* **Backpressure + Safety:** Enforces max in-flight work, retry policies, and poison-message handling to keep the worker pool healthy. + +--- + +#### 2. Runner Operating Modes (Single Binary, Multiple Roles) +The Runner can be deployed in one of these modes: +1. **Saga Worker:** Consumes trigger events and advances saga state machines; emits commands/effect commands via outbox. +2. **Effect Worker:** Consumes effect commands, executes real-world side effects, publishes result events. +3. **Combined Worker:** Runs both roles in the same container for small deployments (still with strict separation between deterministic saga runtime and non-deterministic effect execution). + +Each mode uses the same multitenant model and can scale horizontally by increasing replica count. + +--- + +#### 3. Data Model (Keys, Envelopes, Namespaces) +The Runner relies on a small set of durable keyspaces in `edge-storage` `KvStore`. All keys are tenant-prefixed when multi-tenancy is enabled. + +**Saga State:** +- `saga:{tenant_id}:{saga_name}:{correlation_id}` → JSON state payload + +**Saga Checkpoints (JetStream stream sequence or consumer sequence):** +- `checkpoint:{tenant_id}:{saga_name}` → last processed sequence (monotonic) + +**Outbox (Reliable Dispatch):** +- `outbox:{tenant_id}:{work_kind}:{work_id}` → serialized work item (command or effect command) +- `outbox_index:{tenant_id}` → optional index cursor / priority ordering metadata + +**Schedules (Durable Timeouts/Reminders):** +- `schedule:{tenant_id}:{saga_name}:{correlation_id}:{due_at}` → reminder payload + +**Idempotency / Dedupe (optional but recommended):** +- `dedupe:{tenant_id}:{saga_name}:{event_id}` → marker that an event transition was applied +- `dedupe:{tenant_id}:effect:{command_id}` → marker that an effect was executed + +The `tenant_id` type follows the same semantics as other components (`TenantId` string wrapper; default/empty allowed for single-tenant deployments). + +**Work Item Envelopes:** +* **Aggregate Command (Gateway SubmitCommandRequest shape):** + - `tenant_id` + - `command_id` (UUID v7) + - `aggregate_id` + - `aggregate_type` + - `payload_json` + - `metadata` (must include `correlation_id` and `trace_id` when available) +* **Effect Command:** + - `tenant_id` + - `command_id` (UUID v7, used as the idempotency key) + - `effect_name` + - `payload` (JSON) + - `metadata` (`correlation_id`, `trace_id`, retry policy hints) +* **Effect Result Event:** + - `tenant_id` + - `command_id` + - `effect_name` + - `result_type` (e.g., `Succeeded`, `Failed`, `TimedOut`) + - `payload` (JSON) + - `timestamp` + +--- + +#### 4. JetStream Integration (Subjects, Streams, Consumers) +The Runner consumes and produces messages using tenant-namespaced subject conventions consistent with other components. + +**Aggregate Event Stream (existing):** +- Stream: `AGGREGATE_EVENTS` +- Subjects: `tenant.*.aggregate.*.*` +- Example: `tenant.acme-corp.aggregate.Account.018f...` + +**Workflow Command Stream (Runner-produced work):** +- Stream: `WORKFLOW_COMMANDS` +- Subjects: + - Effect commands: `tenant.*.effect..` + - Optional internal commands: `tenant.*.workflow..` + +**Workflow Event Stream (Effect results + workflow facts):** +- Stream: `WORKFLOW_EVENTS` +- Subjects: + - Effect results: `tenant.*.effect_result..` + - Optional saga facts: `tenant.*.workflow_event..` + +**Consumer Model (Worker Pool):** +* **Saga workers** use durable consumers filtered to relevant subjects (typically `tenant..aggregate.>` or a narrower wildcard set). Replicas share a deliver group so each message is processed by a single worker in the pool. +* **Effect workers** use durable consumers per `effect_name` (or per effect category), again with a deliver group for horizontal scale. +* **Ack Discipline:** Messages are acked only after the corresponding state/outbox/checkpoint transaction commits. +* **Ordering:** Ordering is guaranteed only within a chosen serialization key (usually `(tenant_id, correlation_id)`); concurrency across keys is allowed. + +--- + +#### 5. Saga Lifecycle (Deterministic Workflow) +1. **Trigger:** The Runner receives an Aggregate event from JetStream (`AGGREGATE_EVENTS`) and extracts `tenant_id`, `event_id`, and correlation metadata. +2. **Load:** It loads existing saga state from `KvStore` using `saga:{tenant_id}:{saga_name}:{correlation_id}` (or creates an initial state on first trigger). +3. **Execute (`runtime-function`):** + * Runs the deterministic Saga program: `(saga_state, incoming_event) → (new_saga_state, work_items[])`. + * The program is sandboxed and must not perform I/O or rely on non-deterministic inputs. +4. **Commit (Atomic Outbox):** + * In a single MDBX transaction: + * Persist `new_saga_state` + * Persist each outgoing work item into `outbox:*` + * Advance `checkpoint:{tenant_id}:{saga_name}` to the processed sequence + * Optionally record dedupe markers for `event_id` +5. **Dispatch:** The outbox relay publishes the work items to their destinations: + * Aggregate commands are submitted through the Gateway (same routing and tenant enforcement model as user commands). + * Effect commands are published to `WORKFLOW_COMMANDS` for effect workers to execute. +6. **Wait:** The saga instance remains persisted and will react to the next correlated event or reminder. + +--- + +#### 6. Effect Provider Lifecycle (Non-Deterministic Execution) +1. **Receive Command:** The effect worker consumes an effect command message from `WORKFLOW_COMMANDS` (`tenant..effect..`). +2. **Idempotency Gate:** The worker checks `dedupe:{tenant_id}:effect:{command_id}` (or an equivalent idempotency key) to avoid duplicate external calls. +3. **Execute:** The provider performs the real-world action (HTTP/gRPC/SMTP/SQL) with: + * retries + exponential backoff + * timeouts + * circuit breakers per upstream dependency +4. **Publish Result:** The worker publishes an effect result event to `WORKFLOW_EVENTS` (`tenant..effect_result..`). +5. **Finalize:** After result publish is acknowledged, it records completion in `KvStore` (dedupe marker, optional audit trail) and acks the command message. + +Effect results are consumed by saga workers (and optionally projections) as first-class events. + +--- + +#### 7. Technical Constraints & Guarantees +* **Determinism Boundary:** Saga programs are deterministic and side-effect free. All I/O happens only in effect workers. +* **At-Least-Once Processing:** JetStream delivery is at-least-once; correctness relies on idempotency via checkpoints and dedupe markers. +* **Atomicity:** “Saga state + outbox + checkpoint” is committed as one durable transaction to avoid dual-write gaps. +* **Tenant Isolation:** Every read/write is tenant-scoped; cross-tenant access is blocked at both subject filters and storage keyspace. +* **No Direct Cross-Aggregate Reads:** Sagas coordinate via events and commands; they do not read Aggregate state directly (unless explicitly provided as event payload or via a dedicated query/projection API). + +--- + +#### 8. Horizontal Scaling Strategy (Worker Pool) +The Runner scales horizontally by adding replicas. + +* **Work Distribution:** JetStream deliver groups ensure each message is processed by a single worker in a replica set. +* **Sharding Options:** + - **Tenant Sharding:** Place runner replicas on nodes responsible for certain tenant ranges, aligning with gateway routing and operational locality. + - **Key Sharding:** For large tenants, shard by `(tenant_id, saga_name, correlation_id)` across multiple worker groups. +* **Draining:** Instances support graceful drain: stop acquiring new work, finish in-flight items, flush outbox relay, then exit. +* **Replay:** Rebuild sagas by resetting `checkpoint:{tenant_id}:{saga_name}` and re-consuming from JetStream (with strong caution and explicit operator intent). + +--- + +#### 9. Error Handling & Operational Policies +* **Poison Messages:** If a message repeatedly fails deterministic execution (schema mismatch, runtime-function error), quarantine it: + - write a record to `deadletter:{tenant_id}:...` + - emit an alert/metric + - continue processing other keys +* **Retry Discipline:** Retries for deterministic saga transitions should be bounded and should not busy-loop; external effect retries are handled in effect workers with backoff. +* **Backpressure:** Configure max in-flight per worker and per key; expose lag and outbox depth metrics. +* **Schema Evolution:** Treat event payloads as versioned; sagas and effects must accept older versions or explicitly gate by version and route to compensating paths. + +--- + +#### 10. Admin + Health Endpoints (Under Gateway) +The Runner exposes endpoints consistent with other components: +- `/health` - storage + JetStream connectivity checks +- `/ready` - readiness (not draining, can acquire work) +- `/metrics` - Prometheus metrics +- `/info` - build info, role mode(s), configured saga/effect sets, stream/consumer names +- `/admin/drain` - begin graceful drain +- `/admin/reload` - hot-reload manifest/placement config (where supported) + +All admin endpoints are expected to run behind the Gateway and enforce tenant-scoped access where applicable. diff --git a/runner/proto/aggregate.proto b/runner/proto/aggregate.proto new file mode 100644 index 0000000..206f572 --- /dev/null +++ b/runner/proto/aggregate.proto @@ -0,0 +1,31 @@ +syntax = "proto3"; + +package aggregate.gateway.v1; + +service CommandService { + rpc SubmitCommand(SubmitCommandRequest) returns (SubmitCommandResponse); +} + +message SubmitCommandRequest { + string tenant_id = 1; + string command_id = 2; + string aggregate_id = 3; + string aggregate_type = 4; + string payload_json = 5; + map metadata = 6; +} + +message Event { + string event_id = 1; + string command_id = 2; + string aggregate_id = 3; + string aggregate_type = 4; + uint64 version = 5; + string event_type = 6; + string payload_json = 7; + string timestamp_rfc3339 = 8; +} + +message SubmitCommandResponse { + repeated Event events = 1; +} diff --git a/runner/src/config/mod.rs b/runner/src/config/mod.rs new file mode 100644 index 0000000..cff8e45 --- /dev/null +++ b/runner/src/config/mod.rs @@ -0,0 +1,4 @@ +mod settings; + +pub use settings::RunnerMode; +pub use settings::{Settings, SettingsLoadError}; diff --git a/runner/src/config/settings.rs b/runner/src/config/settings.rs new file mode 100644 index 0000000..153ae70 --- /dev/null +++ b/runner/src/config/settings.rs @@ -0,0 +1,496 @@ +use serde::{Deserialize, Serialize}; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct Settings { + pub nats_url: String, + pub storage_path: String, + pub mode: RunnerMode, + + pub multi_tenant_enabled: bool, + pub default_tenant_id: Option, + pub tenant_allowlist: Vec, + pub tenant_placement_bucket: Option, + pub shard_id: Option, + + pub aggregate_gateway_url: Option, + + pub aggregate_events_stream: String, + pub workflow_commands_stream: String, + pub workflow_events_stream: String, + + pub saga_trigger_subject_filters: Vec, + pub effect_command_subject_filters: Vec, + + pub consumer_durable_prefix: String, + pub deliver_group: Option, + pub max_in_flight: usize, + pub ack_timeout_ms: u64, + pub max_deliver: i64, + + pub saga_manifest_path: String, + pub effects_manifest_path: String, + + pub outbox_scan_interval_ms: u64, + pub outbox_batch_size: usize, + pub outbox_max_in_flight: usize, + pub outbox_max_in_flight_per_tenant: usize, + + pub schedule_scan_interval_ms: u64, + pub schedule_batch_size: usize, + + pub effect_timeout_ms: u64, + pub effect_retry_max_attempts: usize, + pub effect_retry_backoff_ms: u64, + + pub http_addr: String, + + pub test_saga_crash_after_commit: bool, + pub test_effect_crash_after_dedupe_before_ack: bool, + pub test_outbox_crash_after_dispatch: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "snake_case")] +pub enum RunnerMode { + Saga, + Effect, + #[default] + Combined, +} + +impl Default for Settings { + fn default() -> Self { + Self { + nats_url: "nats://localhost:4222".to_string(), + storage_path: "./data".to_string(), + mode: RunnerMode::Combined, + + multi_tenant_enabled: true, + default_tenant_id: None, + tenant_allowlist: Vec::new(), + tenant_placement_bucket: None, + shard_id: None, + + aggregate_gateway_url: None, + + aggregate_events_stream: "AGGREGATE_EVENTS".to_string(), + workflow_commands_stream: "WORKFLOW_COMMANDS".to_string(), + workflow_events_stream: "WORKFLOW_EVENTS".to_string(), + + saga_trigger_subject_filters: vec!["tenant.*.aggregate.*.*".to_string()], + effect_command_subject_filters: vec!["tenant.*.effect.*.*".to_string()], + + consumer_durable_prefix: "runner".to_string(), + deliver_group: None, + max_in_flight: 128, + ack_timeout_ms: 30_000, + max_deliver: 10, + + saga_manifest_path: "./sagas.yaml".to_string(), + effects_manifest_path: "./effects.yaml".to_string(), + + outbox_scan_interval_ms: 200, + outbox_batch_size: 256, + outbox_max_in_flight: 512, + outbox_max_in_flight_per_tenant: 32, + + schedule_scan_interval_ms: 250, + schedule_batch_size: 256, + + effect_timeout_ms: 30_000, + effect_retry_max_attempts: 1, + effect_retry_backoff_ms: 250, + + http_addr: "0.0.0.0:8080".to_string(), + + test_saga_crash_after_commit: false, + test_effect_crash_after_dedupe_before_ack: false, + test_outbox_crash_after_dispatch: false, + } + } +} + +impl Settings { + pub fn from_env() -> Result { + let mut settings = Self::default(); + settings.apply_env_overrides(); + Ok(settings) + } + + pub fn from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn from_toml(toml_str: &str) -> Result { + toml::from_str(toml_str) + } + + pub fn from_json(json: &str) -> Result { + serde_json::from_str(json) + } + + pub fn from_file(path: impl AsRef) -> Result { + let path = path.as_ref(); + let raw = std::fs::read_to_string(path)?; + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + match ext { + "yaml" | "yml" => Ok(Self::from_yaml(&raw)?), + "toml" => Ok(Self::from_toml(&raw)?), + "json" => Ok(Self::from_json(&raw)?), + _ => Err(SettingsLoadError::UnsupportedFormat { + path: path.display().to_string(), + }), + } + } + + pub fn load_from_file_with_env_overrides( + path: impl AsRef, + ) -> Result { + let mut settings = Self::from_file(path)?; + settings.apply_env_overrides(); + Ok(settings) + } + + fn apply_env_overrides(&mut self) { + if let Ok(url) = std::env::var("RUNNER_NATS_URL") { + self.nats_url = url; + } + + if let Ok(path) = std::env::var("RUNNER_STORAGE_PATH") { + self.storage_path = path; + } + + if let Ok(mode) = std::env::var("RUNNER_MODE") { + self.mode = match mode.trim().to_ascii_lowercase().as_str() { + "saga" => RunnerMode::Saga, + "effect" => RunnerMode::Effect, + "combined" => RunnerMode::Combined, + _ => self.mode, + }; + } + + if let Ok(enabled) = std::env::var("RUNNER_MULTI_TENANT") { + if let Ok(value) = enabled.parse() { + self.multi_tenant_enabled = value; + } + } + + if let Ok(default_tenant_id) = std::env::var("RUNNER_DEFAULT_TENANT_ID") { + if default_tenant_id.is_empty() { + self.default_tenant_id = None; + } else { + self.default_tenant_id = Some(default_tenant_id); + } + } + + if let Ok(list) = std::env::var("RUNNER_TENANT_ALLOWLIST") { + let values = list + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect::>(); + self.tenant_allowlist = values; + } + + if let Ok(bucket) = std::env::var("RUNNER_TENANT_PLACEMENT_BUCKET") { + if bucket.trim().is_empty() { + self.tenant_placement_bucket = None; + } else { + self.tenant_placement_bucket = Some(bucket); + } + } + + if let Ok(shard) = std::env::var("RUNNER_SHARD_ID") { + if shard.trim().is_empty() { + self.shard_id = None; + } else { + self.shard_id = Some(shard); + } + } + + if let Ok(url) = std::env::var("RUNNER_AGGREGATE_GATEWAY_URL") { + if url.trim().is_empty() { + self.aggregate_gateway_url = None; + } else { + self.aggregate_gateway_url = Some(url); + } + } + + if let Ok(name) = std::env::var("RUNNER_AGGREGATE_EVENTS_STREAM") { + self.aggregate_events_stream = name; + } + + if let Ok(name) = std::env::var("RUNNER_WORKFLOW_COMMANDS_STREAM") { + self.workflow_commands_stream = name; + } + + if let Ok(name) = std::env::var("RUNNER_WORKFLOW_EVENTS_STREAM") { + self.workflow_events_stream = name; + } + + if let Ok(filters) = std::env::var("RUNNER_SAGA_TRIGGER_SUBJECT_FILTERS") { + let values = filters + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect::>(); + if !values.is_empty() { + self.saga_trigger_subject_filters = values; + } + } + + if let Ok(filters) = std::env::var("RUNNER_EFFECT_COMMAND_SUBJECT_FILTERS") { + let values = filters + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect::>(); + if !values.is_empty() { + self.effect_command_subject_filters = values; + } + } + + if let Ok(prefix) = std::env::var("RUNNER_CONSUMER_DURABLE_PREFIX") { + self.consumer_durable_prefix = prefix; + } + + if let Ok(group) = std::env::var("RUNNER_DELIVER_GROUP") { + if group.trim().is_empty() { + self.deliver_group = None; + } else { + self.deliver_group = Some(group); + } + } + + if let Ok(max_in_flight) = std::env::var("RUNNER_MAX_IN_FLIGHT") { + if let Ok(value) = max_in_flight.parse() { + self.max_in_flight = value; + } + } + + if let Ok(ms) = std::env::var("RUNNER_ACK_TIMEOUT_MS") { + if let Ok(value) = ms.parse() { + self.ack_timeout_ms = value; + } + } + + if let Ok(max_deliver) = std::env::var("RUNNER_MAX_DELIVER") { + if let Ok(value) = max_deliver.parse() { + self.max_deliver = value; + } + } + + if let Ok(path) = std::env::var("RUNNER_SAGA_MANIFEST_PATH") { + self.saga_manifest_path = path; + } + + if let Ok(path) = std::env::var("RUNNER_EFFECTS_MANIFEST_PATH") { + self.effects_manifest_path = path; + } + + if let Ok(ms) = std::env::var("RUNNER_OUTBOX_SCAN_INTERVAL_MS") { + if let Ok(value) = ms.parse() { + self.outbox_scan_interval_ms = value; + } + } + + if let Ok(size) = std::env::var("RUNNER_OUTBOX_BATCH_SIZE") { + if let Ok(value) = size.parse() { + self.outbox_batch_size = value; + } + } + + if let Ok(size) = std::env::var("RUNNER_OUTBOX_MAX_IN_FLIGHT") { + if let Ok(value) = size.parse() { + self.outbox_max_in_flight = value; + } + } + + if let Ok(size) = std::env::var("RUNNER_OUTBOX_MAX_IN_FLIGHT_PER_TENANT") { + if let Ok(value) = size.parse() { + self.outbox_max_in_flight_per_tenant = value; + } + } + + if let Ok(ms) = std::env::var("RUNNER_SCHEDULE_SCAN_INTERVAL_MS") { + if let Ok(value) = ms.parse() { + self.schedule_scan_interval_ms = value; + } + } + + if let Ok(size) = std::env::var("RUNNER_SCHEDULE_BATCH_SIZE") { + if let Ok(value) = size.parse() { + self.schedule_batch_size = value; + } + } + + if let Ok(ms) = std::env::var("RUNNER_EFFECT_TIMEOUT_MS") { + if let Ok(value) = ms.parse() { + self.effect_timeout_ms = value; + } + } + + if let Ok(attempts) = std::env::var("RUNNER_EFFECT_RETRY_MAX_ATTEMPTS") { + if let Ok(value) = attempts.parse() { + self.effect_retry_max_attempts = value; + } + } + + if let Ok(ms) = std::env::var("RUNNER_EFFECT_RETRY_BACKOFF_MS") { + if let Ok(value) = ms.parse() { + self.effect_retry_backoff_ms = value; + } + } + + if let Ok(addr) = std::env::var("RUNNER_HTTP_ADDR") { + if !addr.trim().is_empty() { + self.http_addr = addr; + } + } + + if let Ok(v) = std::env::var("RUNNER_TEST_SAGA_CRASH_AFTER_COMMIT") { + self.test_saga_crash_after_commit = + matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes"); + } + if let Ok(v) = std::env::var("RUNNER_TEST_EFFECT_CRASH_AFTER_DEDUPE_BEFORE_ACK") { + self.test_effect_crash_after_dedupe_before_ack = + matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes"); + } + if let Ok(v) = std::env::var("RUNNER_TEST_OUTBOX_CRASH_AFTER_DISPATCH") { + self.test_outbox_crash_after_dispatch = + matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes"); + } + } + + pub fn validate(&self) -> Result<(), String> { + if self.nats_url.is_empty() { + return Err("NATS URL is required".to_string()); + } + if self.storage_path.is_empty() { + return Err("Storage path is required".to_string()); + } + if self.aggregate_events_stream.is_empty() { + return Err("Aggregate events stream name is required".to_string()); + } + if matches!(self.mode, RunnerMode::Saga | RunnerMode::Combined) + && self.saga_trigger_subject_filters.is_empty() + { + return Err("At least one saga trigger subject filter is required".to_string()); + } + if matches!(self.mode, RunnerMode::Effect | RunnerMode::Combined) + && self.effect_command_subject_filters.is_empty() + { + return Err("At least one effect command subject filter is required".to_string()); + } + if self.consumer_durable_prefix.trim().is_empty() { + return Err("Consumer durable prefix is required".to_string()); + } + if self.max_in_flight == 0 { + return Err("Max in-flight must be > 0".to_string()); + } + if self.ack_timeout_ms == 0 { + return Err("Ack timeout must be > 0".to_string()); + } + if self.outbox_batch_size == 0 || self.schedule_batch_size == 0 { + return Err("Batch sizes must be > 0".to_string()); + } + if self.outbox_max_in_flight == 0 || self.outbox_max_in_flight_per_tenant == 0 { + return Err("Outbox max in-flight must be > 0".to_string()); + } + if self.effect_timeout_ms == 0 { + return Err("Effect timeout must be > 0".to_string()); + } + if self.effect_retry_max_attempts == 0 { + return Err("Effect retry max attempts must be > 0".to_string()); + } + + if matches!(self.mode, RunnerMode::Saga | RunnerMode::Combined) { + if self.saga_manifest_path.trim().is_empty() { + return Err("Saga manifest path is required".to_string()); + } + let manifest = crate::saga::SagaManifest::from_file(&self.saga_manifest_path) + .map_err(|e| format!("Failed to load saga manifest: {}", e))?; + manifest + .validate() + .map_err(|e| format!("Invalid saga manifest: {}", e))?; + } + + if matches!(self.mode, RunnerMode::Effect | RunnerMode::Combined) { + if self.effects_manifest_path.trim().is_empty() { + return Err("Effects manifest path is required".to_string()); + } + let manifest = crate::effects::EffectsManifest::from_file(&self.effects_manifest_path) + .map_err(|e| format!("Failed to load effects manifest: {}", e))?; + manifest + .validate() + .map_err(|e| format!("Invalid effects manifest: {}", e))?; + } + + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SettingsLoadError { + #[error("Failed to read config file: {0}")] + Io(#[from] std::io::Error), + #[error("Failed to parse YAML config: {0}")] + Yaml(#[from] serde_yaml::Error), + #[error("Failed to parse TOML config: {0}")] + Toml(#[from] toml::de::Error), + #[error("Failed to parse JSON config: {0}")] + Json(#[from] serde_json::Error), + #[error("Unsupported config format: {path}")] + UnsupportedFormat { path: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + + fn env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); + LOCK.get_or_init(|| std::sync::Mutex::new(())) + .lock() + .unwrap() + } + + #[test] + fn settings_from_env() { + let _guard = env_lock(); + std::env::set_var("RUNNER_NATS_URL", "nats://localhost:4222"); + let settings = Settings::from_env().unwrap(); + assert_eq!(settings.nats_url, "nats://localhost:4222"); + std::env::remove_var("RUNNER_NATS_URL"); + } + + #[test] + fn tenant_allowlist_overrides_subject_filters() { + let _guard = env_lock(); + std::env::set_var("RUNNER_TENANT_ALLOWLIST", "t1,t2"); + let settings = Settings::from_env().unwrap(); + assert_eq!( + settings.tenant_allowlist, + vec!["t1".to_string(), "t2".to_string()] + ); + std::env::remove_var("RUNNER_TENANT_ALLOWLIST"); + } + + #[test] + fn settings_validation_catches_missing_required() { + let settings = Settings { + nats_url: "".to_string(), + ..Default::default() + }; + assert!(settings.validate().is_err()); + } + + #[test] + fn settings_is_clone_debug() { + fn assert_clone_debug() {} + assert_clone_debug::(); + } +} diff --git a/runner/src/effects/manifest.rs b/runner/src/effects/manifest.rs new file mode 100644 index 0000000..956ee97 --- /dev/null +++ b/runner/src/effects/manifest.rs @@ -0,0 +1,79 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct EffectsManifest { + pub effects: Vec, +} + +impl EffectsManifest { + pub fn validate(&self) -> Result<(), String> { + for effect in &self.effects { + effect.validate()?; + } + Ok(()) + } + + pub fn from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn from_toml(toml_str: &str) -> Result { + toml::from_str(toml_str) + } + + pub fn from_json(json: &str) -> Result { + serde_json::from_str(json) + } + + pub fn from_file(path: impl AsRef) -> Result { + let path = path.as_ref(); + let raw = std::fs::read_to_string(path)?; + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + match ext { + "yaml" | "yml" => Ok(Self::from_yaml(&raw)?), + "toml" => Ok(Self::from_toml(&raw)?), + "json" => Ok(Self::from_json(&raw)?), + _ => Err(EffectsManifestLoadError::UnsupportedFormat { + path: path.display().to_string(), + }), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct EffectDefinition { + pub name: String, + pub provider: String, + pub config: Value, +} + +impl EffectDefinition { + pub fn validate(&self) -> Result<(), String> { + if self.name.trim().is_empty() { + return Err("Effect name is required".to_string()); + } + if self.provider.trim().is_empty() { + return Err(format!("Effect '{}' must specify provider", self.name)); + } + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum EffectsManifestLoadError { + #[error("Failed to read manifest file: {0}")] + Io(#[from] std::io::Error), + #[error("Failed to parse YAML manifest: {0}")] + Yaml(#[from] serde_yaml::Error), + #[error("Failed to parse TOML manifest: {0}")] + Toml(#[from] toml::de::Error), + #[error("Failed to parse JSON manifest: {0}")] + Json(#[from] serde_json::Error), + #[error("Unsupported manifest format: {path}")] + UnsupportedFormat { path: String }, +} diff --git a/runner/src/effects/mod.rs b/runner/src/effects/mod.rs new file mode 100644 index 0000000..5c0a3e1 --- /dev/null +++ b/runner/src/effects/mod.rs @@ -0,0 +1,9 @@ +mod manifest; +mod providers; +mod runtime; +mod worker; + +pub use manifest::{EffectDefinition, EffectsManifest}; +pub use providers::{EffectProvider, ProviderRegistry}; +pub use runtime::EffectRuntime; +pub use worker::run_effect_worker; diff --git a/runner/src/effects/providers/email.rs b/runner/src/effects/providers/email.rs new file mode 100644 index 0000000..3f1b8ef --- /dev/null +++ b/runner/src/effects/providers/email.rs @@ -0,0 +1,527 @@ +use crate::types::{EffectCommandEnvelope, EffectResultEnvelope, EffectResultType, RunnerError}; +use aws_config::Region; +use aws_sdk_sesv2::types::{Body, Content, Destination, EmailContent, Message}; +use chrono::Utc; +use futures::future::BoxFuture; +use lettre::message::{ + header::ContentType, Mailbox, Message as LettreMessage, MultiPart, SinglePart, +}; +use lettre::{AsyncSmtpTransport, AsyncTransport, Tokio1Executor}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::HashMap; +use std::sync::Arc; + +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "backend", rename_all = "snake_case")] +pub enum EmailProviderConfig { + Resend { + api_key_env: String, + #[serde(default)] + from: Option, + }, + Postmark { + server_token_env: String, + #[serde(default)] + from: Option, + }, + Smtp { + url_env: String, + }, + Ses { + region: String, + #[serde(default)] + from: Option, + #[serde(default)] + configuration_set: Option, + }, +} + +#[derive(Debug, Clone)] +pub struct EmailProvider { + config: EmailProviderConfig, + client: Arc, +} + +impl EmailProvider { + pub fn from_config_value(config: Value) -> Result { + let cfg: EmailProviderConfig = + serde_json::from_value(config).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + Ok(Self { + config: cfg, + client: Arc::new(reqwest::Client::new()), + }) + } + + fn env_required(name: &str) -> Result { + std::env::var(name) + .map_err(|_| RunnerError::RuntimeError(format!("Missing required env var: {}", name))) + } +} + +impl super::EffectProvider for EmailProvider { + fn execute( + &self, + cmd: EffectCommandEnvelope, + ) -> BoxFuture<'static, Result> { + let config = self.config.clone(); + let client = self.client.clone(); + Box::pin(async move { + let payload: SendEmailPayload = serde_json::from_value(cmd.payload.clone()) + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + + match config { + EmailProviderConfig::Resend { api_key_env, from } => { + let api_key = Self::env_required(&api_key_env)?; + let from = payload.from.clone().or(from).ok_or_else(|| { + RunnerError::RuntimeError( + "Missing 'from' (payload.from or config.from)".to_string(), + ) + })?; + let res = send_resend(&client, api_key, from, payload).await?; + Ok(EffectResultEnvelope { + tenant_id: cmd.tenant_id, + command_id: cmd.command_id, + effect_name: cmd.effect_name, + result_type: EffectResultType::Succeeded, + payload: res, + timestamp: Utc::now(), + metadata: cmd.metadata, + }) + } + EmailProviderConfig::Postmark { + server_token_env, + from, + } => { + let token = Self::env_required(&server_token_env)?; + let from = payload.from.clone().or(from).ok_or_else(|| { + RunnerError::RuntimeError( + "Missing 'from' (payload.from or config.from)".to_string(), + ) + })?; + let res = send_postmark(&client, token, from, payload).await?; + Ok(EffectResultEnvelope { + tenant_id: cmd.tenant_id, + command_id: cmd.command_id, + effect_name: cmd.effect_name, + result_type: EffectResultType::Succeeded, + payload: res, + timestamp: Utc::now(), + metadata: cmd.metadata, + }) + } + EmailProviderConfig::Smtp { url_env } => { + let url = Self::env_required(&url_env)?; + let res = send_smtp(url, payload).await?; + Ok(EffectResultEnvelope { + tenant_id: cmd.tenant_id, + command_id: cmd.command_id, + effect_name: cmd.effect_name, + result_type: EffectResultType::Succeeded, + payload: res, + timestamp: Utc::now(), + metadata: cmd.metadata, + }) + } + EmailProviderConfig::Ses { + region, + from, + configuration_set, + } => { + let from = payload.from.clone().or(from).ok_or_else(|| { + RunnerError::RuntimeError( + "Missing 'from' (payload.from or config.from)".to_string(), + ) + })?; + let res = send_ses(region, from, configuration_set, payload).await?; + Ok(EffectResultEnvelope { + tenant_id: cmd.tenant_id, + command_id: cmd.command_id, + effect_name: cmd.effect_name, + result_type: EffectResultType::Succeeded, + payload: res, + timestamp: Utc::now(), + metadata: cmd.metadata, + }) + } + } + }) + } +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(untagged)] +enum OneOrMany { + One(T), + Many(Vec), +} + +#[derive(Debug, Clone, Deserialize)] +pub struct SendEmailPayload { + #[serde(default)] + from: Option, + to: OneOrMany, + #[serde(default)] + cc: Option>, + #[serde(default)] + bcc: Option>, + #[serde(default)] + reply_to: Option, + subject: String, + #[serde(default)] + text: Option, + #[serde(default)] + html: Option, + #[serde(default)] + tags: Option>, + #[serde(default)] + headers: Option>, +} + +impl SendEmailPayload { + fn to_vec(one_or_many: OneOrMany) -> Vec { + match one_or_many { + OneOrMany::One(v) => vec![v], + OneOrMany::Many(v) => v, + } + } + + fn to_list(&self) -> Vec { + Self::to_vec(self.to.clone()) + } + + fn cc_list(&self) -> Vec { + self.cc.clone().map(Self::to_vec).unwrap_or_default() + } + + fn bcc_list(&self) -> Vec { + self.bcc.clone().map(Self::to_vec).unwrap_or_default() + } +} + +async fn send_resend( + client: &reqwest::Client, + api_key: String, + from: String, + payload: SendEmailPayload, +) -> Result { + let url = "https://api.resend.com/emails"; + let to = payload.to_list(); + let cc = payload.cc_list(); + let bcc = payload.bcc_list(); + let subject = payload.subject; + let text = payload.text; + let html = payload.html; + let reply_to = payload.reply_to; + let mut body = serde_json::json!({ + "from": from, + "to": to, + "subject": subject, + }); + if let Some(text) = text { + body["text"] = Value::String(text); + } + if let Some(html) = html { + body["html"] = Value::String(html); + } + if let Some(reply_to) = reply_to { + body["reply_to"] = Value::String(reply_to); + } + if !cc.is_empty() { + body["cc"] = serde_json::to_value(cc).unwrap_or(Value::Null); + } + if !bcc.is_empty() { + body["bcc"] = serde_json::to_value(bcc).unwrap_or(Value::Null); + } + + let resp = client + .post(url) + .bearer_auth(api_key) + .json(&body) + .send() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + + let status = resp.status(); + let text = resp + .text() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + if !status.is_success() { + return Err(RunnerError::RuntimeError(format!( + "Resend API error (status={}): {}", + status.as_u16(), + text + ))); + } + serde_json::from_str(&text).map_err(|e| RunnerError::DecodeError(e.to_string())) +} + +async fn send_postmark( + client: &reqwest::Client, + token: String, + from: String, + payload: SendEmailPayload, +) -> Result { + let url = "https://api.postmarkapp.com/email"; + let to = payload.to_list(); + let cc = payload.cc_list(); + let bcc = payload.bcc_list(); + let subject = payload.subject; + let text = payload.text; + let html = payload.html; + let reply_to = payload.reply_to; + let tags = payload.tags; + let headers = payload.headers; + let mut body = serde_json::json!({ + "From": from, + "To": to.join(","), + "Subject": subject, + }); + if let Some(text) = text { + body["TextBody"] = Value::String(text); + } + if let Some(html) = html { + body["HtmlBody"] = Value::String(html); + } + if let Some(reply_to) = reply_to { + body["ReplyTo"] = Value::String(reply_to); + } + if !cc.is_empty() { + body["Cc"] = Value::String(cc.join(",")); + } + if !bcc.is_empty() { + body["Bcc"] = Value::String(bcc.join(",")); + } + if let Some(tags) = tags { + body["Metadata"] = serde_json::to_value(tags).unwrap_or(Value::Null); + } + if let Some(headers) = headers { + let headers_arr = headers + .into_iter() + .map(|(k, v)| serde_json::json!({ "Name": k, "Value": v })) + .collect::>(); + body["Headers"] = serde_json::to_value(headers_arr).unwrap_or(Value::Null); + } + + let resp = client + .post(url) + .header("X-Postmark-Server-Token", token) + .json(&body) + .send() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + + let status = resp.status(); + let text = resp + .text() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + if !status.is_success() { + return Err(RunnerError::RuntimeError(format!( + "Postmark API error (status={}): {}", + status.as_u16(), + text + ))); + } + serde_json::from_str(&text).map_err(|e| RunnerError::DecodeError(e.to_string())) +} + +async fn send_smtp(url: String, payload: SendEmailPayload) -> Result { + let from = payload.from.clone().ok_or_else(|| { + RunnerError::RuntimeError("Missing 'from' in payload for SMTP backend".to_string()) + })?; + let to = payload.to_list(); + let cc = payload.cc_list(); + let bcc = payload.bcc_list(); + let subject = payload.subject; + let reply_to = payload.reply_to; + let text = payload.text; + let html = payload.html; + if to.is_empty() { + return Err(RunnerError::RuntimeError("Missing 'to'".to_string())); + } + let mut builder = LettreMessage::builder() + .from( + from.parse::() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?, + ) + .subject(subject); + for addr in to { + builder = builder.to(addr + .parse::() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?); + } + for addr in cc { + builder = builder.cc(addr + .parse::() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?); + } + for addr in bcc { + builder = builder.bcc( + addr.parse::() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?, + ); + } + if let Some(reply_to) = reply_to { + builder = builder.reply_to( + reply_to + .parse::() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?, + ); + } + + let message = match (text, html) { + (Some(text), Some(html)) => builder + .multipart( + MultiPart::alternative() + .singlepart( + SinglePart::builder() + .header(ContentType::TEXT_PLAIN) + .body(text), + ) + .singlepart( + SinglePart::builder() + .header(ContentType::TEXT_HTML) + .body(html), + ), + ) + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?, + (Some(text), None) => builder + .singlepart( + SinglePart::builder() + .header(ContentType::TEXT_PLAIN) + .body(text), + ) + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?, + (None, Some(html)) => builder + .singlepart( + SinglePart::builder() + .header(ContentType::TEXT_HTML) + .body(html), + ) + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?, + (None, None) => { + return Err(RunnerError::RuntimeError( + "Missing 'text' or 'html'".to_string(), + )) + } + }; + + let transport = AsyncSmtpTransport::::from_url(&url) + .map_err(|e| RunnerError::RuntimeError(e.to_string()))? + .build(); + + transport + .send(message) + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + + Ok(serde_json::json!({ "ok": true })) +} + +async fn send_ses( + region: String, + from: String, + configuration_set: Option, + payload: SendEmailPayload, +) -> Result { + let to_addresses = payload.to_list(); + let cc_addresses = payload.cc_list(); + let bcc_addresses = payload.bcc_list(); + let subject_value = payload.subject; + let text_value = payload.text; + let html_value = payload.html; + if to_addresses.is_empty() { + return Err(RunnerError::RuntimeError("Missing 'to'".to_string())); + } + + let cfg = aws_config::from_env() + .region(Region::new(region)) + .load() + .await; + let client = aws_sdk_sesv2::Client::new(&cfg); + + let subject = Content::builder() + .data(subject_value) + .build() + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + + let mut body_builder = Body::builder(); + if let Some(text) = text_value { + let content = Content::builder() + .data(text) + .build() + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + body_builder = body_builder.text(content); + } + if let Some(html) = html_value { + let content = Content::builder() + .data(html) + .build() + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + body_builder = body_builder.html(content); + } + + let body = body_builder.build(); + + let message = Message::builder().subject(subject).body(body).build(); + + let email_content = EmailContent::builder().simple(message).build(); + + let dest = Destination::builder() + .set_to_addresses(Some(to_addresses)) + .set_cc_addresses(if cc_addresses.is_empty() { + None + } else { + Some(cc_addresses) + }) + .set_bcc_addresses(if bcc_addresses.is_empty() { + None + } else { + Some(bcc_addresses) + }) + .build(); + + let mut req = client + .send_email() + .from_email_address(from) + .destination(dest) + .content(email_content); + if let Some(cs) = configuration_set { + req = req.configuration_set_name(cs); + } + + let out = req + .send() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + + Ok(serde_json::json!({ "message_id": out.message_id() })) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn config_parses_resend() { + let v = + serde_json::json!({"backend":"resend","api_key_env":"RESEND_API_KEY","from":"a@b.com"}); + let p = EmailProvider::from_config_value(v).unwrap(); + match p.config { + EmailProviderConfig::Resend { api_key_env, from } => { + assert_eq!(api_key_env, "RESEND_API_KEY"); + assert_eq!(from.as_deref(), Some("a@b.com")); + } + _ => panic!("unexpected backend"), + } + } + + #[test] + fn payload_requires_subject_and_to() { + let v = serde_json::json!({"subject":"hi","to":"x@y.com","text":"ok"}); + let p: SendEmailPayload = serde_json::from_value(v).unwrap(); + assert_eq!(p.to_list(), vec!["x@y.com".to_string()]); + } +} diff --git a/runner/src/effects/providers/mod.rs b/runner/src/effects/providers/mod.rs new file mode 100644 index 0000000..e1157ae --- /dev/null +++ b/runner/src/effects/providers/mod.rs @@ -0,0 +1,32 @@ +use crate::types::{EffectCommandEnvelope, EffectResultEnvelope, RunnerError}; +use futures::future::BoxFuture; +use std::collections::HashMap; +use std::sync::Arc; + +pub(crate) mod email; + +pub trait EffectProvider: Send + Sync { + fn execute( + &self, + cmd: EffectCommandEnvelope, + ) -> BoxFuture<'static, Result>; +} + +#[derive(Default, Clone)] +pub struct ProviderRegistry { + providers: HashMap>, +} + +impl ProviderRegistry { + pub fn new() -> Self { + Self::default() + } + + pub fn register(&mut self, name: impl Into, provider: Arc) { + self.providers.insert(name.into(), provider); + } + + pub fn get(&self, name: &str) -> Option> { + self.providers.get(name).cloned() + } +} diff --git a/runner/src/effects/runtime.rs b/runner/src/effects/runtime.rs new file mode 100644 index 0000000..536f028 --- /dev/null +++ b/runner/src/effects/runtime.rs @@ -0,0 +1,39 @@ +use crate::effects::ProviderRegistry; +use crate::types::{EffectCommandEnvelope, EffectResultEnvelope, RunnerError}; +use std::collections::HashMap; + +#[derive(Clone)] +pub struct EffectRuntime { + effect_to_provider: HashMap, + registry: ProviderRegistry, +} + +impl std::fmt::Debug for EffectRuntime { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EffectRuntime").finish_non_exhaustive() + } +} + +impl EffectRuntime { + pub fn new(effect_to_provider: HashMap, registry: ProviderRegistry) -> Self { + Self { + effect_to_provider, + registry, + } + } + + pub async fn execute( + &self, + cmd: EffectCommandEnvelope, + ) -> Result { + let provider_name = self + .effect_to_provider + .get(cmd.effect_name.as_str()) + .ok_or_else(|| RunnerError::RuntimeError("Unknown effect".to_string()))?; + let provider = self + .registry + .get(provider_name) + .ok_or_else(|| RunnerError::RuntimeError("Unknown effect provider".to_string()))?; + provider.execute(cmd).await + } +} diff --git a/runner/src/effects/worker.rs b/runner/src/effects/worker.rs new file mode 100644 index 0000000..4b4e630 --- /dev/null +++ b/runner/src/effects/worker.rs @@ -0,0 +1,794 @@ +use crate::config::Settings; +use crate::effects::{EffectProvider, EffectsManifest, ProviderRegistry}; +use crate::observability::Metrics; +use crate::storage::KvClient; +use crate::stream::{ConsumerOptions, JetStreamClient}; +use crate::tenant_placement::TenantGate; +use crate::types::{ + DedupeEffectKey, EffectCommandEnvelope, EffectResultEnvelope, EffectResultType, RunnerError, +}; +use async_nats::jetstream::consumer::DeliverPolicy; +use async_nats::jetstream::AckKind; +use chrono::Utc; +use futures::future::BoxFuture; +use futures::StreamExt; +use serde_json::Value; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::watch; + +#[allow(clippy::too_many_arguments)] +pub async fn run_effect_worker( + settings: Settings, + storage: KvClient, + metrics: Arc, + tenant_gate: Arc, + tenant_filter: Option>>, + reload: Arc, + shutdown: Arc, + draining: Arc, +) -> Result<(), RunnerError> { + if tenant_filter.is_none() && settings.tenant_allowlist.is_empty() { + return run_effect_worker_single( + settings, + storage, + metrics, + tenant_gate, + reload, + shutdown, + draining, + ) + .await; + } + + let settings = Arc::new(settings); + let manifest = load_manifest(&settings, &storage)?; + + let (effect_to_provider, registry) = build_registry(&manifest)?; + let runtime = Arc::new(crate::effects::EffectRuntime::new( + effect_to_provider, + registry, + )); + let (runtime_tx, runtime_rx) = watch::channel(runtime); + + let jetstream = JetStreamClient::connect(&settings) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + let publisher = JetStreamPublisher::new(jetstream.clone()); + let mut tenant_rx = match tenant_filter { + Some(rx) => rx, + None => { + let initial = settings + .tenant_allowlist + .iter() + .cloned() + .collect::>(); + let (_tx, rx) = watch::channel(initial); + rx + } + }; + + let mut tasks: HashMap> = HashMap::new(); + let mut stops: HashMap> = HashMap::new(); + + { + let settings = settings.clone(); + let storage = storage.clone(); + let reload = reload.clone(); + tokio::spawn(async move { + loop { + reload.notified().await; + let manifest = match load_manifest(&settings, &storage) { + Ok(m) => m, + Err(e) => { + tracing::error!(error = %e, "Failed to load effects manifest on reload"); + continue; + } + }; + if let Err(e) = manifest.validate() { + tracing::error!(error = %e, "Invalid effects manifest on reload"); + continue; + } + let runtime = match build_registry(&manifest) + .map(|(m, r)| crate::effects::EffectRuntime::new(m, r)) + { + Ok(r) => Arc::new(r), + Err(e) => { + tracing::error!(error = %e, "Failed to rebuild effect runtime on reload"); + continue; + } + }; + let _ = runtime_tx.send(runtime); + } + }); + } + + loop { + tokio::select! { + _ = shutdown.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(250)) => {}, + changed = tenant_rx.changed() => { + if changed.is_err() { + break; + } + } + } + + let tenants = tenant_rx.borrow().clone(); + + for tenant in tasks.keys().cloned().collect::>() { + if !tenants.contains(&tenant) { + if let Some(n) = stops.remove(&tenant) { + n.notify_waiters(); + } + } + } + + for tenant in tasks + .iter() + .filter_map(|(t, h)| { + if h.is_finished() { + Some(t.clone()) + } else { + None + } + }) + .collect::>() + { + if let Some(h) = tasks.remove(&tenant) { + let _ = h.await; + } + stops.remove(&tenant); + } + + for tenant in tenants { + if tasks.contains_key(&tenant) { + continue; + } + + let stop = Arc::new(tokio::sync::Notify::new()); + stops.insert(tenant.clone(), stop.clone()); + let tenant_key = tenant.clone(); + + let settings = settings.clone(); + let jetstream = jetstream.clone(); + let storage = storage.clone(); + let runtime_rx = runtime_rx.clone(); + let publisher = publisher.clone(); + let metrics = metrics.clone(); + let tenant_gate = tenant_gate.clone(); + let shutdown = shutdown.clone(); + let draining = draining.clone(); + + let handle = tokio::spawn(async move { + let _ = run_effect_worker_for_tenant( + settings, + jetstream, + storage, + runtime_rx, + publisher, + metrics, + tenant_gate, + tenant, + shutdown, + stop, + draining, + ) + .await; + }); + tasks.insert(tenant_key, handle); + } + } + + for (_, n) in stops { + n.notify_waiters(); + } + for (_, h) in tasks { + let _ = h.await; + } + + Ok(()) +} + +async fn run_effect_worker_single( + settings: Settings, + storage: KvClient, + metrics: Arc, + tenant_gate: Arc, + reload: Arc, + shutdown: Arc, + draining: Arc, +) -> Result<(), RunnerError> { + let manifest = load_manifest(&settings, &storage)?; + + let (effect_to_provider, registry) = build_registry(&manifest)?; + let runtime = Arc::new(crate::effects::EffectRuntime::new( + effect_to_provider, + registry, + )); + let (runtime_tx, runtime_rx) = watch::channel(runtime); + + let jetstream = JetStreamClient::connect(&settings) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + let publisher = JetStreamPublisher::new(jetstream.clone()); + + let durable_name = format!("{}_effects", settings.consumer_durable_prefix); + let filter_subject = settings + .effect_command_subject_filters + .first() + .cloned() + .unwrap_or_else(|| "tenant.*.effect.*.*".to_string()); + + let consumer = jetstream + .effect_command_consumer( + &settings, + ConsumerOptions { + durable_name, + filter_subject, + deliver_policy: DeliverPolicy::All, + }, + ) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + let mut messages = consumer + .messages() + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + { + let settings = settings.clone(); + let storage = storage.clone(); + let reload = reload.clone(); + tokio::spawn(async move { + loop { + reload.notified().await; + let manifest = match load_manifest(&settings, &storage) { + Ok(m) => m, + Err(e) => { + tracing::error!(error = %e, "Failed to load effects manifest on reload"); + continue; + } + }; + if let Err(e) = manifest.validate() { + tracing::error!(error = %e, "Invalid effects manifest on reload"); + continue; + } + let runtime = match build_registry(&manifest) + .map(|(m, r)| crate::effects::EffectRuntime::new(m, r)) + { + Ok(r) => Arc::new(r), + Err(e) => { + tracing::error!(error = %e, "Failed to rebuild effect runtime on reload"); + continue; + } + }; + let _ = runtime_tx.send(runtime); + } + }); + } + + loop { + if draining.load(Ordering::Relaxed) { + tokio::select! { + _ = shutdown.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => continue, + }; + } + + let next = tokio::select! { + _ = shutdown.notified() => break, + msg = messages.next() => msg, + }; + + let Some(msg) = next else { break }; + let msg = match msg { + Ok(m) => m, + Err(e) => { + tracing::error!(error = %e, "JetStream message stream error"); + continue; + } + }; + + let runtime = runtime_rx.borrow().clone(); + handle_effect_message( + &settings, + &storage, + runtime, + &publisher, + &metrics, + &tenant_gate, + draining.load(Ordering::Relaxed), + msg, + ) + .await; + } + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn run_effect_worker_for_tenant( + settings: Arc, + jetstream: JetStreamClient, + storage: KvClient, + runtime_rx: watch::Receiver>, + publisher: JetStreamPublisher, + metrics: Arc, + tenant_gate: Arc, + tenant: String, + shutdown: Arc, + stop: Arc, + draining: Arc, +) -> Result<(), RunnerError> { + let durable_name = format!("{}_effects_{}", settings.consumer_durable_prefix, tenant); + let filter_subject = format!("tenant.{}.effect.*.*", tenant); + + let consumer = jetstream + .effect_command_consumer( + &settings, + ConsumerOptions { + durable_name, + filter_subject, + deliver_policy: DeliverPolicy::All, + }, + ) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + let mut messages = consumer + .messages() + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + loop { + if !tenant_gate.should_acquire_processing_work(&tenant, draining.load(Ordering::Relaxed)) { + tokio::select! { + _ = shutdown.notified() => break, + _ = stop.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => continue, + }; + } + + if draining.load(Ordering::Relaxed) { + tokio::select! { + _ = shutdown.notified() => break, + _ = stop.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => continue, + }; + } + + let next = tokio::select! { + _ = shutdown.notified() => break, + _ = stop.notified() => break, + msg = messages.next() => msg, + }; + + let Some(msg) = next else { break }; + let msg = match msg { + Ok(m) => m, + Err(e) => { + tracing::error!(error = %e, "JetStream message stream error"); + continue; + } + }; + + let runtime = runtime_rx.borrow().clone(); + handle_effect_message( + &settings, + &storage, + runtime, + &publisher, + &metrics, + &tenant_gate, + draining.load(Ordering::Relaxed), + msg, + ) + .await; + } + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn handle_effect_message( + settings: &Settings, + storage: &KvClient, + runtime: Arc, + publisher: &JetStreamPublisher, + metrics: &Metrics, + tenant_gate: &TenantGate, + global_draining: bool, + msg: async_nats::jetstream::Message, +) { + let info = match msg.info() { + Ok(i) => i, + Err(e) => { + tracing::error!(error = %e, "Failed to parse JetStream message info"); + let _ = msg.ack().await; + return; + } + }; + let delivered = info.delivered.max(0) as u64; + + let cmd: EffectCommandEnvelope = match serde_json::from_slice(&msg.payload) { + Ok(c) => c, + Err(e) => { + tracing::error!(error = %e, "Failed to decode effect command envelope"); + if delivered >= settings.max_deliver.max(1) as u64 { + let key = format!( + "deadletter:{}:effect_decode:{}", + "", + Utc::now().timestamp_millis() + ); + let record = serde_json::json!({ + "reason": "decode_error", + "delivered": delivered, + "payload": Value::Null, + "timestamp": Utc::now(), + }); + let _ = storage.put_deadletter(&key, &record); + metrics.inc_deadletter_written(); + let _ = msg.ack_with(AckKind::Term).await; + } + return; + } + }; + + if !tenant_gate.should_acquire_processing_work(cmd.tenant_id.as_str(), global_draining) { + let _ = msg + .ack_with(AckKind::Nak(Some(Duration::from_millis(250)))) + .await; + return; + } + let _work = tenant_gate.begin_work(cmd.tenant_id.as_str()); + + match process_effect_command(settings, storage, runtime.as_ref(), publisher, metrics, cmd).await + { + Ok(ProcessDecision::Ack) => { + if settings.test_effect_crash_after_dedupe_before_ack { + panic!("test_effect_crash_after_dedupe_before_ack"); + } + let _ = msg.ack().await; + } + Err(e) => { + tracing::error!(error = %e, "Effect processing failed"); + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ProcessDecision { + Ack, +} + +trait EffectResultPublisher: Send + Sync { + fn publish( + &self, + subject: String, + result: EffectResultEnvelope, + ) -> BoxFuture<'static, Result<(), RunnerError>>; +} + +#[derive(Clone)] +struct JetStreamPublisher { + jetstream: JetStreamClient, +} + +impl JetStreamPublisher { + fn new(jetstream: JetStreamClient) -> Self { + Self { jetstream } + } +} + +impl EffectResultPublisher for JetStreamPublisher { + fn publish( + &self, + subject: String, + result: EffectResultEnvelope, + ) -> BoxFuture<'static, Result<(), RunnerError>> { + let jetstream = self.jetstream.clone(); + Box::pin(async move { jetstream.publish_effect_result(subject, &result).await }) + } +} + +async fn process_effect_command( + settings: &Settings, + storage: &KvClient, + runtime: &crate::effects::EffectRuntime, + publisher: &dyn EffectResultPublisher, + metrics: &Metrics, + cmd: EffectCommandEnvelope, +) -> Result { + let dedupe_key = DedupeEffectKey::new(&cmd.tenant_id, &cmd.command_id); + if storage.is_deduped_effect(&dedupe_key)? { + return Ok(ProcessDecision::Ack); + } + + let timeout = Duration::from_millis(settings.effect_timeout_ms.max(1)); + let mut last_error = None; + + let mut attempt = 0usize; + while attempt < settings.effect_retry_max_attempts.max(1) { + attempt += 1; + let exec = tokio::time::timeout(timeout, runtime.execute(cmd.clone())).await; + match exec { + Ok(Ok(result)) => { + metrics.inc_effect_exec_success(); + return publish_and_mark(storage, publisher, metrics, cmd, result).await; + } + Ok(Err(e)) => { + metrics.inc_effect_exec_failed(); + last_error = Some(e.to_string()); + } + Err(_) => { + metrics.inc_effect_exec_timeout(); + last_error = Some("timeout".to_string()); + } + } + + if attempt < settings.effect_retry_max_attempts.max(1) { + let backoff_ms = settings.effect_retry_backoff_ms.max(1) * (1u64 << (attempt - 1)); + tokio::time::sleep(Duration::from_millis(backoff_ms)).await; + } + } + + let result = EffectResultEnvelope { + tenant_id: cmd.tenant_id.clone(), + command_id: cmd.command_id.clone(), + effect_name: cmd.effect_name.clone(), + result_type: EffectResultType::Failed, + payload: serde_json::json!({ "error": last_error.unwrap_or_else(|| "failed".to_string()) }), + timestamp: Utc::now(), + metadata: cmd.metadata.clone(), + }; + + publish_and_mark(storage, publisher, metrics, cmd, result).await +} + +async fn publish_and_mark( + storage: &KvClient, + publisher: &dyn EffectResultPublisher, + metrics: &Metrics, + cmd: EffectCommandEnvelope, + mut result: EffectResultEnvelope, +) -> Result { + if result.metadata.correlation_id.is_none() { + result.metadata.correlation_id = cmd.metadata.correlation_id.clone(); + } + if result.metadata.trace_id.is_none() { + result.metadata.trace_id = cmd.metadata.trace_id.clone(); + } + + let subject = format!( + "tenant.{}.effect_result.{}.{}", + cmd.tenant_id.as_str(), + cmd.effect_name.as_str(), + cmd.command_id.as_str() + ); + + if let Err(e) = publisher.publish(subject, result).await { + metrics.inc_effect_publish_failed(); + return Err(e); + } + let dedupe_key = DedupeEffectKey::new(&cmd.tenant_id, &cmd.command_id); + storage.mark_deduped_effect(&dedupe_key)?; + Ok(ProcessDecision::Ack) +} + +fn build_registry( + manifest: &EffectsManifest, +) -> Result<(HashMap, ProviderRegistry), RunnerError> { + let mut registry = ProviderRegistry::new(); + type ProviderFactory = + Arc Result, RunnerError> + Send + Sync + 'static>; + + let mut factories: HashMap = HashMap::new(); + factories.insert( + "noop".to_string(), + Arc::new(|_cfg| Ok(Arc::new(NoopProvider))), + ); + factories.insert( + "email".to_string(), + Arc::new(|cfg| { + let provider = super::providers::email::EmailProvider::from_config_value(cfg)?; + Ok(Arc::new(provider)) + }), + ); + + let mut effect_to_provider = HashMap::new(); + for def in &manifest.effects { + let factory = factories.get(&def.provider).ok_or_else(|| { + RunnerError::RuntimeError(format!("Unknown effect provider: {}", def.provider)) + })?; + let provider_instance = factory(def.config.clone())?; + let provider_name = format!("{}__{}", def.provider, def.name); + registry.register(provider_name.clone(), provider_instance); + effect_to_provider.insert(def.name.clone(), provider_name); + } + + Ok((effect_to_provider, registry)) +} + +fn load_manifest(settings: &Settings, storage: &KvClient) -> Result { + if let Some(m) = storage.get_effects_manifest_override()? { + return Ok(m); + } + let manifest = EffectsManifest::from_file(&settings.effects_manifest_path) + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + manifest + .validate() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + Ok(manifest) +} + +#[derive(Debug)] +struct NoopProvider; + +impl EffectProvider for NoopProvider { + fn execute( + &self, + cmd: EffectCommandEnvelope, + ) -> BoxFuture<'static, Result> { + Box::pin(async move { + Ok(EffectResultEnvelope { + tenant_id: cmd.tenant_id, + command_id: cmd.command_id, + effect_name: cmd.effect_name, + result_type: EffectResultType::Succeeded, + payload: cmd.payload, + timestamp: Utc::now(), + metadata: cmd.metadata, + }) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::effects::EffectRuntime; + use crate::types::{CommandId, EffectName, MessageMetadata, TenantId}; + use std::sync::atomic::{AtomicUsize, Ordering}; + + #[derive(Clone)] + struct CountingProvider { + calls: Arc, + } + + impl EffectProvider for CountingProvider { + fn execute( + &self, + cmd: EffectCommandEnvelope, + ) -> BoxFuture<'static, Result> { + let calls = self.calls.clone(); + Box::pin(async move { + calls.fetch_add(1, Ordering::Relaxed); + Ok(EffectResultEnvelope { + tenant_id: cmd.tenant_id, + command_id: cmd.command_id, + effect_name: cmd.effect_name, + result_type: EffectResultType::Succeeded, + payload: cmd.payload, + timestamp: Utc::now(), + metadata: cmd.metadata, + }) + }) + } + } + + #[derive(Clone)] + struct FakePublisher { + fail: bool, + published: Arc, + } + + impl EffectResultPublisher for FakePublisher { + fn publish( + &self, + _subject: String, + _result: EffectResultEnvelope, + ) -> BoxFuture<'static, Result<(), RunnerError>> { + let fail = self.fail; + let published = self.published.clone(); + Box::pin(async move { + published.fetch_add(1, Ordering::Relaxed); + if fail { + Err(RunnerError::StreamError("publish failed".to_string())) + } else { + Ok(()) + } + }) + } + } + + fn runtime_with_counting_provider(calls: Arc) -> EffectRuntime { + let mut registry = ProviderRegistry::new(); + registry.register("counting", Arc::new(CountingProvider { calls })); + let mut map = HashMap::new(); + map.insert("send_email".to_string(), "counting".to_string()); + EffectRuntime::new(map, registry) + } + + fn base_settings() -> Settings { + Settings { + effect_retry_max_attempts: 1, + effect_timeout_ms: 1000, + effect_retry_backoff_ms: 1, + ..Default::default() + } + } + + #[tokio::test] + async fn idempotency_gate_prevents_double_execution_for_same_command_id() { + let storage = KvClient::in_memory(); + let calls = Arc::new(AtomicUsize::new(0)); + let runtime = runtime_with_counting_provider(calls.clone()); + let publisher = FakePublisher { + fail: false, + published: Arc::new(AtomicUsize::new(0)), + }; + let metrics = Metrics::default(); + let settings = base_settings(); + + let cmd = EffectCommandEnvelope { + tenant_id: TenantId::new("t1"), + command_id: CommandId::new("c1"), + effect_name: EffectName::new("send_email"), + payload: serde_json::json!({"a": 1}), + metadata: MessageMetadata::default(), + }; + + let dedupe_key = DedupeEffectKey::new(&cmd.tenant_id, &cmd.command_id); + storage.mark_deduped_effect(&dedupe_key).unwrap(); + + let decision = + process_effect_command(&settings, &storage, &runtime, &publisher, &metrics, cmd) + .await + .unwrap(); + assert_eq!(decision, ProcessDecision::Ack); + assert_eq!(calls.load(Ordering::Relaxed), 0); + } + + #[tokio::test] + async fn result_publish_failure_does_not_mark_command_as_completed() { + let storage = KvClient::in_memory(); + let calls = Arc::new(AtomicUsize::new(0)); + let runtime = runtime_with_counting_provider(calls.clone()); + let published = Arc::new(AtomicUsize::new(0)); + let publisher = FakePublisher { + fail: true, + published: published.clone(), + }; + let metrics = Metrics::default(); + let settings = base_settings(); + + let cmd = EffectCommandEnvelope { + tenant_id: TenantId::new("t1"), + command_id: CommandId::new("c1"), + effect_name: EffectName::new("send_email"), + payload: serde_json::json!({"a": 1}), + metadata: MessageMetadata::default(), + }; + + let res = process_effect_command( + &settings, + &storage, + &runtime, + &publisher, + &metrics, + cmd.clone(), + ) + .await; + assert!(res.is_err()); + + let dedupe_key = DedupeEffectKey::new(&cmd.tenant_id, &cmd.command_id); + assert!(!storage.is_deduped_effect(&dedupe_key).unwrap()); + assert_eq!(published.load(Ordering::Relaxed), 1); + assert_eq!(calls.load(Ordering::Relaxed), 1); + } +} diff --git a/runner/src/gateway/mod.rs b/runner/src/gateway/mod.rs new file mode 100644 index 0000000..4cbd24c --- /dev/null +++ b/runner/src/gateway/mod.rs @@ -0,0 +1,198 @@ +pub const TENANT_ID_METADATA_KEY: &str = "x-tenant-id"; +pub const CORRELATION_ID_METADATA_KEY: &str = "x-correlation-id"; +pub const TRACEPARENT_METADATA_KEY: &str = "traceparent"; + +pub mod proto { + tonic::include_proto!("aggregate.gateway.v1"); +} + +#[derive(Clone)] +pub struct GatewayClient { + inner: proto::command_service_client::CommandServiceClient, +} + +impl std::fmt::Debug for GatewayClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GatewayClient").finish_non_exhaustive() + } +} + +impl GatewayClient { + pub async fn connect(endpoint: &str) -> Result { + let channel = tonic::transport::Endpoint::from_shared(endpoint.to_string()) + .map_err(|e| crate::types::RunnerError::RuntimeError(e.to_string()))? + .connect() + .await + .map_err(|e| crate::types::RunnerError::RuntimeError(e.to_string()))?; + let inner = proto::command_service_client::CommandServiceClient::new(channel); + Ok(Self { inner }) + } + + pub async fn submit_command( + &mut self, + request: proto::SubmitCommandRequest, + ) -> Result { + let mut grpc_request = tonic::Request::new(request); + + let tenant_id = grpc_request.get_ref().tenant_id.as_str(); + if !tenant_id.is_empty() { + let value = tonic::metadata::MetadataValue::try_from(tenant_id).map_err(|e| { + tonic::Status::invalid_argument(format!("invalid tenant_id metadata: {}", e)) + })?; + grpc_request + .metadata_mut() + .insert(TENANT_ID_METADATA_KEY, value); + } + + let correlation_id = grpc_request + .get_ref() + .metadata + .get("x-correlation-id") + .or_else(|| grpc_request.get_ref().metadata.get("correlation_id")) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + if let Some(correlation_id) = correlation_id { + let value = + tonic::metadata::MetadataValue::try_from(correlation_id.as_str()).map_err(|e| { + tonic::Status::invalid_argument(format!( + "invalid correlation_id metadata: {}", + e + )) + })?; + grpc_request + .metadata_mut() + .insert(CORRELATION_ID_METADATA_KEY, value); + } + + let traceparent = grpc_request + .get_ref() + .metadata + .get("traceparent") + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .or_else(|| { + grpc_request + .get_ref() + .metadata + .get("trace_id") + .map(|s| s.trim()) + .filter(|s| s.len() == 32 && s.chars().all(|c| c.is_ascii_hexdigit())) + .map(|trace_id| { + let span_id = uuid::Uuid::new_v4().simple().to_string()[..16].to_string(); + format!("00-{trace_id}-{span_id}-01") + }) + }); + if let Some(traceparent) = traceparent { + let value = + tonic::metadata::MetadataValue::try_from(traceparent.as_str()).map_err(|e| { + tonic::Status::invalid_argument(format!("invalid traceparent metadata: {}", e)) + })?; + grpc_request + .metadata_mut() + .insert(TRACEPARENT_METADATA_KEY, value); + } + + let resp = self.inner.submit_command(grpc_request).await?; + Ok(resp.into_inner()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn traceparent_is_derived_from_trace_id_when_present() { + let req = proto::SubmitCommandRequest { + tenant_id: "t1".to_string(), + command_id: "c1".to_string(), + aggregate_id: "a1".to_string(), + aggregate_type: "User".to_string(), + payload_json: "{}".to_string(), + metadata: std::collections::HashMap::from([( + "trace_id".to_string(), + "0123456789abcdef0123456789abcdef".to_string(), + )]), + }; + let trace_id = req.metadata.get("trace_id").unwrap().as_str(); + let span_id = uuid::Uuid::new_v4().simple().to_string()[..16].to_string(); + let traceparent = format!("00-{trace_id}-{span_id}-01"); + assert!(traceparent.starts_with("00-0123456789abcdef0123456789abcdef-")); + assert!(traceparent.ends_with("-01")); + } + + #[tokio::test] + async fn submit_command_propagates_correlation_and_traceparent_metadata_when_present() { + use proto::command_service_server::CommandService; + + #[derive(Default)] + struct Upstream; + + #[tonic::async_trait] + impl CommandService for Upstream { + async fn submit_command( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let correlation = request + .metadata() + .get("x-correlation-id") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if correlation != "corr-1" { + return Err(tonic::Status::failed_precondition("missing correlation")); + } + + let traceparent = request + .metadata() + .get("traceparent") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + if traceparent != "00-0123456789abcdef0123456789abcdef-1111111111111111-01" { + return Err(tonic::Status::failed_precondition("missing traceparent")); + } + + Ok(tonic::Response::new(proto::SubmitCommandResponse { + events: vec![], + })) + } + } + + let upstream_listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + let upstream_addr = upstream_listener.local_addr().unwrap(); + drop(upstream_listener); + tokio::spawn(async move { + tonic::transport::Server::builder() + .add_service(proto::command_service_server::CommandServiceServer::new( + Upstream, + )) + .serve(upstream_addr) + .await + .unwrap(); + }); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let mut client = GatewayClient::connect(&format!("http://{}", upstream_addr)) + .await + .unwrap(); + + let req = proto::SubmitCommandRequest { + tenant_id: "t1".to_string(), + command_id: "c1".to_string(), + aggregate_id: "a1".to_string(), + aggregate_type: "User".to_string(), + payload_json: "{}".to_string(), + metadata: std::collections::HashMap::from([ + ("correlation_id".to_string(), "corr-1".to_string()), + ( + "traceparent".to_string(), + "00-0123456789abcdef0123456789abcdef-1111111111111111-01".to_string(), + ), + ]), + }; + + client.submit_command(req).await.unwrap(); + } +} diff --git a/runner/src/http/mod.rs b/runner/src/http/mod.rs new file mode 100644 index 0000000..74f2e97 --- /dev/null +++ b/runner/src/http/mod.rs @@ -0,0 +1,665 @@ +use crate::config::Settings; +use crate::effects::EffectsManifest; +use crate::observability::Metrics; +use crate::storage::KvClient; +use crate::tenant_placement::TenantGate; +use crate::types::TenantId; +use axum::extract::{Path, State}; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; +use axum::Json; +use serde_json::json; +use serde_json::Value; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +#[derive(Debug)] +pub struct AppState { + pub settings: Settings, + draining: Arc, + tenant_gate: Arc, + pub metrics: Arc, + pub storage: KvClient, + reload: Arc, +} + +impl AppState { + pub fn new( + settings: Settings, + draining: Arc, + tenant_gate: Arc, + metrics: Arc, + storage: KvClient, + reload: Arc, + ) -> Self { + Self { + settings, + draining, + tenant_gate, + metrics, + storage, + reload, + } + } + + pub fn is_draining(&self) -> bool { + self.draining.load(Ordering::Relaxed) + } + + pub fn start_draining(&self) { + self.draining.store(true, Ordering::Relaxed); + } + + pub fn notify_reload(&self) { + self.reload.notify_waiters(); + } +} + +pub async fn serve( + listener: tokio::net::TcpListener, + state: Arc, + shutdown: impl std::future::Future + Send + 'static, +) { + let app = router(state); + axum::serve(listener, app) + .with_graceful_shutdown(shutdown) + .await + .unwrap(); +} + +fn router(state: Arc) -> axum::Router { + axum::Router::new() + .route("/health", get(health)) + .route("/ready", get(ready)) + .route("/metrics", get(metrics)) + .route("/info", get(info)) + .route("/admin/drain", post(drain)) + .route("/admin/drain/status", get(drain_status)) + .route("/admin/reload", post(reload)) + .route("/admin/config/:key", get(get_config_value)) + .route("/admin/config/:key", post(set_config_value)) + .route("/admin/config/:key/delete", post(delete_config_value)) + .route("/admin/config/effects_manifest", get(get_effects_manifest)) + .route("/admin/config/effects_manifest", post(set_effects_manifest)) + .route( + "/admin/config/effects_manifest/delete", + post(clear_effects_manifest), + ) + .route("/admin/replay", post(replay)) + .with_state(state) +} + +async fn health(State(state): State>) -> Response { + let storage_ok = state.storage.writable_probe().is_ok(); + let stream_ok = tokio::time::timeout(Duration::from_secs(1), async { + async_nats::connect(&state.settings.nats_url) + .await + .map(|_| ()) + }) + .await + .is_ok_and(|r| r.is_ok()); + + let ok = storage_ok && stream_ok; + let status = if ok { + StatusCode::OK + } else { + StatusCode::SERVICE_UNAVAILABLE + }; + + ( + status, + Json(json!({ "ok": ok, "storage": storage_ok, "stream": stream_ok })), + ) + .into_response() +} + +#[derive(Debug, serde::Deserialize)] +struct ReadyQuery { + #[serde(default)] + tenant_id: Option, +} + +async fn ready( + State(state): State>, + axum::extract::Query(q): axum::extract::Query, +) -> Response { + if state.is_draining() { + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({ "ok": false, "draining": true })), + ) + .into_response(); + } + + if let Some(tenant_id) = q.tenant_id { + let tenant_id = tenant_id.trim().to_string(); + if tenant_id.is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "tenant_id required" })), + ) + .into_response(); + } + let accepting = state + .tenant_gate + .should_acquire_processing_work(&tenant_id, state.is_draining()); + if !accepting { + let assigned = state.tenant_gate.is_assigned(&tenant_id); + let draining = state.tenant_gate.is_draining(&tenant_id); + return ( + StatusCode::SERVICE_UNAVAILABLE, + Json(json!({ + "ok": false, + "tenant_id": tenant_id, + "accepting": false, + "assigned": assigned, + "draining_tenant": draining + })), + ) + .into_response(); + } + } + + let health = health(State(state.clone())).await; + if health.status() != StatusCode::OK { + return (StatusCode::SERVICE_UNAVAILABLE, health.into_body()).into_response(); + } + + (StatusCode::OK, Json(json!({ "ok": true }))).into_response() +} + +async fn metrics(State(state): State>) -> impl IntoResponse { + let draining = if state.is_draining() { 1 } else { 0 }; + let outbox_count = state + .storage + .list_outbox_all(50_000) + .map(|v| v.len()) + .unwrap_or(0); + + let now_ms = chrono::Utc::now().timestamp_millis().max(0) as u64; + let due_schedule_count = state + .storage + .scan_due_schedule_items_all(now_ms, 50_000) + .map(|v| v.len()) + .unwrap_or(0); + + let body = format!( + "{}runner_draining {}\nrunner_outbox_items {}\nrunner_schedule_due_items {}\n{}", + state.metrics.export_prometheus(), + draining, + outbox_count, + due_schedule_count, + draining_metrics_snapshot(&state) + ); + (StatusCode::OK, body) +} + +async fn info(State(state): State>) -> impl IntoResponse { + Json(json!({ + "service": "runner", + "mode": format!("{:?}", state.settings.mode), + "streams": { + "aggregate_events": state.settings.aggregate_events_stream, + "workflow_commands": state.settings.workflow_commands_stream, + "workflow_events": state.settings.workflow_events_stream, + }, + "draining": state.is_draining(), + "tenant_placement_enabled": state.tenant_gate.assigned_tenants_snapshot().is_some(), + })) +} + +#[derive(Debug, serde::Deserialize)] +struct DrainQuery { + #[serde(default)] + tenant_id: Option, + #[serde(default)] + wait_ms: Option, +} + +async fn drain( + State(state): State>, + axum::extract::Query(q): axum::extract::Query, +) -> Response { + match q.tenant_id.as_deref() { + None => { + state.start_draining(); + ( + StatusCode::OK, + Json(json!({ "ok": true, "draining": true })), + ) + .into_response() + } + Some(tenant_id) => drain_tenant(state, tenant_id, q.wait_ms).await, + } +} + +async fn drain_status( + State(state): State>, + axum::extract::Query(q): axum::extract::Query, +) -> Response { + let Some(tenant_id) = q.tenant_id.as_deref() else { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "tenant_id required" })), + ) + .into_response(); + }; + tenant_drain_status(state, tenant_id).await +} + +async fn drain_tenant(state: Arc, tenant_id: &str, wait_ms: Option) -> Response { + let tenant_id = tenant_id.trim(); + if tenant_id.is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "tenant_id required" })), + ) + .into_response(); + } + + state.tenant_gate.start_draining(tenant_id); + if let Some(wait_ms) = wait_ms.filter(|v| *v > 0) { + let deadline = tokio::time::Instant::now() + Duration::from_millis(wait_ms); + loop { + let status = tenant_drain_state(&state, tenant_id); + if status.drained { + break; + } + if tokio::time::Instant::now() >= deadline { + break; + } + let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + let _ = state + .tenant_gate + .wait_inflight_zero(tenant_id, remaining.min(Duration::from_millis(250))) + .await; + tokio::time::sleep(Duration::from_millis(25)).await; + } + } + + tenant_drain_status(state, tenant_id).await +} + +async fn tenant_drain_status(state: Arc, tenant_id: &str) -> Response { + let tenant_id = tenant_id.trim(); + if tenant_id.is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "tenant_id required" })), + ) + .into_response(); + } + + let status = tenant_drain_state(&state, tenant_id); + let code = if status.drained { + StatusCode::OK + } else { + StatusCode::ACCEPTED + }; + + ( + code, + Json(json!({ + "ok": true, + "tenant_id": tenant_id, + "draining_tenant": state.tenant_gate.is_draining(tenant_id), + "assigned": state.tenant_gate.is_assigned(tenant_id), + "in_flight": status.in_flight, + "outbox_items": status.outbox_items, + "drained": status.drained + })), + ) + .into_response() +} + +struct TenantDrainState { + in_flight: usize, + outbox_items: usize, + drained: bool, +} + +fn tenant_drain_state(state: &AppState, tenant_id: &str) -> TenantDrainState { + let in_flight = state.tenant_gate.inflight_count(tenant_id); + let outbox_items = state + .storage + .list_outbox_prefix(&TenantId::new(tenant_id.to_string()), 50_000) + .map(|v| v.len()) + .unwrap_or(0); + TenantDrainState { + in_flight, + outbox_items, + drained: in_flight == 0 && outbox_items == 0, + } +} + +fn draining_metrics_snapshot(state: &AppState) -> String { + let mut buf = String::new(); + for tenant in state.tenant_gate.draining_tenants_snapshot() { + let in_flight = state.tenant_gate.inflight_count(&tenant); + let outbox_items = state + .storage + .list_outbox_prefix(&TenantId::new(tenant.clone()), 50_000) + .map(|v| v.len()) + .unwrap_or(0); + buf.push_str(&format!( + "runner_tenant_draining_in_flight{{tenant_id=\"{}\"}} {}\n", + tenant, in_flight + )); + buf.push_str(&format!( + "runner_tenant_draining_outbox_items{{tenant_id=\"{}\"}} {}\n", + tenant, outbox_items + )); + } + buf +} + +async fn reload(State(state): State>) -> impl IntoResponse { + state.notify_reload(); + (StatusCode::OK, Json(json!({ "ok": true }))) +} + +async fn get_config_value(State(state): State>, Path(key): Path) -> Response { + if key.trim().is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "key required" })), + ) + .into_response(); + } + + match state.storage.get_config_value(&key) { + Ok(Some(value)) => ( + StatusCode::OK, + Json(json!({ "ok": true, "key": key, "value": value })), + ) + .into_response(), + Ok(None) => ( + StatusCode::NOT_FOUND, + Json(json!({ "ok": false, "key": key, "error": "not found" })), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(), + } +} + +async fn set_config_value( + State(state): State>, + Path(key): Path, + Json(value): Json, +) -> Response { + if key.trim().is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "key required" })), + ) + .into_response(); + } + + if let Err(e) = state.storage.put_config_value(&key, &value) { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(); + } + + state.notify_reload(); + ( + StatusCode::OK, + Json(json!({ "ok": true, "key": key, "stored": true })), + ) + .into_response() +} + +async fn delete_config_value( + State(state): State>, + Path(key): Path, +) -> Response { + if key.trim().is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": "key required" })), + ) + .into_response(); + } + + if let Err(e) = state.storage.delete_config_value(&key) { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(); + } + state.notify_reload(); + ( + StatusCode::OK, + Json(json!({ "ok": true, "key": key, "deleted": true })), + ) + .into_response() +} + +async fn get_effects_manifest(State(state): State>) -> Response { + let override_manifest = state.storage.get_effects_manifest_override(); + let override_manifest = match override_manifest { + Ok(v) => v, + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(); + } + }; + + if let Some(m) = override_manifest { + return ( + StatusCode::OK, + Json(json!({ "ok": true, "source": "storage", "manifest": m })), + ) + .into_response(); + } + + let from_file = EffectsManifest::from_file(&state.settings.effects_manifest_path); + match from_file { + Ok(m) => ( + StatusCode::OK, + Json(json!({ "ok": true, "source": "file", "manifest": m })), + ) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(), + } +} + +async fn set_effects_manifest( + State(state): State>, + Json(manifest): Json, +) -> Response { + if let Err(e) = manifest.validate() { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ "ok": false, "error": e })), + ) + .into_response(); + } + + if let Err(e) = state.storage.put_effects_manifest_override(&manifest) { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(); + } + + state.notify_reload(); + (StatusCode::OK, Json(json!({ "ok": true, "stored": true }))).into_response() +} + +async fn clear_effects_manifest(State(state): State>) -> Response { + if let Err(e) = state.storage.clear_effects_manifest_override() { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(json!({ "ok": false, "error": e.to_string() })), + ) + .into_response(); + } + state.notify_reload(); + (StatusCode::OK, Json(json!({ "ok": true, "cleared": true }))).into_response() +} + +#[derive(Debug, serde::Deserialize)] +#[serde(rename_all = "snake_case")] +enum ReplayMode { + CheckpointOnly, + CheckpointAndDedupe, + FullReset, +} + +#[derive(Debug, serde::Deserialize)] +struct ReplayRequest { + tenant_id: String, + saga_name: String, + #[serde(default = "default_replay_mode")] + mode: ReplayMode, + #[serde(default = "default_replay_max_keys")] + max_keys: usize, +} + +fn default_replay_mode() -> ReplayMode { + ReplayMode::CheckpointAndDedupe +} + +fn default_replay_max_keys() -> usize { + 100_000 +} + +async fn replay(State(state): State>, Json(req): Json) -> Response { + let tenant_id = req.tenant_id.trim(); + let saga_name = req.saga_name.trim(); + if tenant_id.is_empty() || saga_name.is_empty() { + return (StatusCode::BAD_REQUEST, "tenant_id and saga_name required").into_response(); + } + + let checkpoint_prefix = format!("checkpoint:{}:{}", tenant_id, saga_name); + let _ = state.storage.delete_prefix(&checkpoint_prefix, 1); + + let mut deleted = 0usize; + match req.mode { + ReplayMode::CheckpointOnly => {} + ReplayMode::CheckpointAndDedupe => { + let prefix = format!("dedupe:{}:event:{}:", tenant_id, saga_name); + deleted += state + .storage + .delete_prefix(&prefix, req.max_keys) + .unwrap_or(0); + } + ReplayMode::FullReset => { + let dedupe_prefix = format!("dedupe:{}:event:{}:", tenant_id, saga_name); + deleted += state + .storage + .delete_prefix(&dedupe_prefix, req.max_keys) + .unwrap_or(0); + let saga_prefix = format!("saga:{}:{}:", tenant_id, saga_name); + deleted += state + .storage + .delete_prefix(&saga_prefix, req.max_keys) + .unwrap_or(0); + let schedule_prefix = format!("schedule:{}:{}:", tenant_id, saga_name); + deleted += state + .storage + .delete_prefix(&schedule_prefix, req.max_keys) + .unwrap_or(0); + let outbox_prefix = format!("outbox:{}:", tenant_id); + deleted += state + .storage + .delete_prefix(&outbox_prefix, req.max_keys) + .unwrap_or(0); + } + } + + ( + StatusCode::OK, + Json(json!({ + "ok": true, + "tenant_id": tenant_id, + "saga_name": saga_name, + "deleted_keys": deleted + })), + ) + .into_response() +} + +#[cfg(test)] +mod tests { + use super::*; + use axum::body::Body; + use axum::http::Request; + use tower::ServiceExt; + + #[test] + fn readiness_toggles_with_draining_flag() { + let settings = Settings { + nats_url: "nats://127.0.0.1:1".to_string(), + ..Default::default() + }; + let state = Arc::new(AppState::new( + settings, + Arc::new(AtomicBool::new(false)), + Arc::new(TenantGate::new(None)), + Arc::new(Metrics::default()), + KvClient::in_memory(), + Arc::new(tokio::sync::Notify::new()), + )); + assert!(!state.is_draining()); + state.start_draining(); + assert!(state.is_draining()); + } + + #[tokio::test] + async fn health_fails_when_storage_is_unwritable() { + let settings = Settings { + nats_url: "nats://127.0.0.1:1".to_string(), + ..Default::default() + }; + + let storage = KvClient::in_memory(); + storage.fail_next_txn(); + + let state = Arc::new(AppState::new( + settings, + Arc::new(AtomicBool::new(false)), + Arc::new(TenantGate::new(None)), + Arc::new(Metrics::default()), + storage, + Arc::new(tokio::sync::Notify::new()), + )); + let app = router(state); + + let resp = app + .oneshot( + Request::builder() + .uri("/health") + .body(Body::empty()) + .unwrap(), + ) + .await + .unwrap(); + + assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE); + + let body = axum::body::to_bytes(resp.into_body(), usize::MAX) + .await + .unwrap(); + let json: serde_json::Value = serde_json::from_slice(&body).unwrap(); + assert_eq!(json["storage"], false); + } +} diff --git a/runner/src/lib.rs b/runner/src/lib.rs new file mode 100644 index 0000000..07daf84 --- /dev/null +++ b/runner/src/lib.rs @@ -0,0 +1,15 @@ +pub mod config; +pub mod effects; +pub mod gateway; +pub mod http; +pub mod observability; +pub mod outbox; +pub mod saga; +pub mod schedule; +pub mod storage; +pub mod stream; +pub mod tenant_placement; +pub mod types; + +pub use config::Settings; +pub use types::*; diff --git a/runner/src/main.rs b/runner/src/main.rs new file mode 100644 index 0000000..648f818 --- /dev/null +++ b/runner/src/main.rs @@ -0,0 +1,315 @@ +use runner::config::Settings; +use runner::effects::run_effect_worker; +use runner::http; +use runner::observability::Observability; +use runner::outbox::OutboxRelay; +use runner::saga::{run_saga_worker, SagaPrograms, SagaRuntime}; +use runner::schedule::Scheduler; +use runner::storage::KvClient; +use runner::stream::JetStreamClient; +use runner::tenant_placement::{start_tenant_filter, TenantGate}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +#[tokio::main] +async fn main() { + match std::env::args().nth(1).as_deref() { + Some("-h") | Some("--help") => { + print_help(); + return; + } + Some("serve") | None => serve().await, + Some(other) => { + eprintln!("Unknown command: {}", other); + print_help(); + } + } +} + +async fn serve() { + let settings = load_settings(); + if let Err(e) = settings.validate() { + eprintln!("Invalid configuration: {}", e); + std::process::exit(2); + } + + let observability = Observability::default(); + observability.init(); + let metrics = observability.metrics(); + + tracing::info!(settings = ?settings, "Runner starting"); + + let shutdown = Arc::new(tokio::sync::Notify::new()); + let reload = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + + let storage = match KvClient::open(settings.storage_path.clone()) { + Ok(s) => s, + Err(e) => { + eprintln!("Failed to open storage: {}", e); + std::process::exit(1); + } + }; + + let tenant_filter = match start_tenant_filter(&settings).await { + Ok(v) => v, + Err(e) => { + tracing::error!(error = %e, "Failed to initialize tenant filter"); + None + } + }; + + let tenant_gate = Arc::new(TenantGate::new(tenant_filter.clone())); + + let state = Arc::new(http::AppState::new( + settings.clone(), + draining.clone(), + tenant_gate.clone(), + metrics.clone(), + storage.clone(), + reload.clone(), + )); + + let http_listener = tokio::net::TcpListener::bind(settings.http_addr.as_str()) + .await + .unwrap(); + + let http_shutdown = shutdown.clone(); + let http_state = state.clone(); + let http_task = tokio::spawn(async move { + http::serve(http_listener, http_state, async move { + http_shutdown.notified().await + }) + .await + }); + + let signal_shutdown = shutdown.clone(); + let signal_draining = draining.clone(); + tokio::spawn(async move { + #[cfg(unix)] + { + use tokio::signal::unix::{signal, SignalKind}; + let mut sigterm = signal(SignalKind::terminate()).ok(); + let mut sigint = signal(SignalKind::interrupt()).ok(); + tokio::select! { + _ = tokio::signal::ctrl_c() => {}, + _ = async { if let Some(s) = &mut sigterm { let _ = s.recv().await; } } => {}, + _ = async { if let Some(s) = &mut sigint { let _ = s.recv().await; } } => {}, + } + } + + #[cfg(not(unix))] + { + let _ = tokio::signal::ctrl_c().await; + } + + signal_draining.store(true, Ordering::Relaxed); + signal_shutdown.notify_waiters(); + }); + + let mut tasks = Vec::new(); + + match settings.mode { + runner::config::RunnerMode::Saga => { + let programs = Arc::new(match SagaPrograms::load(&settings) { + Ok(p) => p, + Err(e) => { + tracing::error!(error = %e, "Failed to load saga manifest/programs"); + std::process::exit(1); + } + }); + let saga_runtime = SagaRuntime::default(); + tasks.push(tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs.clone(), + saga_runtime.clone(), + metrics.clone(), + tenant_gate.clone(), + tenant_filter.clone(), + shutdown.clone(), + draining.clone(), + ))); + let outbox_settings = settings.clone(); + let outbox_storage = storage.clone(); + let outbox_shutdown = shutdown.clone(); + let outbox_draining = draining.clone(); + let outbox_metrics = metrics.clone(); + let outbox_tenant_gate = tenant_gate.clone(); + tasks.push(tokio::spawn(async move { + let js = JetStreamClient::connect(&outbox_settings) + .await + .map_err(|e| runner::types::RunnerError::StreamError(e.to_string()))?; + OutboxRelay + .run( + outbox_settings, + outbox_storage, + js, + outbox_metrics, + outbox_tenant_gate, + outbox_shutdown, + outbox_draining, + ) + .await + })); + let scheduler_settings = settings.clone(); + let scheduler_storage = storage.clone(); + let scheduler_shutdown = shutdown.clone(); + let scheduler_draining = draining.clone(); + let scheduler_metrics = metrics.clone(); + let scheduler_tenant_gate = tenant_gate.clone(); + tasks.push(tokio::spawn(async move { + Scheduler + .run( + scheduler_settings, + scheduler_storage, + programs, + saga_runtime, + scheduler_metrics, + scheduler_tenant_gate, + scheduler_shutdown, + scheduler_draining, + ) + .await + })); + } + runner::config::RunnerMode::Effect => { + tasks.push(tokio::spawn(run_effect_worker( + settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate.clone(), + tenant_filter.clone(), + reload.clone(), + shutdown.clone(), + draining.clone(), + ))); + } + runner::config::RunnerMode::Combined => { + let programs = Arc::new(match SagaPrograms::load(&settings) { + Ok(p) => p, + Err(e) => { + tracing::error!(error = %e, "Failed to load saga manifest/programs"); + std::process::exit(1); + } + }); + let saga_runtime = SagaRuntime::default(); + tasks.push(tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs.clone(), + saga_runtime.clone(), + metrics.clone(), + tenant_gate.clone(), + tenant_filter.clone(), + shutdown.clone(), + draining.clone(), + ))); + tasks.push(tokio::spawn(run_effect_worker( + settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate.clone(), + tenant_filter.clone(), + reload.clone(), + shutdown.clone(), + draining.clone(), + ))); + let outbox_settings = settings.clone(); + let outbox_storage = storage.clone(); + let outbox_shutdown = shutdown.clone(); + let outbox_draining = draining.clone(); + let outbox_metrics = metrics.clone(); + let outbox_tenant_gate = tenant_gate.clone(); + tasks.push(tokio::spawn(async move { + let js = JetStreamClient::connect(&outbox_settings) + .await + .map_err(|e| runner::types::RunnerError::StreamError(e.to_string()))?; + OutboxRelay + .run( + outbox_settings, + outbox_storage, + js, + outbox_metrics, + outbox_tenant_gate, + outbox_shutdown, + outbox_draining, + ) + .await + })); + let scheduler_settings = settings.clone(); + let scheduler_storage = storage.clone(); + let scheduler_shutdown = shutdown.clone(); + let scheduler_draining = draining.clone(); + let scheduler_metrics = metrics.clone(); + let scheduler_tenant_gate = tenant_gate.clone(); + tasks.push(tokio::spawn(async move { + Scheduler + .run( + scheduler_settings, + scheduler_storage, + programs, + saga_runtime, + scheduler_metrics, + scheduler_tenant_gate, + scheduler_shutdown, + scheduler_draining, + ) + .await + })); + } + } + + let mut failed = None; + for task in tasks { + match task.await { + Ok(Ok(())) => {} + Ok(Err(e)) => { + failed = Some(e); + break; + } + Err(e) => { + failed = Some(runner::types::RunnerError::RuntimeError(e.to_string())); + break; + } + } + } + + draining.store(true, Ordering::Relaxed); + shutdown.notify_waiters(); + let _ = http_task.await; + + if let Some(e) = failed { + tracing::error!(error = %e, "Runner terminated with error"); + std::process::exit(1); + } +} + +fn print_help() { + println!( + "runner\n\nUSAGE:\n runner [COMMAND]\n\nCOMMANDS:\n serve Start the HTTP server (default)\n\nOPTIONS:\n -h, --help Print help\n" + ); +} + +fn load_settings() -> Settings { + if let Ok(path) = std::env::var("RUNNER_CONFIG_PATH") { + if let Ok(settings) = Settings::load_from_file_with_env_overrides(path) { + return settings; + } + } + + Settings::from_env().unwrap_or_default() +} + +#[cfg(test)] +mod tests { + #[test] + fn test_harness_runs() { + let settings = runner::Settings::default(); + assert_eq!(settings.aggregate_events_stream, "AGGREGATE_EVENTS"); + assert!(settings + .saga_trigger_subject_filters + .iter() + .any(|s| s == "tenant.*.aggregate.*.*")); + } +} diff --git a/runner/src/observability/mod.rs b/runner/src/observability/mod.rs new file mode 100644 index 0000000..e208b01 --- /dev/null +++ b/runner/src/observability/mod.rs @@ -0,0 +1,220 @@ +use edge_logger_client::{Config as EdgeLoggerConfig, EdgeLoggerLayer}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tracing_subscriber::prelude::*; + +#[derive(Clone)] +pub struct Observability { + service_name: String, + metrics: Arc, +} + +impl std::fmt::Debug for Observability { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Observability").finish_non_exhaustive() + } +} + +impl Observability { + pub fn new(service_name: impl Into) -> Self { + Self { + service_name: service_name.into(), + metrics: Arc::new(Metrics::default()), + } + } + + pub fn init(&self) { + let filter = std::env::var("RUST_LOG").unwrap_or_else(|_| "info".to_string()); + let env_filter = tracing_subscriber::EnvFilter::new(filter); + + let fmt_layer = tracing_subscriber::fmt::layer().json(); + + let edge_layer = edge_logger_layer_from_env(&self.service_name); + + let registry = tracing_subscriber::registry() + .with(env_filter) + .with(fmt_layer); + let _ = match edge_layer { + Some(layer) => registry.with(layer).try_init(), + None => registry.try_init(), + }; + } + + pub fn service_name(&self) -> &str { + &self.service_name + } + + pub fn metrics(&self) -> Arc { + self.metrics.clone() + } +} + +impl Default for Observability { + fn default() -> Self { + Self::new("runner") + } +} + +#[derive(Debug, Default)] +pub struct Metrics { + saga_events_processed_total: AtomicU64, + saga_events_skipped_checkpoint_total: AtomicU64, + saga_events_skipped_dedupe_total: AtomicU64, + saga_commit_failed_total: AtomicU64, + + schedule_processed_total: AtomicU64, + schedule_failed_total: AtomicU64, + + outbox_dispatch_success_total: AtomicU64, + outbox_dispatch_failed_total: AtomicU64, + + effect_exec_success_total: AtomicU64, + effect_exec_failed_total: AtomicU64, + effect_exec_timeout_total: AtomicU64, + effect_result_publish_failed_total: AtomicU64, + + deadletter_written_total: AtomicU64, +} + +impl Metrics { + pub fn inc_saga_processed(&self) { + self.saga_events_processed_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_saga_skipped_checkpoint(&self) { + self.saga_events_skipped_checkpoint_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_saga_skipped_dedupe(&self) { + self.saga_events_skipped_dedupe_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_saga_commit_failed(&self) { + self.saga_commit_failed_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_schedule_processed(&self) { + self.schedule_processed_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_schedule_failed(&self) { + self.schedule_failed_total.fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_outbox_dispatch_success(&self) { + self.outbox_dispatch_success_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_outbox_dispatch_failed(&self) { + self.outbox_dispatch_failed_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_effect_exec_success(&self) { + self.effect_exec_success_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_effect_exec_failed(&self) { + self.effect_exec_failed_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_effect_exec_timeout(&self) { + self.effect_exec_timeout_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_effect_publish_failed(&self) { + self.effect_result_publish_failed_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn inc_deadletter_written(&self) { + self.deadletter_written_total + .fetch_add(1, Ordering::Relaxed); + } + + pub fn export_prometheus(&self) -> String { + format!( + "runner_saga_events_processed_total {}\nrunner_saga_events_skipped_checkpoint_total {}\nrunner_saga_events_skipped_dedupe_total {}\nrunner_saga_commit_failed_total {}\nrunner_schedule_processed_total {}\nrunner_schedule_failed_total {}\nrunner_outbox_dispatch_success_total {}\nrunner_outbox_dispatch_failed_total {}\nrunner_effect_exec_success_total {}\nrunner_effect_exec_failed_total {}\nrunner_effect_exec_timeout_total {}\nrunner_effect_result_publish_failed_total {}\nrunner_deadletter_written_total {}\n", + self.saga_events_processed_total.load(Ordering::Relaxed), + self.saga_events_skipped_checkpoint_total + .load(Ordering::Relaxed), + self.saga_events_skipped_dedupe_total + .load(Ordering::Relaxed), + self.saga_commit_failed_total.load(Ordering::Relaxed), + self.schedule_processed_total.load(Ordering::Relaxed), + self.schedule_failed_total.load(Ordering::Relaxed), + self.outbox_dispatch_success_total.load(Ordering::Relaxed), + self.outbox_dispatch_failed_total.load(Ordering::Relaxed), + self.effect_exec_success_total.load(Ordering::Relaxed), + self.effect_exec_failed_total.load(Ordering::Relaxed), + self.effect_exec_timeout_total.load(Ordering::Relaxed), + self.effect_result_publish_failed_total + .load(Ordering::Relaxed), + self.deadletter_written_total.load(Ordering::Relaxed), + ) + } +} + +fn edge_logger_layer_from_env(service_name: &str) -> Option { + let enabled = std::env::var("EDGE_LOGGER_ENABLED") + .ok() + .map(|v| matches!(v.trim().to_ascii_lowercase().as_str(), "1" | "true" | "yes")) + .unwrap_or(false); + + let socket_path = std::env::var("EDGE_LOGGER_SOCKET_PATH").ok(); + if !enabled && socket_path.is_none() { + return None; + } + + let environment = std::env::var("EDGE_LOGGER_ENVIRONMENT") + .or_else(|_| std::env::var("ENVIRONMENT")) + .unwrap_or_else(|_| "production".to_string()); + + let tenant_id = + std::env::var("EDGE_LOGGER_TENANT_ID").unwrap_or_else(|_| "default".to_string()); + + let batch_size = std::env::var("EDGE_LOGGER_BATCH_SIZE") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(100); + + let flush_interval = std::env::var("EDGE_LOGGER_FLUSH_INTERVAL_MS") + .ok() + .and_then(|v| v.parse::().ok()) + .map(Duration::from_millis) + .unwrap_or(Duration::from_secs(1)); + + Some(EdgeLoggerLayer::new(EdgeLoggerConfig { + socket_path: socket_path.unwrap_or_else(|| "/var/run/edge-logger/logger.sock".to_string()), + service: service_name.to_string(), + environment, + tenant_id, + batch_size, + flush_interval, + })) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn metrics_exporter_emits_key_metrics() { + let metrics = Metrics::default(); + metrics.inc_saga_processed(); + metrics.inc_outbox_dispatch_failed(); + let body = metrics.export_prometheus(); + assert!(body.contains("runner_saga_events_processed_total 1")); + assert!(body.contains("runner_outbox_dispatch_failed_total 1")); + } +} diff --git a/runner/src/outbox/mod.rs b/runner/src/outbox/mod.rs new file mode 100644 index 0000000..46b9d95 --- /dev/null +++ b/runner/src/outbox/mod.rs @@ -0,0 +1,3 @@ +mod relay; + +pub use relay::OutboxRelay; diff --git a/runner/src/outbox/relay.rs b/runner/src/outbox/relay.rs new file mode 100644 index 0000000..dbfcac4 --- /dev/null +++ b/runner/src/outbox/relay.rs @@ -0,0 +1,448 @@ +use crate::config::Settings; +use crate::gateway::{proto as gateway_proto, GatewayClient}; +use crate::observability::Metrics; +use crate::storage::KvClient; +use crate::stream::JetStreamClient; +use crate::tenant_placement::TenantGate; +use crate::types::{RunnerError, WorkItem}; +use futures::future::BoxFuture; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::{Mutex, Semaphore}; + +#[derive(Clone)] +pub struct OutboxRelay; + +impl std::fmt::Debug for OutboxRelay { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutboxRelay").finish_non_exhaustive() + } +} + +impl OutboxRelay { + pub fn new() -> Self { + Self + } + + #[allow(clippy::too_many_arguments)] + pub async fn run( + &self, + settings: Settings, + storage: KvClient, + jetstream: JetStreamClient, + metrics: Arc, + tenant_gate: Arc, + shutdown: Arc, + draining: Arc, + ) -> Result<(), RunnerError> { + let gateway = if let Some(url) = settings.aggregate_gateway_url.clone() { + Some(Arc::new(Mutex::new( + GatewayClient::connect(&url) + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?, + ))) + } else { + None + }; + + let dispatcher = DefaultOutboxDispatcher { jetstream, gateway }; + self.run_with_dispatcher( + settings, + storage, + Arc::new(dispatcher), + metrics, + tenant_gate, + shutdown, + draining, + ) + .await + } +} + +impl Default for OutboxRelay { + fn default() -> Self { + Self::new() + } +} + +pub trait OutboxDispatcher: Send + Sync { + fn dispatch(&self, item: WorkItem) -> BoxFuture<'static, Result<(), RunnerError>>; +} + +#[derive(Clone)] +struct DefaultOutboxDispatcher { + jetstream: JetStreamClient, + gateway: Option>>, +} + +impl OutboxDispatcher for DefaultOutboxDispatcher { + fn dispatch(&self, item: WorkItem) -> BoxFuture<'static, Result<(), RunnerError>> { + let jetstream = self.jetstream.clone(); + let gateway = self.gateway.clone(); + Box::pin(async move { + match item { + WorkItem::EffectCommand(cmd) => jetstream.publish_effect_command(&cmd).await, + WorkItem::AggregateCommand(cmd) => { + let Some(gateway) = gateway else { + return Err(RunnerError::RuntimeError( + "Aggregate gateway URL not configured".to_string(), + )); + }; + + let payload_json = serde_json::to_string(&cmd.payload_json) + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + let metadata = to_string_metadata(&cmd.metadata); + + let req = gateway_proto::SubmitCommandRequest { + tenant_id: cmd.tenant_id.as_str().to_string(), + command_id: cmd.command_id.as_str().to_string(), + aggregate_id: cmd.aggregate_id, + aggregate_type: cmd.aggregate_type, + payload_json, + metadata, + }; + + let mut client = gateway.lock().await; + let _ = client + .submit_command(req) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + Ok(()) + } + } + }) + } +} + +impl OutboxRelay { + #[allow(clippy::too_many_arguments)] + async fn run_with_dispatcher( + &self, + settings: Settings, + storage: KvClient, + dispatcher: Arc, + metrics: Arc, + tenant_gate: Arc, + shutdown: Arc, + draining: Arc, + ) -> Result<(), RunnerError> { + let mut tick = tokio::time::interval(Duration::from_millis( + settings.outbox_scan_interval_ms.max(1), + )); + tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + + let global = Arc::new(Semaphore::new(settings.outbox_max_in_flight)); + let per_tenant = Arc::new(Mutex::new(HashMap::>::new())); + let mut join_set = tokio::task::JoinSet::>::new(); + + loop { + if draining.load(Ordering::Relaxed) && join_set.is_empty() { + tokio::select! { + _ = shutdown.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => {} + }; + } + + if join_set.len() < settings.outbox_max_in_flight && !draining.load(Ordering::Relaxed) { + let batch = storage.list_outbox_all(settings.outbox_batch_size)?; + if !batch.is_empty() { + for (key, item) in batch { + if join_set.len() >= settings.outbox_max_in_flight { + break; + } + + let tenant_id = work_item_tenant_id(&item).unwrap_or_default(); + if !tenant_gate.should_dispatch_outbox_work( + &tenant_id, + draining.load(Ordering::Relaxed), + ) { + continue; + } + let tenant_sem = get_tenant_semaphore( + per_tenant.clone(), + tenant_id.clone(), + settings.outbox_max_in_flight_per_tenant, + ) + .await; + + let storage = storage.clone(); + let dispatcher = dispatcher.clone(); + let global = global.clone(); + let metrics = metrics.clone(); + let crash_after_dispatch = settings.test_outbox_crash_after_dispatch; + let tenant_gate = tenant_gate.clone(); + let tenant_id = tenant_id.clone(); + + join_set.spawn(async move { + let _g = global + .acquire_owned() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + let _t = tenant_sem + .acquire_owned() + .await + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + let _work = tenant_gate.begin_work(&tenant_id); + + match dispatcher.dispatch(item).await { + Ok(()) => { + if crash_after_dispatch { + panic!("test_outbox_crash_after_dispatch"); + } + storage.delete_outbox_item(&key)?; + metrics.inc_outbox_dispatch_success(); + Ok(()) + } + Err(e) => { + metrics.inc_outbox_dispatch_failed(); + Err(e) + } + } + }); + } + } + } + + if join_set.is_empty() { + tokio::select! { + _ = shutdown.notified() => break, + _ = tick.tick() => {} + }; + continue; + } + + tokio::select! { + _ = shutdown.notified() => break, + _ = tick.tick() => {}, + res = join_set.join_next() => { + if let Some(res) = res { + match res { + Ok(Ok(())) => {} + Ok(Err(e)) => { + tracing::error!(error = %e, "Outbox dispatch failed"); + } + Err(e) => { + tracing::error!(error = %e, "Outbox task panicked"); + } + } + } + } + } + } + + Ok(()) + } +} + +async fn get_tenant_semaphore( + map: Arc>>>, + tenant_id: String, + permits: usize, +) -> Arc { + let mut map = map.lock().await; + map.entry(tenant_id) + .or_insert_with(|| Arc::new(Semaphore::new(permits))) + .clone() +} + +fn work_item_tenant_id(item: &WorkItem) -> Option { + match item { + WorkItem::AggregateCommand(cmd) => Some(cmd.tenant_id.as_str().to_string()), + WorkItem::EffectCommand(cmd) => Some(cmd.tenant_id.as_str().to_string()), + } +} + +fn to_string_metadata(metadata: &crate::types::MessageMetadata) -> HashMap { + let mut map = HashMap::new(); + if let Some(correlation_id) = metadata.correlation_id.as_ref() { + map.insert( + "correlation_id".to_string(), + correlation_id.as_str().to_string(), + ); + } + if let Some(trace_id) = metadata.trace_id.as_ref() { + map.insert("trace_id".to_string(), trace_id.as_str().to_string()); + } + for (k, v) in &metadata.extra { + if let Some(s) = v.as_str() { + map.insert(k.clone(), s.to_string()); + } + } + map +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{ + CommandId, EffectCommandEnvelope, EffectName, MessageMetadata, TenantId, WorkId, + }; + use serde_json::json; + use std::sync::atomic::{AtomicUsize, Ordering}; + + #[derive(Clone)] + struct FakeDispatcher { + should_fail: bool, + calls: Arc, + } + + impl OutboxDispatcher for FakeDispatcher { + fn dispatch(&self, _item: WorkItem) -> BoxFuture<'static, Result<(), RunnerError>> { + let calls = self.calls.clone(); + let should_fail = self.should_fail; + Box::pin(async move { + calls.fetch_add(1, Ordering::Relaxed); + if should_fail { + Err(RunnerError::StreamError("fail".to_string())) + } else { + Ok(()) + } + }) + } + } + + fn test_settings() -> Settings { + Settings { + outbox_scan_interval_ms: 1, + outbox_batch_size: 10, + outbox_max_in_flight: 2, + outbox_max_in_flight_per_tenant: 2, + ..Default::default() + } + } + + #[tokio::test] + async fn outbox_item_is_not_deleted_if_dispatch_fails() { + let settings = test_settings(); + let storage = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let work_id = WorkId::new_v7(); + let key = storage + .put_outbox_item( + &tenant, + "effect", + &work_id, + &WorkItem::EffectCommand(EffectCommandEnvelope { + tenant_id: tenant.clone(), + command_id: CommandId::new("c1"), + effect_name: EffectName::new("noop"), + payload: json!({"a": 1}), + metadata: MessageMetadata::default(), + }), + ) + .unwrap(); + + let dispatcher = Arc::new(FakeDispatcher { + should_fail: true, + calls: Arc::new(AtomicUsize::new(0)), + }); + + let relay = OutboxRelay::new(); + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + + tokio::spawn({ + let relay = relay.clone(); + let settings = settings.clone(); + let storage = storage.clone(); + let shutdown = shutdown.clone(); + let draining = draining.clone(); + async move { + let metrics = Arc::new(Metrics::default()); + let tenant_gate = Arc::new(TenantGate::new(None)); + let _ = relay + .run_with_dispatcher( + settings, + storage, + dispatcher, + metrics, + tenant_gate, + shutdown, + draining, + ) + .await; + } + }); + + tokio::time::sleep(Duration::from_millis(10)).await; + shutdown.notify_waiters(); + tokio::time::sleep(Duration::from_millis(10)).await; + + let items = storage.list_outbox_all(10).unwrap(); + assert!(items.iter().any(|(k, _)| k == &key)); + } + + #[tokio::test] + async fn dispatch_success_deletes_outbox_item_exactly_once() { + let settings = test_settings(); + let storage = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let work_id = WorkId::new_v7(); + let key = storage + .put_outbox_item( + &tenant, + "effect", + &work_id, + &WorkItem::EffectCommand(EffectCommandEnvelope { + tenant_id: tenant.clone(), + command_id: CommandId::new("c1"), + effect_name: EffectName::new("noop"), + payload: json!({"a": 1}), + metadata: MessageMetadata::default(), + }), + ) + .unwrap(); + + let calls = Arc::new(AtomicUsize::new(0)); + let dispatcher = Arc::new(FakeDispatcher { + should_fail: false, + calls: calls.clone(), + }); + + let relay = OutboxRelay::new(); + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + + tokio::spawn({ + let relay = relay.clone(); + let settings = settings.clone(); + let storage = storage.clone(); + let shutdown = shutdown.clone(); + let draining = draining.clone(); + async move { + let metrics = Arc::new(Metrics::default()); + let tenant_gate = Arc::new(TenantGate::new(None)); + let _ = relay + .run_with_dispatcher( + settings, + storage, + dispatcher, + metrics, + tenant_gate, + shutdown, + draining, + ) + .await; + } + }); + + let start = std::time::Instant::now(); + loop { + let items = storage.list_outbox_all(10).unwrap(); + if !items.iter().any(|(k, _)| k == &key) { + break; + } + if start.elapsed() > Duration::from_secs(1) { + panic!("outbox item was not deleted"); + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + + shutdown.notify_waiters(); + tokio::time::sleep(Duration::from_millis(10)).await; + + assert_eq!(calls.load(Ordering::Relaxed), 1); + } +} diff --git a/runner/src/saga/manifest.rs b/runner/src/saga/manifest.rs new file mode 100644 index 0000000..d241903 --- /dev/null +++ b/runner/src/saga/manifest.rs @@ -0,0 +1,122 @@ +use serde::{Deserialize, Serialize}; +use std::path::Path; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct SagaManifest { + pub sagas: Vec, +} + +impl SagaManifest { + pub fn validate(&self) -> Result<(), String> { + for saga in &self.sagas { + saga.validate()?; + } + Ok(()) + } + + pub fn from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml) + } + + pub fn from_toml(toml_str: &str) -> Result { + toml::from_str(toml_str) + } + + pub fn from_json(json: &str) -> Result { + serde_json::from_str(json) + } + + pub fn from_file(path: impl AsRef) -> Result { + let path = path.as_ref(); + let raw = std::fs::read_to_string(path)?; + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + + match ext { + "yaml" | "yml" => Ok(Self::from_yaml(&raw)?), + "toml" => Ok(Self::from_toml(&raw)?), + "json" => Ok(Self::from_json(&raw)?), + _ => Err(SagaManifestLoadError::UnsupportedFormat { + path: path.display().to_string(), + }), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct SagaDefinition { + pub name: String, + pub trigger_subjects: Vec, + pub on_event: String, + pub compensation: Option, +} + +impl SagaDefinition { + pub fn validate(&self) -> Result<(), String> { + if self.name.trim().is_empty() { + return Err("Saga name is required".to_string()); + } + if self.on_event.trim().is_empty() { + return Err(format!( + "Saga '{}' must specify on_event program", + self.name + )); + } + if !Path::new(&self.on_event).exists() { + return Err(format!( + "Saga '{}' on_event program not found: {}", + self.name, self.on_event + )); + } + if let Some(path) = &self.compensation { + if !path.trim().is_empty() && !Path::new(path).exists() { + return Err(format!( + "Saga '{}' compensation program not found: {}", + self.name, path + )); + } + } + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SagaManifestLoadError { + #[error("Failed to read manifest file: {0}")] + Io(#[from] std::io::Error), + #[error("Failed to parse YAML manifest: {0}")] + Yaml(#[from] serde_yaml::Error), + #[error("Failed to parse TOML manifest: {0}")] + Toml(#[from] toml::de::Error), + #[error("Failed to parse JSON manifest: {0}")] + Json(#[from] serde_json::Error), + #[error("Unsupported manifest format: {path}")] + UnsupportedFormat { path: String }, +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn manifest_loads_and_validates() { + let dir = tempdir().unwrap(); + let program_path = dir.path().join("saga.json"); + std::fs::write(&program_path, r#"{"specVersion":"1.1","id":"p","name":"p","inputs":[],"nodes":[],"edges":[],"outputNodeId":"x"}"#).unwrap(); + + let yaml = format!( + r#" +sagas: + - name: billing + trigger_subjects: ["tenant.*.aggregate.*.*"] + on_event: "{}" +"#, + program_path.to_string_lossy() + ); + + let manifest = SagaManifest::from_yaml(&yaml).unwrap(); + assert!(manifest.validate().is_ok()); + } +} diff --git a/runner/src/saga/mod.rs b/runner/src/saga/mod.rs new file mode 100644 index 0000000..85bddee --- /dev/null +++ b/runner/src/saga/mod.rs @@ -0,0 +1,7 @@ +mod manifest; +mod runtime; +mod worker; + +pub use manifest::{SagaDefinition, SagaManifest}; +pub use runtime::SagaRuntime; +pub use worker::{run_saga_worker, AckDecision, SagaPrograms}; diff --git a/runner/src/saga/runtime.rs b/runner/src/saga/runtime.rs new file mode 100644 index 0000000..4eece9b --- /dev/null +++ b/runner/src/saga/runtime.rs @@ -0,0 +1,194 @@ +use crate::types::{AggregateEventEnvelope, RunnerError, WorkItem}; +use chrono::{DateTime, Utc}; +use runtime_function::engine::ExecutionOptions; +use runtime_function::{Context, Engine, Program, Value as RtValue}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::BTreeMap; +use std::time::Duration; + +#[derive(Clone)] +pub struct SagaRuntime { + engine: Engine, + options: ExecutionOptions, +} + +impl std::fmt::Debug for SagaRuntime { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SagaRuntime").finish_non_exhaustive() + } +} + +impl SagaRuntime { + pub fn new(gas_limit: u64, timeout: Duration) -> Self { + let timeout_secs = timeout.as_secs().max(1); + Self { + engine: Engine::with_options(ExecutionOptions { + gas_limit, + timeout_secs, + trace: false, + }), + options: ExecutionOptions { + gas_limit, + timeout_secs, + trace: false, + }, + } + } + + pub async fn execute_on_event( + &self, + program: &Program, + saga_state: &Value, + event: &AggregateEventEnvelope, + ) -> Result { + let event_json = + serde_json::to_value(event).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + let ctx = deterministic_context(event.timestamp, event.event_id.to_string()) + .with_tenant_id(event.tenant_id.as_str()); + self.execute(program, saga_state, &event_json, ctx).await + } + + pub async fn execute( + &self, + program: &Program, + saga_state: &Value, + event: &Value, + context: Context, + ) -> Result { + let mut inputs = BTreeMap::new(); + inputs.insert( + "saga_state".to_string(), + to_runtime_value(saga_state.clone())?, + ); + inputs.insert("event".to_string(), to_runtime_value(event.clone())?); + + let result = + self.engine + .execute_with_options(program, inputs, context, self.options.clone()); + + if !result.success { + return Err(RunnerError::RuntimeError( + result + .error + .map(|e| e.to_string()) + .unwrap_or_else(|| "Saga execution failed".to_string()), + )); + } + + let output = result.output.ok_or_else(|| { + RunnerError::RuntimeError("Saga execution produced no output".to_string()) + })?; + + let json = serde_json::to_value(output) + .map_err(|e| RunnerError::DecodeError(format!("Output encoding failed: {}", e)))?; + serde_json::from_value(json) + .map_err(|e| RunnerError::DecodeError(format!("Output decoding failed: {}", e))) + } +} + +impl Default for SagaRuntime { + fn default() -> Self { + Self::new(runtime_function::DEFAULT_GAS_LIMIT, Duration::from_secs(5)) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SagaExecutionOutput { + pub new_saga_state: Value, + pub work_items: Vec, + pub schedules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SagaSchedule { + pub due_at_ms: u64, + pub payload: Value, +} + +fn to_runtime_value(value: Value) -> Result { + serde_json::from_value::(value).map_err(|e| RunnerError::DecodeError(e.to_string())) +} + +fn deterministic_context(now: DateTime, causation_id: String) -> Context { + Context::new(now, causation_id) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::TenantId; + use chrono::TimeZone; + use uuid::Uuid; + + fn test_program() -> Program { + let json = r#" + { + "specVersion": "1.1", + "id": "saga_test", + "name": "saga_test", + "inputs": [ + { "name": "saga_state", "type": "Any" }, + { "name": "event", "type": "Any" } + ], + "nodes": [ + { + "id": "const", + "type": "Const", + "data": { + "value": { + "new_saga_state": { "x": 1 }, + "work_items": [], + "schedules": [] + } + } + }, + { "id": "output", "type": "Output", "data": {} } + ], + "edges": [ + { "id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value" } + ], + "outputNodeId": "output" + } + "#; + serde_json::from_str(json).unwrap() + } + + fn test_event() -> AggregateEventEnvelope { + AggregateEventEnvelope { + tenant_id: TenantId::new("t1"), + event_id: Uuid::parse_str("00000000-0000-0000-0000-000000000001").unwrap(), + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: serde_json::json!({"a": 1}), + command_id: Uuid::parse_str("00000000-0000-0000-0000-000000000002").unwrap(), + timestamp: Utc.with_ymd_and_hms(2026, 2, 9, 12, 0, 0).unwrap(), + } + } + + #[test] + fn same_inputs_produce_same_outputs() { + let program = test_program(); + let runtime = SagaRuntime::default(); + let event = test_event(); + let saga_state = serde_json::json!({"balance": 10}); + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async { + let out1 = runtime + .execute_on_event(&program, &saga_state, &event) + .await + .unwrap(); + let out2 = runtime + .execute_on_event(&program, &saga_state, &event) + .await + .unwrap(); + + assert_eq!(out1.new_saga_state, out2.new_saga_state); + assert_eq!(out1.work_items.len(), out2.work_items.len()); + assert_eq!(out1.schedules.len(), out2.schedules.len()); + }); + } +} diff --git a/runner/src/saga/worker.rs b/runner/src/saga/worker.rs new file mode 100644 index 0000000..85c1ba3 --- /dev/null +++ b/runner/src/saga/worker.rs @@ -0,0 +1,838 @@ +use crate::config::Settings; +use crate::observability::Metrics; +use crate::saga::{SagaManifest, SagaRuntime}; +use crate::storage::KvClient; +use crate::stream::{ConsumerOptions, JetStreamClient}; +use crate::tenant_placement::TenantGate; +use crate::types::{ + AggregateEventEnvelope, CheckpointKey, CorrelationId, DedupeEventKey, EventId, RunnerError, + SagaName, SagaStateKey, ScheduleKey, TenantId, WorkId, WorkItem, +}; +use async_nats::jetstream::consumer::DeliverPolicy; +use async_nats::jetstream::AckKind; +use chrono::Utc; +use futures::StreamExt; +use runtime_function::Program; +use serde_json::Value; +use std::collections::HashMap; +use std::collections::HashSet; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::watch; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AckDecision { + Ack, + Term, + None, +} + +pub struct SagaPrograms { + manifest: SagaManifest, + programs: HashMap, +} + +impl std::fmt::Debug for SagaPrograms { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SagaPrograms").finish_non_exhaustive() + } +} + +impl SagaPrograms { + pub fn load(settings: &Settings) -> Result { + let manifest = SagaManifest::from_file(&settings.saga_manifest_path) + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + manifest + .validate() + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + + let mut programs = HashMap::new(); + for saga in &manifest.sagas { + let raw = std::fs::read_to_string(&saga.on_event) + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + let program: Program = + serde_json::from_str(&raw).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + program + .validate() + .map_err(|e| RunnerError::RuntimeError(e.to_string()))?; + programs.insert(saga.name.clone(), program); + } + + Ok(Self { manifest, programs }) + } + + pub fn manifest(&self) -> &SagaManifest { + &self.manifest + } + + pub fn program_for(&self, saga_name: &str) -> Option<&Program> { + self.programs.get(saga_name) + } +} + +#[allow(clippy::too_many_arguments)] +pub async fn run_saga_worker( + settings: Settings, + storage: KvClient, + programs: Arc, + runtime: SagaRuntime, + metrics: Arc, + tenant_gate: Arc, + tenant_filter: Option>>, + shutdown: Arc, + draining: Arc, +) -> Result<(), RunnerError> { + if tenant_filter.is_none() && settings.tenant_allowlist.is_empty() { + return run_saga_worker_single( + settings, + storage, + programs, + runtime, + metrics, + tenant_gate, + shutdown, + draining, + ) + .await; + } + + let settings = Arc::new(settings); + let jetstream = JetStreamClient::connect(&settings) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + let mut tenant_rx = match tenant_filter { + Some(rx) => rx, + None => { + let initial = settings + .tenant_allowlist + .iter() + .cloned() + .collect::>(); + let (_tx, rx) = watch::channel(initial); + rx + } + }; + + let mut tasks: HashMap> = HashMap::new(); + let mut stops: HashMap> = HashMap::new(); + + loop { + tokio::select! { + _ = shutdown.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(250)) => {}, + changed = tenant_rx.changed() => { + if changed.is_err() { + break; + } + } + } + + let tenants = tenant_rx.borrow().clone(); + + for tenant in tasks.keys().cloned().collect::>() { + if !tenants.contains(&tenant) { + if let Some(n) = stops.remove(&tenant) { + n.notify_waiters(); + } + } + } + + for tenant in tasks + .iter() + .filter_map(|(t, h)| { + if h.is_finished() { + Some(t.clone()) + } else { + None + } + }) + .collect::>() + { + if let Some(h) = tasks.remove(&tenant) { + let _ = h.await; + } + stops.remove(&tenant); + } + + for tenant in tenants { + if tasks.contains_key(&tenant) { + continue; + } + + let stop = Arc::new(tokio::sync::Notify::new()); + stops.insert(tenant.clone(), stop.clone()); + let tenant_key = tenant.clone(); + + let settings = settings.clone(); + let jetstream = jetstream.clone(); + let storage = storage.clone(); + let programs = programs.clone(); + let runtime = runtime.clone(); + let metrics = metrics.clone(); + let tenant_gate = tenant_gate.clone(); + let shutdown = shutdown.clone(); + let draining = draining.clone(); + + let handle = tokio::spawn(async move { + let _ = run_saga_worker_for_tenant( + settings, + jetstream, + storage, + programs, + runtime, + metrics, + tenant_gate, + tenant, + shutdown, + stop, + draining, + ) + .await; + }); + tasks.insert(tenant_key, handle); + } + } + + for (_, n) in stops { + n.notify_waiters(); + } + for (_, h) in tasks { + let _ = h.await; + } + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn run_saga_worker_single( + settings: Settings, + storage: KvClient, + programs: Arc, + runtime: SagaRuntime, + metrics: Arc, + tenant_gate: Arc, + shutdown: Arc, + draining: Arc, +) -> Result<(), RunnerError> { + let jetstream = JetStreamClient::connect(&settings) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + let durable_name = format!("{}_saga", settings.consumer_durable_prefix); + let filter_subject = settings + .saga_trigger_subject_filters + .first() + .cloned() + .unwrap_or_else(|| "tenant.*.aggregate.*.*".to_string()); + + let consumer = jetstream + .saga_trigger_consumer( + &settings, + ConsumerOptions { + durable_name, + filter_subject, + deliver_policy: DeliverPolicy::All, + }, + ) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + let mut messages = consumer + .messages() + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + loop { + if draining.load(Ordering::Relaxed) { + tokio::select! { + _ = shutdown.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => continue, + }; + } + + let next = tokio::select! { + _ = shutdown.notified() => break, + msg = messages.next() => msg, + }; + + let Some(msg) = next else { break }; + match msg { + Ok(m) => { + handle_saga_message( + &settings, + &storage, + &programs, + &runtime, + &metrics, + &tenant_gate, + draining.load(Ordering::Relaxed), + m, + ) + .await; + } + Err(e) => { + tracing::error!(error = %e, "JetStream message stream error"); + } + } + } + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn run_saga_worker_for_tenant( + settings: Arc, + jetstream: JetStreamClient, + storage: KvClient, + programs: Arc, + runtime: SagaRuntime, + metrics: Arc, + tenant_gate: Arc, + tenant: String, + shutdown: Arc, + stop: Arc, + draining: Arc, +) -> Result<(), RunnerError> { + let durable_name = format!("{}_saga_{}", settings.consumer_durable_prefix, tenant); + let filter_subject = format!("tenant.{}.aggregate.*.*", tenant); + + let consumer = jetstream + .saga_trigger_consumer( + &settings, + ConsumerOptions { + durable_name, + filter_subject, + deliver_policy: DeliverPolicy::All, + }, + ) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + let mut messages = consumer + .messages() + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + loop { + if !tenant_gate.should_acquire_processing_work(&tenant, draining.load(Ordering::Relaxed)) { + tokio::select! { + _ = shutdown.notified() => break, + _ = stop.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => continue, + }; + } + + if draining.load(Ordering::Relaxed) { + tokio::select! { + _ = shutdown.notified() => break, + _ = stop.notified() => break, + _ = tokio::time::sleep(Duration::from_millis(50)) => continue, + }; + } + + let next = tokio::select! { + _ = shutdown.notified() => break, + _ = stop.notified() => break, + msg = messages.next() => msg, + }; + + let Some(msg) = next else { break }; + match msg { + Ok(m) => { + handle_saga_message( + &settings, + &storage, + &programs, + &runtime, + &metrics, + &tenant_gate, + draining.load(Ordering::Relaxed), + m, + ) + .await; + } + Err(e) => { + tracing::error!(error = %e, "JetStream message stream error"); + } + } + } + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +async fn handle_saga_message( + settings: &Settings, + storage: &KvClient, + programs: &SagaPrograms, + runtime: &SagaRuntime, + metrics: &Metrics, + tenant_gate: &TenantGate, + global_draining: bool, + msg: async_nats::jetstream::Message, +) { + let info = match msg.info() { + Ok(i) => i, + Err(e) => { + tracing::error!(error = %e, "Failed to parse JetStream message info"); + let _ = msg.ack().await; + return; + } + }; + + let delivered = info.delivered.max(0) as u64; + let sequence = info.stream_sequence; + + let envelope: AggregateEventEnvelope = match serde_json::from_slice(&msg.payload) { + Ok(e) => e, + Err(e) => { + tracing::error!(error = %e, "Failed to decode aggregate event envelope"); + match handle_poison( + storage, + settings, + metrics, + delivered, + TenantId::default(), + "decode_error", + &Value::Null, + ) { + AckDecision::Term => { + let _ = msg.ack_with(AckKind::Term).await; + } + AckDecision::Ack => { + let _ = msg.ack().await; + } + AckDecision::None => {} + } + return; + } + }; + + if !tenant_gate.should_acquire_processing_work(envelope.tenant_id.as_str(), global_draining) { + let _ = msg + .ack_with(AckKind::Nak(Some(Duration::from_millis(250)))) + .await; + return; + } + let _work = tenant_gate.begin_work(envelope.tenant_id.as_str()); + + let subject = msg.subject.to_string(); + + let decision = process_aggregate_event( + settings, storage, programs, runtime, metrics, &envelope, &subject, sequence, delivered, + ) + .await; + + match decision { + Ok(AckDecision::Ack) => { + let _ = msg.ack().await; + } + Ok(AckDecision::Term) => { + let _ = msg.ack_with(AckKind::Term).await; + } + Ok(AckDecision::None) => {} + Err(RunnerError::StorageError(e)) => { + tracing::error!(error = %e, "Storage error while processing saga message"); + } + Err(e) => { + tracing::error!(error = %e, "Saga message processing failed"); + let decision = handle_poison( + storage, + settings, + metrics, + delivered, + envelope.tenant_id.clone(), + "runtime_error", + &serde_json::to_value(&envelope).unwrap_or(Value::Null), + ); + match decision { + AckDecision::Term => { + let _ = msg.ack_with(AckKind::Term).await; + } + AckDecision::Ack => { + let _ = msg.ack().await; + } + AckDecision::None => {} + } + } + } +} + +#[allow(clippy::too_many_arguments)] +async fn process_aggregate_event( + settings: &Settings, + storage: &KvClient, + programs: &SagaPrograms, + runtime: &SagaRuntime, + metrics: &Metrics, + envelope: &AggregateEventEnvelope, + subject: &str, + sequence: u64, + _delivered: u64, +) -> Result { + let mut any_processed = false; + + for saga in &programs.manifest().sagas { + if !saga + .trigger_subjects + .iter() + .any(|pat| subject_matches(pat, subject)) + { + continue; + } + + let tenant_id = envelope.tenant_id.clone(); + let saga_name = SagaName::new(saga.name.clone()); + let correlation_id = CorrelationId::new(extract_correlation_id(envelope)); + + let checkpoint_key = CheckpointKey::new(&tenant_id, &saga_name); + let checkpoint = storage.get_checkpoint(&checkpoint_key)?.unwrap_or(0); + if sequence <= checkpoint { + metrics.inc_saga_skipped_checkpoint(); + continue; + } + + let event_id = EventId::new(envelope.event_id.to_string()); + let dedupe_key = DedupeEventKey::new(&tenant_id, &saga_name, &event_id); + if storage.is_deduped_event(&dedupe_key)? { + metrics.inc_saga_skipped_dedupe(); + continue; + } + + let saga_key = SagaStateKey::new(&tenant_id, &saga_name, &correlation_id); + let current_state = storage + .get_saga_state(&saga_key)? + .unwrap_or_else(|| serde_json::json!({})); + + let program = programs + .program_for(&saga.name) + .ok_or_else(|| RunnerError::RuntimeError("Saga program missing".to_string()))?; + + let output = runtime + .execute_on_event(program, ¤t_state, envelope) + .await?; + + let outbox_items = output + .work_items + .into_iter() + .map(|item| { + let kind = match &item { + WorkItem::AggregateCommand(_) => "aggregate", + WorkItem::EffectCommand(_) => "effect", + }; + let work_id = WorkId::new_v7(); + let key = format!( + "outbox:{}:{}:{}", + tenant_id.as_str(), + kind, + work_id.as_uuid() + ); + (key, item) + }) + .collect::>(); + + let schedule_items = output + .schedules + .into_iter() + .map(|s| { + let key = ScheduleKey::new(&tenant_id, &saga_name, &correlation_id, s.due_at_ms); + (key, s.payload) + }) + .collect::>(); + + storage + .commit_saga_processing( + &saga_key, + &output.new_saga_state, + outbox_items, + schedule_items, + &checkpoint_key, + sequence, + Some(&dedupe_key), + ) + .inspect_err(|_| metrics.inc_saga_commit_failed())?; + + if settings.test_saga_crash_after_commit { + panic!("test_saga_crash_after_commit"); + } + + metrics.inc_saga_processed(); + any_processed = true; + } + + let _ = any_processed; + Ok(AckDecision::Ack) +} + +fn extract_correlation_id(envelope: &AggregateEventEnvelope) -> String { + envelope + .payload + .get("correlation_id") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| envelope.aggregate_id.clone()) +} + +fn handle_poison( + storage: &KvClient, + settings: &Settings, + metrics: &Metrics, + delivered: u64, + tenant_id: TenantId, + reason: &str, + payload: &Value, +) -> AckDecision { + if delivered < settings.max_deliver.max(1) as u64 { + return AckDecision::None; + } + + let key = format!( + "deadletter:{}:{}:{}", + tenant_id.as_str(), + reason, + Utc::now().timestamp_millis() + ); + let record = serde_json::json!({ + "tenant_id": tenant_id, + "reason": reason, + "delivered": delivered, + "payload": payload, + "timestamp": Utc::now(), + }); + let _ = storage.put_deadletter(&key, &record); + metrics.inc_deadletter_written(); + AckDecision::Term +} + +fn subject_matches(pattern: &str, subject: &str) -> bool { + let p = pattern.split('.').collect::>(); + let s = subject.split('.').collect::>(); + + let mut i = 0usize; + let mut j = 0usize; + while i < p.len() && j < s.len() { + match p[i] { + ">" => return true, + "*" => {} + other => { + if other != s[j] { + return false; + } + } + } + i += 1; + j += 1; + } + + if i == p.len() && j == s.len() { + return true; + } + if i + 1 == p.len() && p[i] == ">" { + return true; + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::saga::SagaDefinition; + use chrono::TimeZone; + use std::collections::HashMap; + use uuid::Uuid; + + fn test_program_with_one_outbox_item() -> Program { + let json = r#" + { + "specVersion": "1.1", + "id": "saga_test", + "name": "saga_test", + "inputs": [ + { "name": "saga_state", "type": "Any" }, + { "name": "event", "type": "Any" } + ], + "nodes": [ + { + "id": "const", + "type": "Const", + "data": { + "value": { + "new_saga_state": { "x": 1 }, + "work_items": [ + { + "kind": "effect_command", + "tenant_id": "t1", + "command_id": "c1", + "effect_name": "noop", + "payload": { "a": 1 }, + "metadata": {} + } + ], + "schedules": [] + } + } + }, + { "id": "output", "type": "Output", "data": {} } + ], + "edges": [ + { "id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value" } + ], + "outputNodeId": "output" + } + "#; + serde_json::from_str(json).unwrap() + } + + fn saga_programs() -> SagaPrograms { + let manifest = SagaManifest { + sagas: vec![SagaDefinition { + name: "billing".to_string(), + trigger_subjects: vec!["tenant.*.aggregate.*.*".to_string()], + on_event: "unused.json".to_string(), + compensation: None, + }], + }; + + let mut programs = HashMap::new(); + let program = test_program_with_one_outbox_item(); + program.validate().unwrap(); + programs.insert("billing".to_string(), program); + + SagaPrograms { manifest, programs } + } + + fn event_with_id(event_id: Uuid) -> AggregateEventEnvelope { + AggregateEventEnvelope { + tenant_id: TenantId::new("t1"), + event_id, + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: serde_json::json!({"correlation_id": "corr1"}), + command_id: Uuid::parse_str("00000000-0000-0000-0000-000000000002").unwrap(), + timestamp: Utc.with_ymd_and_hms(2026, 2, 9, 12, 0, 0).unwrap(), + } + } + + #[tokio::test] + async fn checkpoint_and_dedupe_gates_skip_already_processed_items() { + let storage = KvClient::in_memory(); + let programs = saga_programs(); + let runtime = SagaRuntime::default(); + let metrics = Metrics::default(); + let subject = "tenant.t1.aggregate.Account.a1"; + + let event1 = + event_with_id(Uuid::parse_str("00000000-0000-0000-0000-000000000010").unwrap()); + process_aggregate_event( + &Settings::default(), + &storage, + &programs, + &runtime, + &metrics, + &event1, + subject, + 10, + 1, + ) + .await + .unwrap(); + + assert_eq!(storage.list_outbox_all(100).unwrap().len(), 1); + + process_aggregate_event( + &Settings::default(), + &storage, + &programs, + &runtime, + &metrics, + &event1, + subject, + 11, + 1, + ) + .await + .unwrap(); + + assert_eq!(storage.list_outbox_all(100).unwrap().len(), 1); + + let event2 = + event_with_id(Uuid::parse_str("00000000-0000-0000-0000-000000000011").unwrap()); + process_aggregate_event( + &Settings::default(), + &storage, + &programs, + &runtime, + &metrics, + &event2, + subject, + 10, + 1, + ) + .await + .unwrap(); + + assert_eq!(storage.list_outbox_all(100).unwrap().len(), 1); + } + + #[tokio::test] + async fn pipeline_does_not_advance_checkpoint_if_commit_fails() { + let storage = KvClient::in_memory(); + let programs = saga_programs(); + let runtime = SagaRuntime::default(); + let metrics = Metrics::default(); + let subject = "tenant.t1.aggregate.Account.a1"; + + storage.fail_next_txn(); + + let event1 = + event_with_id(Uuid::parse_str("00000000-0000-0000-0000-000000000010").unwrap()); + let res = process_aggregate_event( + &Settings::default(), + &storage, + &programs, + &runtime, + &metrics, + &event1, + subject, + 10, + 1, + ) + .await; + assert!(res.is_err()); + + let saga_name = SagaName::new("billing"); + let checkpoint_key = CheckpointKey::new(&TenantId::new("t1"), &saga_name); + assert!(storage.get_checkpoint(&checkpoint_key).unwrap().is_none()); + assert_eq!(storage.list_outbox_all(100).unwrap().len(), 0); + } + + #[test] + fn quarantine_record_is_written_on_poison_handling_path() { + let storage = KvClient::in_memory(); + let settings = Settings { + max_deliver: 1, + ..Default::default() + }; + let metrics = Metrics::default(); + + let decision = handle_poison( + &storage, + &settings, + &metrics, + settings.max_deliver as u64, + TenantId::new("t1"), + "poison", + &serde_json::json!({"x": 1}), + ); + assert_eq!(decision, AckDecision::Term); + + let deadletters = storage.list_deadletters(10).unwrap(); + assert_eq!(deadletters.len(), 1); + assert_eq!(deadletters[0].1["reason"], "poison"); + assert_eq!(deadletters[0].1["tenant_id"], "t1"); + } +} diff --git a/runner/src/schedule/mod.rs b/runner/src/schedule/mod.rs new file mode 100644 index 0000000..2a15249 --- /dev/null +++ b/runner/src/schedule/mod.rs @@ -0,0 +1,3 @@ +mod scheduler; + +pub use scheduler::Scheduler; diff --git a/runner/src/schedule/scheduler.rs b/runner/src/schedule/scheduler.rs new file mode 100644 index 0000000..b84a2b7 --- /dev/null +++ b/runner/src/schedule/scheduler.rs @@ -0,0 +1,349 @@ +use crate::config::Settings; +use crate::observability::Metrics; +use crate::saga::{SagaPrograms, SagaRuntime}; +use crate::storage::KvClient; +use crate::tenant_placement::TenantGate; +use crate::types::{ + CorrelationId, RunnerError, SagaName, SagaStateKey, ScheduleKey, TenantId, WorkId, WorkItem, +}; +use chrono::{TimeZone, Utc}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use std::time::Duration; + +#[derive(Clone)] +pub struct Scheduler; + +impl std::fmt::Debug for Scheduler { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Scheduler").finish_non_exhaustive() + } +} + +impl Scheduler { + pub fn new() -> Self { + Self + } + + #[allow(clippy::too_many_arguments)] + pub async fn run( + &self, + settings: Settings, + storage: KvClient, + programs: Arc, + runtime: SagaRuntime, + metrics: Arc, + tenant_gate: Arc, + shutdown: Arc, + draining: Arc, + ) -> Result<(), RunnerError> { + let mut tick = tokio::time::interval(Duration::from_millis( + settings.schedule_scan_interval_ms.max(1), + )); + tick.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + + loop { + tokio::select! { + _ = shutdown.notified() => break, + _ = tick.tick() => {} + } + + if draining.load(Ordering::Relaxed) { + continue; + } + + let now_ms = Utc::now().timestamp_millis().max(0) as u64; + let due = storage.scan_due_schedule_items_all(now_ms, settings.schedule_batch_size)?; + + for (key, payload) in due { + let (tenant_id, saga_name, correlation_id, due_at_ms) = parse_schedule_key(&key) + .ok_or_else(|| RunnerError::DecodeError("Invalid schedule key".to_string()))?; + + if !tenant_gate.should_acquire_processing_work( + tenant_id.as_str(), + draining.load(Ordering::Relaxed), + ) { + continue; + } + let _work = tenant_gate.begin_work(tenant_id.as_str()); + + let saga_def = programs + .manifest() + .sagas + .iter() + .find(|s| s.name == saga_name.as_str()) + .ok_or_else(|| RunnerError::RuntimeError("Unknown saga".to_string()))?; + + let program = programs + .program_for(&saga_def.name) + .ok_or_else(|| RunnerError::RuntimeError("Saga program missing".to_string()))?; + + let saga_key = SagaStateKey::new(&tenant_id, &saga_name, &correlation_id); + let current_state = storage + .get_saga_state(&saga_key)? + .unwrap_or_else(|| serde_json::json!({})); + + let event = serde_json::json!({ + "type": "schedule", + "due_at_ms": due_at_ms, + "payload": payload + }); + + let ctx = runtime_function::Context::new( + Utc.timestamp_millis_opt(due_at_ms as i64) + .single() + .unwrap_or_else(|| Utc.timestamp_millis_opt(0).single().unwrap()), + format!( + "schedule:{}:{}:{}:{}", + tenant_id.as_str(), + saga_name.as_str(), + correlation_id.as_str(), + due_at_ms + ), + ) + .with_tenant_id(tenant_id.as_str()) + .with_correlation_id(correlation_id.as_str()); + + let output = runtime + .execute(program, ¤t_state, &event, ctx) + .await?; + + let outbox_items = output + .work_items + .into_iter() + .map(|item| { + let kind = match &item { + WorkItem::AggregateCommand(_) => "aggregate", + WorkItem::EffectCommand(_) => "effect", + }; + let work_id = WorkId::new_v7(); + let key = format!( + "outbox:{}:{}:{}", + tenant_id.as_str(), + kind, + work_id.as_uuid() + ); + (key, item) + }) + .collect::>(); + + let new_schedule_items = output + .schedules + .into_iter() + .map(|s| { + let key = + ScheduleKey::new(&tenant_id, &saga_name, &correlation_id, s.due_at_ms); + (key, s.payload) + }) + .collect::>(); + + let schedule_key = + ScheduleKey::new(&tenant_id, &saga_name, &correlation_id, due_at_ms); + + storage + .commit_schedule_processing( + &saga_key, + &output.new_saga_state, + outbox_items, + &schedule_key, + new_schedule_items, + ) + .inspect_err(|_| metrics.inc_schedule_failed())?; + metrics.inc_schedule_processed(); + } + } + + Ok(()) + } +} + +impl Default for Scheduler { + fn default() -> Self { + Self::new() + } +} + +fn parse_schedule_key(key: &str) -> Option<(TenantId, SagaName, CorrelationId, u64)> { + let mut it = key.split(':'); + if it.next()? != "schedule" { + return None; + } + let tenant = TenantId::new(it.next()?.to_string()); + let saga = SagaName::new(it.next()?.to_string()); + let corr = CorrelationId::new(it.next()?.to_string()); + let due = it.next()?.parse::().ok()?; + Some((tenant, saga, corr, due)) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn program_json_with_one_outbox_item() -> &'static str { + r#" + { + "specVersion": "1.1", + "id": "schedule_test", + "name": "schedule_test", + "inputs": [ + { "name": "saga_state", "type": "Any" }, + { "name": "event", "type": "Any" } + ], + "nodes": [ + { + "id": "const", + "type": "Const", + "data": { + "value": { + "new_saga_state": { "x": 1 }, + "work_items": [ + { + "kind": "effect_command", + "tenant_id": "t1", + "command_id": "c1", + "effect_name": "noop", + "payload": { "a": 1 }, + "metadata": {} + } + ], + "schedules": [] + } + } + }, + { "id": "output", "type": "Output", "data": {} } + ], + "edges": [ + { "id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value" } + ], + "outputNodeId": "output" + } + "# + } + + #[tokio::test] + async fn restart_rescans_and_delivers_due_schedule_exactly_once() { + let storage = KvClient::in_memory(); + let runtime = SagaRuntime::default(); + let metrics = Arc::new(Metrics::default()); + + let tmp = tempfile::tempdir().unwrap(); + let program_path = tmp.path().join("saga_on_event.json"); + let manifest_path = tmp.path().join("sagas.yaml"); + std::fs::write(&program_path, program_json_with_one_outbox_item()).unwrap(); + std::fs::write( + &manifest_path, + format!( + "sagas:\n - name: noop\n trigger_subjects: [\"tenant.*.aggregate.*.*\"]\n on_event: \"{}\"\n", + program_path.to_string_lossy() + ), + ) + .unwrap(); + + let settings = Settings { + saga_manifest_path: manifest_path.to_string_lossy().to_string(), + schedule_scan_interval_ms: 1, + schedule_batch_size: 10, + ..Default::default() + }; + let programs = Arc::new(SagaPrograms::load(&settings).unwrap()); + + let tenant = TenantId::new("t1"); + let saga = SagaName::new("noop"); + let corr = CorrelationId::new("c1"); + let due_at_ms = Utc::now().timestamp_millis().max(0) as u64; + let schedule_key = ScheduleKey::new(&tenant, &saga, &corr, due_at_ms); + storage + .put_schedule_item(&schedule_key, &json!({"x": 1})) + .unwrap(); + + storage.fail_next_txn(); + + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + let task = tokio::spawn({ + let settings = settings.clone(); + let storage = storage.clone(); + let programs = programs.clone(); + let runtime = runtime.clone(); + let metrics = metrics.clone(); + let tenant_gate = Arc::new(TenantGate::new(None)); + let shutdown = shutdown.clone(); + async move { + let scheduler = Scheduler; + let _ = scheduler + .run( + settings, + storage, + programs, + runtime, + metrics, + tenant_gate, + shutdown, + draining, + ) + .await; + } + }); + + tokio::time::sleep(Duration::from_millis(50)).await; + shutdown.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(1), task).await; + + assert_eq!(storage.list_outbox_all(100).unwrap().len(), 0); + assert_eq!( + storage + .scan_due_schedule_items_all(due_at_ms + 1, 10) + .unwrap() + .len(), + 1 + ); + + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + let task = tokio::spawn({ + let storage = storage.clone(); + let shutdown = shutdown.clone(); + async move { + let scheduler = Scheduler; + let tenant_gate = Arc::new(TenantGate::new(None)); + let _ = scheduler + .run( + settings, + storage, + programs, + runtime, + metrics, + tenant_gate, + shutdown, + draining, + ) + .await; + } + }); + + let start = tokio::time::Instant::now(); + loop { + let outbox = storage.list_outbox_all(100).unwrap().len(); + if outbox == 1 { + break; + } + if start.elapsed() > Duration::from_secs(2) { + panic!("timed out waiting for outbox"); + } + tokio::time::sleep(Duration::from_millis(10)).await; + } + + shutdown.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(1), task).await; + + assert_eq!(storage.list_outbox_all(100).unwrap().len(), 1); + assert_eq!( + storage + .scan_due_schedule_items_all(due_at_ms + 1, 10) + .unwrap() + .len(), + 0 + ); + } +} diff --git a/runner/src/storage/kv.rs b/runner/src/storage/kv.rs new file mode 100644 index 0000000..56db243 --- /dev/null +++ b/runner/src/storage/kv.rs @@ -0,0 +1,793 @@ +use crate::types::{ + CheckpointKey, DedupeEffectKey, DedupeEventKey, RunnerError, SagaStateKey, ScheduleKey, + TenantId, WorkId, WorkItem, +}; +use edge_storage::{Config as EdgeConfig, EdgeStorage, KvStore, TableNames, Writer}; +use libmdbx::{NoWriteMap, WriteFlags, RW}; +use serde_json::Value; +use std::sync::Arc; + +#[derive(Clone)] +pub struct KvClient { + storage: Arc, + kv: KvStore, + #[cfg(test)] + fail_next_txn: Arc, +} + +impl std::fmt::Debug for KvClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KvClient").finish_non_exhaustive() + } +} + +impl KvClient { + pub fn open(storage_path: impl Into) -> Result { + let config = EdgeConfig::new(storage_path.into()); + let storage = EdgeStorage::open(config.clone())?; + let writer = Arc::new(Writer::new(storage.db().clone(), &config)); + let kv = KvStore::new(storage.db().clone(), writer); + + Ok(Self { + storage: Arc::new(storage), + kv, + #[cfg(test)] + fail_next_txn: Arc::new(std::sync::atomic::AtomicBool::new(false)), + }) + } + + #[cfg(test)] + pub fn in_memory() -> Self { + use tempfile::tempdir; + let dir = tempdir().expect("failed to create temp dir"); + let path = dir.path().join("test.mdbx"); + std::mem::forget(dir); + Self::open(path.to_string_lossy().to_string()).expect("failed to open storage") + } + + #[cfg(test)] + pub fn fail_next_txn(&self) { + self.fail_next_txn + .store(true, std::sync::atomic::Ordering::SeqCst); + } + + pub fn get_saga_state(&self, key: &SagaStateKey) -> Result, RunnerError> { + self.get_json(key.as_str().as_bytes()) + } + + pub fn put_saga_state(&self, key: &SagaStateKey, value: &Value) -> Result<(), RunnerError> { + self.put_json(key.as_str().as_bytes(), value) + } + + pub fn put_config_value(&self, key: &str, value: &Value) -> Result<(), RunnerError> { + let storage_key = format!("config:{}", key); + self.put_json(storage_key.as_bytes(), value) + } + + pub fn get_config_value(&self, key: &str) -> Result, RunnerError> { + let storage_key = format!("config:{}", key); + self.get_json(storage_key.as_bytes()) + } + + pub fn delete_config_value(&self, key: &str) -> Result<(), RunnerError> { + let storage_key = format!("config:{}", key); + self.delete_key(storage_key.as_bytes()) + } + + pub fn put_effects_manifest_override( + &self, + manifest: &crate::effects::EffectsManifest, + ) -> Result<(), RunnerError> { + let value = + serde_json::to_value(manifest).map_err(|e| RunnerError::StorageError(e.to_string()))?; + self.put_config_value("effects_manifest", &value) + } + + pub fn get_effects_manifest_override( + &self, + ) -> Result, RunnerError> { + let value = self.get_config_value("effects_manifest")?; + match value { + Some(v) => serde_json::from_value(v) + .map(Some) + .map_err(|e| RunnerError::DecodeError(e.to_string())), + None => Ok(None), + } + } + + pub fn clear_effects_manifest_override(&self) -> Result<(), RunnerError> { + self.delete_config_value("effects_manifest") + } + + pub fn get_checkpoint(&self, key: &CheckpointKey) -> Result, RunnerError> { + let bytes = self + .kv + .get(key.as_str().as_bytes()) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + let Some(bytes) = bytes else { + return Ok(None); + }; + serde_json::from_slice::(&bytes) + .map(Some) + .map_err(|e| RunnerError::DecodeError(format!("Failed to decode checkpoint: {}", e))) + } + + pub fn put_checkpoint(&self, key: &CheckpointKey, value: u64) -> Result<(), RunnerError> { + let bytes = + serde_json::to_vec(&value).map_err(|e| RunnerError::StorageError(e.to_string()))?; + self.commit_kv_txn(|txn, table| { + txn.put( + table, + key.as_str().as_bytes(), + bytes.as_slice(), + WriteFlags::empty(), + )?; + Ok(()) + }) + } + + pub fn put_outbox_item( + &self, + tenant_id: &TenantId, + work_kind: &str, + work_id: &WorkId, + item: &WorkItem, + ) -> Result { + let key = format!("outbox:{}:{}:{}", tenant_id.as_str(), work_kind, work_id); + let bytes = + serde_json::to_vec(item).map_err(|e| RunnerError::StorageError(e.to_string()))?; + self.commit_kv_txn(|txn, table| { + txn.put(table, key.as_bytes(), bytes.as_slice(), WriteFlags::empty())?; + Ok(()) + })?; + Ok(key) + } + + pub fn list_outbox_prefix( + &self, + tenant_id: &TenantId, + max_items: usize, + ) -> Result, RunnerError> { + let prefix = format!("outbox:{}:", tenant_id.as_str()); + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + let mut items = Vec::new(); + for res in self + .kv + .prefix_scan(&txn, prefix.as_bytes()) + .map_err(|e| RunnerError::StorageError(e.to_string()))? + { + let (k, v) = res.map_err(|e| RunnerError::StorageError(e.to_string()))?; + let key = String::from_utf8_lossy(&k).to_string(); + let item: WorkItem = + serde_json::from_slice(&v).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + items.push((key, item)); + if items.len() >= max_items { + break; + } + } + + Ok(items) + } + + pub fn list_outbox_all( + &self, + max_items: usize, + ) -> Result, RunnerError> { + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + let mut items = Vec::new(); + for res in self + .kv + .prefix_scan(&txn, b"outbox:") + .map_err(|e| RunnerError::StorageError(e.to_string()))? + { + let (k, v) = res.map_err(|e| RunnerError::StorageError(e.to_string()))?; + let key = String::from_utf8_lossy(&k).to_string(); + let item: WorkItem = + serde_json::from_slice(&v).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + items.push((key, item)); + if items.len() >= max_items { + break; + } + } + + Ok(items) + } + + pub fn delete_outbox_item(&self, key: &str) -> Result<(), RunnerError> { + self.delete_key(key.as_bytes()) + } + + pub fn put_schedule_item(&self, key: &ScheduleKey, payload: &Value) -> Result<(), RunnerError> { + self.put_json(key.as_str().as_bytes(), payload) + } + + pub fn scan_due_schedule_items( + &self, + tenant_id: &TenantId, + now_ms: u64, + max_items: usize, + ) -> Result, RunnerError> { + let prefix = ScheduleKey::prefix_for_tenant(tenant_id); + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + let mut due = Vec::new(); + for res in self + .kv + .prefix_scan(&txn, prefix.as_bytes()) + .map_err(|e| RunnerError::StorageError(e.to_string()))? + { + let (k, v) = res.map_err(|e| RunnerError::StorageError(e.to_string()))?; + let key = String::from_utf8_lossy(&k).to_string(); + let due_at = key + .rsplit(':') + .next() + .and_then(|s| s.parse::().ok()) + .unwrap_or(u64::MAX); + if due_at > now_ms { + continue; + } + let payload: Value = + serde_json::from_slice(&v).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + due.push((key, payload)); + if due.len() >= max_items { + break; + } + } + + Ok(due) + } + + pub fn scan_due_schedule_items_all( + &self, + now_ms: u64, + max_items: usize, + ) -> Result, RunnerError> { + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + let mut due = Vec::new(); + for res in self + .kv + .prefix_scan(&txn, b"schedule:") + .map_err(|e| RunnerError::StorageError(e.to_string()))? + { + let (k, v) = res.map_err(|e| RunnerError::StorageError(e.to_string()))?; + let key = String::from_utf8_lossy(&k).to_string(); + let due_at = key + .rsplit(':') + .next() + .and_then(|s| s.parse::().ok()) + .unwrap_or(u64::MAX); + if due_at > now_ms { + continue; + } + let payload: Value = + serde_json::from_slice(&v).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + due.push((key, payload)); + if due.len() >= max_items { + break; + } + } + + Ok(due) + } + + pub fn delete_schedule_item(&self, key: &ScheduleKey) -> Result<(), RunnerError> { + self.delete_key(key.as_str().as_bytes()) + } + + pub fn put_deadletter(&self, key: &str, record: &Value) -> Result<(), RunnerError> { + self.put_json(key.as_bytes(), record) + } + + pub fn list_deadletters(&self, max_items: usize) -> Result, RunnerError> { + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + let mut items = Vec::new(); + for res in self + .kv + .prefix_scan(&txn, b"deadletter:") + .map_err(|e| RunnerError::StorageError(e.to_string()))? + { + let (k, v) = res.map_err(|e| RunnerError::StorageError(e.to_string()))?; + let key = String::from_utf8_lossy(&k).to_string(); + let record: Value = + serde_json::from_slice(&v).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + items.push((key, record)); + if items.len() >= max_items { + break; + } + } + + Ok(items) + } + + pub fn delete_prefix(&self, prefix: &str, max_items: usize) -> Result { + let keys = self.list_keys_with_prefix(prefix.as_bytes(), max_items)?; + if keys.is_empty() { + return Ok(0); + } + + self.commit_kv_txn(|txn, table| { + for key in &keys { + let _ = txn.del(table, key.as_bytes(), None)?; + } + Ok(()) + })?; + + Ok(keys.len()) + } + + fn list_keys_with_prefix( + &self, + prefix: &[u8], + max_items: usize, + ) -> Result, RunnerError> { + let txn = self + .storage + .db() + .begin_ro_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + let mut keys = Vec::new(); + for res in self + .kv + .prefix_scan(&txn, prefix) + .map_err(|e| RunnerError::StorageError(e.to_string()))? + { + let (k, _) = res.map_err(|e| RunnerError::StorageError(e.to_string()))?; + keys.push(String::from_utf8_lossy(&k).to_string()); + if keys.len() >= max_items { + break; + } + } + Ok(keys) + } + + pub fn writable_probe(&self) -> Result<(), RunnerError> { + let key = b"__runner_health_probe"; + self.commit_kv_txn(|txn, table| { + txn.put(table, key, b"1", WriteFlags::empty())?; + let _ = txn.del(table, key, None)?; + Ok(()) + }) + } + + pub fn is_deduped_event(&self, key: &DedupeEventKey) -> Result { + let bytes = self + .kv + .get(key.as_str().as_bytes()) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + Ok(bytes.is_some()) + } + + pub fn mark_deduped_event(&self, key: &DedupeEventKey) -> Result<(), RunnerError> { + self.commit_kv_txn(|txn, table| { + txn.put(table, key.as_str().as_bytes(), b"1", WriteFlags::empty())?; + Ok(()) + }) + } + + pub fn is_deduped_effect(&self, key: &DedupeEffectKey) -> Result { + let bytes = self + .kv + .get(key.as_str().as_bytes()) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + Ok(bytes.is_some()) + } + + pub fn mark_deduped_effect(&self, key: &DedupeEffectKey) -> Result<(), RunnerError> { + self.commit_kv_txn(|txn, table| { + txn.put(table, key.as_str().as_bytes(), b"1", WriteFlags::empty())?; + Ok(()) + }) + } + + #[allow(clippy::too_many_arguments)] + pub fn commit_saga_processing( + &self, + saga_key: &SagaStateKey, + new_saga_state: &Value, + outbox_items: Vec<(String, WorkItem)>, + schedule_items: Vec<(ScheduleKey, Value)>, + checkpoint_key: &CheckpointKey, + checkpoint_sequence: u64, + dedupe_event: Option<&DedupeEventKey>, + ) -> Result<(), RunnerError> { + let saga_bytes = serde_json::to_vec(new_saga_state) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + let checkpoint_bytes = serde_json::to_vec(&checkpoint_sequence) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + let outbox_bytes = outbox_items + .into_iter() + .map(|(k, item)| { + serde_json::to_vec(&item) + .map(|bytes| (k, bytes)) + .map_err(|e| RunnerError::StorageError(e.to_string())) + }) + .collect::, _>>()?; + + let schedule_bytes = schedule_items + .into_iter() + .map(|(k, payload)| { + serde_json::to_vec(&payload) + .map(|bytes| (k, bytes)) + .map_err(|e| RunnerError::StorageError(e.to_string())) + }) + .collect::, _>>()?; + + self.commit_kv_txn(|txn, table| { + txn.put( + table, + saga_key.as_str().as_bytes(), + saga_bytes.as_slice(), + WriteFlags::empty(), + )?; + + for (key, bytes) in outbox_bytes { + txn.put(table, key.as_bytes(), bytes.as_slice(), WriteFlags::empty())?; + } + + for (key, bytes) in schedule_bytes { + txn.put( + table, + key.as_str().as_bytes(), + bytes.as_slice(), + WriteFlags::empty(), + )?; + } + + txn.put( + table, + checkpoint_key.as_str().as_bytes(), + checkpoint_bytes.as_slice(), + WriteFlags::empty(), + )?; + + if let Some(key) = dedupe_event { + txn.put(table, key.as_str().as_bytes(), b"1", WriteFlags::empty())?; + } + + Ok(()) + }) + } + + pub fn commit_schedule_processing( + &self, + saga_key: &SagaStateKey, + new_saga_state: &Value, + outbox_items: Vec<(String, WorkItem)>, + schedule_key: &ScheduleKey, + new_schedule_items: Vec<(ScheduleKey, Value)>, + ) -> Result<(), RunnerError> { + let saga_bytes = serde_json::to_vec(new_saga_state) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + let outbox_bytes = outbox_items + .into_iter() + .map(|(k, item)| { + serde_json::to_vec(&item) + .map(|bytes| (k, bytes)) + .map_err(|e| RunnerError::StorageError(e.to_string())) + }) + .collect::, _>>()?; + + let schedule_bytes = new_schedule_items + .into_iter() + .map(|(k, payload)| { + serde_json::to_vec(&payload) + .map(|bytes| (k, bytes)) + .map_err(|e| RunnerError::StorageError(e.to_string())) + }) + .collect::, _>>()?; + + self.commit_kv_txn(|txn, table| { + txn.put( + table, + saga_key.as_str().as_bytes(), + saga_bytes.as_slice(), + WriteFlags::empty(), + )?; + + for (key, bytes) in outbox_bytes { + txn.put(table, key.as_bytes(), bytes.as_slice(), WriteFlags::empty())?; + } + + for (key, bytes) in schedule_bytes { + txn.put( + table, + key.as_str().as_bytes(), + bytes.as_slice(), + WriteFlags::empty(), + )?; + } + + let _ = txn.del(table, schedule_key.as_str().as_bytes(), None)?; + Ok(()) + }) + } + + fn get_json(&self, key: &[u8]) -> Result, RunnerError> { + let bytes = self + .kv + .get(key) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + + match bytes { + Some(bytes) => serde_json::from_slice(&bytes) + .map(Some) + .map_err(|e| RunnerError::DecodeError(e.to_string())), + None => Ok(None), + } + } + + fn put_json(&self, key: &[u8], value: &Value) -> Result<(), RunnerError> { + let bytes = + serde_json::to_vec(value).map_err(|e| RunnerError::StorageError(e.to_string()))?; + self.commit_kv_txn(|txn, table| { + txn.put(table, key, bytes.as_slice(), WriteFlags::empty())?; + Ok(()) + }) + } + + fn delete_key(&self, key: &[u8]) -> Result<(), RunnerError> { + self.commit_kv_txn(|txn, table| { + let _ = txn.del(table, key, None)?; + Ok(()) + }) + } + + fn commit_kv_txn(&self, f: F) -> Result<(), RunnerError> + where + F: FnOnce( + &libmdbx::Transaction<'_, RW, NoWriteMap>, + &libmdbx::Table<'_>, + ) -> Result<(), libmdbx::Error>, + { + #[cfg(test)] + { + if self + .fail_next_txn + .swap(false, std::sync::atomic::Ordering::SeqCst) + { + return Err(RunnerError::StorageError("failpoint".to_string())); + } + } + + let txn = self + .storage + .db() + .begin_rw_txn() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + let table = txn + .open_table(TableNames::KV_STORE) + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + if let Err(e) = f(&txn, &table) { + return Err(RunnerError::StorageError(e.to_string())); + } + txn.commit() + .map_err(|e| RunnerError::StorageError(e.to_string()))?; + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum StorageInitError { + #[error("Failed to open storage: {0}")] + OpenError(#[from] edge_storage::Error), +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{CorrelationId, EventId, SagaName}; + use serde_json::json; + + #[test] + fn saga_state_roundtrip_put_get() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let corr = CorrelationId::new("c1"); + let key = SagaStateKey::new(&tenant, &saga, &corr); + + client.put_saga_state(&key, &json!({"a": 1})).unwrap(); + let loaded = client.get_saga_state(&key).unwrap().unwrap(); + assert_eq!(loaded["a"], 1); + } + + #[test] + fn checkpoint_roundtrip_put_get() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let key = CheckpointKey::new(&tenant, &saga); + + client.put_checkpoint(&key, 42).unwrap(); + let loaded = client.get_checkpoint(&key).unwrap().unwrap(); + assert_eq!(loaded, 42); + } + + #[test] + fn atomicity_no_partial_commit_on_failure() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let corr = CorrelationId::new("c1"); + let saga_key = SagaStateKey::new(&tenant, &saga, &corr); + let checkpoint_key = CheckpointKey::new(&tenant, &saga); + + let preexisting = "preexisting".as_bytes().to_vec(); + client + .commit_kv_txn(|txn, table| { + txn.put(table, preexisting.as_slice(), b"1", WriteFlags::empty())?; + Ok(()) + }) + .unwrap(); + + let txn_result = client.commit_kv_txn(|txn, table| { + let saga_bytes = serde_json::to_vec(&json!({"x": 1})).unwrap(); + txn.put( + table, + saga_key.as_str().as_bytes(), + saga_bytes.as_slice(), + WriteFlags::empty(), + )?; + txn.put( + table, + preexisting.as_slice(), + b"2", + WriteFlags::NO_OVERWRITE, + )?; + let checkpoint_bytes = serde_json::to_vec(&123u64).unwrap(); + txn.put( + table, + checkpoint_key.as_str().as_bytes(), + checkpoint_bytes.as_slice(), + WriteFlags::empty(), + )?; + Ok(()) + }); + + assert!(txn_result.is_err()); + assert!(client.get_saga_state(&saga_key).unwrap().is_none()); + assert!(client.get_checkpoint(&checkpoint_key).unwrap().is_none()); + } + + #[test] + fn outbox_delete_removes_key() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let work_id = WorkId::new_v7(); + let key = client + .put_outbox_item( + &tenant, + "effect", + &work_id, + &WorkItem::EffectCommand(crate::types::EffectCommandEnvelope { + tenant_id: tenant.clone(), + command_id: crate::types::CommandId::new("c1"), + effect_name: crate::types::EffectName::new("noop"), + payload: json!({"a": 1}), + metadata: crate::types::MessageMetadata::default(), + }), + ) + .unwrap(); + + assert_eq!(client.list_outbox_prefix(&tenant, 10).unwrap().len(), 1); + client.delete_outbox_item(&key).unwrap(); + assert_eq!(client.list_outbox_prefix(&tenant, 10).unwrap().len(), 0); + } + + #[test] + fn schedule_scan_only_returns_due_items() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let corr = CorrelationId::new("c1"); + + let due_key = ScheduleKey::new(&tenant, &saga, &corr, 100); + let future_key = ScheduleKey::new(&tenant, &saga, &corr, 200); + client + .put_schedule_item(&due_key, &json!({"x": 1})) + .unwrap(); + client + .put_schedule_item(&future_key, &json!({"x": 2})) + .unwrap(); + + let due = client.scan_due_schedule_items(&tenant, 150, 10).unwrap(); + assert_eq!(due.len(), 1); + assert_eq!(due[0].1["x"], 1); + } + + #[test] + fn scheduler_is_tenant_scoped() { + let client = KvClient::in_memory(); + let tenant_a = TenantId::new("t1"); + let tenant_b = TenantId::new("t2"); + let saga = SagaName::new("billing"); + let corr = CorrelationId::new("c1"); + + let key_a = ScheduleKey::new(&tenant_a, &saga, &corr, 100); + let key_b = ScheduleKey::new(&tenant_b, &saga, &corr, 100); + client + .put_schedule_item(&key_a, &json!({"x": "a"})) + .unwrap(); + client + .put_schedule_item(&key_b, &json!({"x": "b"})) + .unwrap(); + + let due_a = client.scan_due_schedule_items(&tenant_a, 200, 10).unwrap(); + assert_eq!(due_a.len(), 1); + assert_eq!(due_a[0].1["x"], "a"); + } + + #[test] + fn due_schedule_item_can_be_committed_and_deleted() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let corr = CorrelationId::new("c1"); + + let schedule_key = ScheduleKey::new(&tenant, &saga, &corr, 100); + client + .put_schedule_item(&schedule_key, &json!({"x": 1})) + .unwrap(); + + let saga_key = SagaStateKey::new(&tenant, &saga, &corr); + let outbox_key = format!("outbox:{}:effect:{}", tenant.as_str(), WorkId::new_v7()); + client + .commit_schedule_processing( + &saga_key, + &json!({"state": 1}), + vec![( + outbox_key, + WorkItem::EffectCommand(crate::types::EffectCommandEnvelope { + tenant_id: tenant.clone(), + command_id: crate::types::CommandId::new("c1"), + effect_name: crate::types::EffectName::new("noop"), + payload: json!({"a": 1}), + metadata: crate::types::MessageMetadata::default(), + }), + )], + &schedule_key, + Vec::new(), + ) + .unwrap(); + + let due = client.scan_due_schedule_items(&tenant, 200, 10).unwrap(); + assert!(due.is_empty()); + } + + #[test] + fn dedupe_marker_roundtrip() { + let client = KvClient::in_memory(); + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let event_id = EventId::new("e1"); + let key = DedupeEventKey::new(&tenant, &saga, &event_id); + assert!(!client.is_deduped_event(&key).unwrap()); + client.mark_deduped_event(&key).unwrap(); + assert!(client.is_deduped_event(&key).unwrap()); + } +} diff --git a/runner/src/storage/mod.rs b/runner/src/storage/mod.rs new file mode 100644 index 0000000..3cc5a94 --- /dev/null +++ b/runner/src/storage/mod.rs @@ -0,0 +1,3 @@ +mod kv; + +pub use kv::{KvClient, StorageInitError}; diff --git a/runner/src/stream/jetstream.rs b/runner/src/stream/jetstream.rs new file mode 100644 index 0000000..a956312 --- /dev/null +++ b/runner/src/stream/jetstream.rs @@ -0,0 +1,289 @@ +use crate::config::Settings; +use crate::types::{EffectCommandEnvelope, EffectResultEnvelope, RunnerError}; +use async_nats::jetstream::{ + self, + consumer::pull::Config as PullConfig, + consumer::{AckPolicy, DeliverPolicy, ReplayPolicy}, + stream::Config as StreamConfig, +}; + +#[derive(Debug, Clone)] +pub struct JetStreamClient { + jetstream: jetstream::Context, + aggregate_events_stream: jetstream::stream::Stream, + workflow_commands_stream: jetstream::stream::Stream, + #[allow(dead_code)] + workflow_events_stream: jetstream::stream::Stream, +} + +#[derive(Debug, Clone)] +pub struct ConsumerOptions { + pub durable_name: String, + pub filter_subject: String, + pub deliver_policy: DeliverPolicy, +} + +impl JetStreamClient { + pub async fn connect(settings: &Settings) -> Result { + let client = async_nats::connect(&settings.nats_url) + .await + .map_err(|e| StreamInitError::Nats(e.to_string()))?; + + let jetstream = jetstream::new(client); + + let aggregate_events_subjects = if settings.saga_trigger_subject_filters.is_empty() { + vec!["tenant.*.aggregate.*.*".to_string()] + } else { + settings.saga_trigger_subject_filters.clone() + }; + let workflow_commands_subjects = vec![ + "tenant.*.effect.*.*".to_string(), + "tenant.*.workflow.*.*".to_string(), + ]; + + let workflow_events_subjects = vec![ + "tenant.*.effect_result.*.*".to_string(), + "tenant.*.workflow_event.*.*".to_string(), + ]; + + let mut last_err = None; + for attempt in 0..30u64 { + match try_init_streams( + &jetstream, + settings, + aggregate_events_subjects.clone(), + workflow_commands_subjects.clone(), + workflow_events_subjects.clone(), + ) + .await + { + Ok((aggregate_events_stream, workflow_commands_stream, workflow_events_stream)) => { + return Ok(Self { + jetstream, + aggregate_events_stream, + workflow_commands_stream, + workflow_events_stream, + }); + } + Err(e) => { + last_err = Some(e); + let backoff = std::time::Duration::from_millis(100 * (attempt + 1).min(20)); + tokio::time::sleep(backoff).await; + } + } + } + + Err(last_err.unwrap_or(StreamInitError::Stream( + "Stream initialization failed".to_string(), + ))) + } + + pub async fn saga_trigger_consumer( + &self, + settings: &Settings, + options: ConsumerOptions, + ) -> Result { + let consumer_config = PullConfig { + durable_name: Some(options.durable_name.clone()), + deliver_policy: options.deliver_policy, + ack_policy: AckPolicy::Explicit, + ack_wait: std::time::Duration::from_millis(settings.ack_timeout_ms), + filter_subject: options.filter_subject, + replay_policy: ReplayPolicy::Instant, + max_ack_pending: settings.max_in_flight as i64, + max_deliver: settings.max_deliver, + ..Default::default() + }; + + self.aggregate_events_stream + .get_or_create_consumer(&options.durable_name, consumer_config) + .await + .map_err(|e| StreamInitError::Consumer(e.to_string())) + } + + pub async fn effect_command_consumer( + &self, + settings: &Settings, + options: ConsumerOptions, + ) -> Result { + let consumer_config = PullConfig { + durable_name: Some(options.durable_name.clone()), + deliver_policy: options.deliver_policy, + ack_policy: AckPolicy::Explicit, + ack_wait: std::time::Duration::from_millis(settings.ack_timeout_ms), + filter_subject: options.filter_subject, + replay_policy: ReplayPolicy::Instant, + max_ack_pending: settings.max_in_flight as i64, + max_deliver: settings.max_deliver, + ..Default::default() + }; + + self.workflow_commands_stream + .get_or_create_consumer(&options.durable_name, consumer_config) + .await + .map_err(|e| StreamInitError::Consumer(e.to_string())) + } + + pub async fn publish_effect_result( + &self, + subject: String, + result: &EffectResultEnvelope, + ) -> Result<(), RunnerError> { + let payload = + serde_json::to_vec(result).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + let mut headers = async_nats::HeaderMap::new(); + headers.insert("tenant-id", result.tenant_id.as_str()); + headers.insert("command-id", result.command_id.as_str()); + headers.insert("effect-name", result.effect_name.as_str()); + if let Some(correlation_id) = result.metadata.correlation_id.as_ref() { + headers.insert("x-correlation-id", correlation_id.as_str()); + headers.insert("correlation-id", correlation_id.as_str()); + } + if let Some(trace_id) = result.metadata.trace_id.as_ref() { + headers.insert("trace-id", trace_id.as_str()); + if let Some(traceparent) = shared::traceparent_from_trace_id(trace_id) { + headers.insert("traceparent", traceparent.as_str()); + } + } + if let Some(traceparent) = result + .metadata + .extra + .get("traceparent") + .and_then(|v| v.as_str()) + { + headers.insert("traceparent", traceparent); + if result.metadata.trace_id.is_none() { + if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) { + headers.insert("trace-id", trace_id); + } + } + } + + self.jetstream + .publish_with_headers(subject, headers, payload.into()) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + Ok(()) + } + + pub async fn publish_effect_command( + &self, + cmd: &EffectCommandEnvelope, + ) -> Result<(), RunnerError> { + let subject = format!( + "tenant.{}.effect.{}.{}", + cmd.tenant_id.as_str(), + cmd.effect_name.as_str(), + cmd.command_id.as_str() + ); + + let payload = + serde_json::to_vec(cmd).map_err(|e| RunnerError::DecodeError(e.to_string()))?; + let mut headers = async_nats::HeaderMap::new(); + headers.insert("Nats-Msg-Id", cmd.command_id.as_str()); + headers.insert("tenant-id", cmd.tenant_id.as_str()); + headers.insert("command-id", cmd.command_id.as_str()); + headers.insert("effect-name", cmd.effect_name.as_str()); + if let Some(correlation_id) = cmd.metadata.correlation_id.as_ref() { + headers.insert("x-correlation-id", correlation_id.as_str()); + headers.insert("correlation-id", correlation_id.as_str()); + } + if let Some(trace_id) = cmd.metadata.trace_id.as_ref() { + headers.insert("trace-id", trace_id.as_str()); + if let Some(traceparent) = shared::traceparent_from_trace_id(trace_id) { + headers.insert("traceparent", traceparent.as_str()); + } + } + if let Some(traceparent) = cmd + .metadata + .extra + .get("traceparent") + .and_then(|v| v.as_str()) + { + headers.insert("traceparent", traceparent); + if cmd.metadata.trace_id.is_none() { + if let Some(trace_id) = shared::trace_id_from_traceparent(traceparent) { + headers.insert("trace-id", trace_id); + } + } + } + + self.jetstream + .publish_with_headers(subject, headers, payload.into()) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + Ok(()) + } +} + +async fn try_init_streams( + jetstream: &jetstream::Context, + settings: &Settings, + aggregate_events_subjects: Vec, + workflow_commands_subjects: Vec, + workflow_events_subjects: Vec, +) -> Result< + ( + jetstream::stream::Stream, + jetstream::stream::Stream, + jetstream::stream::Stream, + ), + StreamInitError, +> { + let aggregate_events_stream = ensure_stream( + jetstream, + &settings.aggregate_events_stream, + aggregate_events_subjects, + ) + .await?; + + let workflow_commands_stream = ensure_stream( + jetstream, + &settings.workflow_commands_stream, + workflow_commands_subjects, + ) + .await?; + + let workflow_events_stream = ensure_stream( + jetstream, + &settings.workflow_events_stream, + workflow_events_subjects, + ) + .await?; + + Ok(( + aggregate_events_stream, + workflow_commands_stream, + workflow_events_stream, + )) +} + +async fn ensure_stream( + jetstream: &jetstream::Context, + name: &str, + subjects: Vec, +) -> Result { + let config = StreamConfig { + name: name.to_string(), + subjects, + max_messages: 10_000_000, + max_bytes: -1, + max_age: std::time::Duration::from_secs(365 * 24 * 60 * 60), + duplicate_window: std::time::Duration::from_secs(120), + ..Default::default() + }; + jetstream + .get_or_create_stream(config) + .await + .map_err(|e| StreamInitError::Stream(e.to_string())) +} + +#[derive(Debug, thiserror::Error)] +pub enum StreamInitError { + #[error("Failed to connect to NATS: {0}")] + Nats(String), + #[error("Stream error: {0}")] + Stream(String), + #[error("Consumer error: {0}")] + Consumer(String), +} diff --git a/runner/src/stream/mod.rs b/runner/src/stream/mod.rs new file mode 100644 index 0000000..b44063d --- /dev/null +++ b/runner/src/stream/mod.rs @@ -0,0 +1,3 @@ +mod jetstream; + +pub use jetstream::{ConsumerOptions, JetStreamClient, StreamInitError}; diff --git a/runner/src/tenant_placement.rs b/runner/src/tenant_placement.rs new file mode 100644 index 0000000..12ad003 --- /dev/null +++ b/runner/src/tenant_placement.rs @@ -0,0 +1,285 @@ +use crate::config::Settings; +use crate::types::RunnerError; +use async_nats::jetstream; +use futures::StreamExt; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, Mutex, RwLock}; +use std::time::Duration; +use tokio::sync::watch; +use tokio::sync::Notify; + +pub async fn start_tenant_filter( + settings: &Settings, +) -> Result>>, RunnerError> { + if let (Some(bucket), Some(shard_id)) = ( + settings.tenant_placement_bucket.clone(), + settings.shard_id.clone(), + ) { + let nats_url = settings.nats_url.clone(); + let (tx, rx) = watch::channel(HashSet::::new()); + tokio::spawn(async move { + if let Err(e) = watch_tenant_placement(nats_url, bucket, shard_id, tx).await { + tracing::error!(error = %e, "Tenant placement watcher failed"); + } + }); + return Ok(Some(rx)); + } + + if !settings.tenant_allowlist.is_empty() { + let initial = settings + .tenant_allowlist + .iter() + .cloned() + .collect::>(); + let (_tx, rx) = watch::channel(initial); + return Ok(Some(rx)); + } + + Ok(None) +} + +async fn watch_tenant_placement( + nats_url: String, + bucket: String, + shard_id: String, + tx: watch::Sender>, +) -> Result<(), RunnerError> { + let client = async_nats::connect(&nats_url) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + let js = jetstream::new(client); + + let store = match js.get_key_value(bucket.clone()).await { + Ok(store) => store, + Err(_) => js + .create_key_value(jetstream::kv::Config { + bucket: bucket.clone(), + history: 1, + ..Default::default() + }) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?, + }; + + let mut assignments: HashMap = HashMap::new(); + let mut current: HashSet = HashSet::new(); + + let mut watch = store + .watch_all_from_revision(1) + .await + .map_err(|e| RunnerError::StreamError(e.to_string()))?; + + while let Some(entry) = watch.next().await { + let entry = entry.map_err(|e| RunnerError::StreamError(e.to_string()))?; + + match entry.operation { + jetstream::kv::Operation::Put => { + let value = String::from_utf8(entry.value.to_vec()) + .map_err(|e| RunnerError::DecodeError(e.to_string()))?; + assignments.insert(entry.key, value); + } + jetstream::kv::Operation::Delete | jetstream::kv::Operation::Purge => { + assignments.remove(&entry.key); + } + } + + let next = assignments + .iter() + .filter_map(|(tenant, shard)| { + if shard == &shard_id { + Some(tenant.clone()) + } else { + None + } + }) + .collect::>(); + + if next != current { + current = next.clone(); + let _ = tx.send(next); + } + } + + Ok(()) +} + +#[derive(Clone)] +pub struct TenantGate { + assigned: Arc>>>, + draining: Arc>>, + inflight: Arc>>, + inflight_notify: Arc>>>, +} + +impl std::fmt::Debug for TenantGate { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TenantGate").finish_non_exhaustive() + } +} + +impl TenantGate { + pub fn new(tenant_filter: Option>>) -> Self { + let assigned = Arc::new(RwLock::new( + tenant_filter.as_ref().map(|rx| rx.borrow().clone()), + )); + + if let Some(mut rx) = tenant_filter { + let assigned = assigned.clone(); + tokio::spawn(async move { + loop { + if rx.changed().await.is_err() { + break; + } + let next = rx.borrow().clone(); + if let Ok(mut g) = assigned.write() { + *g = Some(next); + } + } + }); + } + + Self { + assigned, + draining: Arc::new(RwLock::new(HashSet::new())), + inflight: Arc::new(Mutex::new(HashMap::new())), + inflight_notify: Arc::new(Mutex::new(HashMap::new())), + } + } + + pub fn is_assigned(&self, tenant_id: &str) -> bool { + let Ok(g) = self.assigned.read() else { + return true; + }; + match &*g { + None => true, + Some(tenants) => tenants.contains(tenant_id), + } + } + + pub fn assigned_tenants_snapshot(&self) -> Option> { + let Ok(g) = self.assigned.read() else { + return None; + }; + g.clone() + } + + pub fn draining_tenants_snapshot(&self) -> HashSet { + let Ok(g) = self.draining.read() else { + return HashSet::new(); + }; + g.clone() + } + + pub fn start_draining(&self, tenant_id: &str) { + if let Ok(mut g) = self.draining.write() { + g.insert(tenant_id.to_string()); + } + } + + pub fn stop_draining(&self, tenant_id: &str) { + if let Ok(mut g) = self.draining.write() { + g.remove(tenant_id); + } + } + + pub fn is_draining(&self, tenant_id: &str) -> bool { + let Ok(g) = self.draining.read() else { + return false; + }; + g.contains(tenant_id) + } + + pub fn inflight_count(&self, tenant_id: &str) -> usize { + let Ok(g) = self.inflight.lock() else { + return 0; + }; + g.get(tenant_id).copied().unwrap_or(0) + } + + pub fn should_acquire_processing_work(&self, tenant_id: &str, global_draining: bool) -> bool { + if global_draining { + return false; + } + if self.is_draining(tenant_id) { + return false; + } + self.is_assigned(tenant_id) + } + + pub fn should_dispatch_outbox_work(&self, tenant_id: &str, global_draining: bool) -> bool { + if global_draining { + return false; + } + self.is_assigned(tenant_id) || self.is_draining(tenant_id) + } + + pub fn begin_work(&self, tenant_id: &str) -> TenantWorkGuard { + if let Ok(mut g) = self.inflight.lock() { + *g.entry(tenant_id.to_string()).or_insert(0) += 1; + } + TenantWorkGuard { + gate: self.clone(), + tenant_id: tenant_id.to_string(), + } + } + + pub async fn wait_inflight_zero(&self, tenant_id: &str, timeout: Duration) -> bool { + let start = tokio::time::Instant::now(); + loop { + if self.inflight_count(tenant_id) == 0 { + return true; + } + let remaining = timeout.saturating_sub(start.elapsed()); + if remaining.is_zero() { + return false; + } + + let notify = { + let Ok(mut g) = self.inflight_notify.lock() else { + tokio::time::sleep(Duration::from_millis(50)).await; + continue; + }; + g.entry(tenant_id.to_string()) + .or_insert_with(|| Arc::new(Notify::new())) + .clone() + }; + + let _ = + tokio::time::timeout(remaining.min(Duration::from_millis(250)), notify.notified()) + .await; + } + } +} + +pub struct TenantWorkGuard { + gate: TenantGate, + tenant_id: String, +} + +impl Drop for TenantWorkGuard { + fn drop(&mut self) { + let count = if let Ok(mut g) = self.gate.inflight.lock() { + match g.get_mut(&self.tenant_id) { + Some(v) => { + *v = v.saturating_sub(1); + let next = *v; + if next == 0 { + g.remove(&self.tenant_id); + } + next + } + None => 0, + } + } else { + 0 + }; + + if count == 0 { + if let Ok(g) = self.gate.inflight_notify.lock() { + if let Some(n) = g.get(&self.tenant_id) { + n.notify_waiters(); + } + } + } + } +} diff --git a/runner/src/types/envelope.rs b/runner/src/types/envelope.rs new file mode 100644 index 0000000..d14408b --- /dev/null +++ b/runner/src/types/envelope.rs @@ -0,0 +1,105 @@ +use crate::types::{CommandId, CorrelationId, EffectName, TenantId, TraceId}; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::BTreeMap; +use uuid::Uuid; + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct MessageMetadata { + pub correlation_id: Option, + pub trace_id: Option, + #[serde(flatten)] + pub extra: BTreeMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AggregateEventEnvelope { + pub tenant_id: TenantId, + pub event_id: Uuid, + pub aggregate_id: String, + pub aggregate_type: String, + pub version: u64, + pub event_type: String, + pub payload: Value, + pub command_id: Uuid, + pub timestamp: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GatewayCommandEnvelope { + pub tenant_id: TenantId, + pub command_id: CommandId, + pub aggregate_id: String, + pub aggregate_type: String, + pub payload_json: Value, + #[serde(default)] + pub metadata: MessageMetadata, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EffectCommandEnvelope { + pub tenant_id: TenantId, + pub command_id: CommandId, + pub effect_name: EffectName, + pub payload: Value, + #[serde(default)] + pub metadata: MessageMetadata, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum EffectResultType { + Succeeded, + Failed, + TimedOut, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EffectResultEnvelope { + pub tenant_id: TenantId, + pub command_id: CommandId, + pub effect_name: EffectName, + pub result_type: EffectResultType, + pub payload: Value, + pub timestamp: DateTime, + #[serde(default)] + pub metadata: MessageMetadata, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum WorkItem { + AggregateCommand(GatewayCommandEnvelope), + EffectCommand(EffectCommandEnvelope), +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{CommandId, EffectName, TenantId}; + use serde_json::json; + + #[test] + fn envelope_decoding_ignores_unknown_fields() { + let raw = json!({ + "tenant_id": "t1", + "command_id": "c1", + "effect_name": "send_email", + "payload": {"to": "a@example.com"}, + "metadata": {"correlation_id": "corr", "trace_id": "trace", "extra_key": 123}, + "unknown_field": "ignored" + }); + + let decoded: EffectCommandEnvelope = serde_json::from_value(raw).unwrap(); + assert_eq!(decoded.tenant_id, TenantId::new("t1")); + assert_eq!(decoded.command_id, CommandId::new("c1")); + assert_eq!(decoded.effect_name, EffectName::new("send_email")); + assert_eq!( + decoded.metadata.correlation_id.as_ref().map(|v| v.as_str()), + Some("corr") + ); + assert_eq!(decoded.metadata.extra.get("extra_key"), Some(&json!(123))); + } +} diff --git a/runner/src/types/error.rs b/runner/src/types/error.rs new file mode 100644 index 0000000..c91d30c --- /dev/null +++ b/runner/src/types/error.rs @@ -0,0 +1,19 @@ +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum RunnerError { + #[error("Storage error: {0}")] + StorageError(String), + #[error("Stream error: {0}")] + StreamError(String), + #[error("Decode error: {0}")] + DecodeError(String), + #[error("Runtime error: {0}")] + RuntimeError(String), + #[error("Tenant access error: {0}")] + TenantAccessError(String), + #[error("Policy error: {0}")] + PolicyError(String), +} + +pub type RunnerResult = Result; diff --git a/runner/src/types/id.rs b/runner/src/types/id.rs new file mode 100644 index 0000000..5b8efd4 --- /dev/null +++ b/runner/src/types/id.rs @@ -0,0 +1,204 @@ +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::str::FromStr; +use uuid::Uuid; + +pub type TenantId = shared::TenantId; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct SagaName(String); + +impl SagaName { + pub fn new(name: impl Into) -> Self { + Self(name.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for SagaName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for SagaName { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for SagaName { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct EffectName(String); + +impl EffectName { + pub fn new(name: impl Into) -> Self { + Self(name.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for EffectName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for EffectName { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for EffectName { + fn as_ref(&self) -> &str { + &self.0 + } +} + +pub type CorrelationId = shared::CorrelationId; +pub type TraceId = shared::TraceId; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct CommandId(String); + +impl CommandId { + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for CommandId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for CommandId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for CommandId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct EventId(String); + +impl EventId { + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for EventId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for EventId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for EventId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct WorkId(Uuid); + +impl WorkId { + pub fn new_v7() -> Self { + Self(Uuid::now_v7()) + } + + pub fn as_uuid(&self) -> &Uuid { + &self.0 + } + + pub fn as_str(&self) -> String { + self.0.to_string() + } +} + +impl fmt::Display for WorkId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for WorkId { + type Err = uuid::Error; + + fn from_str(s: &str) -> Result { + Ok(Self(Uuid::parse_str(s)?)) + } +} + +impl Default for WorkId { + fn default() -> Self { + Self::new_v7() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tenant_id_roundtrips_and_defaults_to_empty() { + let id = TenantId::new("acme"); + let json = serde_json::to_string(&id).unwrap(); + let decoded: TenantId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + assert!(TenantId::default().is_empty()); + } + + #[test] + fn ids_are_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + assert_send_sync::(); + } +} diff --git a/runner/src/types/keys.rs b/runner/src/types/keys.rs new file mode 100644 index 0000000..00007d4 --- /dev/null +++ b/runner/src/types/keys.rs @@ -0,0 +1,149 @@ +use crate::types::{CommandId, CorrelationId, EventId, SagaName, TenantId, WorkId}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SagaStateKey(String); + +impl SagaStateKey { + pub fn new(tenant_id: &TenantId, saga_name: &SagaName, correlation_id: &CorrelationId) -> Self { + Self(format!( + "saga:{}:{}:{}", + tenant_id.as_str(), + saga_name.as_str(), + correlation_id.as_str() + )) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct CheckpointKey(String); + +impl CheckpointKey { + pub fn new(tenant_id: &TenantId, saga_name: &SagaName) -> Self { + Self(format!( + "checkpoint:{}:{}", + tenant_id.as_str(), + saga_name.as_str() + )) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct OutboxKey(String); + +impl OutboxKey { + pub fn new(tenant_id: &TenantId, work_kind: &str, work_id: &WorkId) -> Self { + Self(format!( + "outbox:{}:{}:{}", + tenant_id.as_str(), + work_kind, + work_id + )) + } + + pub fn prefix_for_tenant(tenant_id: &TenantId) -> String { + format!("outbox:{}:", tenant_id.as_str()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ScheduleKey(String); + +impl ScheduleKey { + pub fn new( + tenant_id: &TenantId, + saga_name: &SagaName, + correlation_id: &CorrelationId, + due_at_ms: u64, + ) -> Self { + Self(format!( + "schedule:{}:{}:{}:{}", + tenant_id.as_str(), + saga_name.as_str(), + correlation_id.as_str(), + due_at_ms + )) + } + + pub fn prefix_for_tenant(tenant_id: &TenantId) -> String { + format!("schedule:{}:", tenant_id.as_str()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DedupeEventKey(String); + +impl DedupeEventKey { + pub fn new(tenant_id: &TenantId, saga_name: &SagaName, event_id: &EventId) -> Self { + Self(format!( + "dedupe:{}:{}:{}", + tenant_id.as_str(), + saga_name.as_str(), + event_id.as_str() + )) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct DedupeEffectKey(String); + +impl DedupeEffectKey { + pub fn new(tenant_id: &TenantId, command_id: &CommandId) -> Self { + Self(format!( + "dedupe:{}:effect:{}", + tenant_id.as_str(), + command_id.as_str() + )) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{CorrelationId, SagaName, TenantId, WorkId}; + + #[test] + fn key_composition_is_stable() { + let tenant = TenantId::new("t1"); + let saga = SagaName::new("billing"); + let corr = CorrelationId::new("c1"); + let work_id = WorkId::new_v7(); + + let saga_key = SagaStateKey::new(&tenant, &saga, &corr); + assert_eq!(saga_key.as_str(), "saga:t1:billing:c1"); + + let cp_key = CheckpointKey::new(&tenant, &saga); + assert_eq!(cp_key.as_str(), "checkpoint:t1:billing"); + + let outbox_key = OutboxKey::new(&tenant, "effect", &work_id); + assert!(outbox_key.as_str().starts_with("outbox:t1:effect:")); + assert_eq!(OutboxKey::prefix_for_tenant(&tenant), "outbox:t1:"); + + let schedule_key = ScheduleKey::new(&tenant, &saga, &corr, 123); + assert_eq!(schedule_key.as_str(), "schedule:t1:billing:c1:123"); + assert_eq!(ScheduleKey::prefix_for_tenant(&tenant), "schedule:t1:"); + } +} diff --git a/runner/src/types/mod.rs b/runner/src/types/mod.rs new file mode 100644 index 0000000..0aa8bcd --- /dev/null +++ b/runner/src/types/mod.rs @@ -0,0 +1,9 @@ +mod envelope; +mod error; +mod id; +mod keys; + +pub use envelope::*; +pub use error::*; +pub use id::*; +pub use keys::*; diff --git a/runner/tests/e2e_integration.rs b/runner/tests/e2e_integration.rs new file mode 100644 index 0000000..d262540 --- /dev/null +++ b/runner/tests/e2e_integration.rs @@ -0,0 +1,1119 @@ +use async_nats::jetstream::{ + self, consumer::pull::Config as PullConfig, consumer::AckPolicy, stream::Config as StreamConfig, +}; +use futures::StreamExt; +use runner::config::Settings; +use runner::effects::run_effect_worker; +use runner::observability::Metrics; +use runner::outbox::OutboxRelay; +use runner::saga::{run_saga_worker, SagaPrograms, SagaRuntime}; +use runner::storage::KvClient; +use runner::stream::JetStreamClient; +use runner::types::{ + AggregateEventEnvelope, CheckpointKey, CommandId, EffectName, EffectResultEnvelope, + EffectResultType, MessageMetadata, SagaName, TenantId, +}; +use serde_json::json; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; +use std::time::Duration; +use uuid::Uuid; + +fn nats_url_or_skip() -> Option { + std::env::var("RUNNER_TEST_NATS_URL").ok() +} + +fn unique_suffix() -> String { + Uuid::now_v7().simple().to_string() +} + +fn open_temp_storage(dir: &std::path::Path) -> KvClient { + let path = dir.join("runner.mdbx").to_string_lossy().to_string(); + KvClient::open(path).unwrap() +} + +fn tenant_from_suffix(suffix: &str) -> TenantId { + TenantId::new(format!("t{}", suffix)) +} + +fn command_from_suffix(suffix: &str) -> CommandId { + CommandId::new(format!("c{}", suffix)) +} + +fn write_fixture_saga_files( + dir: &std::path::Path, + tenant_id: &TenantId, + command_id: &CommandId, + correlation_id: &str, +) -> (String, String) { + let program_path = dir.join("saga_on_event.json"); + let manifest_path = dir.join("sagas.yaml"); + + let template = r#" +{ + "specVersion": "1.1", + "id": "e2e_saga", + "name": "e2e_saga", + "inputs": [ + { "name": "saga_state", "type": "Any" }, + { "name": "event", "type": "Any" } + ], + "nodes": [ + { + "id": "const", + "type": "Const", + "data": { + "value": { + "new_saga_state": {}, + "work_items": [ + { + "kind": "effect_command", + "tenant_id": "__TENANT_ID__", + "command_id": "__COMMAND_ID__", + "effect_name": "noop", + "payload": { "ok": true }, + "metadata": { "correlation_id": "__CORRELATION_ID__" } + } + ], + "schedules": [] + } + } + }, + { "id": "output", "type": "Output", "data": {} } + ], + "edges": [ + { "id": "e1", "source": "const", "sourceHandle": "out", "target": "output", "targetHandle": "value" } + ], + "outputNodeId": "output" +} +"#; + let program = template + .replace("__TENANT_ID__", tenant_id.as_str()) + .replace("__COMMAND_ID__", command_id.as_str()) + .replace("__CORRELATION_ID__", correlation_id); + std::fs::write(&program_path, program).unwrap(); + + std::fs::write( + &manifest_path, + format!( + r#" +sagas: + - name: noop + trigger_subjects: ["tenant.*.aggregate.*.*"] + on_event: "{}" +"#, + program_path.to_string_lossy() + ), + ) + .unwrap(); + + ( + manifest_path.to_string_lossy().to_string(), + program_path.to_string_lossy().to_string(), + ) +} + +fn write_fixture_effects_manifest(dir: &std::path::Path) -> String { + let path = dir.join("effects.yaml"); + std::fs::write( + &path, + r#" +effects: + - name: noop + provider: noop + config: {} +"#, + ) + .unwrap(); + path.to_string_lossy().to_string() +} + +async fn ensure_base_streams(nats_url: &str) { + let client = async_nats::connect(nats_url).await.unwrap(); + let js = jetstream::new(client); + let _ = js + .get_or_create_stream(StreamConfig { + name: "AGGREGATE_EVENTS".to_string(), + subjects: vec!["tenant.*.aggregate.*.*".to_string()], + ..Default::default() + }) + .await + .unwrap(); + let _ = js + .get_or_create_stream(StreamConfig { + name: "WORKFLOW_COMMANDS".to_string(), + subjects: vec![ + "tenant.*.effect.*.*".to_string(), + "tenant.*.workflow.*.*".to_string(), + ], + ..Default::default() + }) + .await + .unwrap(); + let _ = js + .get_or_create_stream(StreamConfig { + name: "WORKFLOW_EVENTS".to_string(), + subjects: vec![ + "tenant.*.effect_result.*.*".to_string(), + "tenant.*.workflow_event.*.*".to_string(), + ], + ..Default::default() + }) + .await + .unwrap(); +} + +async fn wait_for_effect_result( + nats_url: &str, + settings: &Settings, + consumer_name: String, + subject_filter: String, +) -> EffectResultEnvelope { + let client = async_nats::connect(nats_url).await.unwrap(); + let js = jetstream::new(client); + let stream = js + .get_stream(&settings.workflow_events_stream) + .await + .unwrap(); + let durable_name = consumer_name.clone(); + let consumer = stream + .get_or_create_consumer( + consumer_name.as_str(), + PullConfig { + durable_name: Some(durable_name), + ack_policy: AckPolicy::Explicit, + filter_subject: subject_filter, + ..Default::default() + }, + ) + .await + .unwrap(); + + let mut messages = consumer.messages().await.unwrap(); + let msg = tokio::time::timeout(Duration::from_secs(10), messages.next()) + .await + .unwrap() + .unwrap() + .unwrap(); + let _ = msg.ack().await; + serde_json::from_slice(&msg.payload).unwrap() +} + +async fn count_messages( + nats_url: &str, + stream_name: String, + consumer_name: String, + subject_filter: String, + max: usize, + duration: Duration, +) -> usize { + let client = async_nats::connect(nats_url).await.unwrap(); + let js = jetstream::new(client); + let stream = js.get_stream(&stream_name).await.unwrap(); + let consumer = stream + .get_or_create_consumer( + consumer_name.as_str(), + PullConfig { + durable_name: Some(consumer_name.to_string()), + ack_policy: AckPolicy::Explicit, + filter_subject: subject_filter, + ..Default::default() + }, + ) + .await + .unwrap(); + + let mut messages = consumer.messages().await.unwrap(); + let start = tokio::time::Instant::now(); + let mut count = 0usize; + while count < max && start.elapsed() < duration { + let next = tokio::time::timeout(Duration::from_millis(500), messages.next()).await; + match next { + Ok(Some(Ok(msg))) => { + count += 1; + let _ = msg.ack().await; + } + Ok(Some(Err(_))) => {} + Ok(None) => break, + Err(_) => {} + } + } + count +} + +#[test] +#[ignore] +fn integration_full_happy_path_workflow() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let tmp = tempfile::tempdir().unwrap(); + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let correlation_id = format!("corr_{}", suffix); + + let (saga_manifest_path, _) = + write_fixture_saga_files(tmp.path(), &tenant, &command_id, &correlation_id); + let effects_manifest_path = write_fixture_effects_manifest(tmp.path()); + + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("runner_{}", suffix), + saga_manifest_path, + effects_manifest_path, + saga_trigger_subject_filters: vec![format!("tenant.{}.aggregate.*.*", tenant.as_str())], + effect_command_subject_filters: vec![format!("tenant.{}.effect.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let metrics = Arc::new(Metrics::default()); + let storage = open_temp_storage(tmp.path()); + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + let tenant_gate = Arc::new(runner::tenant_placement::TenantGate::new(None)); + + let programs = Arc::new(SagaPrograms::load(&settings).unwrap()); + let runtime = SagaRuntime::default(); + + let saga_task = tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs, + runtime.clone(), + metrics.clone(), + tenant_gate.clone(), + None, + shutdown.clone(), + draining.clone(), + )); + + let js = JetStreamClient::connect(&settings).await.unwrap(); + let outbox_task = tokio::spawn(OutboxRelay.run( + settings.clone(), + storage.clone(), + js, + metrics.clone(), + tenant_gate.clone(), + shutdown.clone(), + draining.clone(), + )); + + let effect_task = tokio::spawn(run_effect_worker( + settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate.clone(), + None, + Arc::new(tokio::sync::Notify::new()), + shutdown.clone(), + draining.clone(), + )); + + let event = AggregateEventEnvelope { + tenant_id: tenant.clone(), + event_id: Uuid::now_v7(), + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: json!({"correlation_id": correlation_id}), + command_id: Uuid::now_v7(), + timestamp: chrono::Utc::now(), + }; + + let client = async_nats::connect(&nats_url).await.unwrap(); + let js_ctx = jetstream::new(client); + let payload = serde_json::to_vec(&event).unwrap(); + js_ctx + .publish( + format!("tenant.{}.aggregate.Account.a1", tenant.as_str()), + payload.into(), + ) + .await + .unwrap(); + + let result = wait_for_effect_result( + &nats_url, + &settings, + format!("e2e_result_{}", suffix), + format!( + "tenant.{}.effect_result.noop.{}", + tenant.as_str(), + command_id.as_str() + ), + ) + .await; + + assert_eq!(result.tenant_id, tenant); + assert_eq!(result.command_id, command_id); + assert_eq!(result.effect_name, EffectName::new("noop")); + assert_eq!(result.result_type, EffectResultType::Succeeded); + assert_eq!( + result.metadata.correlation_id.as_ref().map(|v| v.as_str()), + Some(correlation_id.as_str()) + ); + + draining.store(true, std::sync::atomic::Ordering::Relaxed); + shutdown.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), saga_task).await; + let _ = tokio::time::timeout(Duration::from_secs(2), outbox_task).await; + let _ = tokio::time::timeout(Duration::from_secs(2), effect_task).await; + }); +} + +#[test] +#[ignore] +fn integration_crash_restart_preserves_atomicity() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let tmp = tempfile::tempdir().unwrap(); + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let correlation_id = format!("corr_{}", suffix); + + let (saga_manifest_path, _) = + write_fixture_saga_files(tmp.path(), &tenant, &command_id, &correlation_id); + let effects_manifest_path = write_fixture_effects_manifest(tmp.path()); + + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("runner_{}", suffix), + saga_manifest_path, + effects_manifest_path, + saga_trigger_subject_filters: vec![format!("tenant.{}.aggregate.*.*", tenant.as_str())], + effect_command_subject_filters: vec![format!("tenant.{}.effect.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let metrics = Arc::new(Metrics::default()); + let storage = open_temp_storage(tmp.path()); + + let event = AggregateEventEnvelope { + tenant_id: tenant.clone(), + event_id: Uuid::now_v7(), + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: json!({"correlation_id": correlation_id}), + command_id: Uuid::now_v7(), + timestamp: chrono::Utc::now(), + }; + + let client = async_nats::connect(&nats_url).await.unwrap(); + let js_ctx = jetstream::new(client); + let payload = serde_json::to_vec(&event).unwrap(); + js_ctx + .publish( + format!("tenant.{}.aggregate.Account.a1", tenant.as_str()), + payload.into(), + ) + .await + .unwrap(); + + let mut crash_settings = settings.clone(); + crash_settings.test_saga_crash_after_commit = true; + crash_settings.test_outbox_crash_after_dispatch = true; + crash_settings.test_effect_crash_after_dedupe_before_ack = true; + + let shutdown1 = Arc::new(tokio::sync::Notify::new()); + let draining1 = Arc::new(AtomicBool::new(false)); + let tenant_gate1 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let programs = Arc::new(SagaPrograms::load(&crash_settings).unwrap()); + let runtime = SagaRuntime::default(); + + let saga_task = tokio::spawn(run_saga_worker( + crash_settings.clone(), + storage.clone(), + programs, + runtime.clone(), + metrics.clone(), + tenant_gate1.clone(), + None, + shutdown1.clone(), + draining1.clone(), + )); + let js = JetStreamClient::connect(&crash_settings).await.unwrap(); + let outbox_task = tokio::spawn(OutboxRelay.run( + crash_settings.clone(), + storage.clone(), + js, + metrics.clone(), + tenant_gate1.clone(), + shutdown1.clone(), + draining1.clone(), + )); + let effect_task = tokio::spawn(run_effect_worker( + crash_settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate1.clone(), + None, + Arc::new(tokio::sync::Notify::new()), + shutdown1.clone(), + draining1.clone(), + )); + + let _ = tokio::time::timeout(Duration::from_secs(5), saga_task).await; + let _ = tokio::time::timeout(Duration::from_secs(5), outbox_task).await; + let _ = tokio::time::timeout(Duration::from_secs(5), effect_task).await; + + let shutdown2 = Arc::new(tokio::sync::Notify::new()); + let draining2 = Arc::new(AtomicBool::new(false)); + let tenant_gate2 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let programs = Arc::new(SagaPrograms::load(&settings).unwrap()); + + let saga_task = tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs, + runtime.clone(), + metrics.clone(), + tenant_gate2.clone(), + None, + shutdown2.clone(), + draining2.clone(), + )); + let js = JetStreamClient::connect(&settings).await.unwrap(); + let outbox_task = tokio::spawn(OutboxRelay.run( + settings.clone(), + storage.clone(), + js, + metrics.clone(), + tenant_gate2.clone(), + shutdown2.clone(), + draining2.clone(), + )); + let effect_task = tokio::spawn(run_effect_worker( + settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate2.clone(), + None, + Arc::new(tokio::sync::Notify::new()), + shutdown2.clone(), + draining2.clone(), + )); + + let result = wait_for_effect_result( + &nats_url, + &settings, + format!("e2e_result_{}", suffix), + format!( + "tenant.{}.effect_result.noop.{}", + tenant.as_str(), + command_id.as_str() + ), + ) + .await; + assert_eq!(result.result_type, EffectResultType::Succeeded); + + draining2.store(true, std::sync::atomic::Ordering::Relaxed); + shutdown2.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), saga_task).await; + let _ = tokio::time::timeout(Duration::from_secs(2), outbox_task).await; + let _ = tokio::time::timeout(Duration::from_secs(2), effect_task).await; + }); +} + +#[test] +#[ignore] +fn integration_outbox_restart_does_not_duplicate_effect_command_publish() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("runner_{}", suffix), + test_outbox_crash_after_dispatch: true, + effect_command_subject_filters: vec![format!("tenant.{}.effect.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let metrics = Arc::new(Metrics::default()); + let tmp = tempfile::tempdir().unwrap(); + let storage = open_temp_storage(tmp.path()); + storage + .put_outbox_item( + &tenant, + "effect", + &runner::types::WorkId::new_v7(), + &runner::types::WorkItem::EffectCommand(runner::types::EffectCommandEnvelope { + tenant_id: tenant.clone(), + command_id: command_id.clone(), + effect_name: EffectName::new("noop"), + payload: json!({"ok": true}), + metadata: MessageMetadata::default(), + }), + ) + .unwrap(); + + let client = async_nats::connect(&nats_url).await.unwrap(); + let js_ctx = jetstream::new(client); + let stream = js_ctx + .get_stream(&settings.workflow_commands_stream) + .await + .unwrap(); + + let consumer = stream + .get_or_create_consumer( + format!("cmd_count_{}", suffix).as_str(), + PullConfig { + durable_name: Some(format!("cmd_count_{}", suffix)), + ack_policy: AckPolicy::Explicit, + filter_subject: format!( + "tenant.{}.effect.noop.{}", + tenant.as_str(), + command_id.as_str() + ), + ..Default::default() + }, + ) + .await + .unwrap(); + + let shutdown1 = Arc::new(tokio::sync::Notify::new()); + let draining1 = Arc::new(AtomicBool::new(false)); + let tenant_gate1 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + + let js = JetStreamClient::connect(&settings).await.unwrap(); + let outbox_task = tokio::spawn(OutboxRelay.run( + settings.clone(), + storage.clone(), + js, + metrics.clone(), + tenant_gate1.clone(), + shutdown1.clone(), + draining1.clone(), + )); + let _ = tokio::time::timeout(Duration::from_secs(5), outbox_task).await; + + let mut restart_settings = settings.clone(); + restart_settings.test_outbox_crash_after_dispatch = false; + let shutdown2 = Arc::new(tokio::sync::Notify::new()); + let draining2 = Arc::new(AtomicBool::new(false)); + let tenant_gate2 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let js = JetStreamClient::connect(&restart_settings).await.unwrap(); + let outbox_task = tokio::spawn(OutboxRelay.run( + restart_settings, + storage.clone(), + js, + metrics.clone(), + tenant_gate2.clone(), + shutdown2.clone(), + draining2.clone(), + )); + + let mut messages = consumer.messages().await.unwrap(); + let mut count = 0usize; + let start = tokio::time::Instant::now(); + while start.elapsed() < Duration::from_secs(5) { + if let Ok(Some(msg)) = + tokio::time::timeout(Duration::from_millis(500), messages.next()).await + { + let msg = msg.unwrap(); + count += 1; + let _ = msg.ack().await; + } + } + + draining2.store(true, std::sync::atomic::Ordering::Relaxed); + shutdown2.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), outbox_task).await; + + assert_eq!(count, 1); + }); +} + +#[test] +#[ignore] +fn integration_saga_redelivery_does_not_duplicate_effect_command_publish() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let tmp = tempfile::tempdir().unwrap(); + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let correlation_id = format!("corr_{}", suffix); + + let (saga_manifest_path, _) = + write_fixture_saga_files(tmp.path(), &tenant, &command_id, &correlation_id); + let effects_manifest_path = write_fixture_effects_manifest(tmp.path()); + + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("runner_{}", suffix), + saga_manifest_path, + effects_manifest_path, + saga_trigger_subject_filters: vec![format!("tenant.{}.aggregate.*.*", tenant.as_str())], + effect_command_subject_filters: vec![format!("tenant.{}.effect.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let metrics = Arc::new(Metrics::default()); + let storage = open_temp_storage(tmp.path()); + + let event = AggregateEventEnvelope { + tenant_id: tenant.clone(), + event_id: Uuid::now_v7(), + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: json!({"correlation_id": correlation_id}), + command_id: Uuid::now_v7(), + timestamp: chrono::Utc::now(), + }; + + let client = async_nats::connect(&nats_url).await.unwrap(); + let js_ctx = jetstream::new(client); + let payload = serde_json::to_vec(&event).unwrap(); + js_ctx + .publish( + format!("tenant.{}.aggregate.Account.a1", tenant.as_str()), + payload.into(), + ) + .await + .unwrap(); + + let mut crash_settings = settings.clone(); + crash_settings.test_saga_crash_after_commit = true; + + let shutdown1 = Arc::new(tokio::sync::Notify::new()); + let draining1 = Arc::new(AtomicBool::new(false)); + let tenant_gate1 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let programs = Arc::new(SagaPrograms::load(&crash_settings).unwrap()); + let runtime = SagaRuntime::default(); + + let saga_task = tokio::spawn(run_saga_worker( + crash_settings.clone(), + storage.clone(), + programs, + runtime.clone(), + metrics.clone(), + tenant_gate1.clone(), + None, + shutdown1.clone(), + draining1.clone(), + )); + + let _ = tokio::time::timeout(Duration::from_secs(5), saga_task).await; + + let shutdown2 = Arc::new(tokio::sync::Notify::new()); + let draining2 = Arc::new(AtomicBool::new(false)); + let tenant_gate2 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let programs = Arc::new(SagaPrograms::load(&settings).unwrap()); + let saga_task = tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs, + runtime.clone(), + metrics.clone(), + tenant_gate2.clone(), + None, + shutdown2.clone(), + draining2.clone(), + )); + + let js = JetStreamClient::connect(&settings).await.unwrap(); + let outbox_task = tokio::spawn(OutboxRelay.run( + settings.clone(), + storage.clone(), + js, + metrics.clone(), + tenant_gate2.clone(), + shutdown2.clone(), + draining2.clone(), + )); + + let count = count_messages( + &nats_url, + settings.workflow_commands_stream.clone(), + format!("saga_cmd_count_{}", suffix), + format!( + "tenant.{}.effect.noop.{}", + tenant.as_str(), + command_id.as_str() + ), + 10, + Duration::from_secs(10), + ) + .await; + + draining2.store(true, std::sync::atomic::Ordering::Relaxed); + shutdown2.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), saga_task).await; + let _ = tokio::time::timeout(Duration::from_secs(2), outbox_task).await; + + assert_eq!(count, 1); + }); +} + +#[test] +#[ignore] +fn integration_effect_redelivery_does_not_duplicate_result_publish() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let tmp = tempfile::tempdir().unwrap(); + let effects_manifest_path = write_fixture_effects_manifest(tmp.path()); + + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("runner_{}", suffix), + effects_manifest_path, + effect_command_subject_filters: vec![format!("tenant.{}.effect.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let storage = open_temp_storage(tmp.path()); + let metrics = Arc::new(Metrics::default()); + + let cmd = runner::types::EffectCommandEnvelope { + tenant_id: tenant.clone(), + command_id: command_id.clone(), + effect_name: EffectName::new("noop"), + payload: json!({"ok": true}), + metadata: MessageMetadata::default(), + }; + + let js = JetStreamClient::connect(&settings).await.unwrap(); + js.publish_effect_command(&cmd).await.unwrap(); + + let mut crash_settings = settings.clone(); + crash_settings.test_effect_crash_after_dedupe_before_ack = true; + + let shutdown1 = Arc::new(tokio::sync::Notify::new()); + let draining1 = Arc::new(AtomicBool::new(false)); + let tenant_gate1 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let effect_task = tokio::spawn(run_effect_worker( + crash_settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate1.clone(), + None, + Arc::new(tokio::sync::Notify::new()), + shutdown1.clone(), + draining1.clone(), + )); + let _ = tokio::time::timeout(Duration::from_secs(10), effect_task).await; + + let shutdown2 = Arc::new(tokio::sync::Notify::new()); + let draining2 = Arc::new(AtomicBool::new(false)); + let tenant_gate2 = Arc::new(runner::tenant_placement::TenantGate::new(None)); + let effect_task = tokio::spawn(run_effect_worker( + settings.clone(), + storage.clone(), + metrics.clone(), + tenant_gate2.clone(), + None, + Arc::new(tokio::sync::Notify::new()), + shutdown2.clone(), + draining2.clone(), + )); + + let count = count_messages( + &nats_url, + settings.workflow_events_stream.clone(), + format!("effect_res_count_{}", suffix), + format!( + "tenant.{}.effect_result.noop.{}", + tenant.as_str(), + command_id.as_str() + ), + 10, + Duration::from_secs(10), + ) + .await; + + draining2.store(true, std::sync::atomic::Ordering::Relaxed); + shutdown2.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), effect_task).await; + + assert_eq!(count, 1); + }); +} + +#[test] +#[ignore] +fn integration_scale_out_two_saga_replicas_no_duplicate_outbox() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let tmp = tempfile::tempdir().unwrap(); + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let correlation_id = format!("corr_{}", suffix); + let (saga_manifest_path, _) = + write_fixture_saga_files(tmp.path(), &tenant, &command_id, &correlation_id); + + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("shared_{}", suffix), + saga_manifest_path, + saga_trigger_subject_filters: vec![format!("tenant.{}.aggregate.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let metrics = Arc::new(Metrics::default()); + let storage = open_temp_storage(tmp.path()); + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + let tenant_gate = Arc::new(runner::tenant_placement::TenantGate::new(None)); + + let programs = Arc::new(SagaPrograms::load(&settings).unwrap()); + let runtime = SagaRuntime::default(); + + let saga_a = tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs.clone(), + runtime.clone(), + metrics.clone(), + tenant_gate.clone(), + None, + shutdown.clone(), + draining.clone(), + )); + let saga_b = tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs.clone(), + runtime.clone(), + metrics.clone(), + tenant_gate.clone(), + None, + shutdown.clone(), + draining.clone(), + )); + + let publish_count = 50usize; + let client = async_nats::connect(&nats_url).await.unwrap(); + let js_ctx = jetstream::new(client); + let mut max_seq = 0u64; + + for _ in 0..publish_count { + let event = AggregateEventEnvelope { + tenant_id: tenant.clone(), + event_id: Uuid::now_v7(), + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: json!({"correlation_id": correlation_id}), + command_id: Uuid::now_v7(), + timestamp: chrono::Utc::now(), + }; + let payload = serde_json::to_vec(&event).unwrap(); + let ack = js_ctx + .publish( + format!("tenant.{}.aggregate.Account.a1", tenant.as_str()), + payload.into(), + ) + .await + .unwrap() + .await + .unwrap(); + max_seq = max_seq.max(ack.sequence); + } + + let start = tokio::time::Instant::now(); + loop { + let outbox = storage.list_outbox_all(10_000).unwrap(); + if outbox.len() > publish_count { + panic!("outbox duplicates detected: {}", outbox.len()); + } + if outbox.len() == publish_count { + tokio::time::sleep(Duration::from_millis(500)).await; + let outbox2 = storage.list_outbox_all(10_000).unwrap(); + assert_eq!(outbox2.len(), publish_count); + break; + } + if start.elapsed() > Duration::from_secs(10) { + panic!("timed out waiting for outbox items: {}", outbox.len()); + } + tokio::time::sleep(Duration::from_millis(50)).await; + } + + let checkpoint_key = CheckpointKey::new(&tenant, &SagaName::new("noop")); + let start = tokio::time::Instant::now(); + loop { + let checkpoint = storage + .get_checkpoint(&checkpoint_key) + .unwrap() + .unwrap_or(0); + if checkpoint >= max_seq { + break; + } + if start.elapsed() > Duration::from_secs(5) { + panic!( + "checkpoint did not advance to max_seq (checkpoint={}, max_seq={})", + checkpoint, max_seq + ); + } + tokio::time::sleep(Duration::from_millis(50)).await; + } + + draining.store(true, std::sync::atomic::Ordering::Relaxed); + shutdown.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), saga_a).await; + let _ = tokio::time::timeout(Duration::from_secs(2), saga_b).await; + }); +} + +#[test] +#[ignore] +fn integration_draining_pauses_processing_and_resume_completes() { + let Some(nats_url) = nats_url_or_skip() else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let tmp = tempfile::tempdir().unwrap(); + let suffix = unique_suffix(); + let tenant = tenant_from_suffix(&suffix); + let command_id = command_from_suffix(&suffix); + let correlation_id = format!("corr_{}", suffix); + let (saga_manifest_path, _) = + write_fixture_saga_files(tmp.path(), &tenant, &command_id, &correlation_id); + + let settings = Settings { + nats_url: nats_url.clone(), + consumer_durable_prefix: format!("drain_{}", suffix), + saga_manifest_path, + saga_trigger_subject_filters: vec![format!("tenant.{}.aggregate.*.*", tenant.as_str())], + ..Default::default() + }; + + ensure_base_streams(&nats_url).await; + + let metrics = Arc::new(Metrics::default()); + let storage = open_temp_storage(tmp.path()); + let shutdown = Arc::new(tokio::sync::Notify::new()); + let draining = Arc::new(AtomicBool::new(false)); + let tenant_gate = Arc::new(runner::tenant_placement::TenantGate::new(None)); + + let programs = Arc::new(SagaPrograms::load(&settings).unwrap()); + let runtime = SagaRuntime::default(); + + let saga = tokio::spawn(run_saga_worker( + settings.clone(), + storage.clone(), + programs, + runtime, + metrics.clone(), + tenant_gate.clone(), + None, + shutdown.clone(), + draining.clone(), + )); + + let publish_count = 80usize; + let client = async_nats::connect(&nats_url).await.unwrap(); + let js_ctx = jetstream::new(client); + + for _ in 0..publish_count { + let event = AggregateEventEnvelope { + tenant_id: tenant.clone(), + event_id: Uuid::now_v7(), + aggregate_id: "a1".to_string(), + aggregate_type: "Account".to_string(), + version: 1, + event_type: "Created".to_string(), + payload: json!({"correlation_id": correlation_id}), + command_id: Uuid::now_v7(), + timestamp: chrono::Utc::now(), + }; + let payload = serde_json::to_vec(&event).unwrap(); + js_ctx + .publish( + format!("tenant.{}.aggregate.Account.a1", tenant.as_str()), + payload.into(), + ) + .await + .unwrap(); + } + + let start = tokio::time::Instant::now(); + let paused_at; + loop { + let count = storage.list_outbox_all(10_000).unwrap().len(); + if count > 0 { + draining.store(true, std::sync::atomic::Ordering::Relaxed); + paused_at = count; + break; + } + if start.elapsed() > Duration::from_secs(5) { + panic!("no progress before drain"); + } + tokio::time::sleep(Duration::from_millis(50)).await; + } + + tokio::time::sleep(Duration::from_millis(500)).await; + let after = storage.list_outbox_all(10_000).unwrap().len(); + assert!(after <= paused_at + 2); + + tokio::time::sleep(Duration::from_millis(500)).await; + let after2 = storage.list_outbox_all(10_000).unwrap().len(); + assert_eq!(after2, after); + + draining.store(false, std::sync::atomic::Ordering::Relaxed); + + let start = tokio::time::Instant::now(); + loop { + let count = storage.list_outbox_all(10_000).unwrap().len(); + if count == publish_count { + break; + } + if start.elapsed() > Duration::from_secs(10) { + panic!("timed out waiting after resume: {}", count); + } + tokio::time::sleep(Duration::from_millis(50)).await; + } + + shutdown.notify_waiters(); + let _ = tokio::time::timeout(Duration::from_secs(2), saga).await; + }); +} diff --git a/runner/tests/jetstream_integration.rs b/runner/tests/jetstream_integration.rs new file mode 100644 index 0000000..dad4fd1 --- /dev/null +++ b/runner/tests/jetstream_integration.rs @@ -0,0 +1,52 @@ +use async_nats::jetstream::{self, stream::Config as StreamConfig}; +use runner::config::Settings; +use runner::stream::JetStreamClient; +use runner::types::{ + CommandId, EffectName, EffectResultEnvelope, EffectResultType, MessageMetadata, TenantId, +}; +use serde_json::json; + +#[test] +#[ignore] +fn jetstream_connects_and_can_publish_effect_result() { + let Ok(nats_url) = std::env::var("RUNNER_TEST_NATS_URL") else { + return; + }; + + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + let client = async_nats::connect(&nats_url).await.unwrap(); + let js = jetstream::new(client); + + let _ = js + .get_or_create_stream(StreamConfig { + name: "AGGREGATE_EVENTS".to_string(), + subjects: vec!["tenant.*.aggregate.*.*".to_string()], + ..Default::default() + }) + .await + .unwrap(); + + let settings = Settings { + nats_url, + ..Default::default() + }; + + let runner_js = JetStreamClient::connect(&settings).await.unwrap(); + + let result = EffectResultEnvelope { + tenant_id: TenantId::new("t1"), + command_id: CommandId::new("c1"), + effect_name: EffectName::new("noop"), + result_type: EffectResultType::Succeeded, + payload: json!({"ok": true}), + timestamp: chrono::Utc::now(), + metadata: MessageMetadata::default(), + }; + + runner_js + .publish_effect_result("tenant.t1.effect_result.noop.c1".to_string(), &result) + .await + .unwrap(); + }); +} diff --git a/runner/tests/swarm_stack.rs b/runner/tests/swarm_stack.rs new file mode 100644 index 0000000..7f462ee --- /dev/null +++ b/runner/tests/swarm_stack.rs @@ -0,0 +1,11 @@ +#[test] +fn docker_stack_file_is_valid_yaml() { + let contents = include_str!("../../swarm/stacks/platform.yml"); + let doc: serde_yaml::Value = serde_yaml::from_str(contents).unwrap(); + + let services = doc.get("services").and_then(|v| v.as_mapping()).unwrap(); + + assert!(services.contains_key(serde_yaml::Value::String("nats".to_string()))); + assert!(services.contains_key(serde_yaml::Value::String("runner_saga".to_string()))); + assert!(services.contains_key(serde_yaml::Value::String("runner_effect".to_string()))); +} diff --git a/shared/Cargo.toml b/shared/Cargo.toml new file mode 100644 index 0000000..f8e8b06 --- /dev/null +++ b/shared/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "shared" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1", features = ["derive"] } +uuid = { version = "1", features = ["v4", "serde"] } + +[dev-dependencies] +serde_json = "1" diff --git a/shared/src/lib.rs b/shared/src/lib.rs new file mode 100644 index 0000000..0dea649 --- /dev/null +++ b/shared/src/lib.rs @@ -0,0 +1,196 @@ +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::str::FromStr; +use uuid::Uuid; + +pub const HEADER_X_CORRELATION_ID: &str = "x-correlation-id"; +pub const HEADER_TRACEPARENT: &str = "traceparent"; +pub const HEADER_TRACE_ID: &str = "trace-id"; +pub const NATS_HEADER_CORRELATION_ID: &str = "correlation-id"; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Default)] +pub struct TenantId(String); + +impl TenantId { + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for TenantId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for TenantId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for TenantId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct CorrelationId(String); + +impl CorrelationId { + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + pub fn generate() -> Self { + Self(Uuid::new_v4().to_string()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for CorrelationId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for CorrelationId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for CorrelationId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(transparent)] +pub struct TraceId(String); + +impl TraceId { + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + pub fn as_str(&self) -> &str { + &self.0 + } + + pub fn is_valid_hex_32(&self) -> bool { + is_valid_hex_32(self.as_str()) + } +} + +impl fmt::Display for TraceId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for TraceId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +impl AsRef for TraceId { + fn as_ref(&self) -> &str { + &self.0 + } +} + +pub fn trace_id_from_traceparent(traceparent: &str) -> Option<&str> { + let mut parts = traceparent.split('-'); + let version = parts.next()?; + let trace_id = parts.next()?; + let span_id = parts.next()?; + let flags = parts.next()?; + if version.len() != 2 || trace_id.len() != 32 || span_id.len() != 16 || flags.len() != 2 { + return None; + } + if !trace_id.chars().all(|c| c.is_ascii_hexdigit()) + || !span_id.chars().all(|c| c.is_ascii_hexdigit()) + || !flags.chars().all(|c| c.is_ascii_hexdigit()) + || !version.chars().all(|c| c.is_ascii_hexdigit()) + { + return None; + } + Some(trace_id) +} + +pub fn traceparent_from_trace_id(trace_id: &TraceId) -> Option { + if !trace_id.is_valid_hex_32() { + return None; + } + let span_id = Uuid::new_v4().simple().to_string()[..16].to_string(); + Some(format!("00-{}-{span_id}-01", trace_id.as_str())) +} + +fn is_valid_hex_32(s: &str) -> bool { + s.len() == 32 && s.chars().all(|c| c.is_ascii_hexdigit()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tenant_id_serialization_roundtrip() { + let id = TenantId::new("acme-corp"); + let json = serde_json::to_string(&id).unwrap(); + let decoded: TenantId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, decoded); + } + + #[test] + fn tenant_id_default_is_empty() { + let id = TenantId::default(); + assert!(id.is_empty()); + } + + #[test] + fn tenant_id_is_send_sync() { + fn assert_send_sync() {} + assert_send_sync::(); + } + + #[test] + fn correlation_id_roundtrip_is_string() { + let id = CorrelationId::new("corr-1"); + let json = serde_json::to_string(&id).unwrap(); + assert_eq!(json, "\"corr-1\""); + let decoded: CorrelationId = serde_json::from_str(&json).unwrap(); + assert_eq!(decoded.as_str(), "corr-1"); + } + + #[test] + fn trace_id_from_traceparent_parses() { + let tp = "00-0123456789abcdef0123456789abcdef-1111111111111111-01"; + assert_eq!( + trace_id_from_traceparent(tp), + Some("0123456789abcdef0123456789abcdef") + ); + } +} diff --git a/swarm/dev.json b/swarm/dev.json new file mode 100644 index 0000000..3cab420 --- /dev/null +++ b/swarm/dev.json @@ -0,0 +1,21 @@ +{ + "services": [ + { + "name": "gateway", + "image": "cloudlysis/gateway:dev", + "mode": "replicated", + "replicas": "1/1", + "updated_at": null + } + ], + "tasks": [ + { + "id": "task-1", + "service": "gateway", + "node": "node-1", + "desired_state": "running", + "current_state": "running", + "error": null + } + ] +} diff --git a/swarm/stacks/control-plane.yml b/swarm/stacks/control-plane.yml new file mode 100644 index 0000000..cd6a43a --- /dev/null +++ b/swarm/stacks/control-plane.yml @@ -0,0 +1,55 @@ +version: "3.9" + +services: + control-api: + image: cloudlysis/control-api:dev + environment: + CONTROL_API_ADDR: "0.0.0.0:8080" + CONTROL_PLACEMENT_PATH: "/etc/control/placement.json" + CONTROL_SWARM_STATE_PATH: "/etc/control/swarm_state.json" + CONTROL_SELF_URL: "http://control-api:8080" + configs: + - source: control_placement_dev + target: /etc/control/placement.json + - source: control_swarm_state_dev + target: /etc/control/swarm_state.json + networks: + - internal + ports: + - target: 8080 + published: 8080 + protocol: tcp + mode: ingress + deploy: + replicas: 1 + restart_policy: + condition: on-failure + + control-ui: + image: cloudlysis/control-ui:dev + environment: + VITE_CONTROL_API_URL: "http://control-api:8080" + networks: + - public + - internal + ports: + - target: 80 + published: 8081 + protocol: tcp + mode: ingress + deploy: + replicas: 1 + restart_policy: + condition: on-failure + +configs: + control_placement_dev: + file: ../../placement/dev.json + control_swarm_state_dev: + file: ../../swarm/dev.json + +networks: + public: + driver: overlay + internal: + driver: overlay diff --git a/swarm/stacks/observability.yml b/swarm/stacks/observability.yml new file mode 100644 index 0000000..20daad9 --- /dev/null +++ b/swarm/stacks/observability.yml @@ -0,0 +1,152 @@ +version: "3.9" + +services: + victoria-metrics: + image: victoriametrics/victoria-metrics:v1.120.0 + command: + - "-retentionPeriod=30d" + networks: + - internal + deploy: + replicas: 1 + restart_policy: + condition: on-failure + + vmagent: + image: victoriametrics/vmagent:v1.120.0 + command: + - "-promscrape.config=/etc/vmagent/scrape.yml" + - "-remoteWrite.url=http://victoria-metrics:8428/api/v1/write" + configs: + - source: vmagent_scrape + target: /etc/vmagent/scrape.yml + networks: + - internal + deploy: + replicas: 1 + restart_policy: + condition: on-failure + + loki: + image: grafana/loki:3.5.5 + command: + - "-config.file=/etc/loki/config.yml" + configs: + - source: loki_config + target: /etc/loki/config.yml + networks: + - internal + deploy: + replicas: 1 + restart_policy: + condition: on-failure + + tempo: + image: grafana/tempo:2.8.2 + command: + - "-config.file=/etc/tempo/config.yml" + configs: + - source: tempo_config + target: /etc/tempo/config.yml + networks: + - internal + deploy: + replicas: 1 + restart_policy: + condition: on-failure + + grafana: + image: grafana/grafana:12.1.1 + environment: + GF_SECURITY_ADMIN_USER: "admin" + GF_SECURITY_ADMIN_PASSWORD__FILE: "/run/secrets/grafana_admin_password" + GF_AUTH_ANONYMOUS_ENABLED: "false" + configs: + - source: grafana_datasources + target: /etc/grafana/provisioning/datasources/datasources.yml + - source: grafana_dashboards_provider + target: /etc/grafana/provisioning/dashboards/dashboards.yml + - source: dashboard_operations_overview + target: /var/lib/grafana/dashboards/operations_overview.json + - source: dashboard_http_detail + target: /var/lib/grafana/dashboards/http_detail.json + - source: dashboard_logs + target: /var/lib/grafana/dashboards/logs.json + - source: dashboard_traces + target: /var/lib/grafana/dashboards/traces.json + - source: dashboard_event_bus + target: /var/lib/grafana/dashboards/event_bus.json + - source: dashboard_workers + target: /var/lib/grafana/dashboards/workers.json + - source: dashboard_storage + target: /var/lib/grafana/dashboards/storage.json + - source: dashboard_cluster + target: /var/lib/grafana/dashboards/cluster.json + - source: dashboard_noisy_neighbor_tenant_health + target: /var/lib/grafana/dashboards/noisy_neighbor_tenant_health.json + - source: dashboard_api_regression_deployment + target: /var/lib/grafana/dashboards/api_regression_deployment.json + - source: dashboard_storage_event_bus_bottlenecks + target: /var/lib/grafana/dashboards/storage_event_bus_bottlenecks.json + - source: dashboard_infrastructure_exhaustion + target: /var/lib/grafana/dashboards/infrastructure_exhaustion.json + secrets: + - grafana_admin_password + networks: + - public + - internal + ports: + - target: 3000 + published: 3000 + protocol: tcp + mode: ingress + deploy: + replicas: 1 + restart_policy: + condition: on-failure + +configs: + vmagent_scrape: + file: ../../observability/vmagent/scrape.yml + loki_config: + file: ../../observability/loki/config.yml + tempo_config: + file: ../../observability/tempo/config.yml + grafana_datasources: + file: ../../observability/grafana/provisioning/datasources/datasources.yml + grafana_dashboards_provider: + file: ../../observability/grafana/provisioning/dashboards/dashboards.yml + dashboard_operations_overview: + file: ../../observability/grafana/dashboards/operations_overview.json + dashboard_http_detail: + file: ../../observability/grafana/dashboards/http_detail.json + dashboard_logs: + file: ../../observability/grafana/dashboards/logs.json + dashboard_traces: + file: ../../observability/grafana/dashboards/traces.json + dashboard_event_bus: + file: ../../observability/grafana/dashboards/event_bus.json + dashboard_workers: + file: ../../observability/grafana/dashboards/workers.json + dashboard_storage: + file: ../../observability/grafana/dashboards/storage.json + dashboard_cluster: + file: ../../observability/grafana/dashboards/cluster.json + dashboard_noisy_neighbor_tenant_health: + file: ../../observability/grafana/dashboards/noisy_neighbor_tenant_health.json + dashboard_api_regression_deployment: + file: ../../observability/grafana/dashboards/api_regression_deployment.json + dashboard_storage_event_bus_bottlenecks: + file: ../../observability/grafana/dashboards/storage_event_bus_bottlenecks.json + dashboard_infrastructure_exhaustion: + file: ../../observability/grafana/dashboards/infrastructure_exhaustion.json + +secrets: + grafana_admin_password: + external: true + +networks: + public: + driver: overlay + internal: + driver: overlay diff --git a/swarm/stacks/platform.yml b/swarm/stacks/platform.yml new file mode 100644 index 0000000..1fe1ffe --- /dev/null +++ b/swarm/stacks/platform.yml @@ -0,0 +1,132 @@ +version: "3.9" + +services: + nats: + image: nats:2.10-alpine + command: ["-js", "-m", "8222"] + ports: + - "4222:4222" + - "8222:8222" + volumes: + - nats_data:/data + deploy: + replicas: 1 + + gateway: + image: cloudlysis/gateway:dev + environment: + GATEWAY_ADDR: 0.0.0.0:8080 + GATEWAY_GRPC_ADDR: 0.0.0.0:8081 + GATEWAY_STORAGE_PATH: /data/gateway.mdbx + GATEWAY_ROUTING_NATS_URL: nats://nats:4222 + GATEWAY_ROUTING_NATS_BUCKET: gateway.routing + GATEWAY_ROUTING_NATS_KEY: routing.json + ports: + - "8080:8080" + - "8081:8081" + volumes: + - gateway_data:/data + deploy: + replicas: 2 + update_config: + parallelism: 1 + order: start-first + failure_action: rollback + rollback_config: + parallelism: 1 + order: stop-first + + aggregate: + image: cloudlysis/aggregate:dev + environment: + AGGREGATE_NATS_URL: nats://nats:4222 + AGGREGATE_STORAGE_PATH: /data + AGGREGATE_HTTP_ADDR: 0.0.0.0:8080 + AGGREGATE_GRPC_ADDR: 0.0.0.0:50051 + volumes: + - aggregate_data:/data + deploy: + replicas: 3 + placement: + constraints: + - node.labels.tenant_range != "" + restart_policy: + condition: on-failure + + projection: + image: cloudlysis/projection:dev + environment: + PROJECTION_NATS_URL: nats://nats:4222 + PROJECTION_STREAM_NAME: AGGREGATE_EVENTS + PROJECTION_DURABLE_NAME: projection + PROJECTION_STORAGE_PATH: /data + PROJECTION_MANIFEST_PATH: /config/projection-manifest.yaml + PROJECTION_MULTI_TENANT: "true" + PROJECTION_MAX_IN_FLIGHT: "128" + PROJECTION_ACK_TIMEOUT_MS: "30000" + PROJECTION_MAX_DELIVER: "10" + PROJECTION_CONSUMER_MODE: single + volumes: + - projection_data:/data + configs: + - source: projection_manifest + target: /config/projection-manifest.yaml + deploy: + replicas: 2 + restart_policy: + condition: on-failure + update_config: + parallelism: 1 + order: start-first + failure_action: rollback + + runner_saga: + image: cloudlysis/runner:dev + environment: + RUNNER_NATS_URL: nats://nats:4222 + RUNNER_MODE: saga + RUNNER_HTTP_ADDR: 0.0.0.0:8080 + RUNNER_STORAGE_PATH: /data/runner.mdbx + RUNNER_SAGA_MANIFEST_PATH: /config/sagas.yaml + RUNNER_EFFECTS_MANIFEST_PATH: /config/effects.yaml + volumes: + - runner_saga_data:/data + configs: + - source: runner_sagas + target: /config/sagas.yaml + - source: runner_effects + target: /config/effects.yaml + deploy: + replicas: 1 + + runner_effect: + image: cloudlysis/runner:dev + environment: + RUNNER_NATS_URL: nats://nats:4222 + RUNNER_MODE: effect + RUNNER_HTTP_ADDR: 0.0.0.0:8081 + RUNNER_STORAGE_PATH: /data/runner.mdbx + RUNNER_EFFECTS_MANIFEST_PATH: /config/effects.yaml + volumes: + - runner_effect_data:/data + configs: + - source: runner_effects + target: /config/effects.yaml + deploy: + replicas: 1 + +configs: + projection_manifest: + file: ../../projection/projection-manifest.yaml + runner_sagas: + file: ../../runner/config/sagas.yaml + runner_effects: + file: ../../runner/config/effects.yaml + +volumes: + aggregate_data: + gateway_data: + nats_data: + projection_data: + runner_effect_data: + runner_saga_data: diff --git a/swarm/tenant-placement.yaml b/swarm/tenant-placement.yaml new file mode 100644 index 0000000..1b66d4b --- /dev/null +++ b/swarm/tenant-placement.yaml @@ -0,0 +1,14 @@ +virtual_nodes_per_node: 200 + +nodes: + - node_id: "node-a" + tenant_range: "00-3f" + - node_id: "node-b" + tenant_range: "40-7f" + - node_id: "node-c" + tenant_range: "80-bf" + +tenants: + tenant-a: "node-a" + tenant-b: "node-b" +