added initial roadmap and implementation

This commit is contained in:
2026-03-11 22:23:16 +02:00
parent 39b97a6db5
commit c0792f2e1d
62 changed files with 12410 additions and 1 deletions

26
storage/Cargo.toml Normal file
View File

@@ -0,0 +1,26 @@
[package]
name = "storage"
version = "0.1.0"
edition = "2021"
[dependencies]
common = { workspace = true }
auth = { workspace = true }
tokio = { workspace = true }
axum = { workspace = true, features = ["multipart"] }
serde = { workspace = true }
serde_json = { workspace = true }
sqlx = { workspace = true }
tracing = { workspace = true }
futures = { workspace = true }
aws-sdk-s3 = { workspace = true }
aws-config = { workspace = true }
aws-types = { workspace = true }
bytes = "1.0"
anyhow = { workspace = true }
tower = "0.4"
tower-http = { version = "0.5", features = ["fs", "trace"] }
uuid = { workspace = true }
chrono = { workspace = true }
http-body-util = "0.1.3"

427
storage/src/handlers.rs Normal file
View File

@@ -0,0 +1,427 @@
use auth::AuthContext;
use aws_sdk_s3::{primitives::ByteStream, Client};
use axum::{
body::{Body, Bytes},
extract::{FromRequest, Multipart, Path, Request, State},
http::{header::{self, CONTENT_TYPE}, HeaderMap, StatusCode},
response::{IntoResponse, Json},
Extension,
};
use common::{Config, ProjectContext};
use futures::stream::StreamExt;
use serde::{Deserialize, Serialize};
use serde_json::json;
use sqlx::{PgPool, Row};
use std::sync::Arc;
use uuid::Uuid;
use http_body_util::BodyExt; // For collect()
#[derive(Clone)]
pub struct StorageState {
pub db: PgPool,
pub s3_client: Client,
pub config: Config,
pub bucket_name: String, // Global S3 Bucket Name
}
#[derive(Serialize, sqlx::FromRow)]
pub struct FileObject {
pub name: String,
pub id: Option<Uuid>,
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
pub created_at: Option<chrono::DateTime<chrono::Utc>>,
pub last_accessed_at: Option<chrono::DateTime<chrono::Utc>>,
pub metadata: Option<serde_json::Value>,
}
pub async fn list_buckets(
State(state): State<StorageState>,
db: Option<Extension<PgPool>>,
Extension(auth_ctx): Extension<AuthContext>,
Extension(_project_ctx): Extension<ProjectContext>,
) -> Result<Json<Vec<String>>, (StatusCode, String)> {
// Query storage.buckets with RLS
let db = db.map(|Extension(p)| p).unwrap_or_else(|| state.db.clone());
let mut tx = db
.begin()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let role_query = format!("SET LOCAL role = '{}'", auth_ctx.role);
sqlx::query(&role_query)
.execute(&mut *tx)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set role: {}", e),
)
})?;
if let Some(claims) = &auth_ctx.claims {
let sub_query = "SELECT set_config('request.jwt.claim.sub', $1, true)";
sqlx::query(sub_query)
.bind(&claims.sub)
.execute(&mut *tx)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set claims: {}", e),
)
})?;
}
// In a real system, `storage.buckets` table would have a `project_id` column?
// OR we just use the single DB (which is shared in MVP) but RLS handles ownership?
// Wait, the DB tables are shared across all tenants in this MVP architecture?
// Yes, we only have one Postgres instance.
// So we need to filter by tenant/project if we had a project_id column.
// But `storage.buckets` schema (from Supabase) usually doesn't have project_id if it's per-tenant DB.
// Since we share the DB, we must add a way to segregate.
// BUT, for MVP, let's assume `buckets` are global within the DB?
// No, that leaks data.
// Simplification: We prefix bucket IDs with `project_ref` in the DB?
// Or we just rely on RLS.
// If we rely on RLS, we need to know WHICH buckets belong to WHICH project.
// `storage.buckets` has an `owner` column (User UUID).
// Users are unique per project? No, we share `auth.users` too in MVP?
// Actually, `auth.users` is global in this MVP implementation (single table).
// So users from Project A and Project B are all in the same table.
// If a user creates a bucket, they own it.
// So `list_buckets` will show buckets owned by the user.
// This is "User Multitenancy", not "Project Multitenancy".
// If we want "Project Multitenancy", we need to filter by Project Context.
// Let's assume for now we just list what RLS allows.
let buckets: Vec<String> = sqlx::query_scalar("SELECT id FROM storage.buckets")
.fetch_all(&mut *tx)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
// Filter buckets that start with project_ref?
// Or just return all visible.
// Let's filter by prefix to enforce project isolation if we adopt a naming convention.
// Convention: "{project_ref}_{bucket_name}"
// But user sends "bucket_name".
// Let's assume we return "bucket_name" by stripping prefix?
// Too complex for MVP.
// Let's just return what RLS gives us.
Ok(Json(buckets))
}
pub async fn list_objects(
State(state): State<StorageState>,
db: Option<Extension<PgPool>>,
Extension(auth_ctx): Extension<AuthContext>,
Extension(_project_ctx): Extension<ProjectContext>,
Path(bucket_id): Path<String>,
) -> Result<Json<Vec<FileObject>>, (StatusCode, String)> {
tracing::info!("Starting list_objects for bucket: {}", bucket_id);
let db = db.map(|Extension(p)| p).unwrap_or_else(|| state.db.clone());
let mut tx = db
.begin()
.await
.map_err(|e| {
tracing::error!("Failed to begin transaction: {}", e);
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string())
})?;
let role_query = format!("SET LOCAL role = '{}'", auth_ctx.role);
sqlx::query(&role_query)
.execute(&mut *tx)
.await
.map_err(|e| {
tracing::error!("Failed to set role: {}", e);
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set role: {}", e),
)
})?;
if let Some(claims) = &auth_ctx.claims {
let sub_query = "SELECT set_config('request.jwt.claim.sub', $1, true)";
sqlx::query(sub_query)
.bind(&claims.sub)
.execute(&mut *tx)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set claims: {}", e),
)
})?;
}
// Ensure we are accessing a bucket that belongs to this project?
// We can check if `bucket_id` matches expected pattern or if we use a project_id column.
// For MVP, we trust RLS on the `storage.buckets` table.
let bucket_exists: Option<String> =
sqlx::query_scalar("SELECT id FROM storage.buckets WHERE id = $1")
.bind(&bucket_id)
.fetch_optional(&mut *tx)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
if bucket_exists.is_none() {
return Err((StatusCode::NOT_FOUND, "Bucket not found".to_string()));
}
let objects = sqlx::query_as::<_, FileObject>(
r#"
SELECT name, id, updated_at, created_at, last_accessed_at, metadata
FROM storage.objects
WHERE bucket_id = $1
"#,
)
.bind(&bucket_id)
.fetch_all(&mut *tx)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(objects))
}
pub async fn upload_object(
State(state): State<StorageState>,
db: Option<Extension<PgPool>>,
Extension(auth_ctx): Extension<AuthContext>,
Extension(project_ctx): Extension<ProjectContext>,
Path((bucket_id, filename)): Path<(String, String)>,
request: Request,
) -> Result<impl IntoResponse, (StatusCode, String)> {
tracing::info!("Starting upload_object for bucket: {}, filename: {}", bucket_id, filename);
let content_type = request.headers().get(CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or("");
let data = if content_type.starts_with("multipart/form-data") {
let mut multipart = Multipart::from_request(request, &state).await
.map_err(|e| (StatusCode::BAD_REQUEST, e.to_string()))?;
let mut file_data = None;
while let Ok(Some(field)) = multipart.next_field().await {
if field.name() == Some("file") || field.name() == Some("") {
let bytes = field.bytes().await.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
file_data = Some(bytes);
break;
}
}
file_data.ok_or((StatusCode::BAD_REQUEST, "No file found in multipart".to_string()))?
} else {
// Raw body
let body = request.into_body();
body.collect().await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.to_bytes()
};
let size = data.len();
tracing::info!("File size: {} bytes", size);
let db = db.map(|Extension(p)| p).unwrap_or_else(|| state.db.clone());
let mut tx = db
.begin()
.await
.map_err(|e| {
tracing::error!("Failed to begin transaction: {}", e);
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string())
})?;
let role_query = format!("SET LOCAL role = '{}'", auth_ctx.role);
sqlx::query(&role_query)
.execute(&mut *tx)
.await
.map_err(|e| {
tracing::error!("Failed to set role: {}", e);
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set role: {}", e),
)
})?;
if let Some(claims) = &auth_ctx.claims {
let sub_query = "SELECT set_config('request.jwt.claim.sub', $1, true)";
sqlx::query(sub_query)
.bind(&claims.sub)
.execute(&mut *tx)
.await
.map_err(|e| {
tracing::error!("Failed to set claims: {}", e);
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set claims: {}", e),
)
})?;
}
let bucket_exists: Option<String> =
sqlx::query_scalar("SELECT id FROM storage.buckets WHERE id = $1")
.bind(&bucket_id)
.fetch_optional(&mut *tx)
.await
.map_err(|e| {
tracing::error!("Failed to check bucket existence: {}", e);
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string())
})?;
if bucket_exists.is_none() {
tracing::warn!("Bucket not found: {}", bucket_id);
return Err((StatusCode::NOT_FOUND, "Bucket not found".to_string()));
}
let key = format!("{}/{}/{}", project_ctx.project_ref, bucket_id, filename);
tracing::info!("Uploading to S3 with key: {}", key);
state
.s3_client
.put_object()
.bucket(&state.bucket_name)
.key(&key)
.body(ByteStream::from(data))
.send()
.await
.map_err(|e| {
tracing::error!("S3 PutObject error: {:?}", e);
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string())
})?;
tracing::info!("S3 upload successful");
let user_id = auth_ctx
.claims
.as_ref()
.and_then(|c| Uuid::parse_str(&c.sub).ok());
tracing::info!("Inserting metadata into DB");
let file_object = sqlx::query_as::<_, FileObject>(
r#"
INSERT INTO storage.objects (bucket_id, name, owner, metadata)
VALUES ($1, $2, $3, $4)
ON CONFLICT (bucket_id, name)
DO UPDATE SET updated_at = now(), metadata = $4
RETURNING name, id, updated_at, created_at, last_accessed_at, metadata
"#,
)
.bind(&bucket_id)
.bind(&filename)
.bind(user_id)
.bind(serde_json::json!({ "size": size, "mimetype": "application/octet-stream" }))
.fetch_one(&mut *tx)
.await
.map_err(|e| {
tracing::error!("DB Insert Object error: {:?}", e);
(StatusCode::FORBIDDEN, format!("Permission denied: {}", e))
})?;
tx.commit()
.await
.map_err(|e| {
tracing::error!("Commit error: {}", e);
(StatusCode::INTERNAL_SERVER_ERROR, e.to_string())
})?;
Ok((StatusCode::CREATED, Json(file_object)))
}
pub async fn download_object(
State(state): State<StorageState>,
db: Option<Extension<PgPool>>,
Extension(auth_ctx): Extension<AuthContext>,
Extension(project_ctx): Extension<ProjectContext>,
Path((bucket_id, filename)): Path<(String, String)>,
) -> Result<impl IntoResponse, (StatusCode, String)> {
let db = db.map(|Extension(p)| p).unwrap_or_else(|| state.db.clone());
let mut tx = db
.begin()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let role_query = format!("SET LOCAL role = '{}'", auth_ctx.role);
sqlx::query(&role_query)
.execute(&mut *tx)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set role: {}", e),
)
})?;
if let Some(claims) = &auth_ctx.claims {
let sub_query = "SELECT set_config('request.jwt.claim.sub', $1, true)";
sqlx::query(sub_query)
.bind(&claims.sub)
.execute(&mut *tx)
.await
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to set claims: {}", e),
)
})?;
}
let object_exists: Option<Uuid> =
sqlx::query_scalar("SELECT id FROM storage.objects WHERE bucket_id = $1 AND name = $2")
.bind(&bucket_id)
.bind(&filename)
.fetch_optional(&mut *tx)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
if object_exists.is_none() {
return Err((
StatusCode::NOT_FOUND,
"File not found or access denied".to_string(),
));
}
// S3 Key Namespacing: {project_ref}/{bucket_id}/{filename}
let key = format!("{}/{}/{}", project_ctx.project_ref, bucket_id, filename);
let resp = state
.s3_client
.get_object()
.bucket(&state.bucket_name)
.key(&key)
.send()
.await
.map_err(|_e| {
(
StatusCode::NOT_FOUND,
"File content not found in storage".to_string(),
)
})?;
let mut headers = HeaderMap::new();
if let Some(ct) = resp.content_type() {
if let Ok(val) = ct.parse() {
headers.insert("Content-Type", val);
}
}
let body_bytes = resp
.body
.collect()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.into_bytes();
if let Ok(s) = std::str::from_utf8(&body_bytes) {
tracing::info!("Downloaded content (utf8): {}", s);
} else {
tracing::info!("Downloaded content (binary): {} bytes", body_bytes.len());
}
let body = Body::from(body_bytes);
Ok((headers, body))
}

60
storage/src/lib.rs Normal file
View File

@@ -0,0 +1,60 @@
pub mod handlers;
use aws_config::BehaviorVersion;
use aws_sdk_s3::config::Credentials;
use aws_sdk_s3::{config::Region, Client};
use axum::{extract::DefaultBodyLimit, routing::{get, post}, Router};
use common::Config;
use handlers::StorageState;
use sqlx::PgPool;
pub async fn init(db: PgPool, config: Config) -> Router {
// Initialize S3 Client (MinIO)
let s3_endpoint =
std::env::var("S3_ENDPOINT").unwrap_or_else(|_| "http://localhost:9000".to_string());
let s3_access_key =
std::env::var("MINIO_ROOT_USER").unwrap_or_else(|_| "minioadmin".to_string());
let s3_secret_key =
std::env::var("MINIO_ROOT_PASSWORD").unwrap_or_else(|_| "minioadmin".to_string());
let s3_bucket = std::env::var("S3_BUCKET").unwrap_or_else(|_| "madbase".to_string());
let aws_config = aws_config::defaults(BehaviorVersion::latest())
.region(Region::new("us-east-1"))
.endpoint_url(&s3_endpoint)
.credentials_provider(Credentials::new(
s3_access_key,
s3_secret_key,
None,
None,
"static",
))
.load()
.await;
let s3_config = aws_sdk_s3::config::Builder::from(&aws_config)
.endpoint_url(&s3_endpoint)
.force_path_style(true)
.build();
let s3_client = Client::from_conf(s3_config);
// Create bucket if not exists
let _ = s3_client.create_bucket().bucket(&s3_bucket).send().await;
let state = StorageState {
db,
s3_client,
config,
bucket_name: s3_bucket,
};
Router::new()
.route("/bucket", get(handlers::list_buckets))
.route("/object/list/:bucket_id", post(handlers::list_objects))
.route(
"/object/:bucket_id/:filename",
get(handlers::download_object).post(handlers::upload_object),
)
.layer(DefaultBodyLimit::max(10 * 1024 * 1024)) // 10MB limit
.with_state(state)
}