Push new feature: auto config for S3.

This commit is contained in:
2026-04-25 00:37:07 +01:00
parent f75046e74d
commit 61fa4cc645
5 changed files with 1615 additions and 98 deletions

1298
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,8 @@ description = "Opinionated Rust framework for queue-driven microservices"
license = "MIT"
[dependencies]
aws-config = "1"
aws-sdk-s3 = "1"
futures-util = "0.3"
lapin = "2"
reqwest = { version = "0.12", features = ["blocking", "json", "rustls-tls"], default-features = false }

View File

@@ -9,8 +9,9 @@ assumptions about a microservice:
1. A microservice listens to incoming requests on its own dedicated and
singular queue (RabbitMQ).
2. Incoming requests are in the form of a 64-bit unsigned integer (`u64`).
2. Microservices process requests via a `process` function, which takes one
argument: the incoming request (`u64`).
2. Microservices process requests via a `process` function, which takes three
arguments: the incoming request (`u64`), a `read_file` function, and a
`write_file` function.
3. The `process` function returns a set of IDs (also `u64`) that are the result
of processing the incoming request. Each of these IDs is also associated
with a "case variable" that is used for routing the result to the
@@ -21,8 +22,8 @@ assumptions about a microservice:
i. This service provides inbound queue name, as well as any outbound queues
and their corresponding case variables.
ii. It is also responsible for providing the RabbitMQ connection details
(host, port, username, password), and any bucket names if using S3 for
storage.
(host, port, username, password), and the object-storage host plus GNU
`pass` references for the S3 access key and secret key.
The `slingshot-microservice` framework handles setting up the RabbitMQ
connection, listening to the inbound queue and routing results based on case variables.
@@ -46,10 +47,25 @@ cargo build
```rust
use slingshot_microservice::Microservice;
use slingshot_microservice::{ProcessFuture, ReadFileFn, WriteFileFn};
use std::io::Write;
use tokio::io::AsyncReadExt;
fn process(request: u64) -> Vec<(u64, String)> {
// Example processing logic: return the request ID and a case variable
vec![(request, "case_a".to_string())]
fn process<'a>(
request: u64,
read_file: &'a ReadFileFn,
write_file: &'a WriteFileFn,
) -> ProcessFuture<'a, String> {
Box::pin(async move {
let mut input = String::new();
let mut reader = read_file("in", request)?;
reader.read_to_string(&mut input).await?;
let mut writer = write_file("out", request)?;
writer.write_all(input.as_bytes())?;
Ok(vec![(request, "case_a".to_string())])
})
}
fn main() {
@@ -111,12 +127,33 @@ to send results to based on a case variable that is either `false` or `true`:
The configuration service also provides the RabbitMQ connection details (host,
port, etc.):
Object storage credentials are fetched separately from
`https://sys-map.slingshot.cv/object-storage`. The access-key and secret-key
values returned there are GNU `pass` entry names, so the runtime resolves the
actual secrets with `pass show <key>` before constructing the S3 client.
When the microservice first starts up, it makes a request to the configuration
service to get the queue metadata. Then it starts to listen to the inbound
queue. Inbound requests are processed by the user-programmed `process`
function, which returns a set of tuples of the form `(result_id, case_variable)`.
The microservice then routes each `result_id` to the appropriate outbound
queue(s) based on the `case_variable`, using a process that looks like this:
Within each `process` pass:
1. `read_file(key, id)` treats `key` as a bucket reference such as `in`, not
as the canonical bucket name. On first use, the runtime fetches
`https://{HOSTNAME}/{MICROSERVICE_NAME}/{key}` to resolve the real bucket
name, caches that mapping, and then returns an async stream for object
`id` in that bucket using the AWS SDK (`get_object(...).body.into_async_read()`).
2. `write_file(key, id)` resolves `key` through the same cached lookup and
returns an opened local file handle for writing, staging the output for
`s3://{resolved_bucket}/{id}`.
3. After `process` returns, opened files are closed.
4. Then staged write files are uploaded to S3 with the AWS SDK, local staged
files are deleted, and local temporary directories are removed.
5. Only after file finalization is complete are output IDs published to
outbound queues.
The output queue routing step looks like this:
Peudocode:
```

View File

@@ -1,12 +1,27 @@
use slingshot_microservice::Microservice;
use slingshot_microservice::{Microservice, ProcessFuture, ReadFileFn, WriteFileFn};
use std::io::Write;
use tokio::io::AsyncReadExt;
fn process(request: u64) -> Vec<(u64, String)> {
vec![(request, "case_a".to_string())]
fn process<'a>(
request: u64,
read_file: &'a ReadFileFn,
write_file: &'a WriteFileFn,
) -> ProcessFuture<'a, String> {
Box::pin(async move {
let mut input = String::new();
let mut reader = read_file("in", request)?;
reader.read_to_string(&mut input).await?;
let mut writer = write_file("out", request)?;
writer.write_all(input.as_bytes())?;
Ok(vec![(request, "case_a".to_string())])
})
}
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let microservice = Microservice::new("simple-microservice", "sys-map.slingshot.cv", process);
let microservice = Microservice::new("simple-microservice", "sys-map.slingshot.cv", process);
microservice.start()?;
Ok(())
microservice.start()?;
Ok(())
}

View File

@@ -1,8 +1,19 @@
use std::collections::HashMap;
use std::error::Error;
use std::future::Future;
use std::fs::{self, File};
use std::io::ErrorKind;
use std::pin::Pin;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::{AtomicU64, Ordering};
use aws_config::BehaviorVersion;
use aws_sdk_s3::Client;
use aws_sdk_s3::config::{Credentials, Region};
use aws_sdk_s3::primitives::ByteStream;
use futures_util::StreamExt;
use lapin::options::{
BasicAckOptions, BasicConsumeOptions, BasicNackOptions, BasicPublishOptions,
@@ -14,11 +25,21 @@ use lapin::{BasicProperties, Channel, Connection, ConnectionProperties};
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
use tokio::io::AsyncRead;
use tracing_subscriber::EnvFilter;
type AnyError = Box<dyn Error + Send + Sync + 'static>;
pub type AnyError = Box<dyn Error + Send + Sync + 'static>;
type ProcessFn = dyn Fn(u64) -> Vec<(u64, Value)> + Send + Sync + 'static;
pub type ReadStream = Pin<Box<dyn AsyncRead + Send + Unpin + 'static>>;
pub type ReadFileFn = dyn Fn(&str, u64) -> Result<ReadStream, AnyError> + Send + Sync + 'static;
pub type WriteFileFn = dyn Fn(&str, u64) -> Result<File, AnyError> + Send + Sync + 'static;
pub type ProcessFuture<'a, C> = Pin<Box<dyn Future<Output = Result<Vec<(u64, C)>, AnyError>> + Send + 'a>>;
type ProcessFn = dyn for<'a> Fn(u64, &'a ReadFileFn, &'a WriteFileFn) -> ProcessFuture<'a, Value>
+ Send
+ Sync
+ 'static;
static REQUEST_FILE_CONTEXT_COUNTER: AtomicU64 = AtomicU64::new(1);
fn init_tracing() {
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
@@ -44,8 +65,9 @@ struct OutboundCase {
/// The microservice:
/// 1) Retrieves queue metadata from a configuration service,
/// 2) Consumes u64 IDs from an inbound queue,
/// 3) Runs the user-provided processing function,
/// 4) Routes each output ID to outbound queue(s) based on case variables.
/// 3) Runs the user-provided processing function with S3-backed file helpers,
/// 4) Closes/finalizes staged files and uploads writes,
/// 5) Routes each output ID to outbound queue(s) based on case variables.
pub struct Microservice {
name: String,
config_host: String,
@@ -55,24 +77,36 @@ pub struct Microservice {
impl Microservice {
/// Create a new microservice runtime.
///
/// `process` accepts an inbound request ID and returns a list of
/// `process` accepts an inbound request ID, a `read_file` function, and a
/// `write_file` function, and then returns a list of
/// `(result_id, case_variable)` tuples. Case variables can be any
/// serializable primitive, such as `String`, `bool`, or integers.
pub fn new<F, C>(name: impl Into<String>, config_host: impl Into<String>, process: F) -> Self
where
F: Fn(u64) -> Vec<(u64, C)> + Send + Sync + 'static,
C: Serialize,
F: for<'a> Fn(u64, &'a ReadFileFn, &'a WriteFileFn) -> ProcessFuture<'a, C>
+ Send
+ Sync
+ 'static,
C: Serialize + 'static,
{
init_tracing();
let process_wrapper = move |request: u64| -> Vec<(u64, Value)> {
process(request)
.into_iter()
.map(|(id, case)| {
let value = serde_json::to_value(case)
.expect("case variable must be serializable to JSON");
(id, value)
})
.collect()
let process_wrapper = move |
request: u64,
read_file: &ReadFileFn,
write_file: &WriteFileFn,
| -> ProcessFuture<'_, Value> {
let fut = process(request, read_file, write_file);
Box::pin(async move {
let outputs = fut.await?;
Ok(outputs
.into_iter()
.map(|(id, case)| {
let value = serde_json::to_value(case)
.expect("case variable must be serializable to JSON");
(id, value)
})
.collect())
})
};
Self {
@@ -91,8 +125,9 @@ impl Microservice {
let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?;
let s3_client = runtime.block_on(fetch_s3_client_from_sys_map())?;
runtime.block_on(self.run_consumer(config.inbound, route_map, amqp_url))
runtime.block_on(self.run_consumer(config.inbound, route_map, amqp_url, s3_client))
}
fn fetch_config(&self) -> Result<QueueConfig, AnyError> {
@@ -108,7 +143,9 @@ impl Microservice {
inbound_queue: String,
route_map: HashMap<String, Vec<String>>,
amqp_url: String,
s3_client: Arc<Client>,
) -> Result<(), AnyError> {
let bucket_name_cache = Arc::new(Mutex::new(HashMap::<String, String>::new()));
let connection = Connection::connect(&amqp_url, ConnectionProperties::default()).await?;
let channel = connection.create_channel().await?;
@@ -142,7 +179,51 @@ impl Microservice {
}
};
let outputs = (self.process)(request_id);
let file_context = Arc::new(Mutex::new(RequestFileContext::new(request_id)?));
let read_context = Arc::clone(&file_context);
let write_context = Arc::clone(&file_context);
let s3_read_client = Arc::clone(&s3_client);
let read_bucket_cache = Arc::clone(&bucket_name_cache);
let write_bucket_cache = Arc::clone(&bucket_name_cache);
let config_host = self.config_host.clone();
let microservice_name = self.name.clone();
let read_config_host = config_host.clone();
let read_microservice_name = microservice_name.clone();
let read_file = move |key: &str, id: u64| -> Result<ReadStream, AnyError> {
let bucket = resolve_bucket_name(
&read_config_host,
&read_microservice_name,
&read_bucket_cache,
key,
)?;
let mut guard = read_context
.lock()
.map_err(|e| format!("file context lock poisoned for read_file: {}", e))?;
guard.read_file(s3_read_client.as_ref(), &bucket, id)
};
let write_file = move |key: &str, id: u64| -> Result<File, AnyError> {
let bucket = resolve_bucket_name(
&config_host,
&microservice_name,
&write_bucket_cache,
key,
)?;
let mut guard = write_context
.lock()
.map_err(|e| format!("file context lock poisoned for write_file: {}", e))?;
guard.write_file(&bucket, id)
};
let outputs = (self.process)(request_id, &read_file, &write_file).await?;
{
let mut guard = file_context
.lock()
.map_err(|e| format!("file context lock poisoned for finalize: {}", e))?;
guard.finalize(s3_client.as_ref())?
}
publish_outputs(&channel, outputs, &route_map).await?;
delivery.ack(BasicAckOptions::default()).await?;
}
@@ -151,6 +232,141 @@ impl Microservice {
}
}
#[derive(Debug)]
struct PendingUpload {
bucket: String,
object_key: String,
local_path: PathBuf,
}
#[derive(Debug)]
struct RequestFileContext {
root_dir: PathBuf,
pending_uploads: Vec<PendingUpload>,
}
impl RequestFileContext {
fn new(request_id: u64) -> Result<Self, AnyError> {
let unique = REQUEST_FILE_CONTEXT_COUNTER.fetch_add(1, Ordering::Relaxed);
let root_dir = std::env::temp_dir().join(format!(
"slingshot-microservice-{}-{}-{}",
std::process::id(),
request_id,
unique
));
fs::create_dir_all(root_dir.join("write"))?;
Ok(Self {
root_dir,
pending_uploads: Vec::new(),
})
}
fn read_file(&mut self, s3_client: &Client, bucket: &str, id: u64) -> Result<ReadStream, AnyError> {
let bucket_name = bucket.to_string();
let object_key = id.to_string();
let client = s3_client.clone();
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async move {
let response = client
.get_object()
.bucket(&bucket_name)
.key(&object_key)
.send()
.await?;
Ok::<ReadStream, AnyError>(Box::pin(response.body.into_async_read()))
})
})
}
fn write_file(&mut self, key: &str, id: u64) -> Result<File, AnyError> {
let object_key = id.to_string();
let local_path = self
.root_dir
.join("write")
.join(format!("{}.bin", self.pending_uploads.len()));
self.pending_uploads.push(PendingUpload {
bucket: key.to_string(),
object_key,
local_path: local_path.clone(),
});
Ok(File::create(local_path)?)
}
fn finalize(&mut self, s3_client: &Client) -> Result<(), AnyError> {
for upload in &self.pending_uploads {
upload_to_s3(s3_client, &upload.bucket, &upload.object_key, &upload.local_path)?;
remove_if_exists(&upload.local_path)?;
}
self.pending_uploads.clear();
remove_dir_if_exists(&self.root_dir)?;
Ok(())
}
}
impl Drop for RequestFileContext {
fn drop(&mut self) {
for upload in &self.pending_uploads {
let _ = remove_if_exists(&upload.local_path);
}
let _ = remove_dir_if_exists(&self.root_dir);
}
}
fn normalize_key_component(value: &str) -> String {
value.trim_matches('/').to_string()
}
fn upload_to_s3(
s3_client: &Client,
bucket: &str,
object_key: &str,
local_path: &Path,
) -> Result<(), AnyError> {
let client = s3_client.clone();
let bucket_name = bucket.to_string();
let key = object_key.to_string();
let path = local_path.to_path_buf();
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async move {
let body = ByteStream::from_path(&path).await?;
client
.put_object()
.bucket(&bucket_name)
.key(&key)
.body(body)
.send()
.await?;
Ok::<(), AnyError>(())
})
})
}
fn remove_if_exists(path: &Path) -> Result<(), AnyError> {
match fs::remove_file(path) {
Ok(()) => Ok(()),
Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
Err(err) => Err(Box::new(err)),
}
}
fn remove_dir_if_exists(path: &Path) -> Result<(), AnyError> {
match fs::remove_dir_all(path) {
Ok(()) => Ok(()),
Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
Err(err) => Err(Box::new(err)),
}
}
#[derive(Debug, Deserialize)]
struct RabbitMqConfig {
port: Vec<u16>,
@@ -159,6 +375,15 @@ struct RabbitMqConfig {
pass: Vec<String>,
}
#[derive(Debug, Deserialize)]
struct ObjectStorageConfig {
host: Vec<String>,
#[serde(rename = "pass:access-key")]
pass_access_key: Vec<String>,
#[serde(rename = "pass:secret-key")]
pass_secret_key: Vec<String>,
}
fn fetch_rabbitmq_url_from_sys_map() -> Result<String, AnyError> {
let response = reqwest::blocking::get("https://sys-map.slingshot.cv/rabbitmq")?;
let response = response.error_for_status()?;
@@ -176,6 +401,70 @@ fn fetch_rabbitmq_url_from_sys_map() -> Result<String, AnyError> {
Ok(format!("amqp://{}:{}@{}:{}/%2f", username, pass, host, port))
}
async fn fetch_s3_client_from_sys_map() -> Result<Arc<Client>, AnyError> {
let response = reqwest::blocking::get("https://sys-map.slingshot.cv/object-storage")?;
let response = response.error_for_status()?;
let config = response.json::<ObjectStorageConfig>()?;
let host = single_value(&config.host, "host")?;
let access_key_ref = single_value(&config.pass_access_key, "pass:access-key")?;
let secret_key_ref = single_value(&config.pass_secret_key, "pass:secret-key")?;
let access_key = resolve_password_from_pass(&access_key_ref)?;
let secret_key = resolve_password_from_pass(&secret_key_ref)?;
info!("Fetched object storage config from sys-map: host={}", host);
let shared_config = aws_config::defaults(BehaviorVersion::latest())
.region(Region::new("us-east-1"))
.credentials_provider(Credentials::new(
access_key,
secret_key,
None,
None,
"sys-map",
))
.load()
.await;
let s3_config = aws_sdk_s3::config::Builder::from(&shared_config)
.endpoint_url(format!("https://{}", host))
.build();
Ok(Arc::new(Client::from_conf(s3_config)))
}
fn resolve_bucket_name(
config_host: &str,
microservice_name: &str,
cache: &Mutex<HashMap<String, String>>,
key: &str,
) -> Result<String, AnyError> {
{
let guard = cache
.lock()
.map_err(|e| format!("bucket-name cache lock poisoned: {}", e))?;
if let Some(bucket_name) = guard.get(key) {
return Ok(bucket_name.clone());
}
}
let url = bucket_mapping_url(config_host, microservice_name, key);
let response = reqwest::blocking::get(&url)?;
let response = response.error_for_status()?;
let bucket_name = response.text()?.trim().to_string();
if bucket_name.is_empty() {
return Err(format!("bucket mapping '{}' returned an empty bucket name", url).into());
}
let mut guard = cache
.lock()
.map_err(|e| format!("bucket-name cache lock poisoned: {}", e))?;
guard.insert(key.to_string(), bucket_name.clone());
Ok(bucket_name)
}
fn resolve_password_from_pass(pass_key: &str) -> Result<String, AnyError> {
let output = Command::new("pass").arg("show").arg(pass_key).output()?;
@@ -202,7 +491,7 @@ fn resolve_password_from_pass(pass_key: &str) -> Result<String, AnyError> {
fn single_value<T: Clone>(values: &[T], field_name: &str) -> Result<T, AnyError> {
if values.len() != 1 {
return Err(format!(
"sys-map.rabbitmq field '{}' must contain exactly one value, got {}",
"sys-map field '{}' must contain exactly one value, got {}",
field_name,
values.len()
)
@@ -220,6 +509,10 @@ fn config_url(host: &str, microservice_name: &str) -> String {
}
}
fn bucket_mapping_url(host: &str, microservice_name: &str, key: &str) -> String {
format!("{}/{}", config_url(host, microservice_name), key.trim_matches('/'))
}
fn build_route_map(outbound: &[OutboundCase]) -> HashMap<String, Vec<String>> {
let mut map = HashMap::new();
for entry in outbound {