use std::{ io::SeekFrom, ops::Bound, sync::{ Arc, atomic::{AtomicUsize, Ordering}, }, time::Duration, }; use axum::body::BodyDataStream; use base64::{Engine as _, prelude::BASE64_URL_SAFE_NO_PAD}; use bytes::{BufMut, Bytes, BytesMut}; use color_eyre::eyre::{self, WrapErr}; use hmac::Mac; use img_parts::{DynImage, ImageEXIF}; use rand::distr::{Alphanumeric, SampleString}; use tokio::{ fs::File, io::{AsyncReadExt, AsyncSeekExt}, }; use tokio_stream::StreamExt; use tracing::{debug, error, info}; use twox_hash::XxHash3_128; use crate::{cache, config, disk}; /// Various forms of upload data that can be sent to the client pub enum UploadData { /// Send back the data from memory Cache(Bytes), /// Stream the file from disk to the client Disk(tokio::io::Take), } /// Upload data and metadata needed to build a view response pub struct UploadResponse { pub full_len: u64, pub range: (u64, u64), pub data: UploadData, } /// Non-error outcomes of an [`Engine::process`] call. /// Some are rejections. pub enum ProcessOutcome { /// The upload was successful. /// We give the user their file's URL (and deletion URL if one was created) Success { url: String, deletion_url: Option, }, /// Occurs when an upload exceeds the chosen maximum file size. UploadTooLarge, /// Occurs when a temporary upload is too big to fit in the cache. TemporaryUploadTooLarge, /// Occurs when the user-given lifetime is longer than we will allow TemporaryUploadLifetimeTooLong, } /// Non-error outcomes of an [`Engine::get`] call. pub enum GetOutcome { /// Successfully read upload. Success(UploadResponse), /// The upload was not found anywhere NotFound, /// A range was requested that exceeds an upload's bounds RangeNotSatisfiable, } /// Type alias to make using HMAC SHA256 easier type HmacSha256 = hmac::Hmac; /// breeze engine pub struct Engine { /// Cached count of uploaded files pub upl_count: AtomicUsize, /// Engine configuration pub cfg: config::EngineConfig, /// HMAC state initialised with the deletion secret (if present) pub deletion_hmac: Option, /// The in-memory cache that cached uploads are stored in cache: Arc, /// An interface to the on-disk upload store disk: disk::Disk, } /// Try to parse a `Range` header into an easier format to work with fn resolve_range(range: Option, full_len: u64) -> Option<(u64, u64)> { let last_byte = full_len - 1; let (start, end) = if let Some((start, end)) = range.and_then(|r| r.satisfiable_ranges(full_len).next()) { // satisfiable_ranges will never return Excluded so this is ok let start = if let Bound::Included(start_incl) = start { start_incl } else { 0 }; let end = if let Bound::Included(end_incl) = end { end_incl } else { last_byte }; (start, end) } else { (0, last_byte) }; // catch ranges we can't satisfy if end > last_byte || start > end { return None; } Some((start, end)) } /// Calculate HMAC of field values. pub fn update_hmac(hmac: &mut HmacSha256, saved_name: &str, hash: u128) { // mix deletion req fields into one buf let mut field_bytes = BytesMut::new(); field_bytes.put(saved_name.as_bytes()); field_bytes.put_u128(hash); // take the hmac hmac.update(&field_bytes); } /// How many bytes of a file should be used for hash calculation. const SAMPLE_WANTED_BYTES: usize = 32768; /// Format some info about an upload and hash it /// /// This should not change between versions!! /// That would break deletion urls fn calculate_hash(len: u64, data_sample: Bytes) -> u128 { let mut buf = BytesMut::new(); buf.put_u64(len); buf.put(data_sample); XxHash3_128::oneshot(&buf) } impl Engine { /// Creates a new instance of the engine pub fn with_config(cfg: config::EngineConfig) -> Self { let deletion_hmac = cfg .deletion_secret .as_ref() .map(|s| HmacSha256::new_from_slice(s.as_bytes()).unwrap()); let cache = cache::Cache::with_config(cfg.cache.clone()); let disk = disk::Disk::with_config(cfg.disk.clone()); let cache = Arc::new(cache); let cache_scanner = cache.clone(); tokio::spawn(async move { cache_scanner.scanner().await }); Self { // initialise our cached upload count. this doesn't include temp uploads! upl_count: AtomicUsize::new(disk.count()), deletion_hmac, cfg, cache, disk, } } /// Fetch an upload. /// /// This will first try to read from cache, and then disk after. /// If an upload is eligible to be cached, it will be cached and /// sent back as a cache response instead of a disk response. /// /// If there is a range, it is applied at the very end. pub async fn get( &self, saved_name: &str, range: Option, ) -> eyre::Result { let data = if let Some(u) = self.cache.get(saved_name) { u } else { // now, check if we have it on disk let Some(mut f) = self.disk.open(saved_name).await? else { // file didn't exist return Ok(GetOutcome::NotFound); }; let full_len = self.disk.len(&f).await?; // if possible, recache and send a cache response // else, send a disk response if self.cache.will_use(full_len) { // read file from disk let mut data = BytesMut::with_capacity(full_len.try_into()?); // read file from disk and if it fails at any point, return 500 loop { match f.read_buf(&mut data).await { Ok(n) => { if n == 0 { break; } } Err(e) => Err(e)?, } } let data = data.freeze(); // re-insert it into cache self.cache.add(saved_name, data.clone()); data } else { let Some((start, end)) = resolve_range(range, full_len) else { return Ok(GetOutcome::RangeNotSatisfiable); }; let range_len = (end - start) + 1; f.seek(SeekFrom::Start(start)).await?; let f = f.take(range_len); let res = UploadResponse { full_len, range: (start, end), data: UploadData::Disk(f), }; return Ok(GetOutcome::Success(res)); } }; let full_len = data.len() as u64; let Some((start, end)) = resolve_range(range, full_len) else { return Ok(GetOutcome::RangeNotSatisfiable); }; // cut down to range let data = data.slice((start as usize)..=(end as usize)); // build response let res = UploadResponse { full_len, range: (start, end), data: UploadData::Cache(data), }; Ok(GetOutcome::Success(res)) } /// Check if we have an upload stored anywhere. /// /// This is only used to prevent `saved_name` collisions!! /// It is not used to deliver "not found" errors. pub async fn has(&self, saved_name: &str) -> bool { if self.cache.has(saved_name) { return true; } // sidestep handling the error properly // that way we can call this in gen_saved_name easier if self.disk.open(saved_name).await.is_ok_and(|f| f.is_some()) { return true; } false } /// Try to read a file and calculate a hash for it. pub async fn get_hash(&self, saved_name: &str) -> eyre::Result> { // readout sample data and full len let (data_sample, len) = if let Some(full_data) = self.cache.get(saved_name) { // we found it in cache! take as many bytes as we can let taking = full_data.len().min(SAMPLE_WANTED_BYTES); let data = full_data.slice(0..taking); let len = full_data.len() as u64; tracing::info!("data len is {}", data.len()); (data, len) } else { // not in cache, so try disk let Some(mut f) = self.disk.open(saved_name).await? else { // not found there either so we just dont have it return Ok(None); }; // find len.. let len = f.seek(SeekFrom::End(0)).await?; f.rewind().await?; // only take wanted # of bytes for read let mut f = f.take(SAMPLE_WANTED_BYTES as u64); // try to read let mut data = Vec::with_capacity(SAMPLE_WANTED_BYTES); f.read_to_end(&mut data).await?; let data = Bytes::from(data); (data, len) }; // calculate hash Ok(Some(calculate_hash(len, data_sample))) } /// Generate a new saved name for an upload. /// /// If it picks a name that already exists, it will try again. pub async fn gen_saved_name(&self, ext: Option) -> String { loop { // generate a 6-character alphanumeric string let mut saved_name: String = Alphanumeric.sample_string(&mut rand::rng(), 6); // if we have an extension, add it now if let Some(ref ext) = ext { saved_name.push('.'); saved_name.push_str(ext); } if !self.has(&saved_name).await { break saved_name; } else { // there was a name collision. loop and try again info!("name collision! saved_name= {}", saved_name); } } } /// Wipe out an upload from all storage. /// /// This is for deleting failed uploads only!! pub async fn remove(&self, saved_name: &str) -> eyre::Result<()> { info!(saved_name, "!! removing upload"); self.cache.remove(saved_name); self.disk .remove(saved_name) .await .wrap_err("failed to remove file from disk")?; info!("!! successfully removed upload"); Ok(()) } /// Save a file to disk, and optionally cache. /// /// This also handles custom file lifetimes and EXIF data removal. pub async fn save( &self, saved_name: &str, provided_len: u64, mut use_cache: bool, mut stream: BodyDataStream, lifetime: Option, keep_exif: bool, ) -> eyre::Result<(Bytes, u64)> { // if we're using cache, make some space to store the upload in let mut data = if use_cache { BytesMut::with_capacity(provided_len.try_into()?) } else { BytesMut::new() }; // don't begin a disk save if we're using temporary lifetimes let tx = if lifetime.is_none() { Some(self.disk.start_save(saved_name)) } else { None }; // whether or not we are going to coalesce the data // in order to strip the exif data at the end, // instead of just sending it off to the i/o task let coalesce_and_strip = use_cache && matches!( std::path::Path::new(saved_name) .extension() .map(|s| s.to_str()), Some(Some("png" | "jpg" | "jpeg" | "webp" | "tiff")) ) && !keep_exif && provided_len <= self.cfg.max_strip_len; // buffer of sampled data for the deletion hash let mut hash_sample = BytesMut::with_capacity(SAMPLE_WANTED_BYTES); // actual number of bytes processed let mut observed_len = 0; // read and save upload while let Some(chunk) = stream.next().await { // if we error on a chunk, fail out let chunk = chunk?; // if we have an i/o task, send it off // also cloning this is okay because it's a Bytes if !coalesce_and_strip { if let Some(ref tx) = tx { debug!("sending chunk to i/o task"); tx.send(chunk.clone()) .wrap_err("failed to send chunk to i/o task!")?; } } // add to sample if we need to let wanted = SAMPLE_WANTED_BYTES - hash_sample.len(); if wanted != 0 { // take as many bytes as we can ... let taking = chunk.len().min(wanted); hash_sample.extend_from_slice(&chunk[0..taking]); } // record new len observed_len += chunk.len() as u64; if use_cache { debug!("receiving data into buffer"); if data.len() + chunk.len() > data.capacity() { info!( "the amount of data sent exceeds the content-length provided by the client! caching will be cancelled for this upload." ); // if we receive too much data, drop the buffer and stop using cache (it is still okay to use disk, probably) data = BytesMut::new(); use_cache = false; } else { data.put(chunk); } } } let data = data.freeze(); // we coalesced the data instead of streaming to disk, // strip the exif data and send it off now let data = if coalesce_and_strip { // strip the exif if we can // if we can't, then oh well let data = if let Ok(Some(data)) = DynImage::from_bytes(data.clone()).map(|o| { o.map(|mut img| { img.set_exif(None); img.encoder().bytes() }) }) { info!("stripped exif data"); data } else { info!("failed to strip exif data"); data }; // send what we did over to the i/o task, all in one chunk if let Some(ref tx) = tx { debug!("sending filled buffer to i/o task"); tx.send(data.clone()) .wrap_err("failed to send coalesced buffer to i/o task!")?; } data } else { // or, we didn't do that // keep the data as it is data }; // insert upload into cache if we're using it if use_cache { info!("caching upload!"); match lifetime { Some(lt) => self.cache.add_with_lifetime(saved_name, data, lt, false), None => self.cache.add(saved_name, data), }; } Ok((hash_sample.freeze(), observed_len)) } pub async fn process( &self, ext: Option, provided_len: u64, stream: BodyDataStream, lifetime: Option, keep_exif: bool, ) -> eyre::Result { // if the upload size is greater than our max file size, deny it now if self.cfg.max_upload_len.is_some_and(|l| provided_len > l) { return Ok(ProcessOutcome::UploadTooLarge); } // if the upload size is smaller than the specified maximum, we use the cache! let use_cache = self.cache.will_use(provided_len); // if a temp file is too big for cache, reject it now if lifetime.is_some() && !use_cache { return Ok(ProcessOutcome::TemporaryUploadTooLarge); } // if a temp file's lifetime is too long, reject it now if lifetime.is_some_and(|lt| lt > self.cfg.max_temp_lifetime) { return Ok(ProcessOutcome::TemporaryUploadLifetimeTooLong); } // generate the file name let saved_name = self.gen_saved_name(ext).await; // save it let save_result = self .save( &saved_name, provided_len, use_cache, stream, lifetime, keep_exif, ) .await; // handle result let (hash_sample, len) = match save_result { // Okay so just extract metadata Ok(m) => m, // If anything fails, delete the upload and return the error Err(err) => { error!("failed processing upload!"); self.remove(&saved_name).await?; return Err(err); } }; // if deletion urls are enabled, create one let deletion_url = self.deletion_hmac.clone().map(|mut hmac| { // calculate hash of file metadata let hash = calculate_hash(len, hash_sample); let mut hash_bytes = BytesMut::new(); hash_bytes.put_u128(hash); let hash_b64 = BASE64_URL_SAFE_NO_PAD.encode(&hash_bytes); // take hmac update_hmac(&mut hmac, &saved_name, hash); let out = hmac.finalize().into_bytes(); let out_b64 = BASE64_URL_SAFE_NO_PAD.encode(out); // format deletion url format!( "{}/del?name={saved_name}&hash={hash_b64}&hmac={out_b64}", self.cfg.base_url ) }); // format and send back the url let url = format!("{}/p/{saved_name}", self.cfg.base_url); // if all goes well, increment the cached upload counter self.upl_count.fetch_add(1, Ordering::Relaxed); info!("finished processing upload!"); Ok(ProcessOutcome::Success { url, deletion_url }) } }