From b65b6ca0027340a2c8ba3e77fc9cac586ec2798b Mon Sep 17 00:00:00 2001 From: min Date: Fri, 29 May 2026 17:20:27 -0400 Subject: [PATCH] disk refactor a little --- Cargo.lock | 17 +++++++--------- Cargo.toml | 8 +++++++- src/config.rs | 31 +++++++++++++++++++++++++---- src/delete.rs | 5 +---- src/disk.rs | 26 +++++++++++++++++++++++-- src/engine.rs | 54 ++++++++++++++++++++++++--------------------------- src/main.rs | 10 ++-------- 7 files changed, 93 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b7ba9c3..86e254e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,7 +240,7 @@ dependencies = [ "tracing", "tracing-subscriber", "tracing-test", - "twox-hash", + "xxhash-rust", ] [[package]] @@ -1545,15 +1545,6 @@ dependencies = [ "syn", ] -[[package]] -name = "twox-hash" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" -dependencies = [ - "rand", -] - [[package]] name = "typenum" version = "1.20.0" @@ -1724,6 +1715,12 @@ version = "0.57.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "zerocopy" version = "0.8.48" diff --git a/Cargo.toml b/Cargo.toml index 8ea3c08..dd88990 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,12 @@ edition = "2024" [profile.dev.package] tikv-jemalloc-sys = { opt-level = 3 } +backtrace = { opt-level = 3 } + +[profile.release] +lto = true +codegen-units = 1 +debug = "line-tables-only" [dependencies] argh = "0.1.12" @@ -44,7 +50,7 @@ toml = { version = "0.9", default-features = false, features = [ ] } tracing = "0.1" tracing-subscriber = "0.3" -twox-hash = "2" +xxhash-rust = { version = "0.8", features = ["xxh3"] } [dev-dependencies] http-body-util = "0.1" diff --git a/src/config.rs b/src/config.rs index 41aedcb..c627cca 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,10 +35,6 @@ pub struct EngineConfig { /// If this secret is leaked, anyone can delete any file. Be careful!!! pub deletion_secret: Option, - /// Maximum size of an upload that will be accepted. - /// Files above this size can not be uploaded. - pub max_upload_len: Option, - /// Maximum lifetime of a temporary upload #[serde_as(as = "DurationSeconds")] pub max_temp_lifetime: Duration, @@ -55,10 +51,37 @@ pub struct EngineConfig { pub motd: String, } +#[serde_as] +#[derive(Deserialize, Default, Clone)] +pub struct DeleteWhenConfig { + /// Condition that is satisfied when + /// an upload reaches the specified age. + /// (in seconds) + #[serde_as(as = "DurationSeconds")] + pub older_than: Duration, + + /// Condition that is satisfied when + /// an upload has not been accessed + /// for the specified duration. (in seconds) + #[serde_as(as = "DurationSeconds")] + pub not_accessed_for: Duration, +} + #[derive(Deserialize, Clone)] pub struct DiskConfig { /// Location on disk the uploads are to be saved to pub save_path: PathBuf, + + /// Maximum size of an upload that will be + /// saved on this disk. Anything higher will + /// skip this disk. If no disks are suitable, + /// the upload will be rejected. (status 413) + pub max_save_len: Option, + + /// When this "AND" condition is satisfied + /// for an upload, it will be deleted. + #[serde(default)] + pub delete_when: DeleteWhenConfig, } #[serde_as] diff --git a/src/delete.rs b/src/delete.rs index 49fdb04..0841e2d 100644 --- a/src/delete.rs +++ b/src/delete.rs @@ -1,4 +1,4 @@ -use std::sync::{Arc, atomic::Ordering}; +use std::sync::Arc; use axum::extract::{Query, State}; use base64::{Engine as _, prelude::BASE64_URL_SAFE_NO_PAD}; @@ -82,8 +82,5 @@ pub async fn delete( return (StatusCode::INTERNAL_SERVER_ERROR, "Delete failed"); } - // decrement upload count - engine.upl_count.fetch_sub(1, Ordering::Relaxed); - (StatusCode::OK, "Deleted successfully!") } diff --git a/src/disk.rs b/src/disk.rs index 2de56fc..07250bc 100644 --- a/src/disk.rs +++ b/src/disk.rs @@ -16,8 +16,23 @@ pub struct Disk { } impl Disk { - pub fn with_config(cfg: config::DiskConfig) -> Self { - Self { cfg } + pub fn with_config(cfg: config::DiskConfig) -> io::Result { + // check path + if !cfg.save_path.exists() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "the save path does not exist", + )); + } + if !cfg.save_path.is_dir() { + return Err(io::Error::new( + io::ErrorKind::NotADirectory, + "the save path is not a directory", + )); + } + + // return + Ok(Self { cfg }) } /// Counts the number of files saved to disk we have @@ -31,6 +46,13 @@ impl Disk { }) } + /// Returns whether or not an upload + /// is allowed to be stored with this disk + #[inline] + pub fn will_use(&self, length: u64) -> bool { + self.cfg.max_save_len.is_none_or(|l| length <= l) + } + /// Formats the path on disk for a `saved_name`. fn path_for(&self, saved_name: &str) -> PathBuf { // try to prevent path traversal by ignoring everything except the file name diff --git a/src/engine.rs b/src/engine.rs index 7ef1197..9b1d29e 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -21,7 +21,7 @@ use tokio::{ }; use tokio_stream::StreamExt; use tracing::{debug, error, info}; -use twox_hash::XxHash3_128; +use xxhash_rust::xxh3; use crate::{cache, config, disk}; @@ -90,23 +90,7 @@ pub struct Engine { cache: Arc, /// An interface to the on-disk upload store - disk: Arc, -} - -/// Wipe out an upload from all storage. -/// * Intended for deletion URLs and failed uploads -/// * Separated from [`Engine`] for use in [`disk::Disk`] -async fn remove(cache: &cache::Cache, disk: &disk::Disk, saved_name: &str) -> eyre::Result<()> { - info!(saved_name, "!! removing upload"); - - cache.remove(saved_name); - disk.remove(saved_name) - .await - .wrap_err("failed to remove file from disk")?; - - info!("!! successfully removed upload"); - - Ok(()) + disk: disk::Disk, } /// Try to parse a `Range` header into an easier format to work with @@ -184,7 +168,7 @@ fn calculate_hash(len: u64, data_sample: Bytes) -> u128 { buf.put_u64(len); buf.put(data_sample); - XxHash3_128::oneshot(&buf) + xxh3::xxh3_128(&buf) } impl Engine { @@ -207,7 +191,7 @@ impl Engine { cfg, cache, - disk: Arc::new(disk), + disk, }) } @@ -387,17 +371,30 @@ impl Engine { } /// Wipe out an upload from all storage. - /// - /// (Intended for deletion URLs and failed uploads) + /// * Intended for deletion URLs and failed uploads pub async fn remove(&self, saved_name: &str) -> eyre::Result<()> { - remove(&self.cache, &self.disk, saved_name).await + info!(saved_name, "!! removing upload"); + + // removals + self.cache.remove(saved_name); + self.disk + .remove(saved_name) + .await + .wrap_err("failed to remove file from disk")?; + + info!("!! successfully removed upload"); + + // decrement upload count + self.upl_count.fetch_sub(1, Ordering::Relaxed); + + Ok(()) } /// Save a file to disk, and optionally cache. /// /// This also handles custom file lifetimes and EXIF data removal. pub async fn save( - &self, + self: &Arc, saved_name: &str, provided_len: u64, mut use_cache: bool, @@ -415,14 +412,13 @@ impl Engine { // don't begin a disk save if we're using temporary lifetimes let tx = if lifetime.is_none() { Some(self.disk.start_save(saved_name, { - let cache = self.cache.clone(); - let disk = self.disk.clone(); + let me = self.clone(); let saved_name = saved_name.to_string(); async move |err| { // try to delete the failed upload error!(%saved_name, %err, "error while saving file to disk"); - if let Err(err) = remove(&cache, &disk, &saved_name).await { + if let Err(err) = me.remove(&saved_name).await { error!(%saved_name, %err, "IO error callback failed to remove upload"); } } @@ -539,7 +535,7 @@ impl Engine { } pub async fn process( - &self, + self: &Arc, ext: Option, provided_len: u64, stream: BodyDataStream, @@ -547,7 +543,7 @@ impl Engine { keep_exif: bool, ) -> eyre::Result { // if the upload size is greater than our max file size, deny it now - if self.cfg.max_upload_len.is_some_and(|l| provided_len > l) { + if !self.disk.will_use(provided_len) { return Ok(ProcessOutcome::UploadTooLarge); } diff --git a/src/main.rs b/src/main.rs index 41570df..3a4a285 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ use std::{path::PathBuf, sync::Arc}; use argh::FromArgs; -use color_eyre::eyre::{self, Context, bail}; +use color_eyre::eyre::{self, Context}; use engine::Engine; use axum::{ @@ -73,19 +73,13 @@ async fn main() -> eyre::Result<()> { .init(); // Check config - { - let save_path = cfg.disk.save_path.clone(); - if !save_path.exists() || !save_path.is_dir() { - bail!("the save path does not exist or is not a directory! this is invalid"); - } - } if cfg.engine.upload_key.is_empty() { warn!("engine upload_key is empty! no key will be required for uploading new files"); } // Create backends let cache = Arc::new(Cache::with_config(cfg.cache)?); - let disk = Disk::with_config(cfg.disk); + let disk = Disk::with_config(cfg.disk)?; // Start cache scanner tokio::spawn({