use std::{ collections::HashSet, path::{Path, PathBuf}, sync::{Arc, Weak}, }; use bytes::Bytes; use color_eyre::eyre::{self, bail}; use dashmap::DashMap; use tokio::{ fs::File, io::{self, AsyncWriteExt}, sync::{Mutex, mpsc}, }; use crate::config; /// An array of disk file stores with /// a similar API to the cache. pub struct DiskArray { /// Master set of disks. disks: Vec>, /// In-memory index of upload locations. /// /// [Weak] is used to make it easier to /// drop disks if a future update does that locations: DashMap>>, } impl DiskArray { pub fn with_configs(cfgs: Vec) -> eyre::Result { // create all let mut seen_save_paths = HashSet::new(); let mut disks = Vec::new(); let locations: DashMap<_, Vec<_>> = DashMap::new(); for cfg in cfgs { // make sure save paths are unique // if two disks have the same save path, // they will both try to save new uploads // to the exact same spot which probably // causes a lot of problems. also deletes // will try to delete the same file etcetc if !seen_save_paths.insert(cfg.save_path.clone()) { bail!("disk has duplicate save path: {:?}", cfg.save_path); } // init disk let disk = Arc::new(Disk::with_config(cfg)?); // index files for saved_name in disk.files()? { let saved_name = saved_name?; // add disk reference let disk = Arc::downgrade(&disk); let mut on_disks = locations.entry(saved_name).or_default(); on_disks.push(disk); } // add to disks disks.push(disk); } // return Ok(Self { disks, locations }) } /// Returns the amount of uploads stored /// across all disks pub fn count(&self) -> usize { self.locations.len() } /// Returns whether or not an upload /// can be stored on any disk pub fn will_use(&self, length: u64) -> bool { self.disks.iter().any(|d| d.will_use(length)) } /// Fast-path way to check if we have /// an upload using location index pub fn has(&self, saved_name: &str) -> bool { self.locations.contains_key(saved_name) } /// Get the size of an upload's file pub async fn len(&self, f: &File) -> io::Result { Ok(f.metadata().await?.len()) } /// Remove an upload from all disks pub async fn remove(&self, saved_name: &str) -> io::Result<()> { // find what disks the upload is stored on // (removing from location index) println!("get 1"); let Some((_, on_disks)) = self.locations.remove(saved_name) else { // that's not an upload return Err(io::Error::new( io::ErrorKind::NotFound, "file to remove wasn't found", )); }; println!("get 2"); // delete from all disks its stored on for disk in &on_disks { let Some(disk) = disk.upgrade() else { // dead disk so whatever continue; }; // try to delete file disk.remove(saved_name).await?; } // return Ok(()) } /// Start a save I/O task that directs /// to all disks pub fn start_save< Fut: Future + Send + 'static, F: FnOnce(eyre::Error) -> Fut + Send + 'static, >( &self, saved_name: &str, length: u64, fail_callback: F, ) -> mpsc::Sender { let (tx, mut rx) = mpsc::channel::(1000); // setup oneshot fail callback let fail_callback = Arc::new(Mutex::new(Some(fail_callback))); // add to location index let mut on_disks = self.locations.entry(saved_name.to_string()).or_default(); // start save tasks let mut txs = Vec::new(); for disk in &self.disks { if !disk.will_use(length) { // we don't want that really continue; } // update location index { let disk = Arc::downgrade(disk); on_disks.push(disk); } // start task let fail_callback = fail_callback.clone(); let tx = disk.start_save(saved_name, async move |err| { // run callback if we can if let Some(fail_callback) = fail_callback.lock().await.take() { fail_callback(err.into()).await; } // also so i remember- fail_callback is how late errors // get handled. by the time it is called we don't need // to care about channels }); txs.push(tx); } // start our bg task tokio::spawn(async move { while let Some(chunk) = rx.recv().await { // send to all disk tasks for tx in &txs { // handle error. if let Err(err) = tx.send(chunk.clone()).await { // try to report that if let Some(fail_callback) = fail_callback.lock().await.take() { fail_callback(err.into()).await; } // we dont want to talk // with dead channels return; } } } }); tx } /// Opens an upload on the first disk /// that works /// (in order of definition in config) pub async fn open(&self, saved_name: &str) -> io::Result> { // get location entry let Some(on_disks) = self.locations.get(saved_name) else { // that's not found..... return Ok(None); }; // start trying disks for disk in on_disks.iter() { let Some(disk) = disk.upgrade() else { // no more that disk :( // it would be nice to remove it from list continue; }; // try to open if let Some(f) = disk.open(saved_name).await? { return Ok(Some(f)); } } // none worked.... // it would be nice to delete the entry Ok(None) } } /// Provides an API to access the disk file store /// like we access the cache. struct Disk { cfg: config::DiskConfig, } impl Disk { fn with_config(cfg: config::DiskConfig) -> io::Result { // check path if !cfg.save_path.exists() { return Err(io::Error::new( io::ErrorKind::NotFound, "the save path does not exist", )); } if !cfg.save_path.is_dir() { return Err(io::Error::new( io::ErrorKind::NotADirectory, "the save path is not a directory", )); } // return Ok(Self { cfg }) } /// Returns an iterator of stored file names. fn files(&self) -> io::Result>> { Ok(std::fs::read_dir(&self.cfg.save_path)?.filter_map(|e| { // todo: refactor when try blocks are out^^ (|| { let e = e?; Ok(e.file_type()? .is_file() .then_some(e.file_name().into_string().ok()) .flatten()) })() .transpose() })) } /// Returns whether or not an upload /// is allowed to be stored with this disk #[inline] pub fn will_use(&self, length: u64) -> bool { self.cfg.max_save_len.is_none_or(|l| length <= l) } /// Formats the path on disk for a `saved_name`. fn path_for(&self, saved_name: &str) -> PathBuf { // try to prevent path traversal by ignoring everything except the file name let name = Path::new(saved_name).file_name().unwrap_or_default(); let mut p: PathBuf = self.cfg.save_path.clone(); p.push(name); p } /// Try to open a file on disk, and if we didn't find it, /// then return [`None`]. async fn open(&self, saved_name: &str) -> io::Result> { let p = self.path_for(saved_name); match File::open(p).await { Ok(f) => Ok(Some(f)), Err(e) => match e.kind() { io::ErrorKind::NotFound => Ok(None), _ => Err(e)?, // some other error, send it back }, } } /// Remove an upload from disk. async fn remove(&self, saved_name: &str) -> io::Result<()> { let p = self.path_for(saved_name); tokio::fs::remove_file(p).await } /// Create a background I/O task fn start_save Fut + Send + 'static>( &self, saved_name: &str, fail_callback: F, ) -> mpsc::Sender { // start a task that handles saving files to disk (we can save to cache/disk in parallel that way) // a large buffer size is chosen so uploads can be received quickly, // but with less possibility of running out of memory. // (thats probably only possible w very high link speed tho......) let (tx, mut rx) = mpsc::channel::(1000); let p = self.path_for(saved_name); tokio::spawn(async move { // create file to save upload to let mut file = match File::create(p).await { Ok(f) => f, Err(err) => { tracing::error!(%err, "could not open file! make sure your upload path is valid"); return; } }; // receive chunks and save them to file while let Some(chunk) = rx.recv().await { tracing::debug!(length = chunk.len(), "writing chunk to disk"); if let Err(err) = file.write_all(&chunk).await { drop(rx); fail_callback(err).await; return; } } // flush to disk if let Err(err) = file.flush().await { fail_callback(err).await; return; } // sync data+metadata to disk if let Err(err) = file.sync_all().await { fail_callback(err).await; } }); tx } }