From 698643988aa743e92146305fa8ec7da3bcc93636 Mon Sep 17 00:00:00 2001 From: minish Date: Sat, 25 May 2024 01:21:36 -0400 Subject: [PATCH] v0.2.0 release --- Cargo.lock | 317 ++++++++++++++++--------------- Cargo.toml | 10 +- README.md | 28 ++- archived/.gitignore | 2 - archived/Cargo.lock | 30 --- archived/Cargo.toml | 9 - archived/LICENSE | 22 --- archived/src/entry.rs | 26 --- archived/src/lib.rs | 172 ----------------- src/cache.rs | 243 ++++++++++++++++++++++++ src/config.rs | 19 +- src/disk.rs | 84 +++++++++ src/engine.rs | 421 ++++++++++++++++++++++-------------------- src/main.rs | 13 +- src/new.rs | 74 +++++--- src/view.rs | 79 ++------ 16 files changed, 844 insertions(+), 705 deletions(-) delete mode 100644 archived/.gitignore delete mode 100644 archived/Cargo.lock delete mode 100644 archived/Cargo.toml delete mode 100644 archived/LICENSE delete mode 100644 archived/src/entry.rs delete mode 100644 archived/src/lib.rs create mode 100644 src/cache.rs create mode 100644 src/disk.rs diff --git a/Cargo.lock b/Cargo.lock index 109f947..f0f913e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,7 +78,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" dependencies = [ - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -88,16 +88,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] -name = "archived" -version = "0.2.0" -dependencies = [ - "bytes", - "once_cell", -] +name = "anyhow" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" [[package]] name = "async-recursion" @@ -118,7 +116,16 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", +] + +[[package]] +name = "atomic-time" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3424654267706036b8c23c0abadc4e0412416b9d0208d7ebe1e6978c8c31fec0" +dependencies = [ + "portable-atomic", ] [[package]] @@ -186,7 +193,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -199,7 +206,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.7.1", "object", "rustc-demangle", ] @@ -218,16 +225,19 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "breeze" -version = "0.1.5" +version = "0.2.0" dependencies = [ - "archived", + "anyhow", "async-recursion", + "atomic-time", "axum", "bytes", "clap", - "futures", + "dashmap", "hyper", + "img-parts", "rand", + "rayon", "serde", "serde_with", "tokio", @@ -238,7 +248,6 @@ dependencies = [ "tracing", "tracing-subscriber", "walkdir", - "xxhash-rust", ] [[package]] @@ -312,7 +321,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -333,6 +342,45 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "darling" version = "0.20.3" @@ -354,7 +402,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -365,7 +413,21 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core", "quote", - "syn 2.0.38", + "syn 2.0.48", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.1", + "lock_api", + "once_cell", + "parking_lot_core", + "rayon", ] [[package]] @@ -378,6 +440,12 @@ dependencies = [ "serde", ] +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + [[package]] name = "equivalent" version = "1.0.1" @@ -399,21 +467,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "futures" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - [[package]] name = "futures-channel" version = "0.3.28" @@ -421,7 +474,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", - "futures-sink", ] [[package]] @@ -430,17 +482,6 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" -[[package]] -name = "futures-executor" -version = "0.3.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - [[package]] name = "futures-io" version = "0.3.28" @@ -455,7 +496,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -476,13 +517,9 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ - "futures-channel", "futures-core", - "futures-io", "futures-macro", - "futures-sink", "futures-task", - "memchr", "pin-project-lite", "pin-utils", "slab", @@ -647,6 +684,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "img-parts" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b19358258d99a5fc34466fed27a5318f92ae636c3e36165cf9b1e87b5b6701f0" +dependencies = [ + "bytes", + "crc32fast", + "miniz_oxide 0.5.4", +] + [[package]] name = "indexmap" version = "1.9.2" @@ -698,9 +746,9 @@ checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "lock_api" -version = "0.4.9" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -727,12 +775,30 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +[[package]] +name = "miniz_oxide" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" +dependencies = [ + "adler", +] + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -750,7 +816,7 @@ checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "wasi", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -793,9 +859,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.16.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "overload" @@ -815,15 +881,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.5" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff9f3fef3968a3ec5945535ed654cb38ff72d7495a25619e2247fb15a2ed9ba" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-sys 0.42.0", + "windows-targets", ] [[package]] @@ -864,6 +930,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + [[package]] name = "powerfmt" version = "0.2.0" @@ -878,18 +950,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.69" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.28" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -925,10 +997,30 @@ dependencies = [ ] [[package]] -name = "redox_syscall" -version = "0.2.16" +name = "rayon" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ "bitflags", ] @@ -983,7 +1075,7 @@ checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1053,7 +1145,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1118,9 +1210,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.38" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -1189,7 +1281,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.48.0", + "windows-sys", ] [[package]] @@ -1200,7 +1292,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1315,7 +1407,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", ] [[package]] @@ -1431,7 +1523,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", "wasm-bindgen-shared", ] @@ -1453,7 +1545,7 @@ checksum = "c5353b8dab669f5e10f5bd76df26a9360c748f054f862ff5f3f8aae0c7fb3907" dependencies = [ "proc-macro2", "quote", - "syn 2.0.38", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1504,21 +1596,6 @@ dependencies = [ "windows-targets", ] -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.0", - "windows_aarch64_msvc 0.42.0", - "windows_i686_gnu 0.42.0", - "windows_i686_msvc 0.42.0", - "windows_x86_64_gnu 0.42.0", - "windows_x86_64_gnullvm 0.42.0", - "windows_x86_64_msvc 0.42.0", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -1534,93 +1611,51 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" -[[package]] -name = "windows_i686_gnu" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" - [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" -[[package]] -name = "windows_i686_msvc" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" - [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -1635,9 +1670,3 @@ checksum = "a3b801d0e0a6726477cc207f60162da452f3a95adb368399bef20a946e06f65c" dependencies = [ "memchr", ] - -[[package]] -name = "xxhash-rust" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b" diff --git a/Cargo.toml b/Cargo.toml index 6f4985e..58bccea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "breeze" -version = "0.1.5" +version = "0.2.0" edition = "2021" [dependencies] @@ -14,12 +14,14 @@ bytes = "1" rand = "0.8.5" async-recursion = "1.0.0" walkdir = "2" -futures = "0.3" tracing = "0.1" tracing-subscriber = "0.3" -archived = { path = "./archived" } -xxhash-rust = { version = "0.8.7", features = ["xxh3"] } serde = { version = "1.0.189", features = ["derive"] } toml = "0.8.2" clap = { version = "4.4.6", features = ["derive"] } serde_with = "3.4.0" +anyhow = "1.0.79" +dashmap = { version = "5.5.3", features = ["rayon", "inline"] } +rayon = "1.8" +atomic-time = "0.1.4" +img-parts = "0.3.0" diff --git a/README.md b/README.md index 52f2e9e..f4e9f49 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ Compared to the old Express.js backend, breeze has - Streamed downloading (on larger files) - Upload caching - Generally faster speeds overall +- Temporary uploads +- Automatic exif data removal At this time, breeze does not support encrypted uploads on disk. @@ -34,12 +36,13 @@ services: - ./breeze.toml:/etc/breeze.toml ports: - - 8000:8000 + - 8383:8000 ``` For this configuration, it is expected that: -* there is a clone of the Git repository in the `./breeze` folder. +* there is a clone of the Git repository in the `./breeze` folder * there is a `breeze.toml` config file in current directory * there is a directory at `/srv/uploads` for storing uploads +* port 8383 will be made accessible to the Internet somehow (either forwarding the port through your firewall directly, or passing it through a reverse proxy) It can also be installed directly if you have the Rust toolchain installed: ```bash @@ -50,7 +53,7 @@ cargo install --path . ### Hosting Configuration is read through a toml file. -By default it'll try to read `./breeze.toml`, but you can specify a different path using the `-c`/`--config` command line switch. +The config file path is specified using the `-c`/`--config` command line switch. Here is an example config file: ```toml @@ -61,10 +64,6 @@ Here is an example config file: # upload urls of "https://picture.wtf/p/abcdef.png", etc. base_url = "http://127.0.0.1:8000" -# The location that uploads will be saved to. -# It should be a path to a directory on disk that you can write to. -save_path = "/data" - # OPTIONAL - If set, the static key specified will be required to upload new files. # If it is not set, no key will be required. upload_key = "hiiiiiiii" @@ -76,6 +75,17 @@ upload_key = "hiiiiiiii" # %version% - current breeze version (e.g. 0.1.5) motd = "my image host, currently hosting %uplcount% files" +# The maximum lifetime a temporary upload may be given, in seconds. +# It's okay to leave this somewhat high because large temporary uploads +# will just be bumped out of the cache when a new upload needs to be +# cached anyways. +max_temp_lifetime = 43200 + +[engine.disk] +# The location that uploads will be saved to. +# It should be a path to a directory on disk that you can write to. +save_path = "/data" + [engine.cache] # The file size (in bytes) that a file must be under # to get cached. @@ -104,10 +114,12 @@ level = "warn" ``` ### Uploading -The HTTP API is fairly simple, and it's pretty easy to make a ShareX configuration for it. +The HTTP API is pretty simple, and it's easy to make a ShareX configuration for it. Uploads should be sent to `/new?name={original filename}` as a POST request. If the server uses upload keys, it should be sent to `/new?name={original filename}&key={upload key}`. The uploaded file's content should be sent as raw binary in the request body. +Additionally, you may specify `&lastfor={time in seconds}` to make your upload temporary, or `&keepexif=true` to tell the server not to clear EXIF data on image uploads. (if you don't know what EXIF data is, just leave it as default. you'll know if you need it) + Here's an example ShareX configuration for it (with a key): ```json { diff --git a/archived/.gitignore b/archived/.gitignore deleted file mode 100644 index 928b102..0000000 --- a/archived/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.idea -target diff --git a/archived/Cargo.lock b/archived/Cargo.lock deleted file mode 100644 index a678206..0000000 --- a/archived/Cargo.lock +++ /dev/null @@ -1,30 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "archived" -version = "0.2.0" -dependencies = [ - "bytes", - "once_cell", - "rustc-hash", -] - -[[package]] -name = "bytes" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfb24e866b15a1af2a1b663f10c6b6b8f397a84aadb828f12e5b289ec23a3a3c" - -[[package]] -name = "once_cell" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c601810575c99596d4afc46f78a678c80105117c379eb3650cf99b8a21ce5b" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" diff --git a/archived/Cargo.toml b/archived/Cargo.toml deleted file mode 100644 index 7f659de..0000000 --- a/archived/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "archived" -version = "0.2.0" -edition = "2018" -license = "MIT" - -[dependencies] -bytes = "1.3.0" -once_cell = "1.3.1" \ No newline at end of file diff --git a/archived/LICENSE b/archived/LICENSE deleted file mode 100644 index 0d6fadb..0000000 --- a/archived/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -MIT License - -Copyright (c) 2020 aikidos -Copyright (c) 2023 ot2t7, minish - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/archived/src/entry.rs b/archived/src/entry.rs deleted file mode 100644 index 0b825b9..0000000 --- a/archived/src/entry.rs +++ /dev/null @@ -1,26 +0,0 @@ -use std::time::{Duration, SystemTime}; - -/// Represents a set of eviction and expiration details for a specific cache entry. -pub(crate) struct CacheEntry { - /// Entry value. - pub(crate) value: B, - - /// Expiration time. - /// - /// - [`None`] if the value must be kept forever. - pub(crate) expiration_time: SystemTime, -} - -impl CacheEntry { - pub(crate) fn new(value: B, lifetime: Duration) -> Self { - Self { - expiration_time: SystemTime::now() + lifetime, - value, - } - } - - /// Check if a entry is expired. - pub(crate) fn is_expired(&self, current_time: SystemTime) -> bool { - current_time >= self.expiration_time - } -} diff --git a/archived/src/lib.rs b/archived/src/lib.rs deleted file mode 100644 index cfb154a..0000000 --- a/archived/src/lib.rs +++ /dev/null @@ -1,172 +0,0 @@ -mod entry; - -use bytes::Bytes; - -use crate::entry::*; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::time::{Duration, SystemTime}; - -pub struct Archive { - cache_table: HashMap>, - full_scan_frequency: Option, - created_time: SystemTime, - last_scan_time: Option, - entry_lifetime: Duration, - capacity: usize, - length: usize, -} - -impl Archive { - /* pub fn new(capacity: usize) -> Self { - Self { - cache_table: HashMap::new(), - full_scan_frequency: None, - created_time: SystemTime::now(), - last_scan_time: None, - capacity, - length: 0, - } - } */ - - pub fn with_full_scan( - full_scan_frequency: Duration, - entry_lifetime: Duration, - capacity: usize, - ) -> Self { - Self { - cache_table: HashMap::with_capacity(256), - full_scan_frequency: Some(full_scan_frequency), - created_time: SystemTime::now(), - last_scan_time: None, - entry_lifetime, - capacity, - length: 0, - } - } - - pub fn contains_key(&self, key: &String) -> bool { - let now = SystemTime::now(); - - self.cache_table - .get(key) - .filter(|cache_entry| !cache_entry.is_expired(now)) - .is_some() - } - - pub fn get_last_scan_time(&self) -> Option { - self.last_scan_time - } - - pub fn get_full_scan_frequency(&self) -> Option { - self.full_scan_frequency - } - - pub fn get(&self, key: &String) -> Option<&Bytes> { - let now = SystemTime::now(); - - self.cache_table - .get(key) - .filter(|cache_entry| !cache_entry.is_expired(now)) - .map(|cache_entry| &cache_entry.value) - } - - pub fn get_or_insert(&mut self, key: String, factory: F) -> &Bytes - where - F: Fn() -> Bytes, - { - let now = SystemTime::now(); - - self.try_full_scan_expired_items(now); - - match self.cache_table.entry(key) { - Entry::Occupied(mut occupied) => { - if occupied.get().is_expired(now) { - occupied.insert(CacheEntry::new(factory(), self.entry_lifetime)); - } - - &occupied.into_mut().value - } - Entry::Vacant(vacant) => { - &vacant - .insert(CacheEntry::new(factory(), self.entry_lifetime)) - .value - } - } - } - - pub fn insert(&mut self, key: String, value: Bytes) -> Option { - let now = SystemTime::now(); - - self.try_full_scan_expired_items(now); - - if value.len() + self.length > self.capacity { - return None; - } - - self.length += value.len(); - - self.cache_table - .insert(key, CacheEntry::new(value, self.entry_lifetime)) - .filter(|cache_entry| !cache_entry.is_expired(now)) - .map(|cache_entry| cache_entry.value) - } - - pub fn remove(&mut self, key: &String) -> Option { - let now = SystemTime::now(); - - self.try_full_scan_expired_items(now); - - let mut removed_len: usize = 0; - let result = self - .cache_table - .remove(key) - .filter(|cache_entry| !cache_entry.is_expired(now)) - .and_then(|o| { - removed_len += o.value.len(); - return Some(o); - }) - .map(|cache_entry| cache_entry.value); - self.length -= removed_len; - return result; - } - - pub fn renew(&mut self, key: &String) -> Option<()> { - let now = SystemTime::now(); - - self.try_full_scan_expired_items(now); - - let entry = self.cache_table.get_mut(key); - - match entry { - Some(entry) => { - entry.expiration_time = now + self.entry_lifetime; - - Some(()) - } - None => None, - } - } - - fn try_full_scan_expired_items(&mut self, current_time: SystemTime) { - if let Some(full_scan_frequency) = self.full_scan_frequency { - let since = current_time - .duration_since(self.last_scan_time.unwrap_or(self.created_time)) - .unwrap(); - - if since >= full_scan_frequency { - let mut removed_len = 0; - self.cache_table.retain(|_, cache_entry| { - if cache_entry.is_expired(current_time) { - removed_len += cache_entry.value.len(); - return false; - } - return true; - }); - self.length -= removed_len; - - self.last_scan_time = Some(current_time); - } - } - } -} diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..b6f54c3 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,243 @@ +use std::{ + sync::atomic::{AtomicUsize, Ordering}, + time::{Duration, SystemTime}, +}; + +use atomic_time::AtomicSystemTime; +use bytes::Bytes; +use dashmap::{mapref::one::Ref, DashMap}; +use rayon::prelude::*; +use tokio::time; + +use crate::config; + +/// An entry stored in the cache. +/// +/// It contains basic metadata and the actual value. +pub struct Entry { + /// The data held + value: Bytes, + + /// The last time this entry was read/written + last_used: AtomicSystemTime, + + /// Whether or not `last_used` should be updated + update_used: bool, + + /// How long the entry should last + lifetime: Duration, +} + +impl Entry { + fn new(value: Bytes, lifetime: Duration, update_used: bool) -> Self { + let now = AtomicSystemTime::now(); + + Self { + value, + last_used: now, + update_used, + lifetime, + } + } + + fn last_used(&self) -> SystemTime { + self.last_used.load(Ordering::Relaxed) + } + + fn is_expired(&self) -> bool { + match self.last_used().elapsed() { + Ok(d) => d >= self.lifetime, + Err(_) => false, // now > last_used + } + } +} + +/// A concurrent cache with a maximum memory size (w/ LRU) and expiration. +/// +/// It is designed to keep memory usage low. +pub struct Cache { + /// Where elements are stored + map: DashMap, + + /// Total length of data stored in cache currently + length: AtomicUsize, + + /// How should it behave + cfg: config::CacheConfig, +} + +impl Cache { + pub fn from_config(cfg: config::CacheConfig) -> Self { + Self { + map: DashMap::with_capacity(256), + length: AtomicUsize::new(0), + + cfg, + } + } + + /// Figure out who should be bumped out of cache next + fn next_out(&self, length: usize) -> Vec { + let mut sorted: Vec<_> = self.map.iter().collect(); + + // Sort by least recently used + sorted.par_sort_unstable_by(|e1, e2| e1.last_used().cmp(&e2.last_used())); + + // Total bytes we would be removing + let mut total = 0; + + // Pull entries until we have enough free space + sorted + .iter() + .take_while(|e| { + let need_more = total < length; + + if need_more { + total += e.value.len(); + } + + need_more + }) + .map(|e| e.key().clone()) + .collect() + } + + /// Remove an element from the cache + /// + /// Returns: [`Some`] if successful, [`None`] if element not found + pub fn remove(&self, key: &str) -> Option<()> { + // Skip expiry checks, we are removing it anyways + // And also that could cause an infinite loop which would be pretty stupid. + let e = self.map.get(key)?; + + // Atomically subtract from the total cache length + self.length.fetch_sub(e.value.len(), Ordering::Relaxed); + + // Drop the entry lock so we can actually remove it + drop(e); + + // Remove from map + self.map.remove(key); + + Some(()) + } + + /// Add a new element to the cache with a specified lifetime. + /// + /// Returns: `true` if no value is replaced, `false` if a value was replaced + pub fn add_with_lifetime( + &self, + key: &str, + value: Bytes, + lifetime: Duration, + is_renewable: bool, + ) -> bool { + let e = Entry::new(value, lifetime, is_renewable); + + let len = e.value.len(); + let cur_total = self.length.load(Ordering::Relaxed); + let new_total = cur_total + len; + + if new_total > self.cfg.mem_capacity { + // How far we went above the limit + let needed = new_total - self.cfg.mem_capacity; + + self.next_out(needed).par_iter().for_each(|k| { + // Remove the element, and ignore the result + // The only reason it should be failing is if it couldn't find it, + // in which case it was already removed + self.remove(k); + }) + } + + // Atomically add to total cached data length + self.length.fetch_add(len, Ordering::Relaxed); + + // Add to the map, return true if we didn't replace anything + self.map.insert(key.to_string(), e).is_none() + } + + /// Add a new element to the cache with the default lifetime. + /// + /// Returns: `true` if no value is replaced, `false` if a value was replaced + pub fn add(&self, key: &str, value: Bytes) -> bool { + self.add_with_lifetime(key, value, self.cfg.upload_lifetime, true) + } + + /// Internal function for retrieving entries. + /// + /// Returns: same as [`DashMap::get`], for our purposes + /// + /// It exists so we can run the expiry check before + /// actually working with any entries, so no weird bugs happen + fn _get(&self, key: &str) -> Option> { + let e = self.map.get(key)?; + + // if the entry is expired get rid of it now + if e.is_expired() { + // drop the reference so we don't deadlock + drop(e); + + // remove it and say we never had it + self.remove(key); + + return None; + } + + Some(e) + } + + /// Get an item from the cache, if it exists. + pub fn get(&self, key: &str) -> Option { + let e = self._get(key)?; + + if e.update_used { + e.last_used.store(SystemTime::now(), Ordering::Relaxed); + } + + Some(e.value.clone()) + } + + /// Check if we have an item in cache. + /// + /// Returns: `true` if key exists, `false` if it doesn't + /// + /// We don't use [`DashMap::contains_key`] here because it would just do + /// the exact same thing I do here, but without running the expiry check logic + pub fn has(&self, key: &str) -> bool { + self._get(key).is_some() + } + + /// Returns if an upload is able to be cached + /// with the current caching rules + #[inline(always)] + pub fn will_use(&self, length: usize) -> bool { + length <= self.cfg.max_length + } + + /// The background job that scans through the cache and removes inactive elements. + /// + /// TODO: see if this is actually less expensive than + /// letting each entry keep track of expiry with its own task + pub async fn scanner(&self) { + let mut interval = time::interval(self.cfg.scan_freq); + + loop { + // We put this first so that it doesn't scan the instant the server starts + interval.tick().await; + + // Save current timestamp so we aren't retrieving it constantly + // If we don't do this it'll be a LOT of system api calls + let now = SystemTime::now(); + + // Drop every expired entry + // If we fail to compare the times, we drop the entry + self.map.retain(|_, e| { + let elapsed = now.duration_since(e.last_used()).unwrap_or(Duration::MAX); + let is_expired = elapsed >= e.lifetime; + + !is_expired + }) + } + } +} diff --git a/src/config.rs b/src/config.rs index f26ef12..8532770 100644 --- a/src/config.rs +++ b/src/config.rs @@ -15,6 +15,7 @@ fn default_motd() -> String { "breeze file server (v%version%) - currently hosting %uplcount% files".to_string() } +#[serde_as] #[derive(Deserialize)] pub struct EngineConfig { /// The url that the instance of breeze is meant to be accessed from. @@ -22,16 +23,20 @@ pub struct EngineConfig { /// ex: https://picture.wtf would generate links like https://picture.wtf/p/abcdef.png pub base_url: String, - /// Location on disk the uploads are to be saved to - pub save_path: PathBuf, - /// Authentication key for new uploads, will be required if this is specified. (optional) #[serde(default)] pub upload_key: String, + /// Configuration for disk system + pub disk: DiskConfig, + /// Configuration for cache system pub cache: CacheConfig, + /// Maximum lifetime of a temporary upload + #[serde_as(as = "DurationSeconds")] + pub max_temp_lifetime: Duration, + /// Motd displayed when the server's index page is visited. /// /// This isn't explicitly engine-related but the engine is what gets passed to routes, @@ -40,8 +45,14 @@ pub struct EngineConfig { pub motd: String, } +#[derive(Deserialize, Clone)] +pub struct DiskConfig { + /// Location on disk the uploads are to be saved to + pub save_path: PathBuf, +} + #[serde_as] -#[derive(Deserialize)] +#[derive(Deserialize, Clone)] pub struct CacheConfig { /// The maximum length in bytes that a file can be /// before it skips cache (in seconds) diff --git a/src/disk.rs b/src/disk.rs new file mode 100644 index 0000000..b667952 --- /dev/null +++ b/src/disk.rs @@ -0,0 +1,84 @@ +use std::path::PathBuf; + +use bytes::Bytes; +use tokio::{ + fs::File, + io::{self, AsyncWriteExt}, + sync::mpsc::{self, Receiver, Sender}, +}; +use tracing::debug; +use walkdir::WalkDir; + +use crate::config; + +/// Provides an API to access the disk file store +/// like we access the cache. +pub struct Disk { + cfg: config::DiskConfig, +} + +impl Disk { + pub fn from_config(cfg: config::DiskConfig) -> Self { + Self { cfg } + } + + /// Counts the number of files saved to disk we have + pub fn count(&self) -> usize { + WalkDir::new(&self.cfg.save_path) + .min_depth(1) + .into_iter() + .count() + } + + /// Formats the path on disk for a `saved_name`. + fn path_for(&self, saved_name: &str) -> PathBuf { + let mut p = self.cfg.save_path.clone(); + p.push(&saved_name); + + p + } + + /// Try to open a file on disk, and if we didn't find it, + /// then return [`None`]. + pub async fn open(&self, saved_name: &str) -> Result, io::Error> { + let p = self.path_for(saved_name); + + match File::open(p).await { + Ok(f) => Ok(Some(f)), + Err(e) => match e.kind() { + io::ErrorKind::NotFound => Ok(None), + _ => Err(e)?, // some other error, send it back + }, + } + } + + /// Get the size of an upload's file + pub async fn len(&self, f: &File) -> Result { + Ok(f.metadata().await?.len() as usize) + } + + /// Create a background I/O task + pub async fn start_save(&self, saved_name: &str) -> Sender { + // start a task that handles saving files to disk (we can save to cache/disk in parallel that way) + let (tx, mut rx): (Sender, Receiver) = mpsc::channel(1); + + let p = self.path_for(saved_name); + + tokio::spawn(async move { + // create file to save upload to + let mut file = File::create(p) + .await + .expect("could not open file! make sure your upload path is valid"); + + // receive chunks and save them to file + while let Some(chunk) = rx.recv().await { + debug!("writing chunk to disk (length: {})", chunk.len()); + file.write_all(&chunk) + .await + .expect("error while writing file to disk"); + } + }); + + tx + } +} diff --git a/src/engine.rs b/src/engine.rs index 152eccd..3fb3c2a 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,176 +1,222 @@ use std::{ - ffi::OsStr, - path::{Path, PathBuf}, - sync::atomic::{AtomicUsize, Ordering}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, + time::Duration, }; -use archived::Archive; use axum::extract::BodyStream; use bytes::{BufMut, Bytes, BytesMut}; -use rand::Rng; -use tokio::{ - fs::File, - io::{AsyncReadExt, AsyncWriteExt}, - sync::{ - mpsc::{self, Receiver, Sender}, - RwLock, - }, -}; +use img_parts::{DynImage, ImageEXIF}; +use rand::distributions::{Alphanumeric, DistString}; +use tokio::{fs::File, io::AsyncReadExt}; use tokio_stream::StreamExt; -use tracing::{debug, error, info}; -use walkdir::WalkDir; +use tracing::{debug, info}; -use crate::{ - config, - view::{ViewError, ViewSuccess}, -}; +use crate::{cache, config, disk}; + +/// Various forms of upload data that can be sent to the client +pub enum UploadData { + /// Send back the data from memory + Cache(Bytes), + + /// Stream the file from disk to the client + Disk(File, usize), +} + +/// Rejection outcomes of an [`Engine::process`] call +pub enum ProcessOutcome { + /// The upload was successful. + /// We give the user their file's URL + Success(String), + + /// Occurs when a temporary upload is too big to fit in the cache. + TemporaryUploadTooLarge, + + /// Occurs when the user-given lifetime is longer than we will allow + TemporaryUploadLifetimeTooLong, +} /// breeze engine! this is the core of everything pub struct Engine { - /// The in-memory cache that cached uploads are stored in. - cache: RwLock, - /// Cached count of uploaded files. pub upl_count: AtomicUsize, /// Engine configuration pub cfg: config::EngineConfig, + + /// The in-memory cache that cached uploads are stored in. + cache: Arc, + + /// An interface to the on-disk upload store + disk: disk::Disk, } impl Engine { /// Creates a new instance of the breeze engine. - pub fn new(cfg: config::EngineConfig) -> Self { + pub fn from_config(cfg: config::EngineConfig) -> Self { + let cache = cache::Cache::from_config(cfg.cache.clone()); + let disk = disk::Disk::from_config(cfg.disk.clone()); + + let cache = Arc::new(cache); + + let cache_scanner = cache.clone(); + tokio::spawn(async move { cache_scanner.scanner().await }); + Self { - cache: RwLock::new(Archive::with_full_scan( - cfg.cache.scan_freq, - cfg.cache.upload_lifetime, - cfg.cache.mem_capacity, - )), - upl_count: AtomicUsize::new( - WalkDir::new(&cfg.save_path) - .min_depth(1) - .into_iter() - .count(), - ), // count the amount of files in the save path and initialise our cached count with it + // initialise our cached upload count. this doesn't include temp uploads! + upl_count: AtomicUsize::new(disk.count()), cfg, + + cache, + disk, } } - /// Returns if an upload would be able to be cached - #[inline(always)] - fn will_use_cache(&self, length: usize) -> bool { - length <= self.cfg.cache.max_length + /// Fetch an upload + /// + /// This will first try to read from cache, and then disk after. + /// If an upload is eligible to be cached, it will be cached and + /// sent back as a cache response instead of a disk response. + pub async fn get(&self, saved_name: &str) -> anyhow::Result> { + // check the cache first + if let Some(u) = self.cache.get(saved_name) { + return Ok(Some(UploadData::Cache(u))); + } + + // now, check if we have it on disk + let mut f = if let Some(f) = self.disk.open(saved_name).await? { + f + } else { + // file didn't exist + return Ok(None); + }; + + let len = self.disk.len(&f).await?; + + // can this be recached? + if self.cache.will_use(len) { + // read file from disk + let mut full = BytesMut::with_capacity(len); + + // read file from disk and if it fails at any point, return 500 + loop { + match f.read_buf(&mut full).await { + Ok(n) => { + if n == 0 { + break; + } + } + Err(e) => Err(e)?, + } + } + + let full = full.freeze(); + + // re-insert it into cache + self.cache.add(saved_name, full.clone()); + + return Ok(Some(UploadData::Cache(full))); + } + + Ok(Some(UploadData::Disk(f, len))) } - /// Check if an upload exists in cache or on disk - pub async fn upload_exists(&self, path: &Path) -> bool { - let cache = self.cache.read().await; - - // extract file name, since that's what cache uses - let name = path - .file_name() - .and_then(OsStr::to_str) - .unwrap_or_default() - .to_string(); - - // check in cache - if cache.contains_key(&name) { + pub async fn has(&self, saved_name: &str) -> bool { + if self.cache.has(saved_name) { return true; } - // check on disk - if path.exists() { + // sidestep handling the error properly + // that way we can call this in gen_saved_name easier + if self.disk.open(saved_name).await.is_ok_and(|f| f.is_some()) { return true; } - return false; + false } - /// Generate a new save path for an upload. + /// Generate a new saved name for an upload. /// /// This will call itself recursively if it picks /// a name that's already used. (it is rare) #[async_recursion::async_recursion] - pub async fn gen_path(&self, original_path: &PathBuf) -> PathBuf { + pub async fn gen_saved_name(&self, ext: &str) -> String { // generate a 6-character alphanumeric string - let id: String = rand::thread_rng() - .sample_iter(&rand::distributions::Alphanumeric) - .take(6) - .map(char::from) - .collect(); - - // extract the extension from the original path - let original_extension = original_path - .extension() - .and_then(OsStr::to_str) - .unwrap_or_default() - .to_string(); + let id: String = Alphanumeric.sample_string(&mut rand::thread_rng(), 6); // path on disk - let mut path = self.cfg.save_path.clone(); - path.push(&id); - path.set_extension(original_extension); + let saved_name = format!("{}.{}", id, ext); - if !self.upload_exists(&path).await { - path + if !self.has(&saved_name).await { + saved_name } else { // we had a name collision! try again.. - self.gen_path(original_path).await + info!("name collision! saved_name= {}", saved_name); + self.gen_saved_name(ext).await } } - /// Process an upload. - /// This is called by the /new route. - pub async fn process_upload( + /// Save a file to disk, and optionally cache. + /// + /// This also handles custom file lifetimes and EXIF data removal. + pub async fn save( &self, - path: PathBuf, - name: String, // we already extract it in the route handler, and it'd be wasteful to do it in gen_path - content_length: usize, + saved_name: &str, + provided_len: usize, + mut use_cache: bool, mut stream: BodyStream, - ) { - // if the upload size is smaller than the specified maximum, we use the cache! - let mut use_cache = self.will_use_cache(content_length); - + lifetime: Option, + keep_exif: bool, + ) -> Result<(), axum::Error> { // if we're using cache, make some space to store the upload in let mut data = if use_cache { - BytesMut::with_capacity(content_length) + BytesMut::with_capacity(provided_len) } else { BytesMut::new() }; - // start a task that handles saving files to disk (we can save to cache/disk in parallel that way) - let (tx, mut rx): (Sender, Receiver) = mpsc::channel(1); + // don't begin a disk save if we're using temporary lifetimes + let tx = if lifetime.is_none() { + Some(self.disk.start_save(saved_name).await) + } else { + None + }; - tokio::spawn(async move { - // create file to save upload to - let mut file = File::create(path) - .await - .expect("could not open file! make sure your upload path is valid"); + let tx: Option<&_> = tx.as_ref(); - // receive chunks and save them to file - while let Some(chunk) = rx.recv().await { - debug!("writing chunk to disk (length: {})", chunk.len()); - file.write_all(&chunk) - .await - .expect("error while writing file to disk"); - } - }); + // whether or not we're gonna coalesce the data + // in order to strip the exif data at the end, + // instead of just sending it off to the i/o task + let coalesce_and_strip = use_cache + && matches!( + std::path::Path::new(saved_name) + .extension() + .map(|s| s.to_str()), + Some(Some("png" | "jpg" | "jpeg" | "webp" | "tiff")) + ) + && !keep_exif + && provided_len <= 16_777_216; // read and save upload while let Some(chunk) = stream.next().await { - let chunk = chunk.unwrap(); + // if we error on a chunk, fail out + let chunk = chunk?; - // send chunk to io task - debug!("sending data to io task"); - tx.send(chunk.clone()) - .await - .expect("failed to send data to io task"); + // if we have an i/o task, send it off + // also cloning this is okay because it's a Bytes + if !coalesce_and_strip { + info!("sending chunk to i/o task"); + tx.map(|tx| tx.send(chunk.clone())); + } if use_cache { debug!("receiving data into buffer"); + if data.len() + chunk.len() > data.capacity() { - error!("the amount of data sent exceeds the content-length provided by the client! caching will be cancelled for this upload."); + info!("the amount of data sent exceeds the content-length provided by the client! caching will be cancelled for this upload."); // if we receive too much data, drop the buffer and stop using cache (it is still okay to use disk, probably) data = BytesMut::new(); @@ -181,109 +227,90 @@ impl Engine { } } - // insert upload into cache if necessary - if use_cache { - let mut cache = self.cache.write().await; + let data = data.freeze(); + // we coalesced the data instead of streaming to disk, + // strip the exif data and send it off now + let data = if coalesce_and_strip { + // strip the exif if we can + // if we can't, then oh well + let data = if let Ok(Some(data)) = DynImage::from_bytes(data.clone()).map(|o| { + o.map(|mut img| { + img.set_exif(None); + img.encoder().bytes() + }) + }) { + info!("stripped exif data"); + data + } else { + data + }; + + // send what we did over to the i/o task, all in one chunk + tx.map(|tx| tx.send(data.clone())); + + data + } else { + // or, we didn't do that + // keep the data as it is + data + }; + + // insert upload into cache if we're using it + if use_cache { info!("caching upload!"); - cache.insert(name, data.freeze()); + match lifetime { + Some(lt) => self.cache.add_with_lifetime(saved_name, data, lt, false), + None => self.cache.add(saved_name, data), + }; } info!("finished processing upload!!"); // if all goes well, increment the cached upload counter self.upl_count.fetch_add(1, Ordering::Relaxed); + + Ok(()) } - /// Read an upload from cache, if it exists. - /// - /// Previously, this would lock the cache as - /// writable to renew the upload's cache lifespan. - /// Locking the cache as readable allows multiple concurrent - /// readers though, which allows me to handle multiple views concurrently. - async fn read_cached_upload(&self, name: &String) -> Option { - let cache = self.cache.read().await; + pub async fn process( + &self, + ext: &str, + provided_len: usize, + stream: BodyStream, + lifetime: Option, + keep_exif: bool, + ) -> Result { + // if the upload size is smaller than the specified maximum, we use the cache! + let use_cache: bool = self.cache.will_use(provided_len); - // fetch upload data from cache - cache.get(name).map(ToOwned::to_owned) - } - - /// Reads an upload, from cache or on disk. - pub async fn get_upload(&self, original_path: &Path) -> Result { - // extract upload file name - let name = original_path - .file_name() - .and_then(OsStr::to_str) - .unwrap_or_default() - .to_string(); - - // path on disk - let mut path = self.cfg.save_path.clone(); - path.push(&name); - - // check if the upload exists, if not then 404 - if !self.upload_exists(&path).await { - return Err(ViewError::NotFound); + // if a temp file is too big for cache, reject it now + if lifetime.is_some() && !use_cache { + return Ok(ProcessOutcome::TemporaryUploadTooLarge); } - // attempt to read upload from cache - let cached_data = self.read_cached_upload(&name).await; - - if let Some(data) = cached_data { - info!("got upload from cache!"); - - Ok(ViewSuccess::FromCache(data)) - } else { - // we already know the upload exists by now so this is okay - let mut file = File::open(&path).await.unwrap(); - - // read upload length from disk - let metadata = file.metadata().await; - - if metadata.is_err() { - error!("failed to get upload file metadata!"); - return Err(ViewError::InternalServerError); - } - - let metadata = metadata.unwrap(); - - let length = metadata.len() as usize; - - debug!("read upload from disk, size = {}", length); - - // if the upload is okay to cache, recache it and send a fromcache response - if self.will_use_cache(length) { - // read file from disk - let mut data = BytesMut::with_capacity(length); - - // read file from disk and if it fails at any point, return 500 - loop { - match file.read_buf(&mut data).await { - Ok(n) => { - if n == 0 { - break; - } - } - Err(_) => { - return Err(ViewError::InternalServerError); - } - } - } - - let data = data.freeze(); - - // re-insert it into cache - let mut cache = self.cache.write().await; - cache.insert(name, data.clone()); - - info!("recached upload from disk!"); - - return Ok(ViewSuccess::FromCache(data)); - } - - info!("got upload from disk!"); - - Ok(ViewSuccess::FromDisk(file)) + // if a temp file's lifetime is too long, reject it now + if lifetime.is_some_and(|lt| lt > self.cfg.max_temp_lifetime) { + return Ok(ProcessOutcome::TemporaryUploadLifetimeTooLong); } + + // generate the file name + let saved_name = self.gen_saved_name(ext).await; + + // save it + self.save( + &saved_name, + provided_len, + use_cache, + stream, + lifetime, + keep_exif, + ) + .await?; + + // format and send back the url + let url = format!("{}/p/{}", self.cfg.base_url, saved_name); + + Ok(ProcessOutcome::Success(url)) } } diff --git a/src/main.rs b/src/main.rs index 25ac623..6af92e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,5 @@ use std::{path::PathBuf, sync::Arc}; -extern crate axum; - use clap::Parser; use engine::Engine; @@ -12,7 +10,9 @@ use axum::{ use tokio::{fs, signal}; use tracing::{info, warn}; +mod cache; mod config; +mod disk; mod engine; mod index; mod new; @@ -41,8 +41,11 @@ async fn main() { .with_max_level(cfg.logger.level) .init(); - if !cfg.engine.save_path.exists() || !cfg.engine.save_path.is_dir() { - panic!("the save path does not exist or is not a directory! this is invalid"); + { + let save_path = cfg.engine.disk.save_path.clone(); + if !save_path.exists() || !save_path.is_dir() { + panic!("the save path does not exist or is not a directory! this is invalid"); + } } if cfg.engine.upload_key.is_empty() { @@ -50,7 +53,7 @@ async fn main() { } // create engine - let engine = Engine::new(cfg.engine); + let engine = Engine::from_config(cfg.engine); // build main router let app = Router::new() diff --git a/src/new.rs b/src/new.rs index eba5f80..50802de 100644 --- a/src/new.rs +++ b/src/new.rs @@ -1,47 +1,58 @@ -use std::{collections::HashMap, ffi::OsStr, path::PathBuf, sync::Arc}; +use std::{ffi::OsStr, path::PathBuf, sync::Arc, time::Duration}; use axum::{ extract::{BodyStream, Query, State}, http::HeaderValue, }; use hyper::{header, HeaderMap, StatusCode}; +use serde::Deserialize; +use serde_with::{serde_as, DurationSeconds}; + +use crate::engine::ProcessOutcome; + +fn default_keep_exif() -> bool { + false +} + +#[serde_as] +#[derive(Deserialize)] +pub struct NewRequest { + name: String, + key: Option, + + #[serde(rename = "lastfor")] + #[serde_as(as = "Option")] + last_for: Option, + + #[serde(rename = "keepexif", default = "default_keep_exif")] + keep_exif: bool, +} /// The request handler for the /new path. /// This handles all new uploads. #[axum::debug_handler] pub async fn new( State(engine): State>, - Query(params): Query>, + Query(req): Query, headers: HeaderMap, stream: BodyStream, ) -> Result { - let key = params.get("key"); - - const EMPTY_STRING: &String = &String::new(); - // check upload key, if i need to - if !engine.cfg.upload_key.is_empty() && key.unwrap_or(EMPTY_STRING) != &engine.cfg.upload_key { + if !engine.cfg.upload_key.is_empty() && req.key.unwrap_or_default() != engine.cfg.upload_key { return Err(StatusCode::FORBIDDEN); } - let original_name = params.get("name"); - // the original file name wasn't given, so i can't work out what the extension should be - if original_name.is_none() { + if req.name.is_empty() { return Err(StatusCode::BAD_REQUEST); } - let original_path = PathBuf::from(original_name.unwrap()); - - let path = engine.gen_path(&original_path).await; - let name = path - .file_name() + let extension = PathBuf::from(req.name) + .extension() .and_then(OsStr::to_str) .unwrap_or_default() .to_string(); - let url = format!("{}/p/{}", engine.cfg.base_url, name); - // read and parse content-length, and if it fails just assume it's really high so it doesn't cache let content_length = headers .get(header::CONTENT_LENGTH) @@ -52,9 +63,30 @@ pub async fn new( .unwrap_or(usize::MAX); // pass it off to the engine to be processed! - engine - .process_upload(path, name, content_length, stream) - .await; + match engine + .process( + &extension, + content_length, + stream, + req.last_for, + req.keep_exif, + ) + .await + { + Ok(outcome) => match outcome { + // 200 OK + ProcessOutcome::Success(url) => Ok(url), - Ok(url) + // 413 Payload Too Large + ProcessOutcome::TemporaryUploadTooLarge => { + Err(StatusCode::PAYLOAD_TOO_LARGE) + } + + // 400 Bad Request + ProcessOutcome::TemporaryUploadLifetimeTooLong => Err(StatusCode::BAD_REQUEST), + }, + + // 500 Internal Server Error + Err(_) => Err(StatusCode::INTERNAL_SERVER_ERROR), + } } diff --git a/src/view.rs b/src/view.rs index e2425dd..36ebe3c 100644 --- a/src/view.rs +++ b/src/view.rs @@ -1,6 +1,5 @@ use std::{ - path::{Component, PathBuf}, - sync::Arc, + ffi::OsStr, path::PathBuf, sync::Arc }; use axum::{ @@ -9,32 +8,11 @@ use axum::{ response::{IntoResponse, Response}, }; -use bytes::Bytes; use hyper::{http::HeaderValue, StatusCode}; -use tokio::{fs::File, runtime::Handle}; use tokio_util::io::ReaderStream; -use tracing::{error, debug, info}; +use tracing::info; -/// Responses for a successful view operation -pub enum ViewSuccess { - /// A file read from disk, suitable for larger files. - /// - /// The file provided will be streamed from disk and - /// back to the viewer. - /// - /// This is only ever used if a file exceeds the - /// cache's maximum file size. - FromDisk(File), - - /// A file read from in-memory cache, best for smaller files. - /// - /// The file is taken from the cache in its entirety - /// and sent back to the viewer. - /// - /// If a file can be fit into cache, this will be - /// used even if it's read from disk. - FromCache(Bytes), -} +use crate::engine::UploadData; /// Responses for a failed view operation pub enum ViewError { @@ -45,34 +23,12 @@ pub enum ViewError { InternalServerError, } -impl IntoResponse for ViewSuccess { +impl IntoResponse for UploadData { fn into_response(self) -> Response { match self { - ViewSuccess::FromDisk(file) => { - // get handle to current tokio runtime - // i use this to block on futures here (not async) - let handle = Handle::current(); - let _ = handle.enter(); - - // read the metadata of the file on disk - // this function isn't async - // .. so we have to use handle.block_on() to get the metadata - let metadata = futures::executor::block_on(file.metadata()); - - // if we error then return 500 - if metadata.is_err() { - error!("failed to read metadata from disk"); - return ViewError::InternalServerError.into_response(); - } - - // unwrap (which we know is safe) and read the file size as a string - let metadata = metadata.unwrap(); - let len_str = metadata.len().to_string(); - - debug!("file is {} bytes on disk", &len_str); - - // HeaderValue::from_str will never error if only visible ASCII characters are passed (32-127) - // .. so unwrapping this should be fine + UploadData::Disk(file, len) => { + // create our content-length header + let len_str = len.to_string(); let content_length = HeaderValue::from_str(&len_str).unwrap(); // create a streamed body response (we want to stream larger files) @@ -92,7 +48,7 @@ impl IntoResponse for ViewSuccess { res } - ViewSuccess::FromCache(data) => { + UploadData::Cache(data) => { // extract mutable headers from the response let mut res = data.into_response(); let headers = res.headers_mut(); @@ -128,16 +84,17 @@ impl IntoResponse for ViewError { pub async fn view( State(engine): State>, Path(original_path): Path, -) -> Result { - // (hopefully) prevent path traversal, just check for any non-file components - if original_path - .components() - .any(|x| !matches!(x, Component::Normal(_))) - { - info!("a request attempted path traversal"); +) -> Result { + let saved_name = if let Some(Some(n)) = original_path.file_name().map(OsStr::to_str) { + n + } else { return Err(ViewError::NotFound); - } + }; // get result from the engine! - engine.get_upload(&original_path).await + match engine.get(saved_name).await { + Ok(Some(u)) => Ok(u), + Ok(None) => Err(ViewError::NotFound), + Err(_) => Err(ViewError::InternalServerError), + } }