This commit is contained in:
minish 2025-03-23 00:34:51 -04:00
parent 9debe776f2
commit a0254dda72
Signed by: min
GPG Key ID: FEECFF24EF0CE9E9
4 changed files with 282 additions and 38 deletions

210
Cargo.lock generated
View File

@ -2,6 +2,18 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "bitflags"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
[[package]]
name = "byteorder"
version = "1.5.0"
@ -18,16 +30,71 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
name = "chains-gen"
version = "0.1.0"
dependencies = [
"dashmap",
"fnv",
"indexmap",
"rand",
"rayon",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "dashmap"
version = "6.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
dependencies = [
"cfg-if",
"crossbeam-utils",
"hashbrown 0.14.5",
"lock_api",
"once_cell",
"parking_lot_core",
"rayon",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.15"
@ -39,6 +106,12 @@ dependencies = [
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "hashbrown"
version = "0.15.2"
@ -52,7 +125,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
dependencies = [
"equivalent",
"hashbrown",
"hashbrown 0.15.2",
"rayon",
]
[[package]]
@ -61,6 +135,35 @@ version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "once_cell"
version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "parking_lot_core"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets",
]
[[package]]
name = "ppv-lite86"
version = "0.2.20"
@ -118,6 +221,47 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
dependencies = [
"bitflags",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "smallvec"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "syn"
version = "2.0.96"
@ -141,6 +285,70 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "zerocopy"
version = "0.7.35"

View File

@ -4,5 +4,8 @@ version = "0.1.0"
edition = "2024"
[dependencies]
indexmap = "2.7.0"
dashmap = { version = "6.1.0", features = ["rayon"] }
fnv = "1.0.7"
indexmap = { version = "2.7.0", features = ["rayon"] }
rand = "0.8.5"
rayon = "1.10.0"

View File

@ -2,20 +2,21 @@ use std::time::Instant;
use markov::{MarkovAllNodes, MarkovNode, MarkovToken};
use rayon::{iter::ParallelIterator, str::ParallelString};
mod markov;
fn main() {
// chain creation
let content = std::fs::read_to_string("./data.txt").unwrap();
let lines = content.lines();
let mut all_nodes = MarkovAllNodes::new();
let all_nodes = MarkovAllNodes::new();
let root_node = MarkovNode::new(MarkovToken::Root);
let end_node = MarkovNode::new(MarkovToken::End);
let start = Instant::now();
for line in lines {
content.par_lines().for_each(|line| {
let mut nodes = line
.split_whitespace()
.filter(|s| s.chars().all(|c| c.is_ascii_alphanumeric()))
@ -30,13 +31,20 @@ fn main() {
break;
}
node.conns.borrow_mut().connect(next_node.clone());
node.conns.connect(next_node.clone());
node = next_node;
}
}
});
println!("took {:?} to create chain", start.elapsed());
// cache index maps
let start = Instant::now();
root_node.conns.index_map();
all_nodes.cache_index_maps();
println!("took {:?} to cache index maps", start.elapsed());
// generation
let mut rng = rand::thread_rng();
@ -52,7 +60,7 @@ fn main() {
.unwrap_or_else(|| root_node.clone());
loop {
let next_node = node.conns.borrow().random_weighted(&mut rng);
let next_node = node.conns.random_weighted(&mut rng);
if let MarkovToken::Value(value) = &node.value {
result += value;

View File

@ -1,8 +1,13 @@
use std::{cell::RefCell, collections::HashMap, hash::Hash, rc::Rc};
use std::{
hash::Hash,
sync::{Arc, OnceLock},
};
use indexmap::IndexMap;
use rand::{distributions::WeightedIndex, prelude::Distribution, rngs::ThreadRng};
type FnvDashMap<K, V> = dashmap::DashMap<K, V, fnv::FnvBuildHasher>;
type FnvIndexMap<K, V> = indexmap::IndexMap<K, V, fnv::FnvBuildHasher>;
#[derive(PartialEq, Eq, Hash, Debug)]
pub enum MarkovToken {
Root,
@ -22,9 +27,14 @@ impl MarkovToken {
pub struct MarkovNode {
pub value: MarkovToken,
pub conns: RefCell<MarkovConns>,
pub conns: MarkovConns,
}
impl std::fmt::Debug for MarkovNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
self.value.fmt(f)
}
}
impl PartialEq for MarkovNode {
fn eq(&self, other: &Self) -> bool {
self.value.eq(&other.value)
@ -38,55 +48,70 @@ impl Hash for MarkovNode {
}
impl MarkovNode {
pub fn new(token: MarkovToken) -> Rc<Self> {
Rc::new(Self {
pub fn new(token: MarkovToken) -> Arc<Self> {
let conns = MarkovConns {
conns: FnvDashMap::default(),
im_cached: OnceLock::new(),
};
Arc::new(Self {
value: token,
conns: RefCell::new(MarkovConns(IndexMap::new())),
conns,
})
}
}
pub struct MarkovConns(pub IndexMap<Rc<MarkovNode>, u32>);
pub struct MarkovConns {
conns: FnvDashMap<Arc<MarkovNode>, u32>,
im_cached: OnceLock<FnvIndexMap<Arc<MarkovNode>, u32>>,
}
impl MarkovConns {
pub fn connect(&mut self, word: Rc<MarkovNode>) {
if let Some(count) = self.0.get_mut(&word) {
*count += 1;
} else {
self.0.insert(word, 1);
}
pub fn connect(&self, n: Arc<MarkovNode>) {
self.conns.entry(n).and_modify(|c| *c += 1).or_insert(1);
}
fn index(&self, i: usize) -> Rc<MarkovNode> {
self.0.get_index(i).map(|(n, _)| n.clone()).unwrap()
pub fn index_map(&self) -> &FnvIndexMap<Arc<MarkovNode>, u32> {
self.im_cached.get_or_init(|| {
self.conns
.iter()
.map(|r| (r.key().clone(), *r.value()))
.collect::<FnvIndexMap<_, _>>()
})
}
pub fn random_weighted(&self, rng: &mut ThreadRng) -> Rc<MarkovNode> {
let wi = WeightedIndex::new(self.0.iter().map(|(_, c)| c)).unwrap(); // SAFETY: there should always be at least an end token
pub fn random_weighted(&self, rng: &mut ThreadRng) -> Arc<MarkovNode> {
let im = self.index_map();
let wi = WeightedIndex::new(im.iter().map(|(_, c)| c).copied()).unwrap(); // SAFETY: there should always be at least an end token
let i = wi.sample(rng);
self.index(i)
im.get_index(i).map(|(n, _)| n.clone()).unwrap()
}
}
pub struct MarkovAllNodes(HashMap<String, Rc<MarkovNode>>);
pub struct MarkovAllNodes(FnvDashMap<String, Arc<MarkovNode>>);
impl MarkovAllNodes {
pub fn new() -> Self {
Self(HashMap::new())
Self(FnvDashMap::default())
}
pub fn node(&mut self, word: &str) -> Rc<MarkovNode> {
if let Some(node) = self.0.get(word) {
node.clone()
} else {
let node = MarkovNode::new(MarkovToken::Value(word.to_string()));
self.0.insert(word.to_string(), node.clone());
node
}
pub fn node(&self, word: &str) -> Arc<MarkovNode> {
self.0
.entry(word.to_string())
.or_insert_with(|| MarkovNode::new(MarkovToken::Value(word.to_string())))
.value()
.clone()
}
pub fn try_node(&self, word: &str) -> Option<Rc<MarkovNode>> {
self.0.get(word).cloned()
pub fn try_node(&self, word: &str) -> Option<Arc<MarkovNode>> {
self.0.get(word).map(|r| r.clone())
}
pub fn cache_index_maps(&self) {
self.0.iter().for_each(|e| {
e.value().conns.index_map();
});
}
}