Initial commit

This commit is contained in:
minish 2025-01-28 20:17:30 -05:00
commit 9debe776f2
Signed by: min
GPG Key ID: FEECFF24EF0CE9E9
6 changed files with 347 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

163
Cargo.lock generated Normal file
View File

@ -0,0 +1,163 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chains-gen"
version = "0.1.0"
dependencies = [
"indexmap",
"rand",
]
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
[[package]]
name = "indexmap"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "libc"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "ppv-lite86"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "syn"
version = "2.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zerocopy"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
dependencies = [
"byteorder",
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "chains-gen"
version = "0.1.0"
edition = "2024"
[dependencies]
indexmap = "2.7.0"
rand = "0.8.5"

10
README.md Normal file
View File

@ -0,0 +1,10 @@
# Chains-gen
chains-gen is a simple little program to create a Markov chain and then generate some text using it.
To use it, put your training data inside `data.txt` in a directory, and then run the program there.
> [!NOTE]
> You should try to keep each "entry" of data contained in one line, not bleeding out into multiple. The results could be weird if you don't..
If everything is okay you should be able to hit enter and generate sentences.

73
src/main.rs Normal file
View File

@ -0,0 +1,73 @@
use std::time::Instant;
use markov::{MarkovAllNodes, MarkovNode, MarkovToken};
mod markov;
fn main() {
// chain creation
let content = std::fs::read_to_string("./data.txt").unwrap();
let lines = content.lines();
let mut all_nodes = MarkovAllNodes::new();
let root_node = MarkovNode::new(MarkovToken::Root);
let end_node = MarkovNode::new(MarkovToken::End);
let start = Instant::now();
for line in lines {
let mut nodes = line
.split_whitespace()
.filter(|s| s.chars().all(|c| c.is_ascii_alphanumeric()))
.map(|t| all_nodes.node(t));
let mut node = root_node.clone();
while !node.value.is_end() {
let next_node = nodes.next().unwrap_or_else(|| end_node.clone());
// it will be an empty chain so skip
if node.value.is_root() && next_node.value.is_end() {
break;
}
node.conns.borrow_mut().connect(next_node.clone());
node = next_node;
}
}
println!("took {:?} to create chain", start.elapsed());
// generation
let mut rng = rand::thread_rng();
loop {
let mut picked_start_word = String::new();
std::io::stdin().read_line(&mut picked_start_word).unwrap();
let start = Instant::now();
let mut result = String::new();
let mut node = all_nodes
.try_node(picked_start_word.trim())
.unwrap_or_else(|| root_node.clone());
loop {
let next_node = node.conns.borrow().random_weighted(&mut rng);
if let MarkovToken::Value(value) = &node.value {
result += value;
result.push(' ');
}
if next_node.value.is_end() {
break;
}
node = next_node;
}
let elapsed = start.elapsed();
println!("output: {result}");
println!("gen took {elapsed:?}");
}
}

92
src/markov.rs Normal file
View File

@ -0,0 +1,92 @@
use std::{cell::RefCell, collections::HashMap, hash::Hash, rc::Rc};
use indexmap::IndexMap;
use rand::{distributions::WeightedIndex, prelude::Distribution, rngs::ThreadRng};
#[derive(PartialEq, Eq, Hash, Debug)]
pub enum MarkovToken {
Root,
Value(String),
End,
}
impl MarkovToken {
pub fn is_root(&self) -> bool {
matches!(self, Self::Root)
}
pub fn is_end(&self) -> bool {
matches!(self, Self::End)
}
}
pub struct MarkovNode {
pub value: MarkovToken,
pub conns: RefCell<MarkovConns>,
}
impl PartialEq for MarkovNode {
fn eq(&self, other: &Self) -> bool {
self.value.eq(&other.value)
}
}
impl Eq for MarkovNode {}
impl Hash for MarkovNode {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.value.hash(state);
}
}
impl MarkovNode {
pub fn new(token: MarkovToken) -> Rc<Self> {
Rc::new(Self {
value: token,
conns: RefCell::new(MarkovConns(IndexMap::new())),
})
}
}
pub struct MarkovConns(pub IndexMap<Rc<MarkovNode>, u32>);
impl MarkovConns {
pub fn connect(&mut self, word: Rc<MarkovNode>) {
if let Some(count) = self.0.get_mut(&word) {
*count += 1;
} else {
self.0.insert(word, 1);
}
}
fn index(&self, i: usize) -> Rc<MarkovNode> {
self.0.get_index(i).map(|(n, _)| n.clone()).unwrap()
}
pub fn random_weighted(&self, rng: &mut ThreadRng) -> Rc<MarkovNode> {
let wi = WeightedIndex::new(self.0.iter().map(|(_, c)| c)).unwrap(); // SAFETY: there should always be at least an end token
let i = wi.sample(rng);
self.index(i)
}
}
pub struct MarkovAllNodes(HashMap<String, Rc<MarkovNode>>);
impl MarkovAllNodes {
pub fn new() -> Self {
Self(HashMap::new())
}
pub fn node(&mut self, word: &str) -> Rc<MarkovNode> {
if let Some(node) = self.0.get(word) {
node.clone()
} else {
let node = MarkovNode::new(MarkovToken::Value(word.to_string()));
self.0.insert(word.to_string(), node.clone());
node
}
}
pub fn try_node(&self, word: &str) -> Option<Rc<MarkovNode>> {
self.0.get(word).cloned()
}
}