Initial commit
This commit is contained in:
commit
9debe776f2
|
@ -0,0 +1 @@
|
|||
/target
|
|
@ -0,0 +1,163 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chains-gen"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.169"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
|
||||
dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.96"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "chains-gen"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
indexmap = "2.7.0"
|
||||
rand = "0.8.5"
|
|
@ -0,0 +1,10 @@
|
|||
# Chains-gen
|
||||
|
||||
chains-gen is a simple little program to create a Markov chain and then generate some text using it.
|
||||
|
||||
To use it, put your training data inside `data.txt` in a directory, and then run the program there.
|
||||
|
||||
> [!NOTE]
|
||||
> You should try to keep each "entry" of data contained in one line, not bleeding out into multiple. The results could be weird if you don't..
|
||||
|
||||
If everything is okay you should be able to hit enter and generate sentences.
|
|
@ -0,0 +1,73 @@
|
|||
use std::time::Instant;
|
||||
|
||||
use markov::{MarkovAllNodes, MarkovNode, MarkovToken};
|
||||
|
||||
mod markov;
|
||||
|
||||
fn main() {
|
||||
// chain creation
|
||||
|
||||
let content = std::fs::read_to_string("./data.txt").unwrap();
|
||||
let lines = content.lines();
|
||||
|
||||
let mut all_nodes = MarkovAllNodes::new();
|
||||
let root_node = MarkovNode::new(MarkovToken::Root);
|
||||
let end_node = MarkovNode::new(MarkovToken::End);
|
||||
|
||||
let start = Instant::now();
|
||||
for line in lines {
|
||||
let mut nodes = line
|
||||
.split_whitespace()
|
||||
.filter(|s| s.chars().all(|c| c.is_ascii_alphanumeric()))
|
||||
.map(|t| all_nodes.node(t));
|
||||
|
||||
let mut node = root_node.clone();
|
||||
while !node.value.is_end() {
|
||||
let next_node = nodes.next().unwrap_or_else(|| end_node.clone());
|
||||
|
||||
// it will be an empty chain so skip
|
||||
if node.value.is_root() && next_node.value.is_end() {
|
||||
break;
|
||||
}
|
||||
|
||||
node.conns.borrow_mut().connect(next_node.clone());
|
||||
|
||||
node = next_node;
|
||||
}
|
||||
}
|
||||
println!("took {:?} to create chain", start.elapsed());
|
||||
|
||||
// generation
|
||||
|
||||
let mut rng = rand::thread_rng();
|
||||
loop {
|
||||
let mut picked_start_word = String::new();
|
||||
std::io::stdin().read_line(&mut picked_start_word).unwrap();
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut result = String::new();
|
||||
let mut node = all_nodes
|
||||
.try_node(picked_start_word.trim())
|
||||
.unwrap_or_else(|| root_node.clone());
|
||||
|
||||
loop {
|
||||
let next_node = node.conns.borrow().random_weighted(&mut rng);
|
||||
|
||||
if let MarkovToken::Value(value) = &node.value {
|
||||
result += value;
|
||||
result.push(' ');
|
||||
}
|
||||
|
||||
if next_node.value.is_end() {
|
||||
break;
|
||||
}
|
||||
|
||||
node = next_node;
|
||||
}
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
println!("output: {result}");
|
||||
println!("gen took {elapsed:?}");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
use std::{cell::RefCell, collections::HashMap, hash::Hash, rc::Rc};
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use rand::{distributions::WeightedIndex, prelude::Distribution, rngs::ThreadRng};
|
||||
|
||||
#[derive(PartialEq, Eq, Hash, Debug)]
|
||||
pub enum MarkovToken {
|
||||
Root,
|
||||
Value(String),
|
||||
End,
|
||||
}
|
||||
|
||||
impl MarkovToken {
|
||||
pub fn is_root(&self) -> bool {
|
||||
matches!(self, Self::Root)
|
||||
}
|
||||
|
||||
pub fn is_end(&self) -> bool {
|
||||
matches!(self, Self::End)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MarkovNode {
|
||||
pub value: MarkovToken,
|
||||
pub conns: RefCell<MarkovConns>,
|
||||
}
|
||||
|
||||
impl PartialEq for MarkovNode {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.value.eq(&other.value)
|
||||
}
|
||||
}
|
||||
impl Eq for MarkovNode {}
|
||||
impl Hash for MarkovNode {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.value.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl MarkovNode {
|
||||
pub fn new(token: MarkovToken) -> Rc<Self> {
|
||||
Rc::new(Self {
|
||||
value: token,
|
||||
conns: RefCell::new(MarkovConns(IndexMap::new())),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MarkovConns(pub IndexMap<Rc<MarkovNode>, u32>);
|
||||
|
||||
impl MarkovConns {
|
||||
pub fn connect(&mut self, word: Rc<MarkovNode>) {
|
||||
if let Some(count) = self.0.get_mut(&word) {
|
||||
*count += 1;
|
||||
} else {
|
||||
self.0.insert(word, 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn index(&self, i: usize) -> Rc<MarkovNode> {
|
||||
self.0.get_index(i).map(|(n, _)| n.clone()).unwrap()
|
||||
}
|
||||
|
||||
pub fn random_weighted(&self, rng: &mut ThreadRng) -> Rc<MarkovNode> {
|
||||
let wi = WeightedIndex::new(self.0.iter().map(|(_, c)| c)).unwrap(); // SAFETY: there should always be at least an end token
|
||||
let i = wi.sample(rng);
|
||||
|
||||
self.index(i)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MarkovAllNodes(HashMap<String, Rc<MarkovNode>>);
|
||||
|
||||
impl MarkovAllNodes {
|
||||
pub fn new() -> Self {
|
||||
Self(HashMap::new())
|
||||
}
|
||||
|
||||
pub fn node(&mut self, word: &str) -> Rc<MarkovNode> {
|
||||
if let Some(node) = self.0.get(word) {
|
||||
node.clone()
|
||||
} else {
|
||||
let node = MarkovNode::new(MarkovToken::Value(word.to_string()));
|
||||
self.0.insert(word.to_string(), node.clone());
|
||||
node
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_node(&self, word: &str) -> Option<Rc<MarkovNode>> {
|
||||
self.0.get(word).cloned()
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue