From e9b82727065cd3c115720fe6ff9a870b5db4d2e7 Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Apr 2025 11:26:08 +0100 Subject: [PATCH] Added generator abstraction --- Cargo.lock | 202 ++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/generator/ast.rs | 174 ++++++++++++++++++++++++++++++++++ src/generator/markov.rs | 51 ++++++++++ src/generator/mod.rs | 14 +++ src/main.rs | 155 +++++++++++++++--------------- 6 files changed, 516 insertions(+), 81 deletions(-) create mode 100644 src/generator/ast.rs create mode 100644 src/generator/markov.rs create mode 100644 src/generator/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 91ea3c0..7b3774b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,6 +32,21 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.18" @@ -205,6 +220,7 @@ version = "0.1.0" dependencies = [ "axum", "axum-server", + "chrono", "clap", "hashbrown", "itertools 0.14.0", @@ -257,6 +273,12 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +[[package]] +name = "bumpalo" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" + [[package]] name = "bytes" version = "1.10.1" @@ -289,6 +311,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-link", +] + [[package]] name = "clang-sys" version = "1.8.1" @@ -355,6 +391,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "dunce" version = "1.0.5" @@ -621,6 +663,30 @@ dependencies = [ "tower-service", ] +[[package]] +name = "iana-time-zone" +version = "0.1.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "indexmap" version = "2.9.0" @@ -671,6 +737,16 @@ dependencies = [ "libc", ] +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -765,6 +841,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.36.7" @@ -1236,6 +1321,64 @@ dependencies = [ "wit-bindgen-rt", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + [[package]] name = "which" version = "4.4.2" @@ -1248,6 +1391,65 @@ dependencies = [ "rustix", ] +[[package]] +name = "windows-core" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-result" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index d986361..122db6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] axum = "0.8.3" axum-server = { version = "0.7.2", features = ["tls-rustls"] } +chrono = "0.4.40" clap = { version = "4.5.37", features = ["derive"] } hashbrown = "0.15.2" itertools = "0.14.0" diff --git a/src/generator/ast.rs b/src/generator/ast.rs new file mode 100644 index 0000000..41d6115 --- /dev/null +++ b/src/generator/ast.rs @@ -0,0 +1,174 @@ +use super::*; + +pub struct Ast {} + +impl Ast { + pub fn new() -> Self { + Self {} + } +} + +impl Generator for Ast { + fn word_stream<'a>( + &'a self, + approx_len: usize, + rng: &'a mut crate::Rng, + ) -> Box> + 'a> { + fn gen_adjective(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { + if len > 1 && rng.random_bool(0.15) { + words.extend(*[&["very"][..], &["not", "very"][..]].choose(rng).unwrap()); + } + + words.extend( + *[ + &["blue"][..], + &["red"][..], + &["big"][..], + &["nice"][..], + &["happy"][..], + &["small"][..], + &["angry"][..], + &["stupid"][..], + &["terrible"][..], + &["anti-capitalist"][..], + &["hateful"][..], + &["beautiful"][..], + &["spammy"][..], + &["funny"][..], + &["bald"][..], + &["long"][..], + &["short"][..], + &["enviable"][..], + &["articulate"][..], + &["fraudulent"][..], + &["communist"][..], + &["rotund"][..], + ] + .choose(rng) + .unwrap(), + ); + } + + fn gen_noun(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { + if len > 1 && rng.random_bool(0.25) { + words.extend( + *[ + &["my"][..], + &["your"][..], + &["my friend's"][..], + &["our"][..], + &["their"][..], + ] + .choose(rng) + .unwrap(), + ); + } + + if len > 2 && rng.random_bool(0.25) { + gen_adjective(words, rng, len - 1); + } + + words.extend( + *[ + &["me"][..], + &["kangaroos"][..], + &["chips"][..], + &["carrots"][..], + &["weather"][..], + &["dogs"][..], + &["cats"][..], + &["salami"][..], + &["cabbage"][..], + &["computer"][..], + &["onion"][..], + &["Sam Altman"][..], + &["goblin"][..], + &["eagle"][..], + &["democracy"][..], + &["america"][..], + ] + .choose(rng) + .unwrap(), + ); + } + + fn gen_qualifier(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) { + words.extend( + *[&["quickly"][..], &["slowly"][..], &["joyfully"][..]] + .choose(rng) + .unwrap(), + ); + } + + fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { + words.extend( + *[ + &["eats"][..], + &["runs"][..], + &["walks"][..], + &["speaks"][..], + &["speaks"][..], + ] + .choose(rng) + .unwrap(), + ); + + if len > 1 && rng.random_bool(0.25) { + gen_qualifier(words, rng, len - 1); + } + } + + fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { + if len > 1 && rng.random_bool(0.25) { + gen_qualifier(words, rng, len - 1); + } + + words.extend(*[&["eats"][..], &["kills"][..]].choose(rng).unwrap()); + } + + fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { + if len > 5 { + gen_clause(words, rng, len / 2); + words.extend( + *[ + &["because"][..], + &["and", "also"][..], + &["when"][..], + &["if"][..], + &["or"][..], + &["and"][..], + &["then"][..], + ] + .choose(rng) + .unwrap(), + ); + gen_clause(words, rng, len - len / 2); + } else { + enum Kind { + Gen(fn(&mut Vec<&'static str>, &mut crate::Rng, usize)), + Word(&'static str), + } + + let clauses = [ + &[Kind::Gen(gen_noun), Kind::Gen(gen_verb)][..], + &[ + Kind::Gen(gen_noun), + Kind::Gen(gen_action), + Kind::Gen(gen_noun), + ][..], + ]; + let clause = clauses.choose(rng).unwrap(); + for k in *clause { + match k { + Kind::Word(w) => words.push(w), + Kind::Gen(f) => f(words, rng, len / clause.len()), + } + } + } + } + + let mut words = Vec::new(); + gen_clause(&mut words, rng, approx_len); + Box::new(words.into_iter().map(|word| word.into())) + } +} diff --git a/src/generator/markov.rs b/src/generator/markov.rs new file mode 100644 index 0000000..5ad0ed1 --- /dev/null +++ b/src/generator/markov.rs @@ -0,0 +1,51 @@ +use super::*; + +pub struct Markov { + freq: HashMap>, +} + +impl Markov { + pub fn new(corpus: &str) -> Self { + let mut freq = HashMap::>::default(); + for (a, b) in corpus + .split_whitespace() + .map(|s| s.trim_matches(|c: char| !c.is_alphabetic())) + .tuple_windows() + { + *freq + .entry(a.to_string()) + .or_default() + .entry(b.to_string()) + .or_default() += 1 + } + Self { + freq: freq + .into_iter() + .map(|(k, v)| (k, v.into_iter().collect::>())) + .collect::>(), + } + } +} + +impl Generator for Markov { + fn word_stream<'a>( + &'a self, + approx_len: usize, + rng: &'a mut crate::Rng, + ) -> Box> + 'a> { + let mut word = self.freq.keys().choose(rng).unwrap(); + Box::new( + std::iter::from_fn(move || { + word = self + .freq + .get(&*word) + .and_then(|rhs| rhs.choose_weighted(rng, |(_, n)| *n).ok()) + .map(|(rhs, _)| rhs) + .unwrap_or(word); + + Some(word.clone().into()) + }) + .take(approx_len), + ) + } +} diff --git a/src/generator/mod.rs b/src/generator/mod.rs new file mode 100644 index 0000000..bdeeaf7 --- /dev/null +++ b/src/generator/mod.rs @@ -0,0 +1,14 @@ +use super::*; + +pub mod ast; +pub mod markov; + +pub use self::{ast::Ast, markov::Markov}; + +pub trait Generator: Send + Sync { + fn word_stream<'a>( + &'a self, + approx_len: usize, + rng: &'a mut crate::Rng, + ) -> Box> + 'a>; +} diff --git a/src/main.rs b/src/main.rs index f8a8be0..213b5c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,14 @@ +mod generator; + use axum::{Router, extract::Path, response::Html, routing::get}; use axum_server::{bind_rustls, tls_rustls::RustlsConfig}; use clap::Parser; use hashbrown::HashMap; use itertools::Itertools; -use rand::{prelude::*, random_range}; +use rand::{Rng as _, prelude::*, random_range}; use rand_chacha::{ChaCha8Rng, rand_core::SeedableRng}; use std::{ - net::SocketAddr, + borrow::Cow, path::PathBuf, sync::{ Arc, @@ -15,45 +17,34 @@ use std::{ time::Duration, }; +pub type Rng = ChaCha8Rng; + #[derive(Parser)] pub struct Args { #[arg(long)] - sock: String, + sock: Option, #[arg(long)] - pem_dir: PathBuf, + pem_dir: Option, } #[tokio::main] async fn main() { - println!("Starting..."); + let args = Args::parse(); - let mut freq = HashMap::>::default(); - include_str!("../wap.txt") - .split_whitespace() - .map(|s| s.trim_matches(|c: char| !c.is_alphabetic())) - .tuple_windows() - .for_each(|(a, b)| { - *freq - .entry(a.to_string()) - .or_default() - .entry(b.to_string()) - .or_default() += 1 - }); - let mut freq = freq - .into_iter() - .map(|(k, v)| (k, v.into_iter().collect::>())) - .collect::>(); - println!("Generated frequencies."); + let generators: Vec> = vec![ + Arc::new(generator::Markov::new(include_str!("../wap.txt"))), + Arc::new(generator::Ast::new()), + ]; - let mut counter = Arc::new(AtomicU64::new(0)); + let counter = Arc::new(AtomicU64::new(0)); - // build our application with a single route let app = { let counter = counter.clone(); Router::new().route( "/{id}", get(|Path(id): Path| async move { tokio::time::sleep(Duration::from_millis(random_range(200..1000))).await; + counter.fetch_add(1, Ordering::Relaxed); let mut seed = [0; 32]; for (i, b) in id @@ -64,83 +55,85 @@ async fn main() { { seed[i] = b; } - let mut rng = &mut ChaCha8Rng::from_seed(seed); + let mut rng = Rng::from_seed(seed); - fn choose_word<'a, 'b, V>( - freq: &'b HashMap, - rng: &'a mut ChaCha8Rng, - ) -> &'b str { - freq.keys().choose(rng).unwrap() - }; + let generator = generators.choose(&mut rng).unwrap(); - let title = (0..rng.gen_range(2..10)) - .map(|_| choose_word(&freq, rng)) + let title = generator + .word_stream(rng.random_range(2..10), &mut rng.clone()) .join(" "); - let mut word = freq.keys().choose(rng).unwrap(); - let mut content = word.clone(); - for _ in 0..rng.gen_range(50..5_000) { - // let word = choose_word(&corpus, rng).to_string(); - word = freq - .get(&*word) - .and_then(|rhs| rhs.choose_weighted(rng, |(_, n)| *n).ok()) - .map(|(rhs, _)| rhs) - .unwrap_or(word); + let content = generator + .word_stream(rng.random_range(50..5_000), &mut rng.clone()) + .fold(String::new(), |mut content, word| { + if rng.random_bool(0.05) { + let url = generator.word_stream(3, &mut rng.clone()).join("-"); + content += &format!(" {}", url, word); + } else if rng.random_bool(0.01) { + content += ".
"; + } else { + content += " "; + content += &word + } + content + }); - if rng.gen_bool(0.05) { - let url = (0..3).map(|_| choose_word(&freq, rng)).join("-"); - content += &format!(" {}", url, word); - } else if rng.gen_bool(0.01) { - content += ".
"; - } else { - content += " "; - content += &word; - }; - } - counter.fetch_add(1, Ordering::Relaxed); Html(format!( " - - - {title} - - + + + {title} + + -

{title}

-

{content}

+

{title}

+

{content}

- - " + + " )) }), ) }; - // run our app with hyper, listening globally on port 3000 - let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap(); - println!("Started server."); - let mut interval = tokio::time::interval(Duration::from_secs(20)); tokio::spawn(async move { + let mut last = 0; loop { interval.tick().await; - println!( - "Served bollocks to {} clients!", - counter.load(Ordering::Relaxed) - ); + + let count = counter.load(Ordering::Relaxed); + + if count != last { + last = count; + println!( + "{} Served bollocks to {} clients!", + chrono::offset::Local::now(), + count, + ); + } } }); - let args = Args::parse(); - - let config = RustlsConfig::from_pem_file( - args.pem_dir.clone().join("cert.pem"), - args.pem_dir.clone().join("key.pem"), - ) - .await - .unwrap(); - bind_rustls(args.sock.parse().unwrap(), config) - .serve(app.into_make_service()) - .await + println!("Starting..."); + let sock = args + .sock + .as_deref() + .unwrap_or("0.0.0.0:4000") + .parse() .unwrap(); + if let Some(pem_dir) = args.pem_dir { + let config = RustlsConfig::from_pem_file(pem_dir.join("cert.pem"), pem_dir.join("key.pem")) + .await + .unwrap(); + bind_rustls(sock, config) + .serve(app.into_make_service()) + .await + .unwrap(); + } else { + axum_server::bind(sock) + .serve(app.into_make_service()) + .await + .unwrap() + } }