Added generator abstraction

This commit is contained in:
Joshua Barretto 2025-04-21 11:26:08 +01:00
parent 40f159df1c
commit e9b8272706
6 changed files with 516 additions and 81 deletions

202
Cargo.lock generated
View file

@ -32,6 +32,21 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.18" version = "0.6.18"
@ -205,6 +220,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"axum", "axum",
"axum-server", "axum-server",
"chrono",
"clap", "clap",
"hashbrown", "hashbrown",
"itertools 0.14.0", "itertools 0.14.0",
@ -257,6 +273,12 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.10.1" version = "1.10.1"
@ -289,6 +311,20 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]] [[package]]
name = "clang-sys" name = "clang-sys"
version = "1.8.1" version = "1.8.1"
@ -355,6 +391,12 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]] [[package]]
name = "dunce" name = "dunce"
version = "1.0.5" version = "1.0.5"
@ -621,6 +663,30 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "iana-time-zone"
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "2.9.0" version = "2.9.0"
@ -671,6 +737,16 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.5.0" version = "1.5.0"
@ -765,6 +841,15 @@ dependencies = [
"minimal-lexical", "minimal-lexical",
] ]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "object" name = "object"
version = "0.36.7" version = "0.36.7"
@ -1236,6 +1321,64 @@ dependencies = [
"wit-bindgen-rt", "wit-bindgen-rt",
] ]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]] [[package]]
name = "which" name = "which"
version = "4.4.2" version = "4.4.2"
@ -1248,6 +1391,65 @@ dependencies = [
"rustix", "rustix",
] ]
[[package]]
name = "windows-core"
version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
[[package]]
name = "windows-result"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97"
dependencies = [
"windows-link",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.52.0" version = "0.52.0"

View file

@ -6,6 +6,7 @@ edition = "2024"
[dependencies] [dependencies]
axum = "0.8.3" axum = "0.8.3"
axum-server = { version = "0.7.2", features = ["tls-rustls"] } axum-server = { version = "0.7.2", features = ["tls-rustls"] }
chrono = "0.4.40"
clap = { version = "4.5.37", features = ["derive"] } clap = { version = "4.5.37", features = ["derive"] }
hashbrown = "0.15.2" hashbrown = "0.15.2"
itertools = "0.14.0" itertools = "0.14.0"

174
src/generator/ast.rs Normal file
View file

@ -0,0 +1,174 @@
use super::*;
pub struct Ast {}
impl Ast {
pub fn new() -> Self {
Self {}
}
}
impl Generator for Ast {
fn word_stream<'a>(
&'a self,
approx_len: usize,
rng: &'a mut crate::Rng,
) -> Box<dyn Iterator<Item = Cow<'a, str>> + 'a> {
fn gen_adjective(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.15) {
words.extend(*[&["very"][..], &["not", "very"][..]].choose(rng).unwrap());
}
words.extend(
*[
&["blue"][..],
&["red"][..],
&["big"][..],
&["nice"][..],
&["happy"][..],
&["small"][..],
&["angry"][..],
&["stupid"][..],
&["terrible"][..],
&["anti-capitalist"][..],
&["hateful"][..],
&["beautiful"][..],
&["spammy"][..],
&["funny"][..],
&["bald"][..],
&["long"][..],
&["short"][..],
&["enviable"][..],
&["articulate"][..],
&["fraudulent"][..],
&["communist"][..],
&["rotund"][..],
]
.choose(rng)
.unwrap(),
);
}
fn gen_noun(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.25) {
words.extend(
*[
&["my"][..],
&["your"][..],
&["my friend's"][..],
&["our"][..],
&["their"][..],
]
.choose(rng)
.unwrap(),
);
}
if len > 2 && rng.random_bool(0.25) {
gen_adjective(words, rng, len - 1);
}
words.extend(
*[
&["me"][..],
&["kangaroos"][..],
&["chips"][..],
&["carrots"][..],
&["weather"][..],
&["dogs"][..],
&["cats"][..],
&["salami"][..],
&["cabbage"][..],
&["computer"][..],
&["onion"][..],
&["Sam Altman"][..],
&["goblin"][..],
&["eagle"][..],
&["democracy"][..],
&["america"][..],
]
.choose(rng)
.unwrap(),
);
}
fn gen_qualifier(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) {
words.extend(
*[&["quickly"][..], &["slowly"][..], &["joyfully"][..]]
.choose(rng)
.unwrap(),
);
}
fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
words.extend(
*[
&["eats"][..],
&["runs"][..],
&["walks"][..],
&["speaks"][..],
&["speaks"][..],
]
.choose(rng)
.unwrap(),
);
if len > 1 && rng.random_bool(0.25) {
gen_qualifier(words, rng, len - 1);
}
}
fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.25) {
gen_qualifier(words, rng, len - 1);
}
words.extend(*[&["eats"][..], &["kills"][..]].choose(rng).unwrap());
}
fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 5 {
gen_clause(words, rng, len / 2);
words.extend(
*[
&["because"][..],
&["and", "also"][..],
&["when"][..],
&["if"][..],
&["or"][..],
&["and"][..],
&["then"][..],
]
.choose(rng)
.unwrap(),
);
gen_clause(words, rng, len - len / 2);
} else {
enum Kind {
Gen(fn(&mut Vec<&'static str>, &mut crate::Rng, usize)),
Word(&'static str),
}
let clauses = [
&[Kind::Gen(gen_noun), Kind::Gen(gen_verb)][..],
&[
Kind::Gen(gen_noun),
Kind::Gen(gen_action),
Kind::Gen(gen_noun),
][..],
];
let clause = clauses.choose(rng).unwrap();
for k in *clause {
match k {
Kind::Word(w) => words.push(w),
Kind::Gen(f) => f(words, rng, len / clause.len()),
}
}
}
}
let mut words = Vec::new();
gen_clause(&mut words, rng, approx_len);
Box::new(words.into_iter().map(|word| word.into()))
}
}

51
src/generator/markov.rs Normal file
View file

@ -0,0 +1,51 @@
use super::*;
pub struct Markov {
freq: HashMap<String, Vec<(String, usize)>>,
}
impl Markov {
pub fn new(corpus: &str) -> Self {
let mut freq = HashMap::<String, HashMap<String, usize>>::default();
for (a, b) in corpus
.split_whitespace()
.map(|s| s.trim_matches(|c: char| !c.is_alphabetic()))
.tuple_windows()
{
*freq
.entry(a.to_string())
.or_default()
.entry(b.to_string())
.or_default() += 1
}
Self {
freq: freq
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect::<Vec<_>>()))
.collect::<HashMap<_, _>>(),
}
}
}
impl Generator for Markov {
fn word_stream<'a>(
&'a self,
approx_len: usize,
rng: &'a mut crate::Rng,
) -> Box<dyn Iterator<Item = Cow<'a, str>> + 'a> {
let mut word = self.freq.keys().choose(rng).unwrap();
Box::new(
std::iter::from_fn(move || {
word = self
.freq
.get(&*word)
.and_then(|rhs| rhs.choose_weighted(rng, |(_, n)| *n).ok())
.map(|(rhs, _)| rhs)
.unwrap_or(word);
Some(word.clone().into())
})
.take(approx_len),
)
}
}

14
src/generator/mod.rs Normal file
View file

@ -0,0 +1,14 @@
use super::*;
pub mod ast;
pub mod markov;
pub use self::{ast::Ast, markov::Markov};
pub trait Generator: Send + Sync {
fn word_stream<'a>(
&'a self,
approx_len: usize,
rng: &'a mut crate::Rng,
) -> Box<dyn Iterator<Item = Cow<'a, str>> + 'a>;
}

View file

@ -1,12 +1,14 @@
mod generator;
use axum::{Router, extract::Path, response::Html, routing::get}; use axum::{Router, extract::Path, response::Html, routing::get};
use axum_server::{bind_rustls, tls_rustls::RustlsConfig}; use axum_server::{bind_rustls, tls_rustls::RustlsConfig};
use clap::Parser; use clap::Parser;
use hashbrown::HashMap; use hashbrown::HashMap;
use itertools::Itertools; use itertools::Itertools;
use rand::{prelude::*, random_range}; use rand::{Rng as _, prelude::*, random_range};
use rand_chacha::{ChaCha8Rng, rand_core::SeedableRng}; use rand_chacha::{ChaCha8Rng, rand_core::SeedableRng};
use std::{ use std::{
net::SocketAddr, borrow::Cow,
path::PathBuf, path::PathBuf,
sync::{ sync::{
Arc, Arc,
@ -15,45 +17,34 @@ use std::{
time::Duration, time::Duration,
}; };
pub type Rng = ChaCha8Rng;
#[derive(Parser)] #[derive(Parser)]
pub struct Args { pub struct Args {
#[arg(long)] #[arg(long)]
sock: String, sock: Option<String>,
#[arg(long)] #[arg(long)]
pem_dir: PathBuf, pem_dir: Option<PathBuf>,
} }
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
println!("Starting..."); let args = Args::parse();
let mut freq = HashMap::<String, HashMap<String, usize>>::default(); let generators: Vec<Arc<dyn generator::Generator>> = vec![
include_str!("../wap.txt") Arc::new(generator::Markov::new(include_str!("../wap.txt"))),
.split_whitespace() Arc::new(generator::Ast::new()),
.map(|s| s.trim_matches(|c: char| !c.is_alphabetic())) ];
.tuple_windows()
.for_each(|(a, b)| {
*freq
.entry(a.to_string())
.or_default()
.entry(b.to_string())
.or_default() += 1
});
let mut freq = freq
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect::<Vec<_>>()))
.collect::<HashMap<_, _>>();
println!("Generated frequencies.");
let mut counter = Arc::new(AtomicU64::new(0)); let counter = Arc::new(AtomicU64::new(0));
// build our application with a single route
let app = { let app = {
let counter = counter.clone(); let counter = counter.clone();
Router::new().route( Router::new().route(
"/{id}", "/{id}",
get(|Path(id): Path<String>| async move { get(|Path(id): Path<String>| async move {
tokio::time::sleep(Duration::from_millis(random_range(200..1000))).await; tokio::time::sleep(Duration::from_millis(random_range(200..1000))).await;
counter.fetch_add(1, Ordering::Relaxed);
let mut seed = [0; 32]; let mut seed = [0; 32];
for (i, b) in id for (i, b) in id
@ -64,83 +55,85 @@ async fn main() {
{ {
seed[i] = b; seed[i] = b;
} }
let mut rng = &mut ChaCha8Rng::from_seed(seed); let mut rng = Rng::from_seed(seed);
fn choose_word<'a, 'b, V>( let generator = generators.choose(&mut rng).unwrap();
freq: &'b HashMap<String, V>,
rng: &'a mut ChaCha8Rng,
) -> &'b str {
freq.keys().choose(rng).unwrap()
};
let title = (0..rng.gen_range(2..10)) let title = generator
.map(|_| choose_word(&freq, rng)) .word_stream(rng.random_range(2..10), &mut rng.clone())
.join(" "); .join(" ");
let mut word = freq.keys().choose(rng).unwrap(); let content = generator
let mut content = word.clone(); .word_stream(rng.random_range(50..5_000), &mut rng.clone())
for _ in 0..rng.gen_range(50..5_000) { .fold(String::new(), |mut content, word| {
// let word = choose_word(&corpus, rng).to_string(); if rng.random_bool(0.05) {
word = freq let url = generator.word_stream(3, &mut rng.clone()).join("-");
.get(&*word) content += &format!(" <a href=\"{}\">{}</a>", url, word);
.and_then(|rhs| rhs.choose_weighted(rng, |(_, n)| *n).ok()) } else if rng.random_bool(0.01) {
.map(|(rhs, _)| rhs) content += ".<br>";
.unwrap_or(word); } else {
content += " ";
content += &word
}
content
});
if rng.gen_bool(0.05) {
let url = (0..3).map(|_| choose_word(&freq, rng)).join("-");
content += &format!(" <a href=\"{}\">{}</a>", url, word);
} else if rng.gen_bool(0.01) {
content += ".<br>";
} else {
content += " ";
content += &word;
};
}
counter.fetch_add(1, Ordering::Relaxed);
Html(format!( Html(format!(
"<!DOCTYPE html> "<!DOCTYPE html>
<html> <html>
<head> <head>
<title>{title}</title> <title>{title}</title>
</head> </head>
<body> <body>
<h1>{title}</h1> <h1>{title}</h1>
<p>{content}</p> <p>{content}</p>
</body> </body>
</html>" </html>"
)) ))
}), }),
) )
}; };
// run our app with hyper, listening globally on port 3000
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();
println!("Started server.");
let mut interval = tokio::time::interval(Duration::from_secs(20)); let mut interval = tokio::time::interval(Duration::from_secs(20));
tokio::spawn(async move { tokio::spawn(async move {
let mut last = 0;
loop { loop {
interval.tick().await; interval.tick().await;
println!(
"Served bollocks to {} clients!", let count = counter.load(Ordering::Relaxed);
counter.load(Ordering::Relaxed)
); if count != last {
last = count;
println!(
"{} Served bollocks to {} clients!",
chrono::offset::Local::now(),
count,
);
}
} }
}); });
let args = Args::parse(); println!("Starting...");
let sock = args
let config = RustlsConfig::from_pem_file( .sock
args.pem_dir.clone().join("cert.pem"), .as_deref()
args.pem_dir.clone().join("key.pem"), .unwrap_or("0.0.0.0:4000")
) .parse()
.await
.unwrap();
bind_rustls(args.sock.parse().unwrap(), config)
.serve(app.into_make_service())
.await
.unwrap(); .unwrap();
if let Some(pem_dir) = args.pem_dir {
let config = RustlsConfig::from_pem_file(pem_dir.join("cert.pem"), pem_dir.join("key.pem"))
.await
.unwrap();
bind_rustls(sock, config)
.serve(app.into_make_service())
.await
.unwrap();
} else {
axum_server::bind(sock)
.serve(app.into_make_service())
.await
.unwrap()
}
} }