Added generator abstraction

This commit is contained in:
Joshua Barretto 2025-04-21 11:26:08 +01:00
parent 40f159df1c
commit e9b8272706
6 changed files with 516 additions and 81 deletions

202
Cargo.lock generated
View file

@ -32,6 +32,21 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.18"
@ -205,6 +220,7 @@ version = "0.1.0"
dependencies = [
"axum",
"axum-server",
"chrono",
"clap",
"hashbrown",
"itertools 0.14.0",
@ -257,6 +273,12 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytes"
version = "1.10.1"
@ -289,6 +311,20 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-link",
]
[[package]]
name = "clang-sys"
version = "1.8.1"
@ -355,6 +391,12 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "dunce"
version = "1.0.5"
@ -621,6 +663,30 @@ dependencies = [
"tower-service",
]
[[package]]
name = "iana-time-zone"
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "indexmap"
version = "2.9.0"
@ -671,6 +737,16 @@ dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -765,6 +841,15 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.36.7"
@ -1236,6 +1321,64 @@ dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "which"
version = "4.4.2"
@ -1248,6 +1391,65 @@ dependencies = [
"rustix",
]
[[package]]
name = "windows-core"
version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
[[package]]
name = "windows-result"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-strings"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97"
dependencies = [
"windows-link",
]
[[package]]
name = "windows-sys"
version = "0.52.0"

View file

@ -6,6 +6,7 @@ edition = "2024"
[dependencies]
axum = "0.8.3"
axum-server = { version = "0.7.2", features = ["tls-rustls"] }
chrono = "0.4.40"
clap = { version = "4.5.37", features = ["derive"] }
hashbrown = "0.15.2"
itertools = "0.14.0"

174
src/generator/ast.rs Normal file
View file

@ -0,0 +1,174 @@
use super::*;
pub struct Ast {}
impl Ast {
pub fn new() -> Self {
Self {}
}
}
impl Generator for Ast {
fn word_stream<'a>(
&'a self,
approx_len: usize,
rng: &'a mut crate::Rng,
) -> Box<dyn Iterator<Item = Cow<'a, str>> + 'a> {
fn gen_adjective(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.15) {
words.extend(*[&["very"][..], &["not", "very"][..]].choose(rng).unwrap());
}
words.extend(
*[
&["blue"][..],
&["red"][..],
&["big"][..],
&["nice"][..],
&["happy"][..],
&["small"][..],
&["angry"][..],
&["stupid"][..],
&["terrible"][..],
&["anti-capitalist"][..],
&["hateful"][..],
&["beautiful"][..],
&["spammy"][..],
&["funny"][..],
&["bald"][..],
&["long"][..],
&["short"][..],
&["enviable"][..],
&["articulate"][..],
&["fraudulent"][..],
&["communist"][..],
&["rotund"][..],
]
.choose(rng)
.unwrap(),
);
}
fn gen_noun(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.25) {
words.extend(
*[
&["my"][..],
&["your"][..],
&["my friend's"][..],
&["our"][..],
&["their"][..],
]
.choose(rng)
.unwrap(),
);
}
if len > 2 && rng.random_bool(0.25) {
gen_adjective(words, rng, len - 1);
}
words.extend(
*[
&["me"][..],
&["kangaroos"][..],
&["chips"][..],
&["carrots"][..],
&["weather"][..],
&["dogs"][..],
&["cats"][..],
&["salami"][..],
&["cabbage"][..],
&["computer"][..],
&["onion"][..],
&["Sam Altman"][..],
&["goblin"][..],
&["eagle"][..],
&["democracy"][..],
&["america"][..],
]
.choose(rng)
.unwrap(),
);
}
fn gen_qualifier(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) {
words.extend(
*[&["quickly"][..], &["slowly"][..], &["joyfully"][..]]
.choose(rng)
.unwrap(),
);
}
fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
words.extend(
*[
&["eats"][..],
&["runs"][..],
&["walks"][..],
&["speaks"][..],
&["speaks"][..],
]
.choose(rng)
.unwrap(),
);
if len > 1 && rng.random_bool(0.25) {
gen_qualifier(words, rng, len - 1);
}
}
fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.25) {
gen_qualifier(words, rng, len - 1);
}
words.extend(*[&["eats"][..], &["kills"][..]].choose(rng).unwrap());
}
fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 5 {
gen_clause(words, rng, len / 2);
words.extend(
*[
&["because"][..],
&["and", "also"][..],
&["when"][..],
&["if"][..],
&["or"][..],
&["and"][..],
&["then"][..],
]
.choose(rng)
.unwrap(),
);
gen_clause(words, rng, len - len / 2);
} else {
enum Kind {
Gen(fn(&mut Vec<&'static str>, &mut crate::Rng, usize)),
Word(&'static str),
}
let clauses = [
&[Kind::Gen(gen_noun), Kind::Gen(gen_verb)][..],
&[
Kind::Gen(gen_noun),
Kind::Gen(gen_action),
Kind::Gen(gen_noun),
][..],
];
let clause = clauses.choose(rng).unwrap();
for k in *clause {
match k {
Kind::Word(w) => words.push(w),
Kind::Gen(f) => f(words, rng, len / clause.len()),
}
}
}
}
let mut words = Vec::new();
gen_clause(&mut words, rng, approx_len);
Box::new(words.into_iter().map(|word| word.into()))
}
}

51
src/generator/markov.rs Normal file
View file

@ -0,0 +1,51 @@
use super::*;
pub struct Markov {
freq: HashMap<String, Vec<(String, usize)>>,
}
impl Markov {
pub fn new(corpus: &str) -> Self {
let mut freq = HashMap::<String, HashMap<String, usize>>::default();
for (a, b) in corpus
.split_whitespace()
.map(|s| s.trim_matches(|c: char| !c.is_alphabetic()))
.tuple_windows()
{
*freq
.entry(a.to_string())
.or_default()
.entry(b.to_string())
.or_default() += 1
}
Self {
freq: freq
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect::<Vec<_>>()))
.collect::<HashMap<_, _>>(),
}
}
}
impl Generator for Markov {
fn word_stream<'a>(
&'a self,
approx_len: usize,
rng: &'a mut crate::Rng,
) -> Box<dyn Iterator<Item = Cow<'a, str>> + 'a> {
let mut word = self.freq.keys().choose(rng).unwrap();
Box::new(
std::iter::from_fn(move || {
word = self
.freq
.get(&*word)
.and_then(|rhs| rhs.choose_weighted(rng, |(_, n)| *n).ok())
.map(|(rhs, _)| rhs)
.unwrap_or(word);
Some(word.clone().into())
})
.take(approx_len),
)
}
}

14
src/generator/mod.rs Normal file
View file

@ -0,0 +1,14 @@
use super::*;
pub mod ast;
pub mod markov;
pub use self::{ast::Ast, markov::Markov};
pub trait Generator: Send + Sync {
fn word_stream<'a>(
&'a self,
approx_len: usize,
rng: &'a mut crate::Rng,
) -> Box<dyn Iterator<Item = Cow<'a, str>> + 'a>;
}

View file

@ -1,12 +1,14 @@
mod generator;
use axum::{Router, extract::Path, response::Html, routing::get};
use axum_server::{bind_rustls, tls_rustls::RustlsConfig};
use clap::Parser;
use hashbrown::HashMap;
use itertools::Itertools;
use rand::{prelude::*, random_range};
use rand::{Rng as _, prelude::*, random_range};
use rand_chacha::{ChaCha8Rng, rand_core::SeedableRng};
use std::{
net::SocketAddr,
borrow::Cow,
path::PathBuf,
sync::{
Arc,
@ -15,45 +17,34 @@ use std::{
time::Duration,
};
pub type Rng = ChaCha8Rng;
#[derive(Parser)]
pub struct Args {
#[arg(long)]
sock: String,
sock: Option<String>,
#[arg(long)]
pem_dir: PathBuf,
pem_dir: Option<PathBuf>,
}
#[tokio::main]
async fn main() {
println!("Starting...");
let args = Args::parse();
let mut freq = HashMap::<String, HashMap<String, usize>>::default();
include_str!("../wap.txt")
.split_whitespace()
.map(|s| s.trim_matches(|c: char| !c.is_alphabetic()))
.tuple_windows()
.for_each(|(a, b)| {
*freq
.entry(a.to_string())
.or_default()
.entry(b.to_string())
.or_default() += 1
});
let mut freq = freq
.into_iter()
.map(|(k, v)| (k, v.into_iter().collect::<Vec<_>>()))
.collect::<HashMap<_, _>>();
println!("Generated frequencies.");
let generators: Vec<Arc<dyn generator::Generator>> = vec![
Arc::new(generator::Markov::new(include_str!("../wap.txt"))),
Arc::new(generator::Ast::new()),
];
let mut counter = Arc::new(AtomicU64::new(0));
let counter = Arc::new(AtomicU64::new(0));
// build our application with a single route
let app = {
let counter = counter.clone();
Router::new().route(
"/{id}",
get(|Path(id): Path<String>| async move {
tokio::time::sleep(Duration::from_millis(random_range(200..1000))).await;
counter.fetch_add(1, Ordering::Relaxed);
let mut seed = [0; 32];
for (i, b) in id
@ -64,40 +55,29 @@ async fn main() {
{
seed[i] = b;
}
let mut rng = &mut ChaCha8Rng::from_seed(seed);
let mut rng = Rng::from_seed(seed);
fn choose_word<'a, 'b, V>(
freq: &'b HashMap<String, V>,
rng: &'a mut ChaCha8Rng,
) -> &'b str {
freq.keys().choose(rng).unwrap()
};
let generator = generators.choose(&mut rng).unwrap();
let title = (0..rng.gen_range(2..10))
.map(|_| choose_word(&freq, rng))
let title = generator
.word_stream(rng.random_range(2..10), &mut rng.clone())
.join(" ");
let mut word = freq.keys().choose(rng).unwrap();
let mut content = word.clone();
for _ in 0..rng.gen_range(50..5_000) {
// let word = choose_word(&corpus, rng).to_string();
word = freq
.get(&*word)
.and_then(|rhs| rhs.choose_weighted(rng, |(_, n)| *n).ok())
.map(|(rhs, _)| rhs)
.unwrap_or(word);
if rng.gen_bool(0.05) {
let url = (0..3).map(|_| choose_word(&freq, rng)).join("-");
let content = generator
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
.fold(String::new(), |mut content, word| {
if rng.random_bool(0.05) {
let url = generator.word_stream(3, &mut rng.clone()).join("-");
content += &format!(" <a href=\"{}\">{}</a>", url, word);
} else if rng.gen_bool(0.01) {
} else if rng.random_bool(0.01) {
content += ".<br>";
} else {
content += " ";
content += &word;
};
content += &word
}
counter.fetch_add(1, Ordering::Relaxed);
content
});
Html(format!(
"<!DOCTYPE html>
<html>
@ -116,31 +96,44 @@ async fn main() {
)
};
// run our app with hyper, listening globally on port 3000
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();
println!("Started server.");
let mut interval = tokio::time::interval(Duration::from_secs(20));
tokio::spawn(async move {
let mut last = 0;
loop {
interval.tick().await;
let count = counter.load(Ordering::Relaxed);
if count != last {
last = count;
println!(
"Served bollocks to {} clients!",
counter.load(Ordering::Relaxed)
"{} Served bollocks to {} clients!",
chrono::offset::Local::now(),
count,
);
}
}
});
let args = Args::parse();
let config = RustlsConfig::from_pem_file(
args.pem_dir.clone().join("cert.pem"),
args.pem_dir.clone().join("key.pem"),
)
println!("Starting...");
let sock = args
.sock
.as_deref()
.unwrap_or("0.0.0.0:4000")
.parse()
.unwrap();
if let Some(pem_dir) = args.pem_dir {
let config = RustlsConfig::from_pem_file(pem_dir.join("cert.pem"), pem_dir.join("key.pem"))
.await
.unwrap();
bind_rustls(args.sock.parse().unwrap(), config)
bind_rustls(sock, config)
.serve(app.into_make_service())
.await
.unwrap();
} else {
axum_server::bind(sock)
.serve(app.into_make_service())
.await
.unwrap()
}
}