From 8b39e1fca64985e2330f583572c61798029168bf Mon Sep 17 00:00:00 2001 From: Joshua Barretto Date: Mon, 21 Apr 2025 12:01:11 +0100 Subject: [PATCH] Better AST generator --- README.md | 2 +- src/generator/ast.rs | 73 +++++++++++++++++++++++++++++++++----------- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 474cf22..998f501 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ with plenty of links. ## Why? - Divert and slow down LLM crawler traffic, protecting your main site -- Potentially poison LLM training data (likely not very effectuve) +- Potentially poison LLM training data (likely not very effective) - Collective defence; the more time a scraper spends swallowing babble, the less time it'll spend bulling someone else's site - Do your bit to protect the public commons from those who would readily see it destroyed for the sake of an investment diff --git a/src/generator/ast.rs b/src/generator/ast.rs index 41d6115..a9c2f46 100644 --- a/src/generator/ast.rs +++ b/src/generator/ast.rs @@ -43,6 +43,9 @@ impl Generator for Ast { &["fraudulent"][..], &["communist"][..], &["rotund"][..], + &["nazi"][..], + &["fascistic"][..], + &["careless"][..], ] .choose(rng) .unwrap(), @@ -70,13 +73,12 @@ impl Generator for Ast { words.extend( *[ - &["me"][..], - &["kangaroos"][..], - &["chips"][..], - &["carrots"][..], + &["kangaroo"][..], + &["chip"][..], + &["carrot"][..], &["weather"][..], - &["dogs"][..], - &["cats"][..], + &["dog"][..], + &["cat"][..], &["salami"][..], &["cabbage"][..], &["computer"][..], @@ -86,44 +88,80 @@ impl Generator for Ast { &["eagle"][..], &["democracy"][..], &["america"][..], + &["Elon Musk"][..], + &["tariff"][..], + &["nazi"][..], + &["liberty"][..], ] .choose(rng) .unwrap(), ); } - fn gen_qualifier(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) { + fn gen_adverb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) { words.extend( - *[&["quickly"][..], &["slowly"][..], &["joyfully"][..]] - .choose(rng) - .unwrap(), + *[ + &["quickly"][..], + &["slowly"][..], + &["joyfully"][..], + &["often"][..], + &["carelessly"][..], + &["angrily"][..], + &["menacingly"][..], + &["deliberately"][..], + ] + .choose(rng) + .unwrap(), ); } fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { words.extend( *[ + &["dies"][..], + &["speaks"][..], &["eats"][..], - &["runs"][..], - &["walks"][..], - &["speaks"][..], - &["speaks"][..], + &["sits"][..], + &["thought"][..], + &["explodes"][..], + &["screams"][..], + &["votes"][..], + &["salutes"][..], + &["flies"][..], ] .choose(rng) .unwrap(), ); if len > 1 && rng.random_bool(0.25) { - gen_qualifier(words, rng, len - 1); + gen_adverb(words, rng, len - 1); } } fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { if len > 1 && rng.random_bool(0.25) { - gen_qualifier(words, rng, len - 1); + gen_adverb(words, rng, len - 1); } - words.extend(*[&["eats"][..], &["kills"][..]].choose(rng).unwrap()); + words.extend( + *[ + &["eats"][..], + &["runs over"][..], + &["walks with"][..], + &["speaks to"][..], + &["talks to"][..], + &["lives with"][..], + &["cries with"][..], + &["screams at"][..], + &["shot"][..], + &["killed"][..], + &["murders"][..], + &["salutes"][..], + &["deports"][..], + ] + .choose(rng) + .unwrap(), + ); } fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { @@ -138,6 +176,7 @@ impl Generator for Ast { &["or"][..], &["and"][..], &["then"][..], + &["before"][..], ] .choose(rng) .unwrap(),