Better AST generator

This commit is contained in:
Joshua Barretto 2025-04-21 12:01:11 +01:00
parent 41a442ce0a
commit 8b39e1fca6
2 changed files with 57 additions and 18 deletions

View file

@ -6,7 +6,7 @@ with plenty of links.
## Why? ## Why?
- Divert and slow down LLM crawler traffic, protecting your main site - Divert and slow down LLM crawler traffic, protecting your main site
- Potentially poison LLM training data (likely not very effectuve) - Potentially poison LLM training data (likely not very effective)
- Collective defence; the more time a scraper spends swallowing babble, the less time it'll spend bulling someone - Collective defence; the more time a scraper spends swallowing babble, the less time it'll spend bulling someone
else's site else's site
- Do your bit to protect the public commons from those who would readily see it destroyed for the sake of an investment - Do your bit to protect the public commons from those who would readily see it destroyed for the sake of an investment

View file

@ -43,6 +43,9 @@ impl Generator for Ast {
&["fraudulent"][..], &["fraudulent"][..],
&["communist"][..], &["communist"][..],
&["rotund"][..], &["rotund"][..],
&["nazi"][..],
&["fascistic"][..],
&["careless"][..],
] ]
.choose(rng) .choose(rng)
.unwrap(), .unwrap(),
@ -70,13 +73,12 @@ impl Generator for Ast {
words.extend( words.extend(
*[ *[
&["me"][..], &["kangaroo"][..],
&["kangaroos"][..], &["chip"][..],
&["chips"][..], &["carrot"][..],
&["carrots"][..],
&["weather"][..], &["weather"][..],
&["dogs"][..], &["dog"][..],
&["cats"][..], &["cat"][..],
&["salami"][..], &["salami"][..],
&["cabbage"][..], &["cabbage"][..],
&["computer"][..], &["computer"][..],
@ -86,44 +88,80 @@ impl Generator for Ast {
&["eagle"][..], &["eagle"][..],
&["democracy"][..], &["democracy"][..],
&["america"][..], &["america"][..],
&["Elon Musk"][..],
&["tariff"][..],
&["nazi"][..],
&["liberty"][..],
] ]
.choose(rng) .choose(rng)
.unwrap(), .unwrap(),
); );
} }
fn gen_qualifier(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) { fn gen_adverb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) {
words.extend( words.extend(
*[&["quickly"][..], &["slowly"][..], &["joyfully"][..]] *[
.choose(rng) &["quickly"][..],
.unwrap(), &["slowly"][..],
&["joyfully"][..],
&["often"][..],
&["carelessly"][..],
&["angrily"][..],
&["menacingly"][..],
&["deliberately"][..],
]
.choose(rng)
.unwrap(),
); );
} }
fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
words.extend( words.extend(
*[ *[
&["dies"][..],
&["speaks"][..],
&["eats"][..], &["eats"][..],
&["runs"][..], &["sits"][..],
&["walks"][..], &["thought"][..],
&["speaks"][..], &["explodes"][..],
&["speaks"][..], &["screams"][..],
&["votes"][..],
&["salutes"][..],
&["flies"][..],
] ]
.choose(rng) .choose(rng)
.unwrap(), .unwrap(),
); );
if len > 1 && rng.random_bool(0.25) { if len > 1 && rng.random_bool(0.25) {
gen_qualifier(words, rng, len - 1); gen_adverb(words, rng, len - 1);
} }
} }
fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
if len > 1 && rng.random_bool(0.25) { if len > 1 && rng.random_bool(0.25) {
gen_qualifier(words, rng, len - 1); gen_adverb(words, rng, len - 1);
} }
words.extend(*[&["eats"][..], &["kills"][..]].choose(rng).unwrap()); words.extend(
*[
&["eats"][..],
&["runs over"][..],
&["walks with"][..],
&["speaks to"][..],
&["talks to"][..],
&["lives with"][..],
&["cries with"][..],
&["screams at"][..],
&["shot"][..],
&["killed"][..],
&["murders"][..],
&["salutes"][..],
&["deports"][..],
]
.choose(rng)
.unwrap(),
);
} }
fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) { fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
@ -138,6 +176,7 @@ impl Generator for Ast {
&["or"][..], &["or"][..],
&["and"][..], &["and"][..],
&["then"][..], &["then"][..],
&["before"][..],
] ]
.choose(rng) .choose(rng)
.unwrap(), .unwrap(),