forked from zesterer/babble
Better AST generator
This commit is contained in:
parent
41a442ce0a
commit
8b39e1fca6
2 changed files with 57 additions and 18 deletions
|
@ -6,7 +6,7 @@ with plenty of links.
|
|||
## Why?
|
||||
|
||||
- Divert and slow down LLM crawler traffic, protecting your main site
|
||||
- Potentially poison LLM training data (likely not very effectuve)
|
||||
- Potentially poison LLM training data (likely not very effective)
|
||||
- Collective defence; the more time a scraper spends swallowing babble, the less time it'll spend bulling someone
|
||||
else's site
|
||||
- Do your bit to protect the public commons from those who would readily see it destroyed for the sake of an investment
|
||||
|
|
|
@ -43,6 +43,9 @@ impl Generator for Ast {
|
|||
&["fraudulent"][..],
|
||||
&["communist"][..],
|
||||
&["rotund"][..],
|
||||
&["nazi"][..],
|
||||
&["fascistic"][..],
|
||||
&["careless"][..],
|
||||
]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
|
@ -70,13 +73,12 @@ impl Generator for Ast {
|
|||
|
||||
words.extend(
|
||||
*[
|
||||
&["me"][..],
|
||||
&["kangaroos"][..],
|
||||
&["chips"][..],
|
||||
&["carrots"][..],
|
||||
&["kangaroo"][..],
|
||||
&["chip"][..],
|
||||
&["carrot"][..],
|
||||
&["weather"][..],
|
||||
&["dogs"][..],
|
||||
&["cats"][..],
|
||||
&["dog"][..],
|
||||
&["cat"][..],
|
||||
&["salami"][..],
|
||||
&["cabbage"][..],
|
||||
&["computer"][..],
|
||||
|
@ -86,44 +88,80 @@ impl Generator for Ast {
|
|||
&["eagle"][..],
|
||||
&["democracy"][..],
|
||||
&["america"][..],
|
||||
&["Elon Musk"][..],
|
||||
&["tariff"][..],
|
||||
&["nazi"][..],
|
||||
&["liberty"][..],
|
||||
]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
fn gen_qualifier(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) {
|
||||
fn gen_adverb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, _len: usize) {
|
||||
words.extend(
|
||||
*[&["quickly"][..], &["slowly"][..], &["joyfully"][..]]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
*[
|
||||
&["quickly"][..],
|
||||
&["slowly"][..],
|
||||
&["joyfully"][..],
|
||||
&["often"][..],
|
||||
&["carelessly"][..],
|
||||
&["angrily"][..],
|
||||
&["menacingly"][..],
|
||||
&["deliberately"][..],
|
||||
]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
fn gen_verb(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
|
||||
words.extend(
|
||||
*[
|
||||
&["dies"][..],
|
||||
&["speaks"][..],
|
||||
&["eats"][..],
|
||||
&["runs"][..],
|
||||
&["walks"][..],
|
||||
&["speaks"][..],
|
||||
&["speaks"][..],
|
||||
&["sits"][..],
|
||||
&["thought"][..],
|
||||
&["explodes"][..],
|
||||
&["screams"][..],
|
||||
&["votes"][..],
|
||||
&["salutes"][..],
|
||||
&["flies"][..],
|
||||
]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
if len > 1 && rng.random_bool(0.25) {
|
||||
gen_qualifier(words, rng, len - 1);
|
||||
gen_adverb(words, rng, len - 1);
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_action(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
|
||||
if len > 1 && rng.random_bool(0.25) {
|
||||
gen_qualifier(words, rng, len - 1);
|
||||
gen_adverb(words, rng, len - 1);
|
||||
}
|
||||
|
||||
words.extend(*[&["eats"][..], &["kills"][..]].choose(rng).unwrap());
|
||||
words.extend(
|
||||
*[
|
||||
&["eats"][..],
|
||||
&["runs over"][..],
|
||||
&["walks with"][..],
|
||||
&["speaks to"][..],
|
||||
&["talks to"][..],
|
||||
&["lives with"][..],
|
||||
&["cries with"][..],
|
||||
&["screams at"][..],
|
||||
&["shot"][..],
|
||||
&["killed"][..],
|
||||
&["murders"][..],
|
||||
&["salutes"][..],
|
||||
&["deports"][..],
|
||||
]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
fn gen_clause(words: &mut Vec<&'static str>, rng: &mut crate::Rng, len: usize) {
|
||||
|
@ -138,6 +176,7 @@ impl Generator for Ast {
|
|||
&["or"][..],
|
||||
&["and"][..],
|
||||
&["then"][..],
|
||||
&["before"][..],
|
||||
]
|
||||
.choose(rng)
|
||||
.unwrap(),
|
||||
|
|
Loading…
Add table
Reference in a new issue