Add robots.txt support

This commit is contained in:
Joshua Barretto 2025-04-29 21:10:12 +01:00
parent 6b05f7fd52
commit a77eb52c56
3 changed files with 71 additions and 54 deletions

View file

@ -20,10 +20,15 @@ round
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
```
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
default one that denies everything.
If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines
away from it.
Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
IPs.
## Warning
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
## Usage terms

View file

@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng {
const COUNT_FILE: &str = "count.txt";
const STATS_FILE: &str = "stats.txt";
const ROBOTS_TXT: &str = "robots.txt";
const SLOW_CHUNK_SIZE: usize = 100;
const SLOW_DURATION: Duration = Duration::from_millis(100);
@ -119,62 +120,70 @@ async fn main() {
let (stats_tx, stats_rx) = flume::unbounded();
let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
.ok()
.unwrap_or_else(|| include_str!("robots.txt").to_string());
let app = {
let counter = counter.clone();
let stats_tx = stats_tx.clone();
Router::new().route(
"/{id}",
get(
|Path(id): Path<String>,
ConnectInfo(sock): ConnectInfo<SocketAddr>,
headers: HeaderMap| async move {
// Create a RNG for this path (deterministic, to simulate static pages)
let mut rng = create_rng(id.bytes());
Router::new()
.route("/robots.txt", get(|| async move { robots_txt.clone() }))
.route(
"/{id}",
get(
|Path(id): Path<String>,
ConnectInfo(sock): ConnectInfo<SocketAddr>,
headers: HeaderMap| async move {
// Create a RNG for this path (deterministic, to simulate static pages)
let mut rng = create_rng(id.bytes());
let ip = headers
.get("X-Forwarded-For")
.and_then(|h| h.to_str().ok())
.and_then(|h| h.split(',').next())
.and_then(|s| s.trim().parse().ok())
.unwrap_or_else(|| sock.ip());
stats_tx.send(RequestStats { ip }).unwrap();
let ip = headers
.get("X-Forwarded-For")
.and_then(|h| h.to_str().ok())
.and_then(|h| h.split(',').next())
.and_then(|s| s.trim().parse().ok())
.unwrap_or_else(|| sock.ip());
stats_tx.send(RequestStats { ip }).unwrap();
// Count the request. Also doubles as the non-deterministic seed
let count = counter.fetch_add(1, Ordering::Relaxed);
// Count the request. Also doubles as the non-deterministic seed
let count = counter.fetch_add(1, Ordering::Relaxed);
// Create a RNG for this session (non-deterministic)
let mut session_rng = create_rng(count.to_le_bytes());
// Create a RNG for this session (non-deterministic)
let mut session_rng = create_rng(count.to_le_bytes());
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000)))
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
tokio::time::sleep(Duration::from_millis(
session_rng.random_range(200..1000),
))
.await;
// Choose a bullshit generator from our collection for this page
let generator = generators.choose(&mut rng).unwrap();
// Choose a bullshit generator from our collection for this page
let generator = generators.choose(&mut rng).unwrap();
let title = generator
.word_stream(rng.random_range(2..10), &mut rng.clone())
.join(" ");
let title = generator
.word_stream(rng.random_range(2..10), &mut rng.clone())
.join(" ");
let stats = format!("Served rubbish to {count} clients so far");
let stats = format!("Served rubbish to {count} clients so far");
let content = generator
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
.fold(String::new(), |mut content, word| {
// Small chance of every word becoming a link back into the void
if rng.random_bool(0.05) {
let url = generator.word_stream(3, &mut rng.clone()).join("-");
content += &format!(" <a href=\"{}\">{}</a>", url, word);
} else {
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
content += if rng.random_bool(0.01) { ".<br>" } else { " " };
content += &word
}
content
});
let content = generator
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
.fold(String::new(), |mut content, word| {
// Small chance of every word becoming a link back into the void
if rng.random_bool(0.05) {
let url = generator.word_stream(3, &mut rng.clone()).join("-");
content += &format!(" <a href=\"{}\">{}</a>", url, word);
} else {
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
content += if rng.random_bool(0.01) { ".<br>" } else { " " };
content += &word
}
content
});
let html = format!(
"<!DOCTYPE html>
let html = format!(
"<!DOCTYPE html>
<html>
<head>
<title>{title}</title>
@ -187,15 +196,15 @@ async fn main() {
</body>
</html>"
);
);
SlowBody {
bytes: html.into(),
interval: interval(SLOW_DURATION),
}
},
),
)
SlowBody {
bytes: html.into(),
interval: interval(SLOW_DURATION),
}
},
),
)
};
let mut interval = tokio::time::interval(Duration::from_secs(20));
@ -221,6 +230,7 @@ async fn main() {
.rev()
.enumerate()
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
.take(30)
.collect::<String>();
let _ = std::fs::write(STATS_FILE, &stats);
}

2
src/robots.txt Normal file
View file

@ -0,0 +1,2 @@
User-agent: *
Disallow: