diff --git a/README.md b/README.md
index 4719319..8aee7cf 100644
--- a/README.md
+++ b/README.md
@@ -20,10 +20,15 @@ round
--sock
| Bind to the given socket. Defaults to 0.0.0.0:3000.
```
-Deploy it in a docker environment. It's probably safe, but no reason to take chances.
+Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
+default one that denies everything.
-If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines
-away from it.
+Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
+IPs.
+
+## Warning
+
+Deploy it in a docker environment. It's probably safe, but no reason to take chances.
## Usage terms
diff --git a/src/main.rs b/src/main.rs
index fedce03..55ab919 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator- ) -> Rng {
const COUNT_FILE: &str = "count.txt";
const STATS_FILE: &str = "stats.txt";
+const ROBOTS_TXT: &str = "robots.txt";
const SLOW_CHUNK_SIZE: usize = 100;
const SLOW_DURATION: Duration = Duration::from_millis(100);
@@ -119,62 +120,70 @@ async fn main() {
let (stats_tx, stats_rx) = flume::unbounded();
+ let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
+ .ok()
+ .unwrap_or_else(|| include_str!("robots.txt").to_string());
+
let app = {
let counter = counter.clone();
let stats_tx = stats_tx.clone();
- Router::new().route(
- "/{id}",
- get(
- |Path(id): Path,
- ConnectInfo(sock): ConnectInfo,
- headers: HeaderMap| async move {
- // Create a RNG for this path (deterministic, to simulate static pages)
- let mut rng = create_rng(id.bytes());
+ Router::new()
+ .route("/robots.txt", get(|| async move { robots_txt.clone() }))
+ .route(
+ "/{id}",
+ get(
+ |Path(id): Path,
+ ConnectInfo(sock): ConnectInfo,
+ headers: HeaderMap| async move {
+ // Create a RNG for this path (deterministic, to simulate static pages)
+ let mut rng = create_rng(id.bytes());
- let ip = headers
- .get("X-Forwarded-For")
- .and_then(|h| h.to_str().ok())
- .and_then(|h| h.split(',').next())
- .and_then(|s| s.trim().parse().ok())
- .unwrap_or_else(|| sock.ip());
- stats_tx.send(RequestStats { ip }).unwrap();
+ let ip = headers
+ .get("X-Forwarded-For")
+ .and_then(|h| h.to_str().ok())
+ .and_then(|h| h.split(',').next())
+ .and_then(|s| s.trim().parse().ok())
+ .unwrap_or_else(|| sock.ip());
+ stats_tx.send(RequestStats { ip }).unwrap();
- // Count the request. Also doubles as the non-deterministic seed
- let count = counter.fetch_add(1, Ordering::Relaxed);
+ // Count the request. Also doubles as the non-deterministic seed
+ let count = counter.fetch_add(1, Ordering::Relaxed);
- // Create a RNG for this session (non-deterministic)
- let mut session_rng = create_rng(count.to_le_bytes());
+ // Create a RNG for this session (non-deterministic)
+ let mut session_rng = create_rng(count.to_le_bytes());
- // Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
- tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000)))
+ // Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
+ tokio::time::sleep(Duration::from_millis(
+ session_rng.random_range(200..1000),
+ ))
.await;
- // Choose a bullshit generator from our collection for this page
- let generator = generators.choose(&mut rng).unwrap();
+ // Choose a bullshit generator from our collection for this page
+ let generator = generators.choose(&mut rng).unwrap();
- let title = generator
- .word_stream(rng.random_range(2..10), &mut rng.clone())
- .join(" ");
+ let title = generator
+ .word_stream(rng.random_range(2..10), &mut rng.clone())
+ .join(" ");
- let stats = format!("Served rubbish to {count} clients so far");
+ let stats = format!("Served rubbish to {count} clients so far");
- let content = generator
- .word_stream(rng.random_range(50..5_000), &mut rng.clone())
- .fold(String::new(), |mut content, word| {
- // Small chance of every word becoming a link back into the void
- if rng.random_bool(0.05) {
- let url = generator.word_stream(3, &mut rng.clone()).join("-");
- content += &format!(" {}", url, word);
- } else {
- // Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
- content += if rng.random_bool(0.01) { ".
" } else { " " };
- content += &word
- }
- content
- });
+ let content = generator
+ .word_stream(rng.random_range(50..5_000), &mut rng.clone())
+ .fold(String::new(), |mut content, word| {
+ // Small chance of every word becoming a link back into the void
+ if rng.random_bool(0.05) {
+ let url = generator.word_stream(3, &mut rng.clone()).join("-");
+ content += &format!(" {}", url, word);
+ } else {
+ // Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
+ content += if rng.random_bool(0.01) { ".
" } else { " " };
+ content += &word
+ }
+ content
+ });
- let html = format!(
- "
+ let html = format!(
+ "
{title}
@@ -187,15 +196,15 @@ async fn main() {
"
- );
+ );
- SlowBody {
- bytes: html.into(),
- interval: interval(SLOW_DURATION),
- }
- },
- ),
- )
+ SlowBody {
+ bytes: html.into(),
+ interval: interval(SLOW_DURATION),
+ }
+ },
+ ),
+ )
};
let mut interval = tokio::time::interval(Duration::from_secs(20));
@@ -221,6 +230,7 @@ async fn main() {
.rev()
.enumerate()
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
+ .take(30)
.collect::();
let _ = std::fs::write(STATS_FILE, &stats);
}
diff --git a/src/robots.txt b/src/robots.txt
new file mode 100644
index 0000000..eb05362
--- /dev/null
+++ b/src/robots.txt
@@ -0,0 +1,2 @@
+User-agent: *
+Disallow: