Add robots.txt support

This commit is contained in:
Joshua Barretto 2025-04-29 21:10:12 +01:00
parent 6b05f7fd52
commit a77eb52c56
3 changed files with 71 additions and 54 deletions

View file

@ -20,10 +20,15 @@ round
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000. --sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
``` ```
Deploy it in a docker environment. It's probably safe, but no reason to take chances. Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
default one that denies everything.
If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
away from it. IPs.
## Warning
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
## Usage terms ## Usage terms

View file

@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng {
const COUNT_FILE: &str = "count.txt"; const COUNT_FILE: &str = "count.txt";
const STATS_FILE: &str = "stats.txt"; const STATS_FILE: &str = "stats.txt";
const ROBOTS_TXT: &str = "robots.txt";
const SLOW_CHUNK_SIZE: usize = 100; const SLOW_CHUNK_SIZE: usize = 100;
const SLOW_DURATION: Duration = Duration::from_millis(100); const SLOW_DURATION: Duration = Duration::from_millis(100);
@ -119,10 +120,16 @@ async fn main() {
let (stats_tx, stats_rx) = flume::unbounded(); let (stats_tx, stats_rx) = flume::unbounded();
let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
.ok()
.unwrap_or_else(|| include_str!("robots.txt").to_string());
let app = { let app = {
let counter = counter.clone(); let counter = counter.clone();
let stats_tx = stats_tx.clone(); let stats_tx = stats_tx.clone();
Router::new().route( Router::new()
.route("/robots.txt", get(|| async move { robots_txt.clone() }))
.route(
"/{id}", "/{id}",
get( get(
|Path(id): Path<String>, |Path(id): Path<String>,
@ -146,7 +153,9 @@ async fn main() {
let mut session_rng = create_rng(count.to_le_bytes()); let mut session_rng = create_rng(count.to_le_bytes());
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources // Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000))) tokio::time::sleep(Duration::from_millis(
session_rng.random_range(200..1000),
))
.await; .await;
// Choose a bullshit generator from our collection for this page // Choose a bullshit generator from our collection for this page
@ -221,6 +230,7 @@ async fn main() {
.rev() .rev()
.enumerate() .enumerate()
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip)) .map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
.take(30)
.collect::<String>(); .collect::<String>();
let _ = std::fs::write(STATS_FILE, &stats); let _ = std::fs::write(STATS_FILE, &stats);
} }

2
src/robots.txt Normal file
View file

@ -0,0 +1,2 @@
User-agent: *
Disallow: