Add robots.txt support

This commit is contained in:
Joshua Barretto 2025-04-29 21:10:12 +01:00
parent 6b05f7fd52
commit a77eb52c56
3 changed files with 71 additions and 54 deletions

View file

@ -20,10 +20,15 @@ round
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000. --sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
``` ```
Deploy it in a docker environment. It's probably safe, but no reason to take chances. Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
default one that denies everything.
If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
away from it. IPs.
## Warning
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
## Usage terms ## Usage terms

View file

@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng {
const COUNT_FILE: &str = "count.txt"; const COUNT_FILE: &str = "count.txt";
const STATS_FILE: &str = "stats.txt"; const STATS_FILE: &str = "stats.txt";
const ROBOTS_TXT: &str = "robots.txt";
const SLOW_CHUNK_SIZE: usize = 100; const SLOW_CHUNK_SIZE: usize = 100;
const SLOW_DURATION: Duration = Duration::from_millis(100); const SLOW_DURATION: Duration = Duration::from_millis(100);
@ -119,62 +120,70 @@ async fn main() {
let (stats_tx, stats_rx) = flume::unbounded(); let (stats_tx, stats_rx) = flume::unbounded();
let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
.ok()
.unwrap_or_else(|| include_str!("robots.txt").to_string());
let app = { let app = {
let counter = counter.clone(); let counter = counter.clone();
let stats_tx = stats_tx.clone(); let stats_tx = stats_tx.clone();
Router::new().route( Router::new()
"/{id}", .route("/robots.txt", get(|| async move { robots_txt.clone() }))
get( .route(
|Path(id): Path<String>, "/{id}",
ConnectInfo(sock): ConnectInfo<SocketAddr>, get(
headers: HeaderMap| async move { |Path(id): Path<String>,
// Create a RNG for this path (deterministic, to simulate static pages) ConnectInfo(sock): ConnectInfo<SocketAddr>,
let mut rng = create_rng(id.bytes()); headers: HeaderMap| async move {
// Create a RNG for this path (deterministic, to simulate static pages)
let mut rng = create_rng(id.bytes());
let ip = headers let ip = headers
.get("X-Forwarded-For") .get("X-Forwarded-For")
.and_then(|h| h.to_str().ok()) .and_then(|h| h.to_str().ok())
.and_then(|h| h.split(',').next()) .and_then(|h| h.split(',').next())
.and_then(|s| s.trim().parse().ok()) .and_then(|s| s.trim().parse().ok())
.unwrap_or_else(|| sock.ip()); .unwrap_or_else(|| sock.ip());
stats_tx.send(RequestStats { ip }).unwrap(); stats_tx.send(RequestStats { ip }).unwrap();
// Count the request. Also doubles as the non-deterministic seed // Count the request. Also doubles as the non-deterministic seed
let count = counter.fetch_add(1, Ordering::Relaxed); let count = counter.fetch_add(1, Ordering::Relaxed);
// Create a RNG for this session (non-deterministic) // Create a RNG for this session (non-deterministic)
let mut session_rng = create_rng(count.to_le_bytes()); let mut session_rng = create_rng(count.to_le_bytes());
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources // Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000))) tokio::time::sleep(Duration::from_millis(
session_rng.random_range(200..1000),
))
.await; .await;
// Choose a bullshit generator from our collection for this page // Choose a bullshit generator from our collection for this page
let generator = generators.choose(&mut rng).unwrap(); let generator = generators.choose(&mut rng).unwrap();
let title = generator let title = generator
.word_stream(rng.random_range(2..10), &mut rng.clone()) .word_stream(rng.random_range(2..10), &mut rng.clone())
.join(" "); .join(" ");
let stats = format!("Served rubbish to {count} clients so far"); let stats = format!("Served rubbish to {count} clients so far");
let content = generator let content = generator
.word_stream(rng.random_range(50..5_000), &mut rng.clone()) .word_stream(rng.random_range(50..5_000), &mut rng.clone())
.fold(String::new(), |mut content, word| { .fold(String::new(), |mut content, word| {
// Small chance of every word becoming a link back into the void // Small chance of every word becoming a link back into the void
if rng.random_bool(0.05) { if rng.random_bool(0.05) {
let url = generator.word_stream(3, &mut rng.clone()).join("-"); let url = generator.word_stream(3, &mut rng.clone()).join("-");
content += &format!(" <a href=\"{}\">{}</a>", url, word); content += &format!(" <a href=\"{}\">{}</a>", url, word);
} else { } else {
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator. // Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
content += if rng.random_bool(0.01) { ".<br>" } else { " " }; content += if rng.random_bool(0.01) { ".<br>" } else { " " };
content += &word content += &word
} }
content content
}); });
let html = format!( let html = format!(
"<!DOCTYPE html> "<!DOCTYPE html>
<html> <html>
<head> <head>
<title>{title}</title> <title>{title}</title>
@ -187,15 +196,15 @@ async fn main() {
</body> </body>
</html>" </html>"
); );
SlowBody { SlowBody {
bytes: html.into(), bytes: html.into(),
interval: interval(SLOW_DURATION), interval: interval(SLOW_DURATION),
} }
}, },
), ),
) )
}; };
let mut interval = tokio::time::interval(Duration::from_secs(20)); let mut interval = tokio::time::interval(Duration::from_secs(20));
@ -221,6 +230,7 @@ async fn main() {
.rev() .rev()
.enumerate() .enumerate()
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip)) .map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
.take(30)
.collect::<String>(); .collect::<String>();
let _ = std::fs::write(STATS_FILE, &stats); let _ = std::fs::write(STATS_FILE, &stats);
} }

2
src/robots.txt Normal file
View file

@ -0,0 +1,2 @@
User-agent: *
Disallow: