Add robots.txt support
This commit is contained in:
parent
6b05f7fd52
commit
a77eb52c56
3 changed files with 71 additions and 54 deletions
11
README.md
11
README.md
|
@ -20,10 +20,15 @@ round
|
|||
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
|
||||
```
|
||||
|
||||
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
|
||||
Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
|
||||
default one that denies everything.
|
||||
|
||||
If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines
|
||||
away from it.
|
||||
Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
|
||||
IPs.
|
||||
|
||||
## Warning
|
||||
|
||||
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
|
||||
|
||||
## Usage terms
|
||||
|
||||
|
|
112
src/main.rs
112
src/main.rs
|
@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng {
|
|||
|
||||
const COUNT_FILE: &str = "count.txt";
|
||||
const STATS_FILE: &str = "stats.txt";
|
||||
const ROBOTS_TXT: &str = "robots.txt";
|
||||
|
||||
const SLOW_CHUNK_SIZE: usize = 100;
|
||||
const SLOW_DURATION: Duration = Duration::from_millis(100);
|
||||
|
@ -119,62 +120,70 @@ async fn main() {
|
|||
|
||||
let (stats_tx, stats_rx) = flume::unbounded();
|
||||
|
||||
let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
|
||||
.ok()
|
||||
.unwrap_or_else(|| include_str!("robots.txt").to_string());
|
||||
|
||||
let app = {
|
||||
let counter = counter.clone();
|
||||
let stats_tx = stats_tx.clone();
|
||||
Router::new().route(
|
||||
"/{id}",
|
||||
get(
|
||||
|Path(id): Path<String>,
|
||||
ConnectInfo(sock): ConnectInfo<SocketAddr>,
|
||||
headers: HeaderMap| async move {
|
||||
// Create a RNG for this path (deterministic, to simulate static pages)
|
||||
let mut rng = create_rng(id.bytes());
|
||||
Router::new()
|
||||
.route("/robots.txt", get(|| async move { robots_txt.clone() }))
|
||||
.route(
|
||||
"/{id}",
|
||||
get(
|
||||
|Path(id): Path<String>,
|
||||
ConnectInfo(sock): ConnectInfo<SocketAddr>,
|
||||
headers: HeaderMap| async move {
|
||||
// Create a RNG for this path (deterministic, to simulate static pages)
|
||||
let mut rng = create_rng(id.bytes());
|
||||
|
||||
let ip = headers
|
||||
.get("X-Forwarded-For")
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.split(',').next())
|
||||
.and_then(|s| s.trim().parse().ok())
|
||||
.unwrap_or_else(|| sock.ip());
|
||||
stats_tx.send(RequestStats { ip }).unwrap();
|
||||
let ip = headers
|
||||
.get("X-Forwarded-For")
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.split(',').next())
|
||||
.and_then(|s| s.trim().parse().ok())
|
||||
.unwrap_or_else(|| sock.ip());
|
||||
stats_tx.send(RequestStats { ip }).unwrap();
|
||||
|
||||
// Count the request. Also doubles as the non-deterministic seed
|
||||
let count = counter.fetch_add(1, Ordering::Relaxed);
|
||||
// Count the request. Also doubles as the non-deterministic seed
|
||||
let count = counter.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
// Create a RNG for this session (non-deterministic)
|
||||
let mut session_rng = create_rng(count.to_le_bytes());
|
||||
// Create a RNG for this session (non-deterministic)
|
||||
let mut session_rng = create_rng(count.to_le_bytes());
|
||||
|
||||
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
|
||||
tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000)))
|
||||
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
|
||||
tokio::time::sleep(Duration::from_millis(
|
||||
session_rng.random_range(200..1000),
|
||||
))
|
||||
.await;
|
||||
|
||||
// Choose a bullshit generator from our collection for this page
|
||||
let generator = generators.choose(&mut rng).unwrap();
|
||||
// Choose a bullshit generator from our collection for this page
|
||||
let generator = generators.choose(&mut rng).unwrap();
|
||||
|
||||
let title = generator
|
||||
.word_stream(rng.random_range(2..10), &mut rng.clone())
|
||||
.join(" ");
|
||||
let title = generator
|
||||
.word_stream(rng.random_range(2..10), &mut rng.clone())
|
||||
.join(" ");
|
||||
|
||||
let stats = format!("Served rubbish to {count} clients so far");
|
||||
let stats = format!("Served rubbish to {count} clients so far");
|
||||
|
||||
let content = generator
|
||||
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
|
||||
.fold(String::new(), |mut content, word| {
|
||||
// Small chance of every word becoming a link back into the void
|
||||
if rng.random_bool(0.05) {
|
||||
let url = generator.word_stream(3, &mut rng.clone()).join("-");
|
||||
content += &format!(" <a href=\"{}\">{}</a>", url, word);
|
||||
} else {
|
||||
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
|
||||
content += if rng.random_bool(0.01) { ".<br>" } else { " " };
|
||||
content += &word
|
||||
}
|
||||
content
|
||||
});
|
||||
let content = generator
|
||||
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
|
||||
.fold(String::new(), |mut content, word| {
|
||||
// Small chance of every word becoming a link back into the void
|
||||
if rng.random_bool(0.05) {
|
||||
let url = generator.word_stream(3, &mut rng.clone()).join("-");
|
||||
content += &format!(" <a href=\"{}\">{}</a>", url, word);
|
||||
} else {
|
||||
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
|
||||
content += if rng.random_bool(0.01) { ".<br>" } else { " " };
|
||||
content += &word
|
||||
}
|
||||
content
|
||||
});
|
||||
|
||||
let html = format!(
|
||||
"<!DOCTYPE html>
|
||||
let html = format!(
|
||||
"<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{title}</title>
|
||||
|
@ -187,15 +196,15 @@ async fn main() {
|
|||
|
||||
</body>
|
||||
</html>"
|
||||
);
|
||||
);
|
||||
|
||||
SlowBody {
|
||||
bytes: html.into(),
|
||||
interval: interval(SLOW_DURATION),
|
||||
}
|
||||
},
|
||||
),
|
||||
)
|
||||
SlowBody {
|
||||
bytes: html.into(),
|
||||
interval: interval(SLOW_DURATION),
|
||||
}
|
||||
},
|
||||
),
|
||||
)
|
||||
};
|
||||
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(20));
|
||||
|
@ -221,6 +230,7 @@ async fn main() {
|
|||
.rev()
|
||||
.enumerate()
|
||||
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
|
||||
.take(30)
|
||||
.collect::<String>();
|
||||
let _ = std::fs::write(STATS_FILE, &stats);
|
||||
}
|
||||
|
|
2
src/robots.txt
Normal file
2
src/robots.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
User-agent: *
|
||||
Disallow:
|
Loading…
Add table
Reference in a new issue