forked from zesterer/babble
Add robots.txt support
This commit is contained in:
parent
6b05f7fd52
commit
a77eb52c56
3 changed files with 71 additions and 54 deletions
11
README.md
11
README.md
|
@ -20,10 +20,15 @@ round
|
||||||
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
|
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
|
||||||
```
|
```
|
||||||
|
|
||||||
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
|
Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
|
||||||
|
default one that denies everything.
|
||||||
|
|
||||||
If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines
|
Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
|
||||||
away from it.
|
IPs.
|
||||||
|
|
||||||
|
## Warning
|
||||||
|
|
||||||
|
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
|
||||||
|
|
||||||
## Usage terms
|
## Usage terms
|
||||||
|
|
||||||
|
|
112
src/main.rs
112
src/main.rs
|
@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng {
|
||||||
|
|
||||||
const COUNT_FILE: &str = "count.txt";
|
const COUNT_FILE: &str = "count.txt";
|
||||||
const STATS_FILE: &str = "stats.txt";
|
const STATS_FILE: &str = "stats.txt";
|
||||||
|
const ROBOTS_TXT: &str = "robots.txt";
|
||||||
|
|
||||||
const SLOW_CHUNK_SIZE: usize = 100;
|
const SLOW_CHUNK_SIZE: usize = 100;
|
||||||
const SLOW_DURATION: Duration = Duration::from_millis(100);
|
const SLOW_DURATION: Duration = Duration::from_millis(100);
|
||||||
|
@ -119,62 +120,70 @@ async fn main() {
|
||||||
|
|
||||||
let (stats_tx, stats_rx) = flume::unbounded();
|
let (stats_tx, stats_rx) = flume::unbounded();
|
||||||
|
|
||||||
|
let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
|
||||||
|
.ok()
|
||||||
|
.unwrap_or_else(|| include_str!("robots.txt").to_string());
|
||||||
|
|
||||||
let app = {
|
let app = {
|
||||||
let counter = counter.clone();
|
let counter = counter.clone();
|
||||||
let stats_tx = stats_tx.clone();
|
let stats_tx = stats_tx.clone();
|
||||||
Router::new().route(
|
Router::new()
|
||||||
"/{id}",
|
.route("/robots.txt", get(|| async move { robots_txt.clone() }))
|
||||||
get(
|
.route(
|
||||||
|Path(id): Path<String>,
|
"/{id}",
|
||||||
ConnectInfo(sock): ConnectInfo<SocketAddr>,
|
get(
|
||||||
headers: HeaderMap| async move {
|
|Path(id): Path<String>,
|
||||||
// Create a RNG for this path (deterministic, to simulate static pages)
|
ConnectInfo(sock): ConnectInfo<SocketAddr>,
|
||||||
let mut rng = create_rng(id.bytes());
|
headers: HeaderMap| async move {
|
||||||
|
// Create a RNG for this path (deterministic, to simulate static pages)
|
||||||
|
let mut rng = create_rng(id.bytes());
|
||||||
|
|
||||||
let ip = headers
|
let ip = headers
|
||||||
.get("X-Forwarded-For")
|
.get("X-Forwarded-For")
|
||||||
.and_then(|h| h.to_str().ok())
|
.and_then(|h| h.to_str().ok())
|
||||||
.and_then(|h| h.split(',').next())
|
.and_then(|h| h.split(',').next())
|
||||||
.and_then(|s| s.trim().parse().ok())
|
.and_then(|s| s.trim().parse().ok())
|
||||||
.unwrap_or_else(|| sock.ip());
|
.unwrap_or_else(|| sock.ip());
|
||||||
stats_tx.send(RequestStats { ip }).unwrap();
|
stats_tx.send(RequestStats { ip }).unwrap();
|
||||||
|
|
||||||
// Count the request. Also doubles as the non-deterministic seed
|
// Count the request. Also doubles as the non-deterministic seed
|
||||||
let count = counter.fetch_add(1, Ordering::Relaxed);
|
let count = counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
|
||||||
// Create a RNG for this session (non-deterministic)
|
// Create a RNG for this session (non-deterministic)
|
||||||
let mut session_rng = create_rng(count.to_le_bytes());
|
let mut session_rng = create_rng(count.to_le_bytes());
|
||||||
|
|
||||||
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
|
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
|
||||||
tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000)))
|
tokio::time::sleep(Duration::from_millis(
|
||||||
|
session_rng.random_range(200..1000),
|
||||||
|
))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
// Choose a bullshit generator from our collection for this page
|
// Choose a bullshit generator from our collection for this page
|
||||||
let generator = generators.choose(&mut rng).unwrap();
|
let generator = generators.choose(&mut rng).unwrap();
|
||||||
|
|
||||||
let title = generator
|
let title = generator
|
||||||
.word_stream(rng.random_range(2..10), &mut rng.clone())
|
.word_stream(rng.random_range(2..10), &mut rng.clone())
|
||||||
.join(" ");
|
.join(" ");
|
||||||
|
|
||||||
let stats = format!("Served rubbish to {count} clients so far");
|
let stats = format!("Served rubbish to {count} clients so far");
|
||||||
|
|
||||||
let content = generator
|
let content = generator
|
||||||
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
|
.word_stream(rng.random_range(50..5_000), &mut rng.clone())
|
||||||
.fold(String::new(), |mut content, word| {
|
.fold(String::new(), |mut content, word| {
|
||||||
// Small chance of every word becoming a link back into the void
|
// Small chance of every word becoming a link back into the void
|
||||||
if rng.random_bool(0.05) {
|
if rng.random_bool(0.05) {
|
||||||
let url = generator.word_stream(3, &mut rng.clone()).join("-");
|
let url = generator.word_stream(3, &mut rng.clone()).join("-");
|
||||||
content += &format!(" <a href=\"{}\">{}</a>", url, word);
|
content += &format!(" <a href=\"{}\">{}</a>", url, word);
|
||||||
} else {
|
} else {
|
||||||
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
|
// Also, a chance for every word to end with a newline. This should probably be controlled by the generator.
|
||||||
content += if rng.random_bool(0.01) { ".<br>" } else { " " };
|
content += if rng.random_bool(0.01) { ".<br>" } else { " " };
|
||||||
content += &word
|
content += &word
|
||||||
}
|
}
|
||||||
content
|
content
|
||||||
});
|
});
|
||||||
|
|
||||||
let html = format!(
|
let html = format!(
|
||||||
"<!DOCTYPE html>
|
"<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>{title}</title>
|
<title>{title}</title>
|
||||||
|
@ -187,15 +196,15 @@ async fn main() {
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>"
|
</html>"
|
||||||
);
|
);
|
||||||
|
|
||||||
SlowBody {
|
SlowBody {
|
||||||
bytes: html.into(),
|
bytes: html.into(),
|
||||||
interval: interval(SLOW_DURATION),
|
interval: interval(SLOW_DURATION),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut interval = tokio::time::interval(Duration::from_secs(20));
|
let mut interval = tokio::time::interval(Duration::from_secs(20));
|
||||||
|
@ -221,6 +230,7 @@ async fn main() {
|
||||||
.rev()
|
.rev()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
|
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
|
||||||
|
.take(30)
|
||||||
.collect::<String>();
|
.collect::<String>();
|
||||||
let _ = std::fs::write(STATS_FILE, &stats);
|
let _ = std::fs::write(STATS_FILE, &stats);
|
||||||
}
|
}
|
||||||
|
|
2
src/robots.txt
Normal file
2
src/robots.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
User-agent: *
|
||||||
|
Disallow:
|
Loading…
Add table
Reference in a new issue