Add robots.txt support
This commit is contained in:
parent
6b05f7fd52
commit
a77eb52c56
3 changed files with 71 additions and 54 deletions
11
README.md
11
README.md
|
@ -20,10 +20,15 @@ round
|
||||||
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
|
--sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000.
|
||||||
```
|
```
|
||||||
|
|
||||||
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
|
Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a
|
||||||
|
default one that denies everything.
|
||||||
|
|
||||||
If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines
|
Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting
|
||||||
away from it.
|
IPs.
|
||||||
|
|
||||||
|
## Warning
|
||||||
|
|
||||||
|
Deploy it in a docker environment. It's probably safe, but no reason to take chances.
|
||||||
|
|
||||||
## Usage terms
|
## Usage terms
|
||||||
|
|
||||||
|
|
14
src/main.rs
14
src/main.rs
|
@ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng {
|
||||||
|
|
||||||
const COUNT_FILE: &str = "count.txt";
|
const COUNT_FILE: &str = "count.txt";
|
||||||
const STATS_FILE: &str = "stats.txt";
|
const STATS_FILE: &str = "stats.txt";
|
||||||
|
const ROBOTS_TXT: &str = "robots.txt";
|
||||||
|
|
||||||
const SLOW_CHUNK_SIZE: usize = 100;
|
const SLOW_CHUNK_SIZE: usize = 100;
|
||||||
const SLOW_DURATION: Duration = Duration::from_millis(100);
|
const SLOW_DURATION: Duration = Duration::from_millis(100);
|
||||||
|
@ -119,10 +120,16 @@ async fn main() {
|
||||||
|
|
||||||
let (stats_tx, stats_rx) = flume::unbounded();
|
let (stats_tx, stats_rx) = flume::unbounded();
|
||||||
|
|
||||||
|
let robots_txt = std::fs::read_to_string(ROBOTS_TXT)
|
||||||
|
.ok()
|
||||||
|
.unwrap_or_else(|| include_str!("robots.txt").to_string());
|
||||||
|
|
||||||
let app = {
|
let app = {
|
||||||
let counter = counter.clone();
|
let counter = counter.clone();
|
||||||
let stats_tx = stats_tx.clone();
|
let stats_tx = stats_tx.clone();
|
||||||
Router::new().route(
|
Router::new()
|
||||||
|
.route("/robots.txt", get(|| async move { robots_txt.clone() }))
|
||||||
|
.route(
|
||||||
"/{id}",
|
"/{id}",
|
||||||
get(
|
get(
|
||||||
|Path(id): Path<String>,
|
|Path(id): Path<String>,
|
||||||
|
@ -146,7 +153,9 @@ async fn main() {
|
||||||
let mut session_rng = create_rng(count.to_le_bytes());
|
let mut session_rng = create_rng(count.to_le_bytes());
|
||||||
|
|
||||||
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
|
// Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
|
||||||
tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000)))
|
tokio::time::sleep(Duration::from_millis(
|
||||||
|
session_rng.random_range(200..1000),
|
||||||
|
))
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
// Choose a bullshit generator from our collection for this page
|
// Choose a bullshit generator from our collection for this page
|
||||||
|
@ -221,6 +230,7 @@ async fn main() {
|
||||||
.rev()
|
.rev()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
|
.map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip))
|
||||||
|
.take(30)
|
||||||
.collect::<String>();
|
.collect::<String>();
|
||||||
let _ = std::fs::write(STATS_FILE, &stats);
|
let _ = std::fs::write(STATS_FILE, &stats);
|
||||||
}
|
}
|
||||||
|
|
2
src/robots.txt
Normal file
2
src/robots.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
User-agent: *
|
||||||
|
Disallow:
|
Loading…
Add table
Reference in a new issue