forked from zesterer/babble
		
	Add robots.txt support
This commit is contained in:
		
							parent
							
								
									6b05f7fd52
								
							
						
					
					
						commit
						a77eb52c56
					
				
					 3 changed files with 71 additions and 54 deletions
				
			
		
							
								
								
									
										11
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										11
									
								
								README.md
									
										
									
									
									
								
							|  | @ -20,10 +20,15 @@ round | ||||||
| --sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000. | --sock <address> | Bind to the given socket. Defaults to 0.0.0.0:3000. | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| Deploy it in a docker environment. It's probably safe, but no reason to take chances. | Babble will search for a `robots.txt` file in the working directory to use. If it does not find one, it will use a | ||||||
|  | default one that denies everything. | ||||||
| 
 | 
 | ||||||
| If you want to be nice to crawlers that *actually abide by `robots.txt`*, perhaps add an entry to warn search engines | Babble will periodically emit statistics into `stats.txt`, showing information about the worst-offending requesting | ||||||
| away from it. | IPs. | ||||||
|  | 
 | ||||||
|  | ## Warning | ||||||
|  | 
 | ||||||
|  | Deploy it in a docker environment. It's probably safe, but no reason to take chances. | ||||||
| 
 | 
 | ||||||
| ## Usage terms | ## Usage terms | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								src/main.rs
									
										
									
									
									
								
							
							
						
						
									
										14
									
								
								src/main.rs
									
										
									
									
									
								
							|  | @ -56,6 +56,7 @@ fn create_rng(seed_bytes: impl IntoIterator<Item = u8>) -> Rng { | ||||||
| 
 | 
 | ||||||
| const COUNT_FILE: &str = "count.txt"; | const COUNT_FILE: &str = "count.txt"; | ||||||
| const STATS_FILE: &str = "stats.txt"; | const STATS_FILE: &str = "stats.txt"; | ||||||
|  | const ROBOTS_TXT: &str = "robots.txt"; | ||||||
| 
 | 
 | ||||||
| const SLOW_CHUNK_SIZE: usize = 100; | const SLOW_CHUNK_SIZE: usize = 100; | ||||||
| const SLOW_DURATION: Duration = Duration::from_millis(100); | const SLOW_DURATION: Duration = Duration::from_millis(100); | ||||||
|  | @ -119,10 +120,16 @@ async fn main() { | ||||||
| 
 | 
 | ||||||
|     let (stats_tx, stats_rx) = flume::unbounded(); |     let (stats_tx, stats_rx) = flume::unbounded(); | ||||||
| 
 | 
 | ||||||
|  |     let robots_txt = std::fs::read_to_string(ROBOTS_TXT) | ||||||
|  |         .ok() | ||||||
|  |         .unwrap_or_else(|| include_str!("robots.txt").to_string()); | ||||||
|  | 
 | ||||||
|     let app = { |     let app = { | ||||||
|         let counter = counter.clone(); |         let counter = counter.clone(); | ||||||
|         let stats_tx = stats_tx.clone(); |         let stats_tx = stats_tx.clone(); | ||||||
|         Router::new().route( |         Router::new() | ||||||
|  |             .route("/robots.txt", get(|| async move { robots_txt.clone() })) | ||||||
|  |             .route( | ||||||
|                 "/{id}", |                 "/{id}", | ||||||
|                 get( |                 get( | ||||||
|                     |Path(id): Path<String>, |                     |Path(id): Path<String>, | ||||||
|  | @ -146,7 +153,9 @@ async fn main() { | ||||||
|                         let mut session_rng = create_rng(count.to_le_bytes()); |                         let mut session_rng = create_rng(count.to_le_bytes()); | ||||||
| 
 | 
 | ||||||
|                         // Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
 |                         // Artificially slow down connections as rudimentary DDoS protection, and to use up client resources
 | ||||||
|                     tokio::time::sleep(Duration::from_millis(session_rng.random_range(200..1000))) |                         tokio::time::sleep(Duration::from_millis( | ||||||
|  |                             session_rng.random_range(200..1000), | ||||||
|  |                         )) | ||||||
|                         .await; |                         .await; | ||||||
| 
 | 
 | ||||||
|                         // Choose a bullshit generator from our collection for this page
 |                         // Choose a bullshit generator from our collection for this page
 | ||||||
|  | @ -221,6 +230,7 @@ async fn main() { | ||||||
|                     .rev() |                     .rev() | ||||||
|                     .enumerate() |                     .enumerate() | ||||||
|                     .map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip)) |                     .map(|(i, (ip, n))| format!("{:<4} | {:<4} | {}\n", i + 1, n, ip)) | ||||||
|  |                     .take(30) | ||||||
|                     .collect::<String>(); |                     .collect::<String>(); | ||||||
|                 let _ = std::fs::write(STATS_FILE, &stats); |                 let _ = std::fs::write(STATS_FILE, &stats); | ||||||
|             } |             } | ||||||
|  |  | ||||||
							
								
								
									
										2
									
								
								src/robots.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								src/robots.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,2 @@ | ||||||
|  | User-agent: * | ||||||
|  | Disallow: | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue