From 3b94fc49a7804a585324dd83541f5c2036d6ab8f Mon Sep 17 00:00:00 2001 From: g0tmi1k Date: Mon, 14 May 2018 15:12:35 +0100 Subject: [PATCH] Fix EOL issue when checking /robots.txt --- lib/wpscan/web_site/robots_txt.rb | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/wpscan/web_site/robots_txt.rb b/lib/wpscan/web_site/robots_txt.rb index 2e928152..d8d52cfb 100644 --- a/lib/wpscan/web_site/robots_txt.rb +++ b/lib/wpscan/web_site/robots_txt.rb @@ -23,20 +23,32 @@ class WebSite return_object = [] response = Browser.get(robots_url.to_s) body = response.body + # Get all allow and disallow urls entries = body.scan(/^(?:dis)?allow:\s*(.*)$/i) if entries + #extract elements entries.flatten! + # Remove any leading/trailing spaces + entries.collect{|x| x.strip || x } + # End Of Line issues + entries.collect{|x| x.chomp! || x } + # Remove nil's and sort entries.compact.sort! + # Unique values only entries.uniq! + # Wordpress URL wordpress_path = @uri.path + + # Each "boring" value as defined below, remove RobotsTxt.known_dirs.each do |d| entries.delete(d) - # also delete when wordpress is installed in subdir + # Also delete when wordpress is installed in subdir dir_with_subdir = "#{wordpress_path}/#{d}".gsub(/\/+/, '/') entries.delete(dir_with_subdir) end + # Each value now, try and make it a full URL entries.each do |d| begin temp = @uri.clone @@ -46,17 +58,21 @@ class WebSite end return_object << temp.to_s end + end return_object end protected + # Useful ~ "function do_robots()" -> https://github.com/WordPress/WordPress/blob/master/wp-includes/functions.php + # # @return [ Array ] def self.known_dirs %w{ / /wp-admin/ + /wp-admin/admin-ajax.php /wp-includes/ /wp-content/ }